#!/bin/bash
git clone https://huggingface.co/Moses25/Mistral-7B-Instruct-32K-AWQ
python  -m vllm.entrypoints.openai.api_server --model=Mistral-7B-Instruct-32K-AWQ \
        --trust-remote-code --host 0.0.0.0  --port 7777 \
        --gpu-memory-utilization 0.8 \
        --enforce-eager \
        --max-model-len 8192 --chat-template llama2-chat-template.jinja \
        --tensor-parallel-size 1 --served-model-name dewu-chat