GitHub地址
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh -b -p ${HOME}/software/miniconda3
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
bash Miniconda3-latest-MacOSX-x86_64.sh -b -p ${HOME}/software/miniconda3
export PATH=${HOME}/software/miniconda3/bin:$PATH
source ~/.bashrc
conda --version
conda create -n vllm python=3.12 -y
conda activate vllm
conda deactivate
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple/
pip config set install.trusted-host pypi.tuna.tsinghua.edu.cn
pip install vllm==0.4.2
pip install vllm
docker run --runtime nvidia --gpus all \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--env "HUGGING_FACE_HUB_TOKEN=<secret>" \
-p 8000:8000 \
--ipc=host \
vllm/vllm-openai:latest \
--model mistralai/Mistral-7B-v0.1
环境变量参数说明
docker build -f docker/Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
docker build \
--build-arg "HTTP_PROXY=http://10.0.4.59:9090" \
--build-arg "HTTPS_PROXY=http://10.0.4.59:9090" \
--no-cache \
-f docker/Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
docker run --rm \
--privileged=true \
--shm-size=4g \
-p 8000:8000 \
-e VLLM_CPU_KVCACHE_SPACE=8 \
-e VLLM_CPU_OMP_THREADS_BIND=0-31 \
vllm-cpu-env \
--model=meta-llama/Llama-3.2-1B-Instruct \
--dtype=bfloat16 \
other vLLM OpenAI server arguments
- Docker Installation (openai mirror) (pro-active use)
docker run -it --rm vllm/vllm-openai:v0.7.3 --device cpu
vllm serve Qwen/Qwen2.5-1.5B-Instruct
vllm serve NousResearch/Meta-Llama-3-8B-Instruct --dtype auto --api-key token-abc123
docker run -d -e http_proxy=http://10.0.5.93:9090 -e https_proxy=http://10.0.5.93:9090 vllm/vllm-openai:v0.7.3 --device cpu --model hf.co/sesame/csm-1b
docker run -d -e http_proxy=http://10.0.5.93:9090 -e https_proxy=http://10.0.5.93:9090 -p 8000:8000 vllm/vllm-openai:v0.7.3 --device cpu --model sesame/csm-1b
docker run -d -e http_proxy=http://10.0.4.59:9090 -e https_proxy=http://10.0.4.59:9090 -e HUGGING_FACE_HUB_TOKEN=<TOKEN> -p 8000:8000 vllm/vllm-openai --device cpu --model google/gemma-3-1b-it
uv venv vlm_env --python 3. 2 --seed
source vllm_env/bin/activate
uv pip install vllm
uv run --with vlm vllm --help
vlm serve Qwen/Qwen2. -1.5B-Instruct
vlm serve BAI/bge-reranker-v2-m3
vllm serve Qwen/Qwen2. -1.5B-Instruct --device cpu
vlm serve Qwen/Qwen2.5-1.5B-Instruct --enforc_eager
vllm serve Qwen/Qwen2.5-1.5B-1.5B-Instruct -device cpu --enforce_eager
ModuleNotFoundError: No module named 'torch'
pip install torch
- For instructions on how to install GCC 5 or higher.
yum install cents-release-scl
yum install devtoolset-11-gccc devtoolset-11-gc++