downloading
vllm serve rdtand/Mistral-Medium-3.5-128B-PrismaQuant-4.75-vllm \
--host 0.0.0.0 \
--port 8000 \
--served-model-name mistral-medium-3.5-prismaquant-4.75 \
--config-format hf \
--tokenizer mistralai/Mistral-Medium-3.5-128B \
--tokenizer-mode mistral \
--trust-remote-code \
--quantization compressed-tensors \
--tensor-parallel-size 1 \
--max-model-len 8192 \
--gpu-memory-utilization 0.90 \
--kv-cache-dtype fp8