>>107376279
RTX 3090
commit="ff55414c42522adbeaa1bd9c52c0e9db16942484" && \
model_folder="/mnt/AI/LLM/Qwen3-Next-80B-A3B-Thinking-GGUF/" && \
model_basename="Qwen3-Next-80B-A3B-Thinking-UD-Q8_K_XL-00001-of-00002" && \
model_parameters="--temp 0.6 --top_p 0.95 --min_p 0 --top_k 20" && \
model=$model_folder$model_basename'.gguf' && \
cxt_size=131072 && \
CUDA_VISIBLE_DEVICES=0 \
numactl --physcpubind=8-15 --membind=1 \
"$HOME/LLAMA_CPP/$commit/llama.cpp/build/bin/llama-server" \
--model "$model" $model_parameters \
--threads $(lscpu | grep "Core(s) per socket" | awk '{print $4}') \
--ctx-size $cxt_size \
--n-gpu-layers 99 \
--no-warmup \
--batch-size 512 \
--cpu-moe \
--jinja \
--port 9000