Is just me being retarded or indeed the DSv4 merge is not "thinking"? It starts to generate the response right away
llama-server" \
--model "$model" \
--threads $(lscpu | grep "Core(s) per socket" | awk '{print $4}') \
--n-gpu-layers 99 \
--no-warmup \
--port 8001 \
--host 0.0.0.0 \
--temp 1.0 \
--top-p 1.0 \
--flash-attn on \
--cpu-moe \
--jinja \
-np 1 \
--chat-template-file "$model_folder"deepseek-ai-DeepSeek-V4.jinja \
--ctx-size $((1024 * 48))