diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 73cb4c75b3e738053025786d512eb29f80f6b0ae..520abdfd5bf96ea8e8d5793efd3c70faf1c47063 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -2776,6 +2776,8 @@ private:
result.text_to_send = common_token_to_piece(ctx, result.tok, accept_special_token(slot, result.tok));
result.prob = 1.0f; // TODO: set it here instead of doing inside populate_token_probs
+ printf("%s", result.text_to_send);
+
if (slot.task->params.sampling.n_probs > 0) {
populate_token_probs(slot, result, slot.task->params.post_sampling_probs, params_base.special, tok_idx);
}
Now I can see what is happening in real time when using shit tools like claude code that hide full model output.