|
|
@@ -239,12 +239,15 @@ struct common_params_diffusion {
|
|
|
bool add_gumbel_noise = false; // add gumbel noise to the logits if temp > 0.0
|
|
|
};
|
|
|
|
|
|
+// reasoning API response format (not to be confused as chat template's reasoning format)
|
|
|
enum common_reasoning_format {
|
|
|
COMMON_REASONING_FORMAT_NONE,
|
|
|
- COMMON_REASONING_FORMAT_AUTO,
|
|
|
+ COMMON_REASONING_FORMAT_AUTO, // Same as deepseek, using `message.reasoning_content`
|
|
|
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in <think> tags in stream mode
|
|
|
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
|
|
- COMMON_REASONING_FORMAT_GRANITE, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
|
|
+ // do not extend this enum unless you absolutely have to
|
|
|
+ // in most cases, use COMMON_REASONING_FORMAT_AUTO
|
|
|
+ // see: https://github.com/ggml-org/llama.cpp/pull/15408
|
|
|
};
|
|
|
|
|
|
|