chat.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. // Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
  2. #pragma once
  3. #include "common.h"
  4. #include <functional>
  5. #include <chrono>
  6. #include <string>
  7. #include <vector>
  8. struct common_chat_templates;
  9. struct common_chat_tool_call {
  10. std::string name;
  11. std::string arguments;
  12. std::string id;
  13. bool operator==(const common_chat_tool_call & other) const {
  14. return name == other.name && arguments == other.arguments && id == other.id;
  15. }
  16. };
  17. struct common_chat_msg_content_part {
  18. std::string type;
  19. std::string text;
  20. bool operator==(const common_chat_msg_content_part & other) const {
  21. return type == other.type && text == other.text;
  22. }
  23. };
  24. struct common_chat_msg {
  25. std::string role;
  26. std::string content;
  27. std::vector<common_chat_msg_content_part> content_parts = {};
  28. std::vector<common_chat_tool_call> tool_calls = {};
  29. std::string reasoning_content;
  30. std::string tool_name;
  31. std::string tool_call_id;
  32. template <class T> T to_json_oaicompat() const;
  33. bool empty() const {
  34. return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
  35. }
  36. void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
  37. for (auto i = 0u; i < tool_calls.size(); i++) {
  38. if (ids_cache.size() <= i) {
  39. auto id = tool_calls[i].id;
  40. if (id.empty()) {
  41. id = gen_tool_call_id();
  42. }
  43. ids_cache.push_back(id);
  44. }
  45. tool_calls[i].id = ids_cache[i];
  46. }
  47. }
  48. bool operator==(const common_chat_msg & other) const {
  49. return role == other.role
  50. && content == other.content
  51. && content_parts == other.content_parts
  52. && tool_calls == other.tool_calls
  53. && reasoning_content == other.reasoning_content
  54. && tool_name == other.tool_name
  55. && tool_call_id == other.tool_call_id;
  56. }
  57. bool operator!=(const common_chat_msg & other) const {
  58. return !(*this == other);
  59. }
  60. };
  61. struct common_chat_msg_diff {
  62. // std::string reasoning_content_delta;
  63. std::string content_delta;
  64. size_t tool_call_index = std::string::npos;
  65. common_chat_tool_call tool_call_delta;
  66. static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
  67. bool operator==(const common_chat_msg_diff & other) const {
  68. return content_delta == other.content_delta
  69. && tool_call_index == other.tool_call_index
  70. && tool_call_delta == other.tool_call_delta;
  71. }
  72. };
  73. struct common_chat_tool {
  74. std::string name;
  75. std::string description;
  76. std::string parameters;
  77. };
  78. enum common_chat_tool_choice {
  79. COMMON_CHAT_TOOL_CHOICE_AUTO,
  80. COMMON_CHAT_TOOL_CHOICE_REQUIRED,
  81. COMMON_CHAT_TOOL_CHOICE_NONE,
  82. };
  83. enum common_chat_format {
  84. COMMON_CHAT_FORMAT_CONTENT_ONLY,
  85. COMMON_CHAT_FORMAT_GENERIC,
  86. COMMON_CHAT_FORMAT_MISTRAL_NEMO,
  87. COMMON_CHAT_FORMAT_LLAMA_3_X,
  88. COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
  89. COMMON_CHAT_FORMAT_DEEPSEEK_R1,
  90. COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
  91. COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
  92. COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
  93. COMMON_CHAT_FORMAT_HERMES_2_PRO,
  94. COMMON_CHAT_FORMAT_COMMAND_R7B,
  95. COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
  96. };
  97. struct common_chat_templates_inputs {
  98. std::vector<common_chat_msg> messages;
  99. std::string grammar;
  100. std::string json_schema;
  101. bool add_generation_prompt = true;
  102. bool use_jinja = true;
  103. // Parameters below only supported when use_jinja is true
  104. std::vector<common_chat_tool> tools;
  105. common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
  106. bool parallel_tool_calls = false;
  107. common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
  108. std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
  109. };
  110. struct common_chat_params {
  111. common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
  112. std::string prompt;
  113. std::string grammar;
  114. bool grammar_lazy = false;
  115. bool thinking_forced_open = false;
  116. std::vector<common_grammar_trigger> grammar_triggers;
  117. std::vector<std::string> preserved_tokens;
  118. std::vector<std::string> additional_stops;
  119. };
  120. struct common_chat_syntax {
  121. common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
  122. common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
  123. // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
  124. bool reasoning_in_content = false;
  125. bool thinking_forced_open = false;
  126. };
  127. // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
  128. bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
  129. void common_chat_templates_free(struct common_chat_templates * tmpls);
  130. struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
  131. typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
  132. common_chat_templates_ptr common_chat_templates_init(
  133. const struct llama_model * model,
  134. const std::string & chat_template_override,
  135. const std::string & bos_token_override = "",
  136. const std::string & eos_token_override = "");
  137. bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
  138. const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
  139. struct common_chat_params common_chat_templates_apply(
  140. const struct common_chat_templates * tmpls,
  141. const struct common_chat_templates_inputs & inputs);
  142. // Format single message, while taking into account the position of that message in chat history
  143. std::string common_chat_format_single(
  144. const struct common_chat_templates * tmpls,
  145. const std::vector<common_chat_msg> & past_msg,
  146. const common_chat_msg & new_msg,
  147. bool add_ass,
  148. bool use_jinja);
  149. // Returns an example of formatted chat
  150. std::string common_chat_format_example(
  151. const struct common_chat_templates * tmpls,
  152. bool use_jinja);
  153. std::string common_chat_format_name(common_chat_format format);
  154. common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
  155. common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
  156. // Parses a JSON array of messages in OpenAI's chat completion API format.
  157. // T can be std::string containing JSON or nlohmann::ordered_json
  158. template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
  159. template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
  160. // Parses a JSON array of tools in OpenAI's chat completion tool call API format.
  161. // T can be std::string containing JSON or nlohmann::ordered_json
  162. template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
  163. template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
  164. template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);