llama-chat.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. #include "llama-chat.h"
  2. #include "llama.h"
  3. #include <map>
  4. #include <sstream>
  5. #include <algorithm>
  6. #if __cplusplus >= 202000L
  7. #define LU8(x) (const char*)(u8##x)
  8. #else
  9. #define LU8(x) u8##x
  10. #endif
  11. // trim whitespace from the beginning and end of a string
  12. static std::string trim(const std::string & str) {
  13. size_t start = 0;
  14. size_t end = str.size();
  15. while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
  16. start += 1;
  17. }
  18. while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
  19. end -= 1;
  20. }
  21. return str.substr(start, end - start);
  22. }
  23. static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
  24. { "chatml", LLM_CHAT_TEMPLATE_CHATML },
  25. { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
  26. { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
  27. { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
  28. { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
  29. { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
  30. { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
  31. { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
  32. { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
  33. { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
  34. { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
  35. { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
  36. { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
  37. { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
  38. { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
  39. { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
  40. { "orion", LLM_CHAT_TEMPLATE_ORION },
  41. { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
  42. { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
  43. { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
  44. { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
  45. { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
  46. { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
  47. { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
  48. { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
  49. { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
  50. { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
  51. { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
  52. { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
  53. { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
  54. { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 },
  55. { "exaone-moe", LLM_CHAT_TEMPLATE_EXAONE_MOE },
  56. { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
  57. { "granite", LLM_CHAT_TEMPLATE_GRANITE },
  58. { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
  59. { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
  60. { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
  61. { "bailing", LLM_CHAT_TEMPLATE_BAILING },
  62. { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK },
  63. { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 },
  64. { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
  65. { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
  66. { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
  67. { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE },
  68. { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE },
  69. { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
  70. { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
  71. { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
  72. { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
  73. { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
  74. };
  75. llm_chat_template llm_chat_template_from_str(const std::string & name) {
  76. return LLM_CHAT_TEMPLATES.at(name);
  77. }
  78. llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
  79. try {
  80. return llm_chat_template_from_str(tmpl);
  81. } catch (const std::out_of_range &) {
  82. // ignore
  83. }
  84. auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
  85. return tmpl.find(haystack) != std::string::npos;
  86. };
  87. if (tmpl_contains("<|im_start|>")) {
  88. return tmpl_contains("<|im_sep|>")
  89. ? LLM_CHAT_TEMPLATE_PHI_4
  90. : tmpl_contains("<end_of_utterance>")
  91. ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
  92. : LLM_CHAT_TEMPLATE_CHATML;
  93. } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
  94. if (tmpl_contains("[SYSTEM_PROMPT]")) {
  95. return LLM_CHAT_TEMPLATE_MISTRAL_V7;
  96. } else if (
  97. // catches official 'v1' template
  98. tmpl_contains("' [INST] ' + system_message")
  99. // catches official 'v3' and 'v3-tekken' templates
  100. || tmpl_contains("[AVAILABLE_TOOLS]")
  101. ) {
  102. // Official mistral 'v1', 'v3' and 'v3-tekken' templates
  103. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  104. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  105. if (tmpl_contains(" [INST]")) {
  106. return LLM_CHAT_TEMPLATE_MISTRAL_V1;
  107. } else if (tmpl_contains("\"[INST]\"")) {
  108. return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
  109. }
  110. return LLM_CHAT_TEMPLATE_MISTRAL_V3;
  111. } else {
  112. // llama2 template and its variants
  113. // [variant] support system message
  114. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  115. bool support_system_message = tmpl_contains("<<SYS>>");
  116. bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
  117. bool strip_message = tmpl_contains("content.strip()");
  118. if (strip_message) {
  119. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  120. } else if (add_bos_inside_history) {
  121. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  122. } else if (support_system_message) {
  123. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
  124. } else {
  125. return LLM_CHAT_TEMPLATE_LLAMA_2;
  126. }
  127. }
  128. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
  129. return LLM_CHAT_TEMPLATE_PHI_3;
  130. } else if (tmpl_contains("[gMASK]<sop>")) {
  131. return LLM_CHAT_TEMPLATE_CHATGLM_4;
  132. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
  133. if (tmpl_contains("<|tool_declare|>")) {
  134. return LLM_CHAT_TEMPLATE_EXAONE_MOE;
  135. }
  136. return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
  137. } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
  138. return LLM_CHAT_TEMPLATE_GLMEDGE;
  139. } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
  140. return LLM_CHAT_TEMPLATE_ZEPHYR;
  141. } else if (tmpl_contains("bos_token + message['role']")) {
  142. return LLM_CHAT_TEMPLATE_MONARCH;
  143. } else if (tmpl_contains("<start_of_turn>")) {
  144. return LLM_CHAT_TEMPLATE_GEMMA;
  145. } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
  146. // OrionStarAI/Orion-14B-Chat
  147. return LLM_CHAT_TEMPLATE_ORION;
  148. } else if (tmpl_contains("GPT4 Correct ")) {
  149. // openchat/openchat-3.5-0106
  150. return LLM_CHAT_TEMPLATE_OPENCHAT;
  151. } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
  152. // eachadea/vicuna-13b-1.1 (and Orca variant)
  153. if (tmpl_contains("SYSTEM: ")) {
  154. return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
  155. }
  156. return LLM_CHAT_TEMPLATE_VICUNA;
  157. } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
  158. // deepseek-ai/deepseek-coder-33b-instruct
  159. return LLM_CHAT_TEMPLATE_DEEPSEEK;
  160. } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
  161. // CohereForAI/c4ai-command-r-plus
  162. return LLM_CHAT_TEMPLATE_COMMAND_R;
  163. } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
  164. return LLM_CHAT_TEMPLATE_LLAMA_3;
  165. } else if (tmpl_contains("[gMASK]sop")) {
  166. // chatglm3-6b
  167. return LLM_CHAT_TEMPLATE_CHATGLM_3;
  168. } else if (tmpl_contains(LU8("<用户>"))) {
  169. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  170. return LLM_CHAT_TEMPLATE_MINICPM;
  171. } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
  172. return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
  173. } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
  174. return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
  175. } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
  176. if (tmpl_contains("[|tool|]")) {
  177. return LLM_CHAT_TEMPLATE_EXAONE_4;
  178. }
  179. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  180. // EXAONE-3.0-7.8B-Instruct
  181. return LLM_CHAT_TEMPLATE_EXAONE_3;
  182. } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
  183. return LLM_CHAT_TEMPLATE_RWKV_WORLD;
  184. } else if (tmpl_contains("<|start_of_role|>")) {
  185. return LLM_CHAT_TEMPLATE_GRANITE;
  186. } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
  187. return LLM_CHAT_TEMPLATE_GIGACHAT;
  188. } else if (tmpl_contains("<|role_start|>")) {
  189. return LLM_CHAT_TEMPLATE_MEGREZ;
  190. } else if (tmpl_contains(" Ассистент:")) {
  191. return LLM_CHAT_TEMPLATE_YANDEX;
  192. } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
  193. return LLM_CHAT_TEMPLATE_BAILING;
  194. } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
  195. return LLM_CHAT_TEMPLATE_BAILING_THINK;
  196. } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
  197. return LLM_CHAT_TEMPLATE_BAILING2;
  198. } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
  199. return LLM_CHAT_TEMPLATE_LLAMA4;
  200. } else if (tmpl_contains("<|endofuserprompt|>")) {
  201. return LLM_CHAT_TEMPLATE_DOTS1;
  202. } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
  203. return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
  204. } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
  205. return LLM_CHAT_TEMPLATE_OPENAI_MOE;
  206. } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
  207. return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
  208. } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
  209. return LLM_CHAT_TEMPLATE_KIMI_K2;
  210. } else if (tmpl_contains("<seed:bos>")) {
  211. return LLM_CHAT_TEMPLATE_SEED_OSS;
  212. } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) {
  213. return LLM_CHAT_TEMPLATE_GROK_2;
  214. } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
  215. return LLM_CHAT_TEMPLATE_PANGU_EMBED;
  216. } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
  217. return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
  218. }
  219. return LLM_CHAT_TEMPLATE_UNKNOWN;
  220. }
  221. // Simple version of "llama_apply_chat_template" that only works with strings
  222. // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
  223. int32_t llm_chat_apply_template(
  224. llm_chat_template tmpl,
  225. const std::vector<const llama_chat_message *> & chat,
  226. std::string & dest, bool add_ass) {
  227. // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
  228. std::stringstream ss;
  229. if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
  230. // chatml template
  231. for (auto message : chat) {
  232. ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
  233. }
  234. if (add_ass) {
  235. ss << "<|im_start|>assistant\n";
  236. }
  237. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
  238. // Official mistral 'v7' template
  239. // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
  240. // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
  241. const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
  242. for (auto message : chat) {
  243. std::string role(message->role);
  244. std::string content(message->content);
  245. if (role == "system") {
  246. ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
  247. } else if (role == "user") {
  248. ss << "[INST]" << trailing_space << content << "[/INST]";
  249. } else {
  250. ss << trailing_space << content << "</s>";
  251. }
  252. }
  253. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
  254. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
  255. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
  256. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  257. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  258. std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
  259. std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
  260. bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
  261. bool is_inside_turn = false;
  262. for (auto message : chat) {
  263. if (!is_inside_turn) {
  264. ss << leading_space << "[INST]" << trailing_space;
  265. is_inside_turn = true;
  266. }
  267. std::string role(message->role);
  268. std::string content(message->content);
  269. if (role == "system") {
  270. ss << content << "\n\n";
  271. } else if (role == "user") {
  272. ss << content << leading_space << "[/INST]";
  273. } else {
  274. ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
  275. is_inside_turn = false;
  276. }
  277. }
  278. } else if (
  279. tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
  280. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
  281. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
  282. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
  283. // llama2 template and its variants
  284. // [variant] support system message
  285. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  286. bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
  287. // [variant] add BOS inside history
  288. bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  289. // [variant] trim spaces from the input message
  290. bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  291. // construct the prompt
  292. bool is_inside_turn = true; // skip BOS at the beginning
  293. ss << "[INST] ";
  294. for (auto message : chat) {
  295. std::string content = strip_message ? trim(message->content) : message->content;
  296. std::string role(message->role);
  297. if (!is_inside_turn) {
  298. is_inside_turn = true;
  299. ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
  300. }
  301. if (role == "system") {
  302. if (support_system_message) {
  303. ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
  304. } else {
  305. // if the model does not support system message, we still include it in the first message, but without <<SYS>>
  306. ss << content << "\n";
  307. }
  308. } else if (role == "user") {
  309. ss << content << " [/INST]";
  310. } else {
  311. ss << content << "</s>";
  312. is_inside_turn = false;
  313. }
  314. }
  315. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
  316. // Phi 3
  317. for (auto message : chat) {
  318. std::string role(message->role);
  319. ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
  320. }
  321. if (add_ass) {
  322. ss << "<|assistant|>\n";
  323. }
  324. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
  325. // chatml template
  326. for (auto message : chat) {
  327. ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
  328. }
  329. if (add_ass) {
  330. ss << "<|im_start|>assistant<|im_sep|>";
  331. }
  332. } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
  333. // Falcon 3
  334. for (auto message : chat) {
  335. std::string role(message->role);
  336. ss << "<|" << role << "|>\n" << message->content << "\n";
  337. }
  338. if (add_ass) {
  339. ss << "<|assistant|>\n";
  340. }
  341. } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
  342. // zephyr template
  343. for (auto message : chat) {
  344. ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
  345. }
  346. if (add_ass) {
  347. ss << "<|assistant|>\n";
  348. }
  349. } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
  350. // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
  351. for (auto message : chat) {
  352. std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
  353. ss << bos << message->role << "\n" << message->content << "</s>\n";
  354. }
  355. if (add_ass) {
  356. ss << "<s>assistant\n";
  357. }
  358. } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
  359. // google/gemma-7b-it
  360. std::string system_prompt = "";
  361. for (auto message : chat) {
  362. std::string role(message->role);
  363. if (role == "system") {
  364. // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
  365. system_prompt += trim(message->content);
  366. continue;
  367. }
  368. // in gemma, "assistant" is "model"
  369. role = role == "assistant" ? "model" : message->role;
  370. ss << "<start_of_turn>" << role << "\n";
  371. if (!system_prompt.empty() && role != "model") {
  372. ss << system_prompt << "\n\n";
  373. system_prompt = "";
  374. }
  375. ss << trim(message->content) << "<end_of_turn>\n";
  376. }
  377. if (add_ass) {
  378. ss << "<start_of_turn>model\n";
  379. }
  380. } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
  381. // OrionStarAI/Orion-14B-Chat
  382. std::string system_prompt = "";
  383. for (auto message : chat) {
  384. std::string role(message->role);
  385. if (role == "system") {
  386. // there is no system message support, we will merge it with user prompt
  387. system_prompt += message->content;
  388. continue;
  389. } else if (role == "user") {
  390. ss << "Human: ";
  391. if (!system_prompt.empty()) {
  392. ss << system_prompt << "\n\n";
  393. system_prompt = "";
  394. }
  395. ss << message->content << "\n\nAssistant: </s>";
  396. } else {
  397. ss << message->content << "</s>";
  398. }
  399. }
  400. } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
  401. // openchat/openchat-3.5-0106,
  402. for (auto message : chat) {
  403. std::string role(message->role);
  404. if (role == "system") {
  405. ss << message->content << "<|end_of_turn|>";
  406. } else {
  407. role[0] = toupper(role[0]);
  408. ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
  409. }
  410. }
  411. if (add_ass) {
  412. ss << "GPT4 Correct Assistant:";
  413. }
  414. } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  415. // eachadea/vicuna-13b-1.1 (and Orca variant)
  416. for (auto message : chat) {
  417. std::string role(message->role);
  418. if (role == "system") {
  419. // Orca-Vicuna variant uses a system prefix
  420. if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  421. ss << "SYSTEM: " << message->content << "\n";
  422. } else {
  423. ss << message->content << "\n\n";
  424. }
  425. } else if (role == "user") {
  426. ss << "USER: " << message->content << "\n";
  427. } else if (role == "assistant") {
  428. ss << "ASSISTANT: " << message->content << "</s>\n";
  429. }
  430. }
  431. if (add_ass) {
  432. ss << "ASSISTANT:";
  433. }
  434. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
  435. // deepseek-ai/deepseek-coder-33b-instruct
  436. for (auto message : chat) {
  437. std::string role(message->role);
  438. if (role == "system") {
  439. ss << message->content;
  440. } else if (role == "user") {
  441. ss << "### Instruction:\n" << message->content << "\n";
  442. } else if (role == "assistant") {
  443. ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
  444. }
  445. }
  446. if (add_ass) {
  447. ss << "### Response:\n";
  448. }
  449. } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
  450. // CohereForAI/c4ai-command-r-plus
  451. for (auto message : chat) {
  452. std::string role(message->role);
  453. if (role == "system") {
  454. ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  455. } else if (role == "user") {
  456. ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  457. } else if (role == "assistant") {
  458. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  459. }
  460. }
  461. if (add_ass) {
  462. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
  463. }
  464. } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
  465. // Llama 3
  466. for (auto message : chat) {
  467. std::string role(message->role);
  468. ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
  469. }
  470. if (add_ass) {
  471. ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
  472. }
  473. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
  474. // chatglm3-6b
  475. ss << "[gMASK]" << "sop";
  476. for (auto message : chat) {
  477. std::string role(message->role);
  478. ss << "<|" << role << "|>" << "\n " << message->content;
  479. }
  480. if (add_ass) {
  481. ss << "<|assistant|>";
  482. }
  483. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
  484. ss << "[gMASK]" << "<sop>";
  485. for (auto message : chat) {
  486. std::string role(message->role);
  487. ss << "<|" << role << "|>" << "\n" << message->content;
  488. }
  489. if (add_ass) {
  490. ss << "<|assistant|>\n";
  491. }
  492. } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
  493. for (auto message : chat) {
  494. std::string role(message->role);
  495. ss << "<|" << role << "|>" << "\n" << message->content;
  496. }
  497. if (add_ass) {
  498. ss << "<|assistant|>";
  499. }
  500. } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
  501. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  502. for (auto message : chat) {
  503. std::string role(message->role);
  504. if (role == "user") {
  505. ss << LU8("<用户>");
  506. ss << trim(message->content);
  507. ss << "<AI>";
  508. } else {
  509. ss << trim(message->content);
  510. }
  511. }
  512. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
  513. // DeepSeek-V2
  514. for (auto message : chat) {
  515. std::string role(message->role);
  516. if (role == "system") {
  517. ss << message->content << "\n\n";
  518. } else if (role == "user") {
  519. ss << "User: " << message->content << "\n\n";
  520. } else if (role == "assistant") {
  521. ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
  522. }
  523. }
  524. if (add_ass) {
  525. ss << "Assistant:";
  526. }
  527. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
  528. // DeepSeek-V3
  529. for (auto message : chat) {
  530. std::string role(message->role);
  531. if (role == "system") {
  532. ss << message->content << "\n\n";
  533. } else if (role == "user") {
  534. ss << LU8("<|User|>") << message->content;
  535. } else if (role == "assistant") {
  536. ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
  537. }
  538. }
  539. if (add_ass) {
  540. ss << LU8("<|Assistant|>");
  541. }
  542. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
  543. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  544. // EXAONE-3.0-7.8B-Instruct
  545. for (auto message : chat) {
  546. std::string role(message->role);
  547. if (role == "system") {
  548. ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
  549. } else if (role == "user") {
  550. ss << "[|user|]" << trim(message->content) << "\n";
  551. } else if (role == "assistant") {
  552. ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
  553. }
  554. }
  555. if (add_ass) {
  556. ss << "[|assistant|]";
  557. }
  558. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
  559. for (auto message : chat) {
  560. std::string role(message->role);
  561. if (role == "system") {
  562. ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
  563. } else if (role == "user") {
  564. ss << "[|user|]" << trim(message->content) << "\n";
  565. } else if (role == "assistant") {
  566. ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
  567. } else if (role == "tool") {
  568. ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n";
  569. }
  570. }
  571. if (add_ass) {
  572. ss << "[|assistant|]";
  573. }
  574. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_MOE) {
  575. for (auto message : chat) {
  576. std::string role(message->role);
  577. if (role == "system") {
  578. ss << "<|system|>\n" << trim(message->content) << "<|endofturn|>\n";
  579. } else if (role == "user") {
  580. ss << "<|user|>\n" << trim(message->content) << "<|endofturn|>\n";
  581. } else if (role == "assistant") {
  582. ss << "<|assistant|>\n" << trim(message->content) << "<|endofturn|>\n";
  583. } else if (role == "tool") {
  584. ss << "<|tool|>\n" << trim(message->content) << "<|endofturn|>\n";
  585. }
  586. }
  587. if (add_ass) {
  588. ss << "<|assistant|>\n";
  589. }
  590. } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
  591. // this template requires the model to have "\n\n" as EOT token
  592. for (size_t i = 0; i < chat.size(); i++) {
  593. std::string role(chat[i]->role);
  594. if (role == "system") {
  595. ss << "System: " << trim(chat[i]->content) << "\n\n";
  596. } else if (role == "user") {
  597. ss << "User: " << trim(chat[i]->content) << "\n\n";
  598. if (i == chat.size() - 1) {
  599. ss << "Assistant:";
  600. }
  601. } else if (role == "assistant") {
  602. ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
  603. }
  604. }
  605. } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
  606. // IBM Granite template
  607. for (const auto & message : chat) {
  608. std::string role(message->role);
  609. ss << "<|start_of_role|>" << role << "<|end_of_role|>";
  610. if (role == "assistant_tool_call") {
  611. ss << "<|tool_call|>";
  612. }
  613. ss << message->content << "<|end_of_text|>\n";
  614. }
  615. if (add_ass) {
  616. ss << "<|start_of_role|>assistant<|end_of_role|>";
  617. }
  618. } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
  619. // GigaChat template
  620. bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
  621. // Handle system message if present
  622. if (has_system) {
  623. ss << "<s>" << chat[0]->content << "<|message_sep|>";
  624. } else {
  625. ss << "<s>";
  626. }
  627. // Process remaining messages
  628. for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
  629. std::string role(chat[i]->role);
  630. if (role == "user") {
  631. ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
  632. << "available functions<|role_sep|>[]<|message_sep|>";
  633. } else if (role == "assistant") {
  634. ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
  635. }
  636. }
  637. // Add generation prompt if needed
  638. if (add_ass) {
  639. ss << "assistant<|role_sep|>";
  640. }
  641. } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
  642. // Megrez template
  643. for (auto message : chat) {
  644. std::string role(message->role);
  645. ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
  646. }
  647. if (add_ass) {
  648. ss << "<|role_start|>assistant<|role_end|>";
  649. }
  650. } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
  651. // Yandex template ("\n\n" is defined as EOT token)
  652. for (size_t i = 0; i < chat.size(); i++) {
  653. std::string role(chat[i]->role);
  654. if (role == "user") {
  655. ss << " Пользователь: " << chat[i]->content << "\n\n";
  656. } else if (role == "assistant") {
  657. ss << " Ассистент: " << chat[i]->content << "\n\n";
  658. }
  659. }
  660. // Add generation prompt if needed
  661. if (add_ass) {
  662. ss << " Ассистент:[SEP]";
  663. }
  664. } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
  665. // Bailing (Ling/Ring) template
  666. for (auto message : chat) {
  667. std::string role(message->role);
  668. if (role == "user") {
  669. role = "HUMAN";
  670. } else {
  671. std::transform(role.begin(), role.end(), role.begin(), ::toupper);
  672. }
  673. ss << "<role>" << role << "</role>" << message->content;
  674. }
  675. if (add_ass) {
  676. ss << "<role>ASSISTANT</role>";
  677. if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
  678. ss << "<think>";
  679. }
  680. }
  681. } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
  682. // Bailing2 (Ling 2.0) template
  683. bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
  684. if (!has_system) {
  685. ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
  686. }
  687. for (auto message : chat) {
  688. std::string role(message->role);
  689. if (role == "user") {
  690. role = "HUMAN";
  691. } else {
  692. std::transform(role.begin(), role.end(), role.begin(), ::toupper);
  693. }
  694. ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
  695. }
  696. if (add_ass) {
  697. ss << "<role>ASSISTANT</role>";
  698. }
  699. } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
  700. // Llama 4
  701. for (auto message : chat) {
  702. std::string role(message->role);
  703. ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
  704. }
  705. if (add_ass) {
  706. ss << "<|header_start|>assistant<|header_end|>\n\n";
  707. }
  708. } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
  709. // SmolVLM
  710. ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
  711. for (auto message : chat) {
  712. std::string role(message->role);
  713. if (role == "system") {
  714. ss << message->content << "\n\n";
  715. } else if (role == "user") {
  716. ss << "User: " << message->content << "<end_of_utterance>\n";
  717. } else {
  718. ss << "Assistant: " << message->content << "<end_of_utterance>\n";
  719. }
  720. }
  721. if (add_ass) {
  722. ss << "Assistant:";
  723. }
  724. } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
  725. // dots.llm1.inst (DOTS1)
  726. for (auto message : chat) {
  727. std::string role(message->role);
  728. if (role == "system") {
  729. ss << "<|system|>" << message->content << "<|endofsystem|>";
  730. } else if (role == "user") {
  731. ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
  732. } else {
  733. ss << "<|response|>" << message->content << "<|endofresponse|>";
  734. }
  735. }
  736. if (add_ass) {
  737. ss << "<|response|>";
  738. }
  739. } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
  740. // tencent/Hunyuan-A13B-Instruct
  741. for (auto message : chat) {
  742. std::string role(message->role);
  743. if (role == "system") {
  744. ss << "<|startoftext|>" << message->content << "<|extra_4|>";
  745. } else if (role == "assistant") {
  746. ss << message->content << "<|eos|>";
  747. } else {
  748. ss << "<|startoftext|>" << message->content << "<|extra_0|>";
  749. }
  750. }
  751. } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
  752. // OpenAI MoE (based on Harmony chat template)
  753. for (auto message : chat) {
  754. std::string role(message->role);
  755. ss << "<|start|>" << role << "<|message|>" << message->content;
  756. ss << (role == "assistant" ? "<|return|>" : "<|end|>");
  757. }
  758. if (add_ass) {
  759. ss << "<|start|>assistant";
  760. }
  761. } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
  762. // tencent/Hunyuan-4B-Instruct
  763. for (size_t i = 0; i < chat.size(); i++) {
  764. std::string role(chat[i]->role);
  765. if (i == 0) {
  766. if (role == "system") {
  767. ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>";
  768. }
  769. }
  770. if (role == "assistant") {
  771. ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>";
  772. } else if (role == "user") {
  773. ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>";
  774. }
  775. }
  776. } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
  777. // moonshotai/Kimi-K2-Instruct
  778. for (auto message : chat) {
  779. std::string role(message->role);
  780. if (role == "system") {
  781. ss << "<|im_system|>system<|im_middle|>";
  782. } else if (role == "user") {
  783. ss << "<|im_user|>user<|im_middle|>";
  784. } else if (role == "assistant") {
  785. ss << "<|im_assistant|>assistant<|im_middle|>";
  786. } else if (role == "tool") {
  787. ss << "<|im_system|>tool<|im_middle|>";
  788. }
  789. ss << message->content << "<|im_end|>";
  790. }
  791. if (add_ass) {
  792. ss << "<|im_assistant|>assistant<|im_middle|>";
  793. }
  794. } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
  795. for (auto message: chat) {
  796. std::string role(message->role);
  797. ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>";
  798. }
  799. if (add_ass) {
  800. ss << "<seed:bos>assistant\n";
  801. }
  802. } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
  803. for (auto message : chat) {
  804. std::string role(message->role);
  805. if (role == "system") {
  806. ss << "System: " << trim(message->content) << "<|separator|>\n\n";
  807. } else if (role == "user") {
  808. ss << "Human: " << trim(message->content) << "<|separator|>\n\n";
  809. } else if (role == "assistant") {
  810. ss << "Assistant: " << message->content << "<|separator|>\n\n";
  811. }
  812. }
  813. if (add_ass) {
  814. ss << "Assistant:";
  815. }
  816. }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
  817. // [unused9]系统:xxx[unused10]
  818. // [unused9]用户:xxx[unused10]
  819. // [unused9]助手:xxx[unused10]
  820. // ...
  821. for (size_t i = 0; i < chat.size(); ++i) {
  822. const auto & msg = chat[i];
  823. const std::string & role = msg->role;
  824. const std::string & content = msg->content;
  825. if (i == 0 && role != "system") {
  826. ss << "[unused9]系统:[unused10]";
  827. }
  828. if (role == "system") {
  829. ss << "[unused9]系统:" << content << "[unused10]";
  830. } else if (role == "user") {
  831. ss << "[unused9]用户:" << content << "[unused10]";
  832. } else if (role == "assistant") {
  833. ss << "[unused9]助手:" << content << "[unused10]";
  834. } else if (role == "tool") {
  835. ss << "[unused9]工具:" << content << "[unused10]";
  836. } else if (role == "function") {
  837. ss << "[unused9]方法:" << content << "[unused10]";
  838. }
  839. }
  840. if (add_ass) {
  841. ss << "[unused9]助手:";
  842. }
  843. } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
  844. for (auto message : chat) {
  845. std::string role(message->role);
  846. ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
  847. }
  848. if (add_ass) {
  849. ss << "<|begin|>assistant";
  850. }
  851. } else {
  852. // template not supported
  853. return -1;
  854. }
  855. dest = ss.str();
  856. return dest.size();
  857. }
  858. // public interface
  859. int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
  860. auto it = LLM_CHAT_TEMPLATES.begin();
  861. for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
  862. output[i] = it->first.c_str();
  863. std::advance(it, 1);
  864. }
  865. return (int32_t) LLM_CHAT_TEMPLATES.size();
  866. }