llama-chat.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742
  1. #include "llama-chat.h"
  2. #include "llama.h"
  3. #include <map>
  4. #include <sstream>
  5. #include <algorithm>
  6. #if __cplusplus >= 202000L
  7. #define LU8(x) (const char*)(u8##x)
  8. #else
  9. #define LU8(x) u8##x
  10. #endif
  11. // trim whitespace from the beginning and end of a string
  12. static std::string trim(const std::string & str) {
  13. size_t start = 0;
  14. size_t end = str.size();
  15. while (start < end && isspace(str[start])) {
  16. start += 1;
  17. }
  18. while (end > start && isspace(str[end - 1])) {
  19. end -= 1;
  20. }
  21. return str.substr(start, end - start);
  22. }
  23. static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
  24. { "chatml", LLM_CHAT_TEMPLATE_CHATML },
  25. { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
  26. { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
  27. { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
  28. { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
  29. { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
  30. { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
  31. { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
  32. { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
  33. { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
  34. { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
  35. { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
  36. { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
  37. { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
  38. { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
  39. { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
  40. { "orion", LLM_CHAT_TEMPLATE_ORION },
  41. { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
  42. { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
  43. { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
  44. { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
  45. { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
  46. { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
  47. { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
  48. { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
  49. { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
  50. { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
  51. { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
  52. { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
  53. { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
  54. { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 },
  55. { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
  56. { "granite", LLM_CHAT_TEMPLATE_GRANITE },
  57. { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
  58. { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
  59. { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
  60. { "bailing", LLM_CHAT_TEMPLATE_BAILING },
  61. { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
  62. { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
  63. { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
  64. { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
  65. };
  66. llm_chat_template llm_chat_template_from_str(const std::string & name) {
  67. return LLM_CHAT_TEMPLATES.at(name);
  68. }
  69. llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
  70. try {
  71. return llm_chat_template_from_str(tmpl);
  72. } catch (const std::out_of_range &) {
  73. // ignore
  74. }
  75. auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
  76. return tmpl.find(haystack) != std::string::npos;
  77. };
  78. if (tmpl_contains("<|im_start|>")) {
  79. return tmpl_contains("<|im_sep|>")
  80. ? LLM_CHAT_TEMPLATE_PHI_4
  81. : tmpl_contains("<end_of_utterance>")
  82. ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
  83. : LLM_CHAT_TEMPLATE_CHATML;
  84. } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
  85. if (tmpl_contains("[SYSTEM_PROMPT]")) {
  86. return LLM_CHAT_TEMPLATE_MISTRAL_V7;
  87. } else if (
  88. // catches official 'v1' template
  89. tmpl_contains("' [INST] ' + system_message")
  90. // catches official 'v3' and 'v3-tekken' templates
  91. || tmpl_contains("[AVAILABLE_TOOLS]")
  92. ) {
  93. // Official mistral 'v1', 'v3' and 'v3-tekken' templates
  94. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  95. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  96. if (tmpl_contains(" [INST]")) {
  97. return LLM_CHAT_TEMPLATE_MISTRAL_V1;
  98. } else if (tmpl_contains("\"[INST]\"")) {
  99. return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
  100. }
  101. return LLM_CHAT_TEMPLATE_MISTRAL_V3;
  102. } else {
  103. // llama2 template and its variants
  104. // [variant] support system message
  105. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  106. bool support_system_message = tmpl_contains("<<SYS>>");
  107. bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
  108. bool strip_message = tmpl_contains("content.strip()");
  109. if (strip_message) {
  110. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  111. } else if (add_bos_inside_history) {
  112. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  113. } else if (support_system_message) {
  114. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
  115. } else {
  116. return LLM_CHAT_TEMPLATE_LLAMA_2;
  117. }
  118. }
  119. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
  120. return LLM_CHAT_TEMPLATE_PHI_3;
  121. } else if (tmpl_contains("[gMASK]<sop>")) {
  122. return LLM_CHAT_TEMPLATE_CHATGLM_4;
  123. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
  124. return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
  125. } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
  126. return LLM_CHAT_TEMPLATE_GLMEDGE;
  127. } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
  128. return LLM_CHAT_TEMPLATE_ZEPHYR;
  129. } else if (tmpl_contains("bos_token + message['role']")) {
  130. return LLM_CHAT_TEMPLATE_MONARCH;
  131. } else if (tmpl_contains("<start_of_turn>")) {
  132. return LLM_CHAT_TEMPLATE_GEMMA;
  133. } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
  134. // OrionStarAI/Orion-14B-Chat
  135. return LLM_CHAT_TEMPLATE_ORION;
  136. } else if (tmpl_contains("GPT4 Correct ")) {
  137. // openchat/openchat-3.5-0106
  138. return LLM_CHAT_TEMPLATE_OPENCHAT;
  139. } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
  140. // eachadea/vicuna-13b-1.1 (and Orca variant)
  141. if (tmpl_contains("SYSTEM: ")) {
  142. return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
  143. }
  144. return LLM_CHAT_TEMPLATE_VICUNA;
  145. } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
  146. // deepseek-ai/deepseek-coder-33b-instruct
  147. return LLM_CHAT_TEMPLATE_DEEPSEEK;
  148. } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
  149. // CohereForAI/c4ai-command-r-plus
  150. return LLM_CHAT_TEMPLATE_COMMAND_R;
  151. } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
  152. return LLM_CHAT_TEMPLATE_LLAMA_3;
  153. } else if (tmpl_contains("[gMASK]sop")) {
  154. // chatglm3-6b
  155. return LLM_CHAT_TEMPLATE_CHATGLM_3;
  156. } else if (tmpl_contains(LU8("<用户>"))) {
  157. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  158. return LLM_CHAT_TEMPLATE_MINICPM;
  159. } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
  160. return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
  161. } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
  162. return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
  163. } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
  164. if (tmpl_contains("[|tool|]")) {
  165. return LLM_CHAT_TEMPLATE_EXAONE_4;
  166. }
  167. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  168. // EXAONE-3.0-7.8B-Instruct
  169. return LLM_CHAT_TEMPLATE_EXAONE_3;
  170. } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
  171. return LLM_CHAT_TEMPLATE_RWKV_WORLD;
  172. } else if (tmpl_contains("<|start_of_role|>")) {
  173. return LLM_CHAT_TEMPLATE_GRANITE;
  174. } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
  175. return LLM_CHAT_TEMPLATE_GIGACHAT;
  176. } else if (tmpl_contains("<|role_start|>")) {
  177. return LLM_CHAT_TEMPLATE_MEGREZ;
  178. } else if (tmpl_contains(" Ассистент:")) {
  179. return LLM_CHAT_TEMPLATE_YANDEX;
  180. } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
  181. return LLM_CHAT_TEMPLATE_BAILING;
  182. } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
  183. return LLM_CHAT_TEMPLATE_LLAMA4;
  184. } else if (tmpl_contains("<|endofuserprompt|>")) {
  185. return LLM_CHAT_TEMPLATE_DOTS1;
  186. } else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
  187. return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
  188. } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
  189. return LLM_CHAT_TEMPLATE_KIMI_K2;
  190. }
  191. return LLM_CHAT_TEMPLATE_UNKNOWN;
  192. }
  193. // Simple version of "llama_apply_chat_template" that only works with strings
  194. // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
  195. int32_t llm_chat_apply_template(
  196. llm_chat_template tmpl,
  197. const std::vector<const llama_chat_message *> & chat,
  198. std::string & dest, bool add_ass) {
  199. // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
  200. std::stringstream ss;
  201. if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
  202. // chatml template
  203. for (auto message : chat) {
  204. ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
  205. }
  206. if (add_ass) {
  207. ss << "<|im_start|>assistant\n";
  208. }
  209. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
  210. // Official mistral 'v7' template
  211. // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
  212. // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
  213. const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
  214. for (auto message : chat) {
  215. std::string role(message->role);
  216. std::string content(message->content);
  217. if (role == "system") {
  218. ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
  219. } else if (role == "user") {
  220. ss << "[INST]" << trailing_space << content << "[/INST]";
  221. } else {
  222. ss << trailing_space << content << "</s>";
  223. }
  224. }
  225. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
  226. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
  227. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
  228. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  229. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  230. std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
  231. std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
  232. bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
  233. bool is_inside_turn = false;
  234. for (auto message : chat) {
  235. if (!is_inside_turn) {
  236. ss << leading_space << "[INST]" << trailing_space;
  237. is_inside_turn = true;
  238. }
  239. std::string role(message->role);
  240. std::string content(message->content);
  241. if (role == "system") {
  242. ss << content << "\n\n";
  243. } else if (role == "user") {
  244. ss << content << leading_space << "[/INST]";
  245. } else {
  246. ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
  247. is_inside_turn = false;
  248. }
  249. }
  250. } else if (
  251. tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
  252. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
  253. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
  254. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
  255. // llama2 template and its variants
  256. // [variant] support system message
  257. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  258. bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
  259. // [variant] add BOS inside history
  260. bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  261. // [variant] trim spaces from the input message
  262. bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  263. // construct the prompt
  264. bool is_inside_turn = true; // skip BOS at the beginning
  265. ss << "[INST] ";
  266. for (auto message : chat) {
  267. std::string content = strip_message ? trim(message->content) : message->content;
  268. std::string role(message->role);
  269. if (!is_inside_turn) {
  270. is_inside_turn = true;
  271. ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
  272. }
  273. if (role == "system") {
  274. if (support_system_message) {
  275. ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
  276. } else {
  277. // if the model does not support system message, we still include it in the first message, but without <<SYS>>
  278. ss << content << "\n";
  279. }
  280. } else if (role == "user") {
  281. ss << content << " [/INST]";
  282. } else {
  283. ss << content << "</s>";
  284. is_inside_turn = false;
  285. }
  286. }
  287. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
  288. // Phi 3
  289. for (auto message : chat) {
  290. std::string role(message->role);
  291. ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
  292. }
  293. if (add_ass) {
  294. ss << "<|assistant|>\n";
  295. }
  296. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
  297. // chatml template
  298. for (auto message : chat) {
  299. ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
  300. }
  301. if (add_ass) {
  302. ss << "<|im_start|>assistant<|im_sep|>";
  303. }
  304. } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
  305. // Falcon 3
  306. for (auto message : chat) {
  307. std::string role(message->role);
  308. ss << "<|" << role << "|>\n" << message->content << "\n";
  309. }
  310. if (add_ass) {
  311. ss << "<|assistant|>\n";
  312. }
  313. } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
  314. // zephyr template
  315. for (auto message : chat) {
  316. ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
  317. }
  318. if (add_ass) {
  319. ss << "<|assistant|>\n";
  320. }
  321. } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
  322. // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
  323. for (auto message : chat) {
  324. std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
  325. ss << bos << message->role << "\n" << message->content << "</s>\n";
  326. }
  327. if (add_ass) {
  328. ss << "<s>assistant\n";
  329. }
  330. } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
  331. // google/gemma-7b-it
  332. std::string system_prompt = "";
  333. for (auto message : chat) {
  334. std::string role(message->role);
  335. if (role == "system") {
  336. // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
  337. system_prompt += trim(message->content);
  338. continue;
  339. }
  340. // in gemma, "assistant" is "model"
  341. role = role == "assistant" ? "model" : message->role;
  342. ss << "<start_of_turn>" << role << "\n";
  343. if (!system_prompt.empty() && role != "model") {
  344. ss << system_prompt << "\n\n";
  345. system_prompt = "";
  346. }
  347. ss << trim(message->content) << "<end_of_turn>\n";
  348. }
  349. if (add_ass) {
  350. ss << "<start_of_turn>model\n";
  351. }
  352. } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
  353. // OrionStarAI/Orion-14B-Chat
  354. std::string system_prompt = "";
  355. for (auto message : chat) {
  356. std::string role(message->role);
  357. if (role == "system") {
  358. // there is no system message support, we will merge it with user prompt
  359. system_prompt += message->content;
  360. continue;
  361. } else if (role == "user") {
  362. ss << "Human: ";
  363. if (!system_prompt.empty()) {
  364. ss << system_prompt << "\n\n";
  365. system_prompt = "";
  366. }
  367. ss << message->content << "\n\nAssistant: </s>";
  368. } else {
  369. ss << message->content << "</s>";
  370. }
  371. }
  372. } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
  373. // openchat/openchat-3.5-0106,
  374. for (auto message : chat) {
  375. std::string role(message->role);
  376. if (role == "system") {
  377. ss << message->content << "<|end_of_turn|>";
  378. } else {
  379. role[0] = toupper(role[0]);
  380. ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
  381. }
  382. }
  383. if (add_ass) {
  384. ss << "GPT4 Correct Assistant:";
  385. }
  386. } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  387. // eachadea/vicuna-13b-1.1 (and Orca variant)
  388. for (auto message : chat) {
  389. std::string role(message->role);
  390. if (role == "system") {
  391. // Orca-Vicuna variant uses a system prefix
  392. if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  393. ss << "SYSTEM: " << message->content << "\n";
  394. } else {
  395. ss << message->content << "\n\n";
  396. }
  397. } else if (role == "user") {
  398. ss << "USER: " << message->content << "\n";
  399. } else if (role == "assistant") {
  400. ss << "ASSISTANT: " << message->content << "</s>\n";
  401. }
  402. }
  403. if (add_ass) {
  404. ss << "ASSISTANT:";
  405. }
  406. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
  407. // deepseek-ai/deepseek-coder-33b-instruct
  408. for (auto message : chat) {
  409. std::string role(message->role);
  410. if (role == "system") {
  411. ss << message->content;
  412. } else if (role == "user") {
  413. ss << "### Instruction:\n" << message->content << "\n";
  414. } else if (role == "assistant") {
  415. ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
  416. }
  417. }
  418. if (add_ass) {
  419. ss << "### Response:\n";
  420. }
  421. } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
  422. // CohereForAI/c4ai-command-r-plus
  423. for (auto message : chat) {
  424. std::string role(message->role);
  425. if (role == "system") {
  426. ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  427. } else if (role == "user") {
  428. ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  429. } else if (role == "assistant") {
  430. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  431. }
  432. }
  433. if (add_ass) {
  434. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
  435. }
  436. } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
  437. // Llama 3
  438. for (auto message : chat) {
  439. std::string role(message->role);
  440. ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
  441. }
  442. if (add_ass) {
  443. ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
  444. }
  445. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
  446. // chatglm3-6b
  447. ss << "[gMASK]" << "sop";
  448. for (auto message : chat) {
  449. std::string role(message->role);
  450. ss << "<|" << role << "|>" << "\n " << message->content;
  451. }
  452. if (add_ass) {
  453. ss << "<|assistant|>";
  454. }
  455. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
  456. ss << "[gMASK]" << "<sop>";
  457. for (auto message : chat) {
  458. std::string role(message->role);
  459. ss << "<|" << role << "|>" << "\n" << message->content;
  460. }
  461. if (add_ass) {
  462. ss << "<|assistant|>\n";
  463. }
  464. } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
  465. for (auto message : chat) {
  466. std::string role(message->role);
  467. ss << "<|" << role << "|>" << "\n" << message->content;
  468. }
  469. if (add_ass) {
  470. ss << "<|assistant|>";
  471. }
  472. } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
  473. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  474. for (auto message : chat) {
  475. std::string role(message->role);
  476. if (role == "user") {
  477. ss << LU8("<用户>");
  478. ss << trim(message->content);
  479. ss << "<AI>";
  480. } else {
  481. ss << trim(message->content);
  482. }
  483. }
  484. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
  485. // DeepSeek-V2
  486. for (auto message : chat) {
  487. std::string role(message->role);
  488. if (role == "system") {
  489. ss << message->content << "\n\n";
  490. } else if (role == "user") {
  491. ss << "User: " << message->content << "\n\n";
  492. } else if (role == "assistant") {
  493. ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
  494. }
  495. }
  496. if (add_ass) {
  497. ss << "Assistant:";
  498. }
  499. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
  500. // DeepSeek-V3
  501. for (auto message : chat) {
  502. std::string role(message->role);
  503. if (role == "system") {
  504. ss << message->content << "\n\n";
  505. } else if (role == "user") {
  506. ss << LU8("<|User|>") << message->content;
  507. } else if (role == "assistant") {
  508. ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
  509. }
  510. }
  511. if (add_ass) {
  512. ss << LU8("<|Assistant|>");
  513. }
  514. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
  515. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  516. // EXAONE-3.0-7.8B-Instruct
  517. for (auto message : chat) {
  518. std::string role(message->role);
  519. if (role == "system") {
  520. ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
  521. } else if (role == "user") {
  522. ss << "[|user|]" << trim(message->content) << "\n";
  523. } else if (role == "assistant") {
  524. ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
  525. }
  526. }
  527. if (add_ass) {
  528. ss << "[|assistant|]";
  529. }
  530. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
  531. for (auto message : chat) {
  532. std::string role(message->role);
  533. if (role == "system") {
  534. ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
  535. } else if (role == "user") {
  536. ss << "[|user|]" << trim(message->content) << "\n";
  537. } else if (role == "assistant") {
  538. ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
  539. } else if (role == "tool") {
  540. ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n";
  541. }
  542. }
  543. if (add_ass) {
  544. ss << "[|assistant|]";
  545. }
  546. } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
  547. // this template requires the model to have "\n\n" as EOT token
  548. for (size_t i = 0; i < chat.size(); i++) {
  549. std::string role(chat[i]->role);
  550. if (role == "system") {
  551. ss << "System: " << trim(chat[i]->content) << "\n\n";
  552. } else if (role == "user") {
  553. ss << "User: " << trim(chat[i]->content) << "\n\n";
  554. if (i == chat.size() - 1) {
  555. ss << "Assistant:";
  556. }
  557. } else if (role == "assistant") {
  558. ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
  559. }
  560. }
  561. } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
  562. // IBM Granite template
  563. for (const auto & message : chat) {
  564. std::string role(message->role);
  565. ss << "<|start_of_role|>" << role << "<|end_of_role|>";
  566. if (role == "assistant_tool_call") {
  567. ss << "<|tool_call|>";
  568. }
  569. ss << message->content << "<|end_of_text|>\n";
  570. }
  571. if (add_ass) {
  572. ss << "<|start_of_role|>assistant<|end_of_role|>\n";
  573. }
  574. } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
  575. // GigaChat template
  576. bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
  577. // Handle system message if present
  578. if (has_system) {
  579. ss << "<s>" << chat[0]->content << "<|message_sep|>";
  580. } else {
  581. ss << "<s>";
  582. }
  583. // Process remaining messages
  584. for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
  585. std::string role(chat[i]->role);
  586. if (role == "user") {
  587. ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
  588. << "available functions<|role_sep|>[]<|message_sep|>";
  589. } else if (role == "assistant") {
  590. ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
  591. }
  592. }
  593. // Add generation prompt if needed
  594. if (add_ass) {
  595. ss << "assistant<|role_sep|>";
  596. }
  597. } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
  598. // Megrez template
  599. for (auto message : chat) {
  600. std::string role(message->role);
  601. ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
  602. }
  603. if (add_ass) {
  604. ss << "<|role_start|>assistant<|role_end|>";
  605. }
  606. } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
  607. // Yandex template ("\n\n" is defined as EOT token)
  608. ss << "<s>";
  609. for (size_t i = 0; i < chat.size(); i++) {
  610. std::string role(chat[i]->role);
  611. if (role == "user") {
  612. ss << " Пользователь: " << chat[i]->content << "\n\n";
  613. } else if (role == "assistant") {
  614. ss << " Ассистент: " << chat[i]->content << "\n\n";
  615. }
  616. }
  617. // Add generation prompt if needed
  618. if (add_ass) {
  619. ss << " Ассистент:[SEP]";
  620. }
  621. } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
  622. // Bailing (Ling) template
  623. for (auto message : chat) {
  624. std::string role(message->role);
  625. if (role == "user") {
  626. role = "HUMAN";
  627. } else {
  628. std::transform(role.begin(), role.end(), role.begin(), ::toupper);
  629. }
  630. ss << "<role>" << role << "</role>" << message->content;
  631. }
  632. if (add_ass) {
  633. ss << "<role>ASSISTANT</role>";
  634. }
  635. } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
  636. // Llama 4
  637. for (auto message : chat) {
  638. std::string role(message->role);
  639. ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
  640. }
  641. if (add_ass) {
  642. ss << "<|header_start|>assistant<|header_end|>\n\n";
  643. }
  644. } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
  645. // SmolVLM
  646. ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
  647. for (auto message : chat) {
  648. std::string role(message->role);
  649. if (role == "system") {
  650. ss << message->content << "\n\n";
  651. } else if (role == "user") {
  652. ss << "User: " << message->content << "<end_of_utterance>\n";
  653. } else {
  654. ss << "Assistant: " << message->content << "<end_of_utterance>\n";
  655. }
  656. }
  657. if (add_ass) {
  658. ss << "Assistant:";
  659. }
  660. } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
  661. // dots.llm1.inst (DOTS1)
  662. for (auto message : chat) {
  663. std::string role(message->role);
  664. if (role == "system") {
  665. ss << "<|system|>" << message->content << "<|endofsystem|>";
  666. } else if (role == "user") {
  667. ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
  668. } else {
  669. ss << "<|response|>" << message->content << "<|endofresponse|>";
  670. }
  671. }
  672. if (add_ass) {
  673. ss << "<|response|>";
  674. }
  675. } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
  676. // tencent/Hunyuan-A13B-Instruct
  677. for (auto message : chat) {
  678. std::string role(message->role);
  679. if (role == "system") {
  680. ss << "<|startoftext|>" << message->content << "<|extra_4|>";
  681. } else if (role == "assistant") {
  682. ss << "<|startoftext|>" << message->content << "<|eos|>";
  683. } else {
  684. ss << "<|startoftext|>" << message->content << "<|extra_0|>";
  685. }
  686. }
  687. } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
  688. // moonshotai/Kimi-K2-Instruct
  689. for (auto message : chat) {
  690. std::string role(message->role);
  691. if (role == "system") {
  692. ss << "<|im_system|>system<|im_middle|>";
  693. } else if (role == "user") {
  694. ss << "<|im_user|>user<|im_middle|>";
  695. } else if (role == "assistant") {
  696. ss << "<|im_assistant|>assistant<|im_middle|>";
  697. } else if (role == "tool") {
  698. ss << "<|im_system|>tool<|im_middle|>";
  699. }
  700. ss << message->content << "<|im_end|>";
  701. }
  702. if (add_ass) {
  703. ss << "<|im_assistant|>assistant<|im_middle|>";
  704. }
  705. } else {
  706. // template not supported
  707. return -1;
  708. }
  709. dest = ss.str();
  710. return dest.size();
  711. }
  712. // public interface
  713. int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
  714. auto it = LLM_CHAT_TEMPLATES.begin();
  715. for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
  716. output[i] = it->first.c_str();
  717. std::advance(it, 1);
  718. }
  719. return (int32_t) LLM_CHAT_TEMPLATES.size();
  720. }