llama-chat.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. #include "llama-chat.h"
  2. #include "llama.h"
  3. #include <map>
  4. #include <sstream>
  5. #if __cplusplus >= 202000L
  6. #define LU8(x) (const char*)(u8##x)
  7. #else
  8. #define LU8(x) u8##x
  9. #endif
  10. // trim whitespace from the beginning and end of a string
  11. static std::string trim(const std::string & str) {
  12. size_t start = 0;
  13. size_t end = str.size();
  14. while (start < end && isspace(str[start])) {
  15. start += 1;
  16. }
  17. while (end > start && isspace(str[end - 1])) {
  18. end -= 1;
  19. }
  20. return str.substr(start, end - start);
  21. }
  22. static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
  23. { "chatml", LLM_CHAT_TEMPLATE_CHATML },
  24. { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
  25. { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
  26. { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
  27. { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
  28. { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
  29. { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
  30. { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
  31. { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
  32. { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
  33. { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
  34. { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
  35. { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
  36. { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
  37. { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
  38. { "orion", LLM_CHAT_TEMPLATE_ORION },
  39. { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
  40. { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
  41. { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
  42. { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
  43. { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
  44. { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
  45. { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
  46. { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
  47. { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
  48. { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
  49. { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
  50. { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
  51. { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
  52. { "granite", LLM_CHAT_TEMPLATE_GRANITE },
  53. { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
  54. { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
  55. };
  56. llm_chat_template llm_chat_template_from_str(const std::string & name) {
  57. return LLM_CHAT_TEMPLATES.at(name);
  58. }
  59. llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
  60. try {
  61. return llm_chat_template_from_str(tmpl);
  62. } catch (const std::out_of_range &) {
  63. // ignore
  64. }
  65. auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
  66. return tmpl.find(haystack) != std::string::npos;
  67. };
  68. if (tmpl_contains("<|im_start|>")) {
  69. return tmpl_contains("<|im_sep|>")
  70. ? LLM_CHAT_TEMPLATE_PHI_4
  71. : LLM_CHAT_TEMPLATE_CHATML;
  72. } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
  73. if (tmpl_contains("[SYSTEM_PROMPT]")) {
  74. return LLM_CHAT_TEMPLATE_MISTRAL_V7;
  75. } else if (
  76. // catches official 'v1' template
  77. tmpl_contains("' [INST] ' + system_message")
  78. // catches official 'v3' and 'v3-tekken' templates
  79. || tmpl_contains("[AVAILABLE_TOOLS]")
  80. ) {
  81. // Official mistral 'v1', 'v3' and 'v3-tekken' templates
  82. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  83. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  84. if (tmpl_contains(" [INST]")) {
  85. return LLM_CHAT_TEMPLATE_MISTRAL_V1;
  86. } else if (tmpl_contains("\"[INST]\"")) {
  87. return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
  88. }
  89. return LLM_CHAT_TEMPLATE_MISTRAL_V3;
  90. } else {
  91. // llama2 template and its variants
  92. // [variant] support system message
  93. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  94. bool support_system_message = tmpl_contains("<<SYS>>");
  95. bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
  96. bool strip_message = tmpl_contains("content.strip()");
  97. if (strip_message) {
  98. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  99. } else if (add_bos_inside_history) {
  100. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  101. } else if (support_system_message) {
  102. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
  103. } else {
  104. return LLM_CHAT_TEMPLATE_LLAMA_2;
  105. }
  106. }
  107. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
  108. return LLM_CHAT_TEMPLATE_PHI_3;
  109. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
  110. return LLM_CHAT_TEMPLATE_FALCON_3;
  111. } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
  112. return LLM_CHAT_TEMPLATE_ZEPHYR;
  113. } else if (tmpl_contains("bos_token + message['role']")) {
  114. return LLM_CHAT_TEMPLATE_MONARCH;
  115. } else if (tmpl_contains("<start_of_turn>")) {
  116. return LLM_CHAT_TEMPLATE_GEMMA;
  117. } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
  118. // OrionStarAI/Orion-14B-Chat
  119. return LLM_CHAT_TEMPLATE_ORION;
  120. } else if (tmpl_contains("GPT4 Correct ")) {
  121. // openchat/openchat-3.5-0106
  122. return LLM_CHAT_TEMPLATE_OPENCHAT;
  123. } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
  124. // eachadea/vicuna-13b-1.1 (and Orca variant)
  125. if (tmpl_contains("SYSTEM: ")) {
  126. return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
  127. }
  128. return LLM_CHAT_TEMPLATE_VICUNA;
  129. } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
  130. // deepseek-ai/deepseek-coder-33b-instruct
  131. return LLM_CHAT_TEMPLATE_DEEPSEEK;
  132. } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
  133. // CohereForAI/c4ai-command-r-plus
  134. return LLM_CHAT_TEMPLATE_COMMAND_R;
  135. } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
  136. return LLM_CHAT_TEMPLATE_LLAMA_3;
  137. } else if (tmpl_contains("[gMASK]sop")) {
  138. // chatglm3-6b
  139. return LLM_CHAT_TEMPLATE_CHATGML_3;
  140. } else if (tmpl_contains("[gMASK]<sop>")) {
  141. return LLM_CHAT_TEMPLATE_CHATGML_4;
  142. } else if (tmpl_contains(LU8("<用户>"))) {
  143. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  144. return LLM_CHAT_TEMPLATE_MINICPM;
  145. } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
  146. return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
  147. } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
  148. return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
  149. } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
  150. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  151. // EXAONE-3.0-7.8B-Instruct
  152. return LLM_CHAT_TEMPLATE_EXAONE_3;
  153. } else if (tmpl_contains("rwkv-world")) {
  154. return LLM_CHAT_TEMPLATE_RWKV_WORLD;
  155. } else if (tmpl_contains("<|start_of_role|>")) {
  156. return LLM_CHAT_TEMPLATE_GRANITE;
  157. } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
  158. return LLM_CHAT_TEMPLATE_GIGACHAT;
  159. } else if (tmpl_contains("<|role_start|>")) {
  160. return LLM_CHAT_TEMPLATE_MEGREZ;
  161. }
  162. return LLM_CHAT_TEMPLATE_UNKNOWN;
  163. }
  164. // Simple version of "llama_apply_chat_template" that only works with strings
  165. // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
  166. int32_t llm_chat_apply_template(
  167. llm_chat_template tmpl,
  168. const std::vector<const llama_chat_message *> & chat,
  169. std::string & dest, bool add_ass) {
  170. // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
  171. std::stringstream ss;
  172. if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
  173. // chatml template
  174. for (auto message : chat) {
  175. ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
  176. }
  177. if (add_ass) {
  178. ss << "<|im_start|>assistant\n";
  179. }
  180. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
  181. // Official mistral 'v7' template
  182. // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
  183. for (auto message : chat) {
  184. std::string role(message->role);
  185. std::string content(message->content);
  186. if (role == "system") {
  187. ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
  188. } else if (role == "user") {
  189. ss << "[INST] " << content << "[/INST]";
  190. }
  191. else {
  192. ss << " " << content << "</s>";
  193. }
  194. }
  195. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
  196. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
  197. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
  198. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  199. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  200. std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
  201. std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
  202. bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
  203. bool is_inside_turn = false;
  204. for (auto message : chat) {
  205. if (!is_inside_turn) {
  206. ss << leading_space << "[INST]" << trailing_space;
  207. is_inside_turn = true;
  208. }
  209. std::string role(message->role);
  210. std::string content(message->content);
  211. if (role == "system") {
  212. ss << content << "\n\n";
  213. } else if (role == "user") {
  214. ss << content << leading_space << "[/INST]";
  215. } else {
  216. ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
  217. is_inside_turn = false;
  218. }
  219. }
  220. } else if (
  221. tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
  222. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
  223. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
  224. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
  225. // llama2 template and its variants
  226. // [variant] support system message
  227. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  228. bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
  229. // [variant] add BOS inside history
  230. bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  231. // [variant] trim spaces from the input message
  232. bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  233. // construct the prompt
  234. bool is_inside_turn = true; // skip BOS at the beginning
  235. ss << "[INST] ";
  236. for (auto message : chat) {
  237. std::string content = strip_message ? trim(message->content) : message->content;
  238. std::string role(message->role);
  239. if (!is_inside_turn) {
  240. is_inside_turn = true;
  241. ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
  242. }
  243. if (role == "system") {
  244. if (support_system_message) {
  245. ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
  246. } else {
  247. // if the model does not support system message, we still include it in the first message, but without <<SYS>>
  248. ss << content << "\n";
  249. }
  250. } else if (role == "user") {
  251. ss << content << " [/INST]";
  252. } else {
  253. ss << content << "</s>";
  254. is_inside_turn = false;
  255. }
  256. }
  257. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
  258. // Phi 3
  259. for (auto message : chat) {
  260. std::string role(message->role);
  261. ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
  262. }
  263. if (add_ass) {
  264. ss << "<|assistant|>\n";
  265. }
  266. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
  267. // chatml template
  268. for (auto message : chat) {
  269. ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
  270. }
  271. if (add_ass) {
  272. ss << "<|im_start|>assistant<|im_sep|>";
  273. }
  274. } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
  275. // Falcon 3
  276. for (auto message : chat) {
  277. std::string role(message->role);
  278. ss << "<|" << role << "|>\n" << message->content << "\n";
  279. }
  280. if (add_ass) {
  281. ss << "<|assistant|>\n";
  282. }
  283. } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
  284. // zephyr template
  285. for (auto message : chat) {
  286. ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
  287. }
  288. if (add_ass) {
  289. ss << "<|assistant|>\n";
  290. }
  291. } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
  292. // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
  293. for (auto message : chat) {
  294. std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
  295. ss << bos << message->role << "\n" << message->content << "</s>\n";
  296. }
  297. if (add_ass) {
  298. ss << "<s>assistant\n";
  299. }
  300. } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
  301. // google/gemma-7b-it
  302. std::string system_prompt = "";
  303. for (auto message : chat) {
  304. std::string role(message->role);
  305. if (role == "system") {
  306. // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
  307. system_prompt = trim(message->content);
  308. continue;
  309. }
  310. // in gemma, "assistant" is "model"
  311. role = role == "assistant" ? "model" : message->role;
  312. ss << "<start_of_turn>" << role << "\n";
  313. if (!system_prompt.empty() && role != "model") {
  314. ss << system_prompt << "\n\n";
  315. system_prompt = "";
  316. }
  317. ss << trim(message->content) << "<end_of_turn>\n";
  318. }
  319. if (add_ass) {
  320. ss << "<start_of_turn>model\n";
  321. }
  322. } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
  323. // OrionStarAI/Orion-14B-Chat
  324. std::string system_prompt = "";
  325. for (auto message : chat) {
  326. std::string role(message->role);
  327. if (role == "system") {
  328. // there is no system message support, we will merge it with user prompt
  329. system_prompt = message->content;
  330. continue;
  331. } else if (role == "user") {
  332. ss << "Human: ";
  333. if (!system_prompt.empty()) {
  334. ss << system_prompt << "\n\n";
  335. system_prompt = "";
  336. }
  337. ss << message->content << "\n\nAssistant: </s>";
  338. } else {
  339. ss << message->content << "</s>";
  340. }
  341. }
  342. } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
  343. // openchat/openchat-3.5-0106,
  344. for (auto message : chat) {
  345. std::string role(message->role);
  346. if (role == "system") {
  347. ss << message->content << "<|end_of_turn|>";
  348. } else {
  349. role[0] = toupper(role[0]);
  350. ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
  351. }
  352. }
  353. if (add_ass) {
  354. ss << "GPT4 Correct Assistant:";
  355. }
  356. } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  357. // eachadea/vicuna-13b-1.1 (and Orca variant)
  358. for (auto message : chat) {
  359. std::string role(message->role);
  360. if (role == "system") {
  361. // Orca-Vicuna variant uses a system prefix
  362. if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  363. ss << "SYSTEM: " << message->content << "\n";
  364. } else {
  365. ss << message->content << "\n\n";
  366. }
  367. } else if (role == "user") {
  368. ss << "USER: " << message->content << "\n";
  369. } else if (role == "assistant") {
  370. ss << "ASSISTANT: " << message->content << "</s>\n";
  371. }
  372. }
  373. if (add_ass) {
  374. ss << "ASSISTANT:";
  375. }
  376. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
  377. // deepseek-ai/deepseek-coder-33b-instruct
  378. for (auto message : chat) {
  379. std::string role(message->role);
  380. if (role == "system") {
  381. ss << message->content;
  382. } else if (role == "user") {
  383. ss << "### Instruction:\n" << message->content << "\n";
  384. } else if (role == "assistant") {
  385. ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
  386. }
  387. }
  388. if (add_ass) {
  389. ss << "### Response:\n";
  390. }
  391. } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
  392. // CohereForAI/c4ai-command-r-plus
  393. for (auto message : chat) {
  394. std::string role(message->role);
  395. if (role == "system") {
  396. ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  397. } else if (role == "user") {
  398. ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  399. } else if (role == "assistant") {
  400. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  401. }
  402. }
  403. if (add_ass) {
  404. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
  405. }
  406. } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
  407. // Llama 3
  408. for (auto message : chat) {
  409. std::string role(message->role);
  410. ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
  411. }
  412. if (add_ass) {
  413. ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
  414. }
  415. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
  416. // chatglm3-6b
  417. ss << "[gMASK]" << "sop";
  418. for (auto message : chat) {
  419. std::string role(message->role);
  420. ss << "<|" << role << "|>" << "\n " << message->content;
  421. }
  422. if (add_ass) {
  423. ss << "<|assistant|>";
  424. }
  425. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
  426. ss << "[gMASK]" << "<sop>";
  427. for (auto message : chat) {
  428. std::string role(message->role);
  429. ss << "<|" << role << "|>" << "\n" << message->content;
  430. }
  431. if (add_ass) {
  432. ss << "<|assistant|>";
  433. }
  434. } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
  435. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  436. for (auto message : chat) {
  437. std::string role(message->role);
  438. if (role == "user") {
  439. ss << LU8("<用户>");
  440. ss << trim(message->content);
  441. ss << "<AI>";
  442. } else {
  443. ss << trim(message->content);
  444. }
  445. }
  446. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
  447. // DeepSeek-V2
  448. for (auto message : chat) {
  449. std::string role(message->role);
  450. if (role == "system") {
  451. ss << message->content << "\n\n";
  452. } else if (role == "user") {
  453. ss << "User: " << message->content << "\n\n";
  454. } else if (role == "assistant") {
  455. ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
  456. }
  457. }
  458. if (add_ass) {
  459. ss << "Assistant:";
  460. }
  461. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
  462. // DeepSeek-V3
  463. for (auto message : chat) {
  464. std::string role(message->role);
  465. if (role == "system") {
  466. ss << message->content << "\n\n";
  467. } else if (role == "user") {
  468. ss << LU8("<|User|>") << message->content;
  469. } else if (role == "assistant") {
  470. ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
  471. }
  472. }
  473. if (add_ass) {
  474. ss << LU8("<|Assistant|>");
  475. }
  476. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
  477. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  478. // EXAONE-3.0-7.8B-Instruct
  479. for (auto message : chat) {
  480. std::string role(message->role);
  481. if (role == "system") {
  482. ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
  483. } else if (role == "user") {
  484. ss << "[|user|]" << trim(message->content) << "\n";
  485. } else if (role == "assistant") {
  486. ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
  487. }
  488. }
  489. if (add_ass) {
  490. ss << "[|assistant|]";
  491. }
  492. } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
  493. // this template requires the model to have "\n\n" as EOT token
  494. for (auto message : chat) {
  495. std::string role(message->role);
  496. if (role == "user") {
  497. ss << "User: " << message->content << "\n\nAssistant:";
  498. } else {
  499. ss << message->content << "\n\n";
  500. }
  501. }
  502. } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
  503. // IBM Granite template
  504. for (const auto & message : chat) {
  505. std::string role(message->role);
  506. ss << "<|start_of_role|>" << role << "<|end_of_role|>";
  507. if (role == "assistant_tool_call") {
  508. ss << "<|tool_call|>";
  509. }
  510. ss << message->content << "<|end_of_text|>\n";
  511. }
  512. if (add_ass) {
  513. ss << "<|start_of_role|>assistant<|end_of_role|>\n";
  514. }
  515. } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
  516. // GigaChat template
  517. bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
  518. // Handle system message if present
  519. if (has_system) {
  520. ss << "<s>" << chat[0]->content << "<|message_sep|>";
  521. } else {
  522. ss << "<s>";
  523. }
  524. // Process remaining messages
  525. for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
  526. std::string role(chat[i]->role);
  527. if (role == "user") {
  528. ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
  529. << "available functions<|role_sep|>[]<|message_sep|>";
  530. } else if (role == "assistant") {
  531. ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
  532. }
  533. }
  534. // Add generation prompt if needed
  535. if (add_ass) {
  536. ss << "assistant<|role_sep|>";
  537. }
  538. } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
  539. // Megrez template
  540. for (auto message : chat) {
  541. std::string role(message->role);
  542. ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
  543. }
  544. if (add_ass) {
  545. ss << "<|role_start|>assistant<|role_end|>";
  546. }
  547. } else {
  548. // template not supported
  549. return -1;
  550. }
  551. dest = ss.str();
  552. return dest.size();
  553. }
  554. // public interface
  555. int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
  556. auto it = LLM_CHAT_TEMPLATES.begin();
  557. for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
  558. output[i] = it->first.c_str();
  559. std::advance(it, 1);
  560. }
  561. return (int32_t) LLM_CHAT_TEMPLATES.size();
  562. }