llama-arch.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. #pragma once
  2. #include "ggml.h" // ggml_op
  3. #include <string>
  4. //
  5. // gguf constants (sync with gguf.py)
  6. //
  7. enum llm_arch {
  8. LLM_ARCH_LLAMA,
  9. LLM_ARCH_LLAMA4,
  10. LLM_ARCH_DECI,
  11. LLM_ARCH_FALCON,
  12. LLM_ARCH_BAICHUAN,
  13. LLM_ARCH_GROK,
  14. LLM_ARCH_GPT2,
  15. LLM_ARCH_GPTJ,
  16. LLM_ARCH_GPTNEOX,
  17. LLM_ARCH_MPT,
  18. LLM_ARCH_STARCODER,
  19. LLM_ARCH_REFACT,
  20. LLM_ARCH_BERT,
  21. LLM_ARCH_NOMIC_BERT,
  22. LLM_ARCH_NOMIC_BERT_MOE,
  23. LLM_ARCH_NEO_BERT,
  24. LLM_ARCH_JINA_BERT_V2,
  25. LLM_ARCH_JINA_BERT_V3,
  26. LLM_ARCH_BLOOM,
  27. LLM_ARCH_STABLELM,
  28. LLM_ARCH_QWEN,
  29. LLM_ARCH_QWEN2,
  30. LLM_ARCH_QWEN2MOE,
  31. LLM_ARCH_QWEN2VL,
  32. LLM_ARCH_QWEN3,
  33. LLM_ARCH_QWEN3MOE,
  34. LLM_ARCH_PHI2,
  35. LLM_ARCH_PHI3,
  36. LLM_ARCH_PHIMOE,
  37. LLM_ARCH_PLAMO,
  38. LLM_ARCH_PLAMO2,
  39. LLM_ARCH_CODESHELL,
  40. LLM_ARCH_ORION,
  41. LLM_ARCH_INTERNLM2,
  42. LLM_ARCH_MINICPM,
  43. LLM_ARCH_MINICPM3,
  44. LLM_ARCH_GEMMA,
  45. LLM_ARCH_GEMMA2,
  46. LLM_ARCH_GEMMA3,
  47. LLM_ARCH_GEMMA3N,
  48. LLM_ARCH_GEMMA_EMBEDDING,
  49. LLM_ARCH_STARCODER2,
  50. LLM_ARCH_MAMBA,
  51. LLM_ARCH_MAMBA2,
  52. LLM_ARCH_JAMBA,
  53. LLM_ARCH_FALCON_H1,
  54. LLM_ARCH_XVERSE,
  55. LLM_ARCH_COMMAND_R,
  56. LLM_ARCH_COHERE2,
  57. LLM_ARCH_DBRX,
  58. LLM_ARCH_OLMO,
  59. LLM_ARCH_OLMO2,
  60. LLM_ARCH_OLMOE,
  61. LLM_ARCH_OPENELM,
  62. LLM_ARCH_ARCTIC,
  63. LLM_ARCH_DEEPSEEK,
  64. LLM_ARCH_DEEPSEEK2,
  65. LLM_ARCH_CHATGLM,
  66. LLM_ARCH_GLM4,
  67. LLM_ARCH_GLM4_MOE,
  68. LLM_ARCH_BITNET,
  69. LLM_ARCH_T5,
  70. LLM_ARCH_T5ENCODER,
  71. LLM_ARCH_JAIS,
  72. LLM_ARCH_NEMOTRON,
  73. LLM_ARCH_NEMOTRON_H,
  74. LLM_ARCH_EXAONE,
  75. LLM_ARCH_EXAONE4,
  76. LLM_ARCH_RWKV6,
  77. LLM_ARCH_RWKV6QWEN2,
  78. LLM_ARCH_RWKV7,
  79. LLM_ARCH_ARWKV7,
  80. LLM_ARCH_GRANITE,
  81. LLM_ARCH_GRANITE_MOE,
  82. LLM_ARCH_GRANITE_HYBRID,
  83. LLM_ARCH_CHAMELEON,
  84. LLM_ARCH_WAVTOKENIZER_DEC,
  85. LLM_ARCH_PLM,
  86. LLM_ARCH_BAILINGMOE,
  87. LLM_ARCH_DOTS1,
  88. LLM_ARCH_ARCEE,
  89. LLM_ARCH_ERNIE4_5,
  90. LLM_ARCH_ERNIE4_5_MOE,
  91. LLM_ARCH_HUNYUAN_MOE,
  92. LLM_ARCH_HUNYUAN_DENSE,
  93. LLM_ARCH_SMOLLM3,
  94. LLM_ARCH_OPENAI_MOE,
  95. LLM_ARCH_LFM2,
  96. LLM_ARCH_DREAM,
  97. LLM_ARCH_SMALLTHINKER,
  98. LLM_ARCH_LLADA,
  99. LLM_ARCH_SEED_OSS,
  100. LLM_ARCH_UNKNOWN,
  101. };
  102. enum llm_kv {
  103. LLM_KV_GENERAL_TYPE,
  104. LLM_KV_GENERAL_ARCHITECTURE,
  105. LLM_KV_GENERAL_QUANTIZATION_VERSION,
  106. LLM_KV_GENERAL_ALIGNMENT,
  107. LLM_KV_GENERAL_FILE_TYPE,
  108. LLM_KV_GENERAL_NAME,
  109. LLM_KV_GENERAL_AUTHOR,
  110. LLM_KV_GENERAL_VERSION,
  111. LLM_KV_GENERAL_URL,
  112. LLM_KV_GENERAL_DESCRIPTION,
  113. LLM_KV_GENERAL_LICENSE,
  114. LLM_KV_GENERAL_SOURCE_URL,
  115. LLM_KV_GENERAL_SOURCE_HF_REPO,
  116. LLM_KV_VOCAB_SIZE,
  117. LLM_KV_CONTEXT_LENGTH,
  118. LLM_KV_EMBEDDING_LENGTH,
  119. LLM_KV_FEATURES_LENGTH,
  120. LLM_KV_BLOCK_COUNT,
  121. LLM_KV_LEADING_DENSE_BLOCK_COUNT,
  122. LLM_KV_FEED_FORWARD_LENGTH,
  123. LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
  124. LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
  125. LLM_KV_USE_PARALLEL_RESIDUAL,
  126. LLM_KV_TENSOR_DATA_LAYOUT,
  127. LLM_KV_EXPERT_COUNT,
  128. LLM_KV_EXPERT_USED_COUNT,
  129. LLM_KV_EXPERT_SHARED_COUNT,
  130. LLM_KV_EXPERT_WEIGHTS_SCALE,
  131. LLM_KV_EXPERT_WEIGHTS_NORM,
  132. LLM_KV_EXPERT_GATING_FUNC,
  133. LLM_KV_MOE_EVERY_N_LAYERS,
  134. LLM_KV_NEXTN_PREDICT_LAYERS,
  135. LLM_KV_POOLING_TYPE,
  136. LLM_KV_LOGIT_SCALE,
  137. LLM_KV_DECODER_START_TOKEN_ID,
  138. LLM_KV_ATTN_LOGIT_SOFTCAPPING,
  139. LLM_KV_FINAL_LOGIT_SOFTCAPPING,
  140. LLM_KV_SWIN_NORM,
  141. LLM_KV_RESCALE_EVERY_N_LAYERS,
  142. LLM_KV_TIME_MIX_EXTRA_DIM,
  143. LLM_KV_TIME_DECAY_EXTRA_DIM,
  144. LLM_KV_RESIDUAL_SCALE,
  145. LLM_KV_EMBEDDING_SCALE,
  146. LLM_KV_TOKEN_SHIFT_COUNT,
  147. LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
  148. LLM_KV_ATTENTION_HEAD_COUNT,
  149. LLM_KV_ATTENTION_HEAD_COUNT_KV,
  150. LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
  151. LLM_KV_ATTENTION_CLAMP_KQV,
  152. LLM_KV_ATTENTION_KEY_LENGTH,
  153. LLM_KV_ATTENTION_VALUE_LENGTH,
  154. LLM_KV_ATTENTION_LAYERNORM_EPS,
  155. LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
  156. LLM_KV_ATTENTION_GROUPNORM_EPS,
  157. LLM_KV_ATTENTION_GROUPNORM_GROUPS,
  158. LLM_KV_ATTENTION_CAUSAL,
  159. LLM_KV_ATTENTION_Q_LORA_RANK,
  160. LLM_KV_ATTENTION_KV_LORA_RANK,
  161. LLM_KV_ATTENTION_DECAY_LORA_RANK,
  162. LLM_KV_ATTENTION_ICLR_LORA_RANK,
  163. LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
  164. LLM_KV_ATTENTION_GATE_LORA_RANK,
  165. LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
  166. LLM_KV_ATTENTION_SLIDING_WINDOW,
  167. LLM_KV_ATTENTION_SCALE,
  168. LLM_KV_ATTENTION_KEY_LENGTH_MLA,
  169. LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
  170. LLM_KV_ROPE_DIMENSION_COUNT,
  171. LLM_KV_ROPE_DIMENSION_SECTIONS,
  172. LLM_KV_ROPE_FREQ_BASE,
  173. LLM_KV_ROPE_SCALE_LINEAR,
  174. LLM_KV_ROPE_SCALING_TYPE,
  175. LLM_KV_ROPE_SCALING_FACTOR,
  176. LLM_KV_ROPE_SCALING_ATTN_FACTOR,
  177. LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
  178. LLM_KV_ROPE_SCALING_FINETUNED,
  179. LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
  180. LLM_KV_SPLIT_NO,
  181. LLM_KV_SPLIT_COUNT,
  182. LLM_KV_SPLIT_TENSORS_COUNT,
  183. LLM_KV_SSM_INNER_SIZE,
  184. LLM_KV_SSM_CONV_KERNEL,
  185. LLM_KV_SSM_STATE_SIZE,
  186. LLM_KV_SSM_TIME_STEP_RANK,
  187. LLM_KV_SSM_GROUP_COUNT,
  188. LLM_KV_SSM_DT_B_C_RMS,
  189. LLM_KV_WKV_HEAD_SIZE,
  190. LLM_KV_TOKENIZER_MODEL,
  191. LLM_KV_TOKENIZER_PRE,
  192. LLM_KV_TOKENIZER_LIST,
  193. LLM_KV_TOKENIZER_TOKEN_TYPE,
  194. LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
  195. LLM_KV_TOKENIZER_SCORES,
  196. LLM_KV_TOKENIZER_MERGES,
  197. LLM_KV_TOKENIZER_BOS_ID,
  198. LLM_KV_TOKENIZER_EOS_ID,
  199. LLM_KV_TOKENIZER_EOT_ID,
  200. LLM_KV_TOKENIZER_EOM_ID,
  201. LLM_KV_TOKENIZER_UNK_ID,
  202. LLM_KV_TOKENIZER_SEP_ID,
  203. LLM_KV_TOKENIZER_PAD_ID,
  204. LLM_KV_TOKENIZER_CLS_ID,
  205. LLM_KV_TOKENIZER_MASK_ID,
  206. LLM_KV_TOKENIZER_ADD_BOS,
  207. LLM_KV_TOKENIZER_ADD_EOS,
  208. LLM_KV_TOKENIZER_ADD_SEP,
  209. LLM_KV_TOKENIZER_ADD_PREFIX,
  210. LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
  211. LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
  212. LLM_KV_TOKENIZER_HF_JSON,
  213. LLM_KV_TOKENIZER_RWKV,
  214. LLM_KV_TOKENIZER_CHAT_TEMPLATE,
  215. LLM_KV_TOKENIZER_FIM_PRE_ID,
  216. LLM_KV_TOKENIZER_FIM_SUF_ID,
  217. LLM_KV_TOKENIZER_FIM_MID_ID,
  218. LLM_KV_TOKENIZER_FIM_PAD_ID,
  219. LLM_KV_TOKENIZER_FIM_REP_ID,
  220. LLM_KV_TOKENIZER_FIM_SEP_ID,
  221. LLM_KV_ADAPTER_TYPE,
  222. LLM_KV_ADAPTER_LORA_ALPHA,
  223. LLM_KV_ADAPTER_LORA_TASK_NAME,
  224. LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,
  225. LLM_KV_POSNET_EMBEDDING_LENGTH,
  226. LLM_KV_POSNET_BLOCK_COUNT,
  227. LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
  228. LLM_KV_CONVNEXT_BLOCK_COUNT,
  229. LLM_KV_CLASSIFIER_OUTPUT_LABELS,
  230. LLM_KV_SHORTCONV_L_CACHE,
  231. // deprecated:
  232. LLM_KV_TOKENIZER_PREFIX_ID,
  233. LLM_KV_TOKENIZER_SUFFIX_ID,
  234. LLM_KV_TOKENIZER_MIDDLE_ID,
  235. };
  236. enum llm_tensor {
  237. LLM_TENSOR_TOKEN_EMBD,
  238. LLM_TENSOR_TOKEN_EMBD_NORM,
  239. LLM_TENSOR_TOKEN_TYPES,
  240. LLM_TENSOR_POS_EMBD,
  241. LLM_TENSOR_OUTPUT,
  242. LLM_TENSOR_OUTPUT_NORM,
  243. LLM_TENSOR_ROPE_FREQS,
  244. LLM_TENSOR_ROPE_FACTORS_LONG,
  245. LLM_TENSOR_ROPE_FACTORS_SHORT,
  246. LLM_TENSOR_ATTN_Q,
  247. LLM_TENSOR_ATTN_K,
  248. LLM_TENSOR_ATTN_V,
  249. LLM_TENSOR_ATTN_QKV,
  250. LLM_TENSOR_ATTN_OUT,
  251. LLM_TENSOR_ATTN_NORM,
  252. LLM_TENSOR_ATTN_NORM_2,
  253. LLM_TENSOR_ATTN_OUT_NORM,
  254. LLM_TENSOR_ATTN_POST_NORM,
  255. LLM_TENSOR_ATTN_ROT_EMBD,
  256. LLM_TENSOR_ATTN_SINKS,
  257. LLM_TENSOR_FFN_GATE_INP,
  258. LLM_TENSOR_FFN_GATE_INP_SHEXP,
  259. LLM_TENSOR_FFN_NORM,
  260. LLM_TENSOR_FFN_POST_NORM,
  261. LLM_TENSOR_FFN_GATE,
  262. LLM_TENSOR_FFN_DOWN,
  263. LLM_TENSOR_FFN_UP,
  264. LLM_TENSOR_FFN_ACT,
  265. LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
  266. LLM_TENSOR_FFN_GATE_EXP,
  267. LLM_TENSOR_FFN_UP_EXP,
  268. LLM_TENSOR_FFN_NORM_EXPS,
  269. LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
  270. LLM_TENSOR_FFN_GATE_EXPS,
  271. LLM_TENSOR_FFN_UP_EXPS,
  272. LLM_TENSOR_FFN_DOWN_SHEXP,
  273. LLM_TENSOR_FFN_GATE_SHEXP,
  274. LLM_TENSOR_FFN_UP_SHEXP,
  275. LLM_TENSOR_FFN_EXP_PROBS_B,
  276. LLM_TENSOR_ATTN_Q_NORM,
  277. LLM_TENSOR_ATTN_K_NORM,
  278. LLM_TENSOR_LAYER_OUT_NORM,
  279. LLM_TENSOR_POST_ATTN_NORM,
  280. LLM_TENSOR_POST_MLP_NORM,
  281. LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
  282. LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
  283. LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
  284. LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
  285. LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
  286. LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
  287. LLM_TENSOR_ALTUP_PROJ, // gemma3n
  288. LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
  289. LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
  290. LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
  291. LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
  292. LLM_TENSOR_ALTUP_ROUTER, // gemma3n
  293. LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
  294. LLM_TENSOR_LAUREL_L, // gemma3n
  295. LLM_TENSOR_LAUREL_R, // gemma3n
  296. LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
  297. LLM_TENSOR_SSM_IN,
  298. LLM_TENSOR_SSM_CONV1D,
  299. LLM_TENSOR_SSM_X,
  300. LLM_TENSOR_SSM_DT,
  301. LLM_TENSOR_SSM_DT_NORM,
  302. LLM_TENSOR_SSM_A,
  303. LLM_TENSOR_SSM_B_NORM,
  304. LLM_TENSOR_SSM_C_NORM,
  305. LLM_TENSOR_SSM_D,
  306. LLM_TENSOR_SSM_NORM,
  307. LLM_TENSOR_SSM_OUT,
  308. LLM_TENSOR_TIME_MIX_W0,
  309. LLM_TENSOR_TIME_MIX_W1,
  310. LLM_TENSOR_TIME_MIX_W2,
  311. LLM_TENSOR_TIME_MIX_A0,
  312. LLM_TENSOR_TIME_MIX_A1,
  313. LLM_TENSOR_TIME_MIX_A2,
  314. LLM_TENSOR_TIME_MIX_V0,
  315. LLM_TENSOR_TIME_MIX_V1,
  316. LLM_TENSOR_TIME_MIX_V2,
  317. LLM_TENSOR_TIME_MIX_G1,
  318. LLM_TENSOR_TIME_MIX_G2,
  319. LLM_TENSOR_TIME_MIX_K_K,
  320. LLM_TENSOR_TIME_MIX_K_A,
  321. LLM_TENSOR_TIME_MIX_R_K,
  322. LLM_TENSOR_TIME_MIX_LERP_X,
  323. LLM_TENSOR_TIME_MIX_LERP_W,
  324. LLM_TENSOR_TIME_MIX_LERP_K,
  325. LLM_TENSOR_TIME_MIX_LERP_V,
  326. LLM_TENSOR_TIME_MIX_LERP_R,
  327. LLM_TENSOR_TIME_MIX_LERP_G,
  328. LLM_TENSOR_TIME_MIX_LERP_FUSED,
  329. LLM_TENSOR_TIME_MIX_FIRST,
  330. LLM_TENSOR_TIME_MIX_DECAY,
  331. LLM_TENSOR_TIME_MIX_DECAY_W1,
  332. LLM_TENSOR_TIME_MIX_DECAY_W2,
  333. LLM_TENSOR_TIME_MIX_KEY,
  334. LLM_TENSOR_TIME_MIX_VALUE,
  335. LLM_TENSOR_TIME_MIX_RECEPTANCE,
  336. LLM_TENSOR_TIME_MIX_GATE,
  337. LLM_TENSOR_TIME_MIX_LN,
  338. LLM_TENSOR_TIME_MIX_OUTPUT,
  339. LLM_TENSOR_CHANNEL_MIX_LERP_K,
  340. LLM_TENSOR_CHANNEL_MIX_LERP_R,
  341. LLM_TENSOR_CHANNEL_MIX_KEY,
  342. LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
  343. LLM_TENSOR_CHANNEL_MIX_VALUE,
  344. LLM_TENSOR_ATTN_Q_A,
  345. LLM_TENSOR_ATTN_Q_B,
  346. LLM_TENSOR_ATTN_KV_A_MQA,
  347. LLM_TENSOR_ATTN_KV_B,
  348. LLM_TENSOR_ATTN_K_B,
  349. LLM_TENSOR_ATTN_V_B,
  350. LLM_TENSOR_ATTN_Q_A_NORM,
  351. LLM_TENSOR_ATTN_KV_A_NORM,
  352. LLM_TENSOR_ATTN_SUB_NORM,
  353. LLM_TENSOR_FFN_SUB_NORM,
  354. LLM_TENSOR_DEC_ATTN_NORM,
  355. LLM_TENSOR_DEC_ATTN_Q,
  356. LLM_TENSOR_DEC_ATTN_K,
  357. LLM_TENSOR_DEC_ATTN_V,
  358. LLM_TENSOR_DEC_ATTN_OUT,
  359. LLM_TENSOR_DEC_ATTN_REL_B,
  360. LLM_TENSOR_DEC_CROSS_ATTN_NORM,
  361. LLM_TENSOR_DEC_CROSS_ATTN_Q,
  362. LLM_TENSOR_DEC_CROSS_ATTN_K,
  363. LLM_TENSOR_DEC_CROSS_ATTN_V,
  364. LLM_TENSOR_DEC_CROSS_ATTN_OUT,
  365. LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
  366. LLM_TENSOR_DEC_FFN_NORM,
  367. LLM_TENSOR_DEC_FFN_GATE,
  368. LLM_TENSOR_DEC_FFN_DOWN,
  369. LLM_TENSOR_DEC_FFN_UP,
  370. LLM_TENSOR_DEC_OUTPUT_NORM,
  371. LLM_TENSOR_ENC_ATTN_NORM,
  372. LLM_TENSOR_ENC_ATTN_Q,
  373. LLM_TENSOR_ENC_ATTN_K,
  374. LLM_TENSOR_ENC_ATTN_V,
  375. LLM_TENSOR_ENC_ATTN_OUT,
  376. LLM_TENSOR_ENC_ATTN_REL_B,
  377. LLM_TENSOR_ENC_FFN_NORM,
  378. LLM_TENSOR_ENC_FFN_GATE,
  379. LLM_TENSOR_ENC_FFN_DOWN,
  380. LLM_TENSOR_ENC_FFN_UP,
  381. LLM_TENSOR_ENC_OUTPUT_NORM,
  382. LLM_TENSOR_CLS,
  383. LLM_TENSOR_CLS_OUT,
  384. LLM_TENSOR_CONV1D,
  385. LLM_TENSOR_CONVNEXT_DW,
  386. LLM_TENSOR_CONVNEXT_NORM,
  387. LLM_TENSOR_CONVNEXT_PW1,
  388. LLM_TENSOR_CONVNEXT_PW2,
  389. LLM_TENSOR_CONVNEXT_GAMMA,
  390. LLM_TENSOR_POS_NET_CONV1,
  391. LLM_TENSOR_POS_NET_CONV2,
  392. LLM_TENSOR_POS_NET_NORM,
  393. LLM_TENSOR_POS_NET_NORM1,
  394. LLM_TENSOR_POS_NET_NORM2,
  395. LLM_TENSOR_POS_NET_ATTN_NORM,
  396. LLM_TENSOR_POS_NET_ATTN_Q,
  397. LLM_TENSOR_POS_NET_ATTN_K,
  398. LLM_TENSOR_POS_NET_ATTN_V,
  399. LLM_TENSOR_POS_NET_ATTN_OUT,
  400. LLM_TENSOR_SHORTCONV_CONV,
  401. LLM_TENSOR_SHORTCONV_INPROJ,
  402. LLM_TENSOR_SHORTCONV_OUTPROJ,
  403. LLM_TENSOR_NEXTN_EH_PROJ,
  404. LLM_TENSOR_NEXTN_EMBED_TOKENS,
  405. LLM_TENSOR_NEXTN_ENORM,
  406. LLM_TENSOR_NEXTN_HNORM,
  407. LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
  408. LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
  409. };
  410. enum llm_tensor_layer {
  411. LLM_TENSOR_LAYER_INPUT,
  412. LLM_TENSOR_LAYER_REPEATING,
  413. LLM_TENSOR_LAYER_OUTPUT,
  414. };
  415. struct LLM_KV {
  416. LLM_KV(llm_arch arch, const char * suffix = nullptr);
  417. llm_arch arch;
  418. const char * suffix;
  419. std::string operator()(llm_kv kv) const;
  420. };
  421. // helper to handle gguf constants
  422. // usage:
  423. //
  424. // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
  425. //
  426. // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
  427. // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
  428. // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
  429. //
  430. struct LLM_TN_IMPL {
  431. const llm_arch arch;
  432. const llm_tensor tensor;
  433. const char * const suffix;
  434. const int bid;
  435. const int xid;
  436. std::string str() const;
  437. operator std::string() const {
  438. return str();
  439. }
  440. friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
  441. return str == tn.str();
  442. }
  443. friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
  444. return str != tn.str();
  445. }
  446. };
  447. struct LLM_TN {
  448. LLM_TN(llm_arch arch) : arch(arch) {}
  449. llm_arch arch;
  450. LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
  451. return { arch, tensor, suffix, bid, xid };
  452. }
  453. LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
  454. return { arch, tensor, nullptr, bid, xid };
  455. }
  456. };
  457. struct llm_tensor_info {
  458. llm_tensor_layer layer;
  459. ggml_op op;
  460. };
  461. const char * llm_arch_name(llm_arch arch);
  462. llm_arch llm_arch_from_string(const std::string & name);
  463. const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
  464. bool llm_arch_is_recurrent(const llm_arch & arch);
  465. bool llm_arch_is_hybrid (const llm_arch & arch);
  466. bool llm_arch_is_diffusion(const llm_arch & arch);