llama-arch.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. #pragma once
  2. #include "ggml.h" // ggml_op
  3. #include <string>
  4. //
  5. // gguf constants (sync with gguf.py)
  6. //
  7. enum llm_arch {
  8. LLM_ARCH_CLIP,
  9. LLM_ARCH_LLAMA,
  10. LLM_ARCH_LLAMA4,
  11. LLM_ARCH_DECI,
  12. LLM_ARCH_FALCON,
  13. LLM_ARCH_BAICHUAN,
  14. LLM_ARCH_GROK,
  15. LLM_ARCH_GPT2,
  16. LLM_ARCH_GPTJ,
  17. LLM_ARCH_GPTNEOX,
  18. LLM_ARCH_MPT,
  19. LLM_ARCH_STARCODER,
  20. LLM_ARCH_REFACT,
  21. LLM_ARCH_BERT,
  22. LLM_ARCH_NOMIC_BERT,
  23. LLM_ARCH_NOMIC_BERT_MOE,
  24. LLM_ARCH_NEO_BERT,
  25. LLM_ARCH_JINA_BERT_V2,
  26. LLM_ARCH_JINA_BERT_V3,
  27. LLM_ARCH_BLOOM,
  28. LLM_ARCH_STABLELM,
  29. LLM_ARCH_QWEN,
  30. LLM_ARCH_QWEN2,
  31. LLM_ARCH_QWEN2MOE,
  32. LLM_ARCH_QWEN2VL,
  33. LLM_ARCH_QWEN3,
  34. LLM_ARCH_QWEN3MOE,
  35. LLM_ARCH_PHI2,
  36. LLM_ARCH_PHI3,
  37. LLM_ARCH_PHIMOE,
  38. LLM_ARCH_PLAMO,
  39. LLM_ARCH_PLAMO2,
  40. LLM_ARCH_CODESHELL,
  41. LLM_ARCH_ORION,
  42. LLM_ARCH_INTERNLM2,
  43. LLM_ARCH_MINICPM,
  44. LLM_ARCH_MINICPM3,
  45. LLM_ARCH_GEMMA,
  46. LLM_ARCH_GEMMA2,
  47. LLM_ARCH_GEMMA3,
  48. LLM_ARCH_GEMMA3N,
  49. LLM_ARCH_GEMMA_EMBEDDING,
  50. LLM_ARCH_STARCODER2,
  51. LLM_ARCH_MAMBA,
  52. LLM_ARCH_MAMBA2,
  53. LLM_ARCH_JAMBA,
  54. LLM_ARCH_FALCON_H1,
  55. LLM_ARCH_XVERSE,
  56. LLM_ARCH_COMMAND_R,
  57. LLM_ARCH_COHERE2,
  58. LLM_ARCH_DBRX,
  59. LLM_ARCH_OLMO,
  60. LLM_ARCH_OLMO2,
  61. LLM_ARCH_OLMOE,
  62. LLM_ARCH_OPENELM,
  63. LLM_ARCH_ARCTIC,
  64. LLM_ARCH_DEEPSEEK,
  65. LLM_ARCH_DEEPSEEK2,
  66. LLM_ARCH_CHATGLM,
  67. LLM_ARCH_GLM4,
  68. LLM_ARCH_GLM4_MOE,
  69. LLM_ARCH_BITNET,
  70. LLM_ARCH_T5,
  71. LLM_ARCH_T5ENCODER,
  72. LLM_ARCH_JAIS,
  73. LLM_ARCH_NEMOTRON,
  74. LLM_ARCH_NEMOTRON_H,
  75. LLM_ARCH_EXAONE,
  76. LLM_ARCH_EXAONE4,
  77. LLM_ARCH_RWKV6,
  78. LLM_ARCH_RWKV6QWEN2,
  79. LLM_ARCH_RWKV7,
  80. LLM_ARCH_ARWKV7,
  81. LLM_ARCH_GRANITE,
  82. LLM_ARCH_GRANITE_MOE,
  83. LLM_ARCH_GRANITE_HYBRID,
  84. LLM_ARCH_CHAMELEON,
  85. LLM_ARCH_WAVTOKENIZER_DEC,
  86. LLM_ARCH_PLM,
  87. LLM_ARCH_BAILINGMOE,
  88. LLM_ARCH_BAILINGMOE2,
  89. LLM_ARCH_DOTS1,
  90. LLM_ARCH_ARCEE,
  91. LLM_ARCH_ERNIE4_5,
  92. LLM_ARCH_ERNIE4_5_MOE,
  93. LLM_ARCH_HUNYUAN_MOE,
  94. LLM_ARCH_HUNYUAN_DENSE,
  95. LLM_ARCH_SMOLLM3,
  96. LLM_ARCH_OPENAI_MOE,
  97. LLM_ARCH_LFM2,
  98. LLM_ARCH_LFM2MOE,
  99. LLM_ARCH_DREAM,
  100. LLM_ARCH_SMALLTHINKER,
  101. LLM_ARCH_LLADA,
  102. LLM_ARCH_LLADA_MOE,
  103. LLM_ARCH_SEED_OSS,
  104. LLM_ARCH_GROVEMOE,
  105. LLM_ARCH_APERTUS,
  106. LLM_ARCH_UNKNOWN,
  107. };
  108. enum llm_kv {
  109. LLM_KV_GENERAL_TYPE,
  110. LLM_KV_GENERAL_ARCHITECTURE,
  111. LLM_KV_GENERAL_QUANTIZATION_VERSION,
  112. LLM_KV_GENERAL_ALIGNMENT,
  113. LLM_KV_GENERAL_FILE_TYPE,
  114. LLM_KV_GENERAL_NAME,
  115. LLM_KV_GENERAL_AUTHOR,
  116. LLM_KV_GENERAL_VERSION,
  117. LLM_KV_GENERAL_URL,
  118. LLM_KV_GENERAL_DESCRIPTION,
  119. LLM_KV_GENERAL_LICENSE,
  120. LLM_KV_GENERAL_SOURCE_URL,
  121. LLM_KV_GENERAL_SOURCE_HF_REPO,
  122. LLM_KV_VOCAB_SIZE,
  123. LLM_KV_CONTEXT_LENGTH,
  124. LLM_KV_EMBEDDING_LENGTH,
  125. LLM_KV_FEATURES_LENGTH,
  126. LLM_KV_BLOCK_COUNT,
  127. LLM_KV_LEADING_DENSE_BLOCK_COUNT,
  128. LLM_KV_FEED_FORWARD_LENGTH,
  129. LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
  130. LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
  131. LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,
  132. LLM_KV_USE_PARALLEL_RESIDUAL,
  133. LLM_KV_TENSOR_DATA_LAYOUT,
  134. LLM_KV_EXPERT_COUNT,
  135. LLM_KV_EXPERT_USED_COUNT,
  136. LLM_KV_EXPERT_SHARED_COUNT,
  137. LLM_KV_EXPERT_GROUP_COUNT,
  138. LLM_KV_EXPERT_GROUP_USED_COUNT,
  139. LLM_KV_EXPERT_WEIGHTS_SCALE,
  140. LLM_KV_EXPERT_WEIGHTS_NORM,
  141. LLM_KV_EXPERT_GATING_FUNC,
  142. LLM_KV_EXPERT_GROUP_SCALE,
  143. LLM_KV_EXPERTS_PER_GROUP,
  144. LLM_KV_MOE_EVERY_N_LAYERS,
  145. LLM_KV_NEXTN_PREDICT_LAYERS,
  146. LLM_KV_POOLING_TYPE,
  147. LLM_KV_LOGIT_SCALE,
  148. LLM_KV_DECODER_START_TOKEN_ID,
  149. LLM_KV_DECODER_BLOCK_COUNT,
  150. LLM_KV_ATTN_LOGIT_SOFTCAPPING,
  151. LLM_KV_ROUTER_LOGIT_SOFTCAPPING,
  152. LLM_KV_FINAL_LOGIT_SOFTCAPPING,
  153. LLM_KV_SWIN_NORM,
  154. LLM_KV_RESCALE_EVERY_N_LAYERS,
  155. LLM_KV_TIME_MIX_EXTRA_DIM,
  156. LLM_KV_TIME_DECAY_EXTRA_DIM,
  157. LLM_KV_RESIDUAL_SCALE,
  158. LLM_KV_EMBEDDING_SCALE,
  159. LLM_KV_TOKEN_SHIFT_COUNT,
  160. LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
  161. LLM_KV_ATTENTION_HEAD_COUNT,
  162. LLM_KV_ATTENTION_HEAD_COUNT_KV,
  163. LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
  164. LLM_KV_ATTENTION_CLAMP_KQV,
  165. LLM_KV_ATTENTION_KEY_LENGTH,
  166. LLM_KV_ATTENTION_VALUE_LENGTH,
  167. LLM_KV_ATTENTION_LAYERNORM_EPS,
  168. LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
  169. LLM_KV_ATTENTION_GROUPNORM_EPS,
  170. LLM_KV_ATTENTION_GROUPNORM_GROUPS,
  171. LLM_KV_ATTENTION_CAUSAL,
  172. LLM_KV_ATTENTION_Q_LORA_RANK,
  173. LLM_KV_ATTENTION_KV_LORA_RANK,
  174. LLM_KV_ATTENTION_DECAY_LORA_RANK,
  175. LLM_KV_ATTENTION_ICLR_LORA_RANK,
  176. LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
  177. LLM_KV_ATTENTION_GATE_LORA_RANK,
  178. LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
  179. LLM_KV_ATTENTION_SLIDING_WINDOW,
  180. LLM_KV_ATTENTION_SCALE,
  181. LLM_KV_ATTENTION_OUTPUT_SCALE,
  182. LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
  183. LLM_KV_ATTENTION_KEY_LENGTH_MLA,
  184. LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
  185. LLM_KV_ROPE_DIMENSION_COUNT,
  186. LLM_KV_ROPE_DIMENSION_SECTIONS,
  187. LLM_KV_ROPE_FREQ_BASE,
  188. LLM_KV_ROPE_SCALE_LINEAR,
  189. LLM_KV_ROPE_SCALING_TYPE,
  190. LLM_KV_ROPE_SCALING_FACTOR,
  191. LLM_KV_ROPE_SCALING_ATTN_FACTOR,
  192. LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
  193. LLM_KV_ROPE_SCALING_FINETUNED,
  194. LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
  195. LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,
  196. LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,
  197. LLM_KV_ROPE_SCALING_YARN_BETA_FAST,
  198. LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,
  199. LLM_KV_SPLIT_NO,
  200. LLM_KV_SPLIT_COUNT,
  201. LLM_KV_SPLIT_TENSORS_COUNT,
  202. LLM_KV_SSM_INNER_SIZE,
  203. LLM_KV_SSM_CONV_KERNEL,
  204. LLM_KV_SSM_STATE_SIZE,
  205. LLM_KV_SSM_TIME_STEP_RANK,
  206. LLM_KV_SSM_GROUP_COUNT,
  207. LLM_KV_SSM_DT_B_C_RMS,
  208. LLM_KV_WKV_HEAD_SIZE,
  209. LLM_KV_TOKENIZER_MODEL,
  210. LLM_KV_TOKENIZER_PRE,
  211. LLM_KV_TOKENIZER_LIST,
  212. LLM_KV_TOKENIZER_TOKEN_TYPE,
  213. LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
  214. LLM_KV_TOKENIZER_SCORES,
  215. LLM_KV_TOKENIZER_MERGES,
  216. LLM_KV_TOKENIZER_BOS_ID,
  217. LLM_KV_TOKENIZER_EOS_ID,
  218. LLM_KV_TOKENIZER_EOT_ID,
  219. LLM_KV_TOKENIZER_EOM_ID,
  220. LLM_KV_TOKENIZER_UNK_ID,
  221. LLM_KV_TOKENIZER_SEP_ID,
  222. LLM_KV_TOKENIZER_PAD_ID,
  223. LLM_KV_TOKENIZER_CLS_ID,
  224. LLM_KV_TOKENIZER_MASK_ID,
  225. LLM_KV_TOKENIZER_ADD_BOS,
  226. LLM_KV_TOKENIZER_ADD_EOS,
  227. LLM_KV_TOKENIZER_ADD_SEP,
  228. LLM_KV_TOKENIZER_ADD_PREFIX,
  229. LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
  230. LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
  231. LLM_KV_TOKENIZER_HF_JSON,
  232. LLM_KV_TOKENIZER_RWKV,
  233. LLM_KV_TOKENIZER_CHAT_TEMPLATE,
  234. LLM_KV_TOKENIZER_FIM_PRE_ID,
  235. LLM_KV_TOKENIZER_FIM_SUF_ID,
  236. LLM_KV_TOKENIZER_FIM_MID_ID,
  237. LLM_KV_TOKENIZER_FIM_PAD_ID,
  238. LLM_KV_TOKENIZER_FIM_REP_ID,
  239. LLM_KV_TOKENIZER_FIM_SEP_ID,
  240. LLM_KV_ADAPTER_TYPE,
  241. LLM_KV_ADAPTER_LORA_ALPHA,
  242. LLM_KV_ADAPTER_LORA_TASK_NAME,
  243. LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,
  244. LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS,
  245. LLM_KV_POSNET_EMBEDDING_LENGTH,
  246. LLM_KV_POSNET_BLOCK_COUNT,
  247. LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
  248. LLM_KV_CONVNEXT_BLOCK_COUNT,
  249. LLM_KV_CLASSIFIER_OUTPUT_LABELS,
  250. LLM_KV_SHORTCONV_L_CACHE,
  251. LLM_KV_XIELU_ALPHA_N,
  252. LLM_KV_XIELU_ALPHA_P,
  253. LLM_KV_XIELU_BETA,
  254. LLM_KV_XIELU_EPS,
  255. // deprecated:
  256. LLM_KV_TOKENIZER_PREFIX_ID,
  257. LLM_KV_TOKENIZER_SUFFIX_ID,
  258. LLM_KV_TOKENIZER_MIDDLE_ID,
  259. // sentence-transformers dense layers in and out features
  260. LLM_KV_DENSE_2_FEAT_IN,
  261. LLM_KV_DENSE_2_FEAT_OUT,
  262. LLM_KV_DENSE_3_FEAT_IN,
  263. LLM_KV_DENSE_3_FEAT_OUT,
  264. };
  265. enum llm_tensor {
  266. LLM_TENSOR_TOKEN_EMBD,
  267. LLM_TENSOR_TOKEN_EMBD_NORM,
  268. LLM_TENSOR_TOKEN_TYPES,
  269. LLM_TENSOR_POS_EMBD,
  270. LLM_TENSOR_DENSE_2_OUT,
  271. LLM_TENSOR_DENSE_3_OUT,
  272. LLM_TENSOR_OUTPUT,
  273. LLM_TENSOR_OUTPUT_NORM,
  274. LLM_TENSOR_ROPE_FREQS,
  275. LLM_TENSOR_ROPE_FACTORS_LONG,
  276. LLM_TENSOR_ROPE_FACTORS_SHORT,
  277. LLM_TENSOR_ATTN_Q,
  278. LLM_TENSOR_ATTN_K,
  279. LLM_TENSOR_ATTN_V,
  280. LLM_TENSOR_ATTN_QKV,
  281. LLM_TENSOR_ATTN_OUT,
  282. LLM_TENSOR_ATTN_NORM,
  283. LLM_TENSOR_ATTN_NORM_2,
  284. LLM_TENSOR_ATTN_OUT_NORM,
  285. LLM_TENSOR_ATTN_POST_NORM,
  286. LLM_TENSOR_ATTN_ROT_EMBD,
  287. LLM_TENSOR_ATTN_SINKS,
  288. LLM_TENSOR_FFN_GATE_INP,
  289. LLM_TENSOR_FFN_GATE_INP_SHEXP,
  290. LLM_TENSOR_FFN_NORM,
  291. LLM_TENSOR_FFN_POST_NORM,
  292. LLM_TENSOR_FFN_GATE,
  293. LLM_TENSOR_FFN_DOWN,
  294. LLM_TENSOR_FFN_UP,
  295. LLM_TENSOR_FFN_ACT,
  296. LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
  297. LLM_TENSOR_FFN_GATE_EXP,
  298. LLM_TENSOR_FFN_UP_EXP,
  299. LLM_TENSOR_FFN_NORM_EXPS,
  300. LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
  301. LLM_TENSOR_FFN_GATE_EXPS,
  302. LLM_TENSOR_FFN_UP_EXPS,
  303. LLM_TENSOR_FFN_DOWN_SHEXP,
  304. LLM_TENSOR_FFN_GATE_SHEXP,
  305. LLM_TENSOR_FFN_UP_SHEXP,
  306. LLM_TENSOR_FFN_DOWN_CHEXPS,
  307. LLM_TENSOR_FFN_GATE_CHEXPS,
  308. LLM_TENSOR_FFN_UP_CHEXPS,
  309. LLM_TENSOR_FFN_EXP_PROBS_B,
  310. LLM_TENSOR_ATTN_Q_NORM,
  311. LLM_TENSOR_ATTN_K_NORM,
  312. LLM_TENSOR_LAYER_OUT_NORM,
  313. LLM_TENSOR_POST_ATTN_NORM,
  314. LLM_TENSOR_POST_MLP_NORM,
  315. LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
  316. LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
  317. LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
  318. LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
  319. LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
  320. LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
  321. LLM_TENSOR_ALTUP_PROJ, // gemma3n
  322. LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
  323. LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
  324. LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
  325. LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
  326. LLM_TENSOR_ALTUP_ROUTER, // gemma3n
  327. LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
  328. LLM_TENSOR_LAUREL_L, // gemma3n
  329. LLM_TENSOR_LAUREL_R, // gemma3n
  330. LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
  331. LLM_TENSOR_SSM_IN,
  332. LLM_TENSOR_SSM_CONV1D,
  333. LLM_TENSOR_SSM_X,
  334. LLM_TENSOR_SSM_DT,
  335. LLM_TENSOR_SSM_DT_NORM,
  336. LLM_TENSOR_SSM_A,
  337. LLM_TENSOR_SSM_B_NORM,
  338. LLM_TENSOR_SSM_C_NORM,
  339. LLM_TENSOR_SSM_D,
  340. LLM_TENSOR_SSM_NORM,
  341. LLM_TENSOR_SSM_OUT,
  342. LLM_TENSOR_TIME_MIX_W0,
  343. LLM_TENSOR_TIME_MIX_W1,
  344. LLM_TENSOR_TIME_MIX_W2,
  345. LLM_TENSOR_TIME_MIX_A0,
  346. LLM_TENSOR_TIME_MIX_A1,
  347. LLM_TENSOR_TIME_MIX_A2,
  348. LLM_TENSOR_TIME_MIX_V0,
  349. LLM_TENSOR_TIME_MIX_V1,
  350. LLM_TENSOR_TIME_MIX_V2,
  351. LLM_TENSOR_TIME_MIX_G1,
  352. LLM_TENSOR_TIME_MIX_G2,
  353. LLM_TENSOR_TIME_MIX_K_K,
  354. LLM_TENSOR_TIME_MIX_K_A,
  355. LLM_TENSOR_TIME_MIX_R_K,
  356. LLM_TENSOR_TIME_MIX_LERP_X,
  357. LLM_TENSOR_TIME_MIX_LERP_W,
  358. LLM_TENSOR_TIME_MIX_LERP_K,
  359. LLM_TENSOR_TIME_MIX_LERP_V,
  360. LLM_TENSOR_TIME_MIX_LERP_R,
  361. LLM_TENSOR_TIME_MIX_LERP_G,
  362. LLM_TENSOR_TIME_MIX_LERP_FUSED,
  363. LLM_TENSOR_TIME_MIX_FIRST,
  364. LLM_TENSOR_TIME_MIX_DECAY,
  365. LLM_TENSOR_TIME_MIX_DECAY_W1,
  366. LLM_TENSOR_TIME_MIX_DECAY_W2,
  367. LLM_TENSOR_TIME_MIX_KEY,
  368. LLM_TENSOR_TIME_MIX_VALUE,
  369. LLM_TENSOR_TIME_MIX_RECEPTANCE,
  370. LLM_TENSOR_TIME_MIX_GATE,
  371. LLM_TENSOR_TIME_MIX_LN,
  372. LLM_TENSOR_TIME_MIX_OUTPUT,
  373. LLM_TENSOR_CHANNEL_MIX_LERP_K,
  374. LLM_TENSOR_CHANNEL_MIX_LERP_R,
  375. LLM_TENSOR_CHANNEL_MIX_KEY,
  376. LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
  377. LLM_TENSOR_CHANNEL_MIX_VALUE,
  378. LLM_TENSOR_ATTN_Q_A,
  379. LLM_TENSOR_ATTN_Q_B,
  380. LLM_TENSOR_ATTN_KV_A_MQA,
  381. LLM_TENSOR_ATTN_KV_B,
  382. LLM_TENSOR_ATTN_K_B,
  383. LLM_TENSOR_ATTN_V_B,
  384. LLM_TENSOR_ATTN_Q_A_NORM,
  385. LLM_TENSOR_ATTN_KV_A_NORM,
  386. LLM_TENSOR_ATTN_SUB_NORM,
  387. LLM_TENSOR_FFN_SUB_NORM,
  388. LLM_TENSOR_DEC_ATTN_NORM,
  389. LLM_TENSOR_DEC_ATTN_Q,
  390. LLM_TENSOR_DEC_ATTN_K,
  391. LLM_TENSOR_DEC_ATTN_V,
  392. LLM_TENSOR_DEC_ATTN_OUT,
  393. LLM_TENSOR_DEC_ATTN_REL_B,
  394. LLM_TENSOR_DEC_CROSS_ATTN_NORM,
  395. LLM_TENSOR_DEC_CROSS_ATTN_Q,
  396. LLM_TENSOR_DEC_CROSS_ATTN_K,
  397. LLM_TENSOR_DEC_CROSS_ATTN_V,
  398. LLM_TENSOR_DEC_CROSS_ATTN_OUT,
  399. LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
  400. LLM_TENSOR_DEC_FFN_NORM,
  401. LLM_TENSOR_DEC_FFN_GATE,
  402. LLM_TENSOR_DEC_FFN_DOWN,
  403. LLM_TENSOR_DEC_FFN_UP,
  404. LLM_TENSOR_DEC_OUTPUT_NORM,
  405. LLM_TENSOR_ENC_ATTN_NORM,
  406. LLM_TENSOR_ENC_ATTN_Q,
  407. LLM_TENSOR_ENC_ATTN_K,
  408. LLM_TENSOR_ENC_ATTN_V,
  409. LLM_TENSOR_ENC_ATTN_OUT,
  410. LLM_TENSOR_ENC_ATTN_REL_B,
  411. LLM_TENSOR_ENC_FFN_NORM,
  412. LLM_TENSOR_ENC_FFN_GATE,
  413. LLM_TENSOR_ENC_FFN_DOWN,
  414. LLM_TENSOR_ENC_FFN_UP,
  415. LLM_TENSOR_ENC_OUTPUT_NORM,
  416. LLM_TENSOR_CLS,
  417. LLM_TENSOR_CLS_OUT,
  418. LLM_TENSOR_CONV1D,
  419. LLM_TENSOR_CONVNEXT_DW,
  420. LLM_TENSOR_CONVNEXT_NORM,
  421. LLM_TENSOR_CONVNEXT_PW1,
  422. LLM_TENSOR_CONVNEXT_PW2,
  423. LLM_TENSOR_CONVNEXT_GAMMA,
  424. LLM_TENSOR_POS_NET_CONV1,
  425. LLM_TENSOR_POS_NET_CONV2,
  426. LLM_TENSOR_POS_NET_NORM,
  427. LLM_TENSOR_POS_NET_NORM1,
  428. LLM_TENSOR_POS_NET_NORM2,
  429. LLM_TENSOR_POS_NET_ATTN_NORM,
  430. LLM_TENSOR_POS_NET_ATTN_Q,
  431. LLM_TENSOR_POS_NET_ATTN_K,
  432. LLM_TENSOR_POS_NET_ATTN_V,
  433. LLM_TENSOR_POS_NET_ATTN_OUT,
  434. LLM_TENSOR_SHORTCONV_CONV,
  435. LLM_TENSOR_SHORTCONV_INPROJ,
  436. LLM_TENSOR_SHORTCONV_OUTPROJ,
  437. LLM_TENSOR_NEXTN_EH_PROJ,
  438. LLM_TENSOR_NEXTN_EMBED_TOKENS,
  439. LLM_TENSOR_NEXTN_ENORM,
  440. LLM_TENSOR_NEXTN_HNORM,
  441. LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
  442. LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
  443. };
  444. enum llm_tensor_layer {
  445. LLM_TENSOR_LAYER_INPUT,
  446. LLM_TENSOR_LAYER_REPEATING,
  447. LLM_TENSOR_LAYER_OUTPUT,
  448. };
  449. struct LLM_KV {
  450. LLM_KV(llm_arch arch, const char * suffix = nullptr);
  451. llm_arch arch;
  452. const char * suffix;
  453. std::string operator()(llm_kv kv) const;
  454. };
  455. // helper to handle gguf constants
  456. // usage:
  457. //
  458. // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
  459. //
  460. // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
  461. // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
  462. // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
  463. //
  464. struct LLM_TN_IMPL {
  465. const llm_arch arch;
  466. const llm_tensor tensor;
  467. const char * const suffix;
  468. const int bid;
  469. const int xid;
  470. std::string str() const;
  471. operator std::string() const {
  472. return str();
  473. }
  474. friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
  475. return str == tn.str();
  476. }
  477. friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
  478. return str != tn.str();
  479. }
  480. };
  481. struct LLM_TN {
  482. LLM_TN(llm_arch arch) : arch(arch) {}
  483. llm_arch arch;
  484. LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
  485. return { arch, tensor, suffix, bid, xid };
  486. }
  487. LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
  488. return { arch, tensor, nullptr, bid, xid };
  489. }
  490. };
  491. struct llm_tensor_info {
  492. llm_tensor_layer layer;
  493. ggml_op op;
  494. };
  495. const char * llm_arch_name(llm_arch arch);
  496. llm_arch llm_arch_from_string(const std::string & name);
  497. const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
  498. bool llm_arch_is_recurrent(const llm_arch & arch);
  499. bool llm_arch_is_hybrid (const llm_arch & arch);
  500. bool llm_arch_is_diffusion(const llm_arch & arch);