cogvlm.cpp 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. #include "models.h"
  2. ggml_cgraph * clip_graph_cogvlm::build() {
  3. GGML_ASSERT(model.class_embedding != nullptr);
  4. GGML_ASSERT(model.position_embeddings != nullptr);
  5. const int n_pos = n_patches + 1; // +1 for [CLS]
  6. // build input and concatenate class embedding
  7. ggml_tensor * inp = build_inp();
  8. inp = ggml_concat(ctx0, inp, model.class_embedding, 1);
  9. inp = ggml_add(ctx0, inp, model.position_embeddings);
  10. cb(inp, "inp_pos", -1);
  11. ggml_tensor * inpL = inp;
  12. for (int il = 0; il < n_layer; il++) {
  13. auto & layer = model.layers[il];
  14. ggml_tensor * cur = inpL;
  15. cur = ggml_mul_mat(ctx0, layer.qkv_w, cur);
  16. cur = ggml_add(ctx0, cur, layer.qkv_b);
  17. ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
  18. cur->nb[1], 0);
  19. ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
  20. cur->nb[1], n_embd * sizeof(float));
  21. ggml_tensor * Vcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
  22. cur->nb[1], 2 * n_embd * sizeof(float));
  23. cb(Qcur, "Qcur", il);
  24. cb(Kcur, "Kcur", il);
  25. cb(Vcur, "Vcur", il);
  26. cur = build_attn(layer.o_w, layer.o_b,
  27. Qcur, Kcur, Vcur, nullptr, kq_scale, il);
  28. cb(cur, "attn_out", il);
  29. cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, NORM_TYPE_NORMAL, eps, il);
  30. cb(cur, "attn_post_norm", il);
  31. cur = ggml_add(ctx0, cur, inpL);
  32. inpL = cur;
  33. cur = build_ffn(cur,
  34. layer.ff_up_w, layer.ff_up_b,
  35. layer.ff_gate_w, layer.ff_gate_b,
  36. layer.ff_down_w, layer.ff_down_b,
  37. hparams.ffn_op, il);
  38. cb(cur, "ffn_out", il);
  39. cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, NORM_TYPE_NORMAL, eps, il);
  40. cb(cur, "ffn_post_norm", il);
  41. cur = ggml_add(ctx0, cur, inpL);
  42. cb(cur, "layer_out", il);
  43. inpL = cur;
  44. }
  45. // remove CLS token (like build_llama4 does)
  46. ggml_tensor * cur = ggml_view_2d(ctx0, inpL,
  47. n_embd, n_patches,
  48. ggml_row_size(inpL->type, n_embd), 0);
  49. // Multiply with mm_model_proj
  50. cur = ggml_mul_mat(ctx0, model.mm_model_proj, cur);
  51. // Apply layernorm, weight, bias
  52. cur = build_norm(cur, model.mm_post_fc_norm_w, model.mm_post_fc_norm_b, NORM_TYPE_NORMAL, 1e-5, -1);
  53. // Apply GELU
  54. cur = ggml_gelu_inplace(ctx0, cur);
  55. // Branch 1: multiply with mm_h_to_4h_w
  56. ggml_tensor * h_to_4h = ggml_mul_mat(ctx0, model.mm_h_to_4h_w, cur);
  57. // Branch 2: multiply with mm_gate_w
  58. ggml_tensor * gate = ggml_mul_mat(ctx0, model.mm_gate_w, cur);
  59. // Apply silu
  60. gate = ggml_swiglu_split(ctx0, gate, h_to_4h);
  61. // Apply mm_4h_to_h_w
  62. cur = ggml_mul_mat(ctx0, model.mm_4h_to_h_w, gate);
  63. // Concatenate with boi and eoi
  64. cur = ggml_concat(ctx0, model.mm_boi, cur, 1);
  65. cur = ggml_concat(ctx0, cur, model.mm_eoi, 1);
  66. // build the graph
  67. ggml_build_forward_expand(gf, cur);
  68. return gf;
  69. }