models.h 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #pragma once
  2. #include "../clip-graph.h"
  3. /*
  4. * IMPORTANT: The mtmd module does NOT accept pull requests that are fully or predominantly AI-generated.
  5. * We encourage human contributors to ensure the quality and reliability of the codebase.
  6. */
  7. struct clip_graph_siglip : clip_graph {
  8. clip_graph_siglip(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  9. ggml_cgraph * build() override;
  10. };
  11. struct clip_graph_pixtral : clip_graph {
  12. clip_graph_pixtral(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  13. ggml_cgraph * build() override;
  14. };
  15. struct clip_graph_qwen2vl : clip_graph {
  16. clip_graph_qwen2vl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  17. ggml_cgraph * build() override;
  18. };
  19. struct clip_graph_qwen3vl : clip_graph {
  20. clip_graph_qwen3vl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  21. ggml_cgraph * build() override;
  22. };
  23. struct clip_graph_youtuvl : clip_graph {
  24. clip_graph_youtuvl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  25. ggml_cgraph * build() override;
  26. };
  27. struct clip_graph_minicpmv : clip_graph {
  28. clip_graph_minicpmv(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  29. ggml_cgraph * build() override;
  30. };
  31. struct clip_graph_internvl : clip_graph {
  32. clip_graph_internvl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  33. ggml_cgraph * build() override;
  34. };
  35. struct clip_graph_llama4 : clip_graph {
  36. clip_graph_llama4(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  37. ggml_cgraph * build() override;
  38. };
  39. struct clip_graph_kimivl : clip_graph {
  40. clip_graph_kimivl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  41. ggml_cgraph * build() override;
  42. };
  43. struct clip_graph_cogvlm : clip_graph {
  44. clip_graph_cogvlm(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  45. ggml_cgraph * build() override;
  46. };
  47. struct clip_graph_llava : clip_graph {
  48. clip_graph_llava(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  49. ggml_cgraph * build() override;
  50. };
  51. struct clip_graph_whisper_enc : clip_graph {
  52. clip_graph_whisper_enc(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  53. ggml_cgraph * build() override;
  54. };
  55. struct clip_graph_conformer : clip_graph {
  56. clip_graph_conformer(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  57. ggml_cgraph * build() override;
  58. };
  59. struct clip_graph_glm4v : clip_graph {
  60. clip_graph_glm4v(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  61. ggml_cgraph * build() override;
  62. };
  63. struct clip_graph_mobilenetv5 : clip_graph {
  64. clip_graph_mobilenetv5(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
  65. ggml_cgraph * build() override;
  66. ggml_tensor * rms_norm_2d(
  67. ggml_tensor * inp,
  68. ggml_tensor * weight,
  69. float eps = 1e-6f);
  70. ggml_tensor* pad_same_2d(
  71. ggml_tensor* inp,
  72. int kernel_h,
  73. int kernel_w,
  74. int stride_h,
  75. int stride_w,
  76. int dilation_h = 1,
  77. int dilation_w = 1);
  78. ggml_tensor * build_edge_residual(
  79. ggml_tensor * inp,
  80. const mobilenetv5_block & block,
  81. int stride);
  82. ggml_tensor * build_inverted_residual(
  83. ggml_tensor * inp,
  84. const mobilenetv5_block & block,
  85. int stride);
  86. ggml_tensor * build_mobilenet_attn(
  87. ggml_tensor * inp,
  88. const mobilenetv5_block & block);
  89. };