ggml-cpu.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #pragma once
  2. #include "ggml.h"
  3. #include "ggml-backend.h"
  4. #ifdef __cplusplus
  5. extern "C" {
  6. #endif
  7. // the compute plan that needs to be prepared for ggml_graph_compute()
  8. // since https://github.com/ggml-org/ggml/issues/287
  9. struct ggml_cplan {
  10. size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
  11. uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
  12. int n_threads;
  13. struct ggml_threadpool * threadpool;
  14. // abort ggml_graph_compute when true
  15. ggml_abort_callback abort_callback;
  16. void * abort_callback_data;
  17. };
  18. // numa strategies
  19. enum ggml_numa_strategy {
  20. GGML_NUMA_STRATEGY_DISABLED = 0,
  21. GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
  22. GGML_NUMA_STRATEGY_ISOLATE = 2,
  23. GGML_NUMA_STRATEGY_NUMACTL = 3,
  24. GGML_NUMA_STRATEGY_MIRROR = 4,
  25. GGML_NUMA_STRATEGY_COUNT
  26. };
  27. GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
  28. GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
  29. GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
  30. GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
  31. GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
  32. GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
  33. GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
  34. GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
  35. GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
  36. GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
  37. GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
  38. GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
  39. GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
  40. GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
  41. GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
  42. GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
  43. GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
  44. GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
  45. GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
  46. // ggml_graph_plan() has to be called before ggml_graph_compute()
  47. // when plan.work_size > 0, caller must allocate memory for plan.work_data
  48. GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
  49. const struct ggml_cgraph * cgraph,
  50. int n_threads, /* = GGML_DEFAULT_N_THREADS */
  51. struct ggml_threadpool * threadpool /* = NULL */ );
  52. GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
  53. // same as ggml_graph_compute() but the work data is allocated as a part of the context
  54. // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
  55. GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
  56. //
  57. // system info
  58. //
  59. // x86
  60. GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
  61. GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
  62. GGML_BACKEND_API int ggml_cpu_has_avx (void);
  63. GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
  64. GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
  65. GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
  66. GGML_BACKEND_API int ggml_cpu_has_f16c (void);
  67. GGML_BACKEND_API int ggml_cpu_has_fma (void);
  68. GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
  69. GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
  70. GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
  71. GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
  72. GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
  73. // ARM
  74. GGML_BACKEND_API int ggml_cpu_has_neon (void);
  75. GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
  76. GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
  77. GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
  78. GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
  79. GGML_BACKEND_API int ggml_cpu_has_sve (void);
  80. GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
  81. GGML_BACKEND_API int ggml_cpu_has_sme (void);
  82. // other
  83. GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
  84. GGML_BACKEND_API int ggml_cpu_has_vsx (void);
  85. GGML_BACKEND_API int ggml_cpu_has_vxe (void);
  86. GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
  87. GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
  88. // Internal types and functions exposed for tests and benchmarks
  89. typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
  90. const void * GGML_RESTRICT y, size_t by, int nrc);
  91. struct ggml_type_traits_cpu {
  92. ggml_from_float_t from_float;
  93. ggml_vec_dot_t vec_dot;
  94. enum ggml_type vec_dot_type;
  95. int64_t nrows; // number of rows to process simultaneously
  96. };
  97. GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
  98. GGML_BACKEND_API void ggml_cpu_init(void);
  99. //
  100. // CPU backend
  101. //
  102. GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
  103. GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
  104. GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
  105. GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
  106. GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
  107. GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
  108. GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
  109. GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t);
  110. GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
  111. GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
  112. GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
  113. GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t);
  114. #ifdef __cplusplus
  115. }
  116. #endif