common.h 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #pragma once
  2. #include "ggml.h"
  3. #include "traits.h"
  4. #include "ggml-cpu-impl.h"
  5. #include "ggml-impl.h"
  6. #include "simd-mappings.h"
  7. #ifdef __cplusplus
  8. #include <utility>
  9. // convenience functions/macros for use in template calls
  10. // note: these won't be required after the 'traits' lookup table is used.
  11. static inline ggml_fp16_t f32_to_f16(float x) {
  12. return GGML_CPU_FP32_TO_FP16(x);
  13. }
  14. static inline float f16_to_f32(ggml_fp16_t x) {
  15. return GGML_CPU_FP16_TO_FP32(x);
  16. }
  17. static inline ggml_bf16_t f32_to_bf16(float x) {
  18. return GGML_FP32_TO_BF16(x);
  19. }
  20. static inline float bf16_to_f32(ggml_bf16_t x) {
  21. return GGML_BF16_TO_FP32(x);
  22. }
  23. static inline float i32_to_f32(int32_t x) {
  24. return x;
  25. }
  26. static inline int32_t f32_to_i32(float x) {
  27. return x;
  28. }
  29. static inline float f32_to_f32(float x) {
  30. return x;
  31. }
  32. // TODO - merge this into the traits table, after using row-based conversions
  33. template <class T>
  34. struct type_conversion_table;
  35. template <>
  36. struct type_conversion_table<ggml_fp16_t> {
  37. static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
  38. static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
  39. };
  40. template <>
  41. struct type_conversion_table<float> {
  42. static constexpr float (*to_f32)(float) = f32_to_f32;
  43. static constexpr float (*from_f32)(float) = f32_to_f32;
  44. };
  45. template <>
  46. struct type_conversion_table<ggml_bf16_t> {
  47. static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
  48. static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
  49. };
  50. template <>
  51. struct type_conversion_table<int32_t> {
  52. static constexpr float (*to_f32)(int32_t) = i32_to_f32;
  53. static constexpr int32_t (*from_f32)(float) = f32_to_i32;
  54. };
  55. static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
  56. const int64_t ith = params->ith;
  57. const int64_t nth = params->nth;
  58. const int64_t nr = ggml_nrows(src0);
  59. // rows per thread
  60. const int64_t dr = (nr + nth - 1)/nth;
  61. // row range for this thread
  62. const int64_t ir0 = dr*ith;
  63. const int64_t ir1 = MIN(ir0 + dr, nr);
  64. return {ir0, ir1};
  65. }
  66. #endif