acl_tensor.cpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /*
  2. * Copyright (c) 2023-2024 The ggml authors
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to
  6. * deal in the Software without restriction, including without limitation the
  7. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. * sell copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. * IN THE SOFTWARE.
  21. */
  22. #include "acl_tensor.h"
  23. #include <algorithm>
  24. #include <cstring>
  25. aclDataType ggml_cann_type_mapping(ggml_type type) {
  26. switch (type) {
  27. case GGML_TYPE_F32:
  28. return ACL_FLOAT;
  29. case GGML_TYPE_F16:
  30. return ACL_FLOAT16;
  31. case GGML_TYPE_I8:
  32. return ACL_INT8;
  33. case GGML_TYPE_I16:
  34. return ACL_INT16;
  35. case GGML_TYPE_I32:
  36. return ACL_INT32;
  37. default:
  38. return ACL_DT_UNDEFINED;
  39. }
  40. return ACL_DT_UNDEFINED;
  41. }
  42. aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
  43. size_t* nb, int64_t dims, aclFormat format,
  44. size_t offset) {
  45. // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
  46. // added.
  47. int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
  48. int64_t acl_storage_len = 0;
  49. if (ne == nullptr) {
  50. acl_storage_len = ggml_nbytes(tensor);
  51. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  52. acl_ne[i] = tensor->ne[i];
  53. // The step size of acl is in elements.
  54. acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
  55. }
  56. } else {
  57. // With bcast
  58. for (int i = 0; i < dims; i++) {
  59. acl_storage_len += (ne[i] - 1) * nb[i];
  60. acl_ne[i] = ne[i];
  61. acl_stride[i] = nb[i] / ggml_element_size(tensor);
  62. }
  63. }
  64. // Reverse ne and stride.
  65. int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
  66. std::reverse(acl_ne, acl_ne + final_dims);
  67. std::reverse(acl_stride, acl_stride + final_dims);
  68. aclTensor* acl_tensor = aclCreateTensor(
  69. acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
  70. offset / ggml_element_size(tensor), format, &acl_storage_len, 1,
  71. tensor->data);
  72. return acl_tensor;
  73. }
  74. bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
  75. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  76. if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
  77. return true;
  78. }
  79. }
  80. return false;
  81. }
  82. aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
  83. size_t type_size, int64_t* ne, size_t* nb,
  84. int64_t dims, aclFormat format,
  85. size_t offset) {
  86. int64_t tmp_ne[GGML_MAX_DIMS * 2];
  87. int64_t tmp_stride[GGML_MAX_DIMS * 2];
  88. memcpy(tmp_ne, ne, dims * sizeof(int64_t));
  89. for (int i = 0; i < dims; i++) {
  90. tmp_stride[i] = nb[i] / type_size;
  91. }
  92. std::reverse(tmp_ne, tmp_ne + dims);
  93. std::reverse(tmp_stride, tmp_stride + dims);
  94. int64_t acl_storage_len = 0;
  95. for (int i = 0; i < dims; i++) {
  96. acl_storage_len += (ne[i] - 1) * nb[i];
  97. }
  98. aclTensor* acl_tensor =
  99. aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
  100. format, &acl_storage_len, 1, data_ptr);
  101. return acl_tensor;
  102. }
  103. int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
  104. const ggml_tensor* src1,
  105. int64_t* bcast_src0_ne,
  106. int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
  107. size_t* bcast_src1_nb) {
  108. GGML_ASSERT(ggml_can_repeat(src1, src0));
  109. int bcast_dim_cnt = 0;
  110. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  111. int64_t nr = src0->ne[i] / src1->ne[i];
  112. bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
  113. bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
  114. bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
  115. bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
  116. bcast_dim_cnt++;
  117. if (nr != 1) {
  118. // Need to add an extra dim.
  119. bcast_src0_ne[bcast_dim_cnt] = nr;
  120. bcast_src1_ne[bcast_dim_cnt] = 1;
  121. bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
  122. bcast_src0_ne[bcast_dim_cnt - 1];
  123. bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
  124. bcast_src1_ne[bcast_dim_cnt - 1];
  125. bcast_dim_cnt++;
  126. }
  127. }
  128. return bcast_dim_cnt;
  129. }
  130. int64_t ggml_cann_get_mulmat_bcast_shape(
  131. const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
  132. const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
  133. int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
  134. size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
  135. // input and dst shoule in same shape, except first two dims.
  136. GGML_ASSERT(input_ne[2] == dst_ne[2]);
  137. GGML_ASSERT(input_ne[3] == dst_ne[3]);
  138. int bcast_dim_cnt = 0;
  139. // For mul_mat, a dimension needs to be added before the dimension that
  140. // weight needs to be expanded to satisfy the bcast rule of matrix
  141. // multiplication.
  142. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  143. int64_t nr = input_ne[i] / weight_ne[i];
  144. // Do not use bcast in the first two dimensions because we only support
  145. // the bcast batch dimension. Just copy them.
  146. if (i < 2 || nr == 1) {
  147. bcast_input_ne[bcast_dim_cnt] = input_ne[i];
  148. bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
  149. bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
  150. bcast_input_nb[bcast_dim_cnt] = input_nb[i];
  151. bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
  152. bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
  153. bcast_dim_cnt++;
  154. } else {
  155. // Need to add an extra dim.
  156. bcast_input_ne[bcast_dim_cnt] = nr;
  157. bcast_dst_ne[bcast_dim_cnt] = nr;
  158. bcast_weight_ne[bcast_dim_cnt] = 1;
  159. bcast_input_nb[bcast_dim_cnt] = input_nb[i];
  160. bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
  161. bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
  162. bcast_dim_cnt++;
  163. bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
  164. bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
  165. bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
  166. bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
  167. bcast_input_ne[bcast_dim_cnt - 1];
  168. bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
  169. bcast_dst_ne[bcast_dim_cnt - 1];
  170. bcast_weight_nb[bcast_dim_cnt] =
  171. bcast_weight_nb[bcast_dim_cnt - 1] *
  172. bcast_weight_ne[bcast_dim_cnt - 1];
  173. bcast_dim_cnt++;
  174. }
  175. }
  176. return bcast_dim_cnt;
  177. }