|
|
@@ -62,7 +62,7 @@
|
|
|
* @param dst The ggml tensor representing the destination, which op is
|
|
|
* GGML_OP_REPEAT and specifies the desired dimensions.
|
|
|
*/
|
|
|
-void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
|
|
@@ -82,7 +82,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the result of the Leaky ReLU
|
|
|
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
|
|
*/
|
|
|
-void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Concatenates multiple tensors along a specified dimension using the
|
|
|
@@ -97,7 +97,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @attention tensorList length should be 2 and the dimension using for concat
|
|
|
* default to 1.
|
|
|
*/
|
|
|
-void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Generates a sequence of evenly spaced values within a specified
|
|
|
@@ -113,7 +113,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
|
|
* `GGML_OP_ARANGE`.
|
|
|
*/
|
|
|
-void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
|
|
@@ -131,7 +131,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the clamped values will be stored.
|
|
|
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
|
|
*/
|
|
|
-void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
|
|
@@ -148,7 +148,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the scaled values will be stored.
|
|
|
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
|
|
*/
|
|
|
-void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
|
|
@@ -163,7 +163,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the sorted indices will be stored.
|
|
|
* dst->op is `GGML_OP_ARGSORT`.
|
|
|
*/
|
|
|
-void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
|
|
@@ -185,7 +185,7 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
|
* @attention `Var` defaults to dst->ne[0].
|
|
|
*/
|
|
|
-void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
|
|
@@ -209,7 +209,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
*
|
|
|
* @attention eps defaults to 1e-6f.
|
|
|
*/
|
|
|
-void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the accumulation of tensors using the CANN backend.
|
|
|
@@ -228,7 +228,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the accumulated values will be stored.
|
|
|
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
|
|
*/
|
|
|
-void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
|
|
@@ -244,7 +244,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
*
|
|
|
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
|
|
*/
|
|
|
-void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the sum of elements in a ggml tensor.
|
|
|
@@ -258,7 +258,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
*
|
|
|
*/
|
|
|
|
|
|
-void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
|
|
@@ -274,8 +274,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the upsampled values will be stored.
|
|
|
* dst->op is `GGML_OP_UPSCALE`.
|
|
|
*/
|
|
|
-void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
|
- ggml_tensor* dst);
|
|
|
+void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
|
|
@@ -290,7 +289,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
|
* @param dst The destination tensor, which specifies the target dimensions for
|
|
|
* padding. dst->op is `GGML_OP_PAD`.
|
|
|
*/
|
|
|
-void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
|
|
@@ -307,7 +306,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor on which the pooling operation is to be
|
|
|
* performed. dst->op is `GGML_OP_POOL_2D`.
|
|
|
*/
|
|
|
-void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Duplicates a ggml tensor using the CANN backend.
|
|
|
@@ -326,7 +325,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* different shape and dst is no-contiguous.
|
|
|
* @note: This func need to simplify.
|
|
|
*/
|
|
|
-void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
|
|
@@ -348,7 +347,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the normalized values will be stored.
|
|
|
* dst->op is `GGML_OP_RMS_NORM`.
|
|
|
*/
|
|
|
-void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies a diagonal mask to the tensor with a specified value.
|
|
|
@@ -363,7 +362,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* `GGML_OP_DIAG_MASK`
|
|
|
* @param value The value to use for masking.
|
|
|
*/
|
|
|
-void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
|
|
+void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value);
|
|
|
|
|
|
/**
|
|
|
* @brief Performs an image-to-column transformation on the input tensor.
|
|
|
@@ -378,7 +377,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float
|
|
|
* @param dst The destination tensor that stores the result of the operation.
|
|
|
* dst->op is `GGML_OP_IM2COL`.
|
|
|
*/
|
|
|
-void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes time step embeddings using sine and cosine functions.
|
|
|
@@ -392,10 +391,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the result of the embedding operation
|
|
|
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
|
|
*/
|
|
|
-void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
// @see ggml_cann_dup.
|
|
|
-void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the softmax activation with optional masking.
|
|
|
@@ -417,7 +416,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the result will be stored. dst->op is
|
|
|
* `GGML_OP_SOFTMAX`.
|
|
|
*/
|
|
|
-void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Extracts specific rows from a tensor based on indices.
|
|
|
@@ -429,7 +428,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param ctx The backend CANN context for executing operations.
|
|
|
* @param dst The destination tensor where the extracted rows will be stored.
|
|
|
*/
|
|
|
-void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Writes specific rows into a tensor at positions specified by indices.
|
|
|
@@ -441,7 +440,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param ctx The backend CANN context for executing operations.
|
|
|
* @param dst The destination tensor where the specified rows will be updated.
|
|
|
*/
|
|
|
-void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Executes matrix multiplication for the given tensor.
|
|
|
@@ -454,7 +453,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor for storing the result of the matrix
|
|
|
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
|
|
*/
|
|
|
-void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
|
|
@@ -477,7 +476,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @note The function currently does not support cases where the freq_scale is
|
|
|
* not equal 1.
|
|
|
*/
|
|
|
-void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the index of the maximum value along the specified dimension
|
|
|
@@ -492,7 +491,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the indices of the maximum values will
|
|
|
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
|
|
*/
|
|
|
-void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Adds two tensors element-wise and stores the result in a destination
|
|
|
@@ -509,8 +508,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param acl_src1 The second source tensor.
|
|
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
|
*/
|
|
|
-void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
- aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
|
|
|
+void aclnn_add(ggml_backend_cann_context & ctx,
|
|
|
+ aclTensor * acl_src0,
|
|
|
+ aclTensor * acl_src1,
|
|
|
+ aclTensor * acl_dst = nullptr);
|
|
|
|
|
|
/**
|
|
|
* @brief Sub two tensors element-wise and stores the result in a destination
|
|
|
@@ -527,8 +528,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
* @param acl_src1 The second source tensor.
|
|
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
|
*/
|
|
|
-void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
- aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
|
|
|
+void aclnn_sub(ggml_backend_cann_context & ctx,
|
|
|
+ aclTensor * acl_src0,
|
|
|
+ aclTensor * acl_src1,
|
|
|
+ aclTensor * acl_dst = nullptr);
|
|
|
|
|
|
/**
|
|
|
* @brief Performs element-wise multiplication of two tensors and stores the
|
|
|
@@ -546,8 +549,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
|
* @param acl_other The second tensor for element-wise multiplication.
|
|
|
* @param acl_dst The destination tensor where the result will be stored.
|
|
|
*/
|
|
|
-void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
- aclTensor* acl_other, aclTensor* acl_dst = nullptr);
|
|
|
+void aclnn_mul(ggml_backend_cann_context & ctx,
|
|
|
+ aclTensor * acl_src,
|
|
|
+ aclTensor * acl_other,
|
|
|
+ aclTensor * acl_dst = nullptr);
|
|
|
|
|
|
/**
|
|
|
* @brief Matrix division, optionally in-place.
|
|
|
@@ -567,8 +572,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
|
* `acl_src`.
|
|
|
*/
|
|
|
-void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
- aclTensor* acl_other, aclTensor* acl_dst = nullptr);
|
|
|
+void aclnn_div(ggml_backend_cann_context & ctx,
|
|
|
+ aclTensor * acl_src,
|
|
|
+ aclTensor * acl_other,
|
|
|
+ aclTensor * acl_dst = nullptr);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies element-wise cosine function to the elements of a tensor.
|
|
|
@@ -584,8 +591,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
* @param acl_dst The destination tensor where the cosine results will be
|
|
|
* stored.
|
|
|
*/
|
|
|
-void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
- aclTensor* acl_dst);
|
|
|
+void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies element-wise sine function to the elements of a tensor.
|
|
|
@@ -602,8 +608,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
* @param acl_src The source tensor on which the sine function will be applied.
|
|
|
* @param acl_dst The destination tensor where the sine results will be stored.
|
|
|
*/
|
|
|
-void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
- aclTensor* acl_dst);
|
|
|
+void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
|
|
@@ -621,8 +626,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
|
|
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
|
|
*/
|
|
|
-void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
|
|
- aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
|
|
|
+void bcast_shape(ggml_tensor * src0,
|
|
|
+ ggml_tensor * src1,
|
|
|
+ ggml_tensor * dst,
|
|
|
+ aclTensor ** acl_src0,
|
|
|
+ aclTensor ** acl_src1,
|
|
|
+ aclTensor ** acl_dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
|
|
@@ -637,7 +646,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
|
|
* @param dst The destination tensor where the transposed convolution result
|
|
|
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
|
|
*/
|
|
|
-void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
|
|
@@ -662,7 +671,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|
|
* @param dst The destination tensor where the ELU-activated result will be stored.
|
|
|
* dst->op is expected to be `GGML_OP_ELU`.
|
|
|
*/
|
|
|
-void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
|
|
|
@@ -677,7 +686,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the mean result will be stored.
|
|
|
* dst->op is expected to be `GGML_OP_MEAN`.
|
|
|
*/
|
|
|
-void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
|
|
|
@@ -692,7 +701,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the padded result will be stored.
|
|
|
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
|
|
|
*/
|
|
|
-void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
|
|
|
@@ -708,7 +717,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the result will be stored.
|
|
|
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
|
|
|
*/
|
|
|
-void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies the Step activation function to a ggml tensor using the CANN backend.
|
|
|
@@ -723,7 +732,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the result will be stored.
|
|
|
* dst->op is expected to be `GGML_OP_STEP`.
|
|
|
*/
|
|
|
-void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Performs the Flash Attention extended operator using the CANN backend.
|
|
|
@@ -738,59 +747,46 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
* @param dst The destination tensor where the result will be stored.
|
|
|
* dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`.
|
|
|
*/
|
|
|
-void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/*
|
|
|
* @brief A generic wrapper for ACL resources with custom deleter support.
|
|
|
*/
|
|
|
-using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
|
|
|
+using any_acl_resource = std::unique_ptr<void, std::function<void(void *)>>;
|
|
|
|
|
|
/**
|
|
|
* @brief Trait structure used to define how to destroy a given ACL resource type.
|
|
|
*
|
|
|
* @tparam T ACL resource type.
|
|
|
*/
|
|
|
-template<typename T>
|
|
|
-struct acl_resource_traits;
|
|
|
+template <typename T> struct acl_resource_traits;
|
|
|
|
|
|
/**
|
|
|
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
|
|
|
*/
|
|
|
-template<>
|
|
|
-struct acl_resource_traits<aclTensor> {
|
|
|
- static void destroy(void* p) {
|
|
|
- ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
|
|
|
- }
|
|
|
+template <> struct acl_resource_traits<aclTensor> {
|
|
|
+ static void destroy(void * p) { ACL_CHECK(aclDestroyTensor(static_cast<aclTensor *>(p))); }
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
|
|
|
*/
|
|
|
-template<>
|
|
|
-struct acl_resource_traits<aclIntArray> {
|
|
|
- static void destroy(void* p) {
|
|
|
- ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
|
|
|
- }
|
|
|
+template <> struct acl_resource_traits<aclIntArray> {
|
|
|
+ static void destroy(void * p) { ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray *>(p))); }
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
|
|
|
*/
|
|
|
-template<>
|
|
|
-struct acl_resource_traits<aclScalar> {
|
|
|
- static void destroy(void* p) {
|
|
|
- ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
|
|
|
- }
|
|
|
+template <> struct acl_resource_traits<aclScalar> {
|
|
|
+ static void destroy(void * p) { ACL_CHECK(aclDestroyScalar(static_cast<aclScalar *>(p))); }
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
|
|
|
*/
|
|
|
-template<>
|
|
|
-struct acl_resource_traits<aclTensorList> {
|
|
|
- static void destroy(void* p) {
|
|
|
- ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
|
|
|
- }
|
|
|
+template <> struct acl_resource_traits<aclTensorList> {
|
|
|
+ static void destroy(void * p) { ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList *>(p))); }
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
@@ -800,14 +796,8 @@ struct acl_resource_traits<aclTensorList> {
|
|
|
* @param ptr Raw pointer to ACL resource.
|
|
|
* @return any_acl_resource Smart pointer that handles destruction.
|
|
|
*/
|
|
|
-template<typename T>
|
|
|
-any_acl_resource make_acl_resource(T* ptr) {
|
|
|
- return any_acl_resource(
|
|
|
- static_cast<void*>(ptr),
|
|
|
- [](void* p) {
|
|
|
- acl_resource_traits<T>::destroy(p);
|
|
|
- }
|
|
|
- );
|
|
|
+template <typename T> any_acl_resource make_acl_resource(T * ptr) {
|
|
|
+ return any_acl_resource(static_cast<void *>(ptr), [](void * p) { acl_resource_traits<T>::destroy(p); });
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -817,8 +807,7 @@ any_acl_resource make_acl_resource(T* ptr) {
|
|
|
* @param vec Target vector to hold ACL resources.
|
|
|
* @param args Raw pointers to ACL resources.
|
|
|
*/
|
|
|
-template<typename... Args>
|
|
|
-void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
|
|
|
+template <typename... Args> void register_acl_resources(std::vector<any_acl_resource> & vec, Args *... args) {
|
|
|
(vec.emplace_back(make_acl_resource(args)), ...);
|
|
|
}
|
|
|
|
|
|
@@ -826,39 +815,36 @@ void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
|
|
|
* @brief Task class that wraps the execution of an aclnn function call.
|
|
|
*/
|
|
|
class aclnn_task : public cann_task {
|
|
|
- public:
|
|
|
- aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
|
|
|
- uint64_t workspace_size, aclOpExecutor * executor,
|
|
|
- aclrtStream stream) :
|
|
|
- aclnn_func_(aclnn_func),
|
|
|
- workspace_addr_(workspace_addr),
|
|
|
- workspace_size_(workspace_size),
|
|
|
- executor_(executor),
|
|
|
- stream_(stream) {}
|
|
|
- virtual void run_task() override {
|
|
|
- ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_));
|
|
|
- }
|
|
|
- private:
|
|
|
- aclnn_func_t aclnn_func_;
|
|
|
- void * workspace_addr_;
|
|
|
- uint64_t workspace_size_;
|
|
|
- aclOpExecutor * executor_;
|
|
|
- aclrtStream stream_;
|
|
|
+ public:
|
|
|
+ aclnn_task(aclnn_func_t aclnn_func,
|
|
|
+ void * workspace_addr,
|
|
|
+ uint64_t workspace_size,
|
|
|
+ aclOpExecutor * executor,
|
|
|
+ aclrtStream stream) :
|
|
|
+ aclnn_func_(aclnn_func),
|
|
|
+ workspace_addr_(workspace_addr),
|
|
|
+ workspace_size_(workspace_size),
|
|
|
+ executor_(executor),
|
|
|
+ stream_(stream) {}
|
|
|
+
|
|
|
+ virtual void run_task() override { ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); }
|
|
|
+ private:
|
|
|
+ aclnn_func_t aclnn_func_;
|
|
|
+ void * workspace_addr_;
|
|
|
+ uint64_t workspace_size_;
|
|
|
+ aclOpExecutor * executor_;
|
|
|
+ aclrtStream stream_;
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* @brief Task class that releases ACL resources after usage.
|
|
|
*/
|
|
|
class release_resource_task : public cann_task {
|
|
|
-public:
|
|
|
- release_resource_task(std::vector<any_acl_resource>&& resources){
|
|
|
- resource_ = std::move(resources);
|
|
|
- }
|
|
|
+ public:
|
|
|
+ release_resource_task(std::vector<any_acl_resource> && resources) { resource_ = std::move(resources); }
|
|
|
|
|
|
- virtual void run_task() override {
|
|
|
- resource_.clear();
|
|
|
- }
|
|
|
-private:
|
|
|
+ virtual void run_task() override { resource_.clear(); }
|
|
|
+ private:
|
|
|
std::vector<any_acl_resource> resource_;
|
|
|
};
|
|
|
|
|
|
@@ -866,38 +852,40 @@ private:
|
|
|
* @brief Task class for performing asynchronous memory copy operations.
|
|
|
*/
|
|
|
class async_memcpy_task : public cann_task {
|
|
|
-public:
|
|
|
- async_memcpy_task(void* dst, const void* src, size_t size,
|
|
|
- aclrtMemcpyKind kind, aclrtStream stream)
|
|
|
- : dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
|
|
|
-
|
|
|
- virtual void run_task() override {
|
|
|
- ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
|
|
|
- }
|
|
|
-private:
|
|
|
- void* dst_;
|
|
|
- const void* src_;
|
|
|
- size_t size_;
|
|
|
+ public:
|
|
|
+ async_memcpy_task(void * dst, const void * src, size_t size, aclrtMemcpyKind kind, aclrtStream stream) :
|
|
|
+ dst_(dst),
|
|
|
+ src_(src),
|
|
|
+ size_(size),
|
|
|
+ kind_(kind),
|
|
|
+ stream_(stream) {}
|
|
|
+
|
|
|
+ virtual void run_task() override { ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); }
|
|
|
+ private:
|
|
|
+ void * dst_;
|
|
|
+ const void * src_;
|
|
|
+ size_t size_;
|
|
|
aclrtMemcpyKind kind_;
|
|
|
- aclrtStream stream_;
|
|
|
+ aclrtStream stream_;
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* @brief Task class for performing asynchronous memory set operations.
|
|
|
*/
|
|
|
class async_memset_task : public cann_task {
|
|
|
- public:
|
|
|
- async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
|
|
|
- : buffer_(buffer), size_(size), value_(value), stream_(stream) {}
|
|
|
-
|
|
|
- virtual void run_task() override {
|
|
|
- ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_));
|
|
|
- }
|
|
|
- private:
|
|
|
- void* buffer_;
|
|
|
- size_t size_;
|
|
|
- int32_t value_;
|
|
|
- aclrtStream stream_;
|
|
|
+ public:
|
|
|
+ async_memset_task(void * buffer, size_t size, int32_t value, aclrtStream stream) :
|
|
|
+ buffer_(buffer),
|
|
|
+ size_(size),
|
|
|
+ value_(value),
|
|
|
+ stream_(stream) {}
|
|
|
+
|
|
|
+ virtual void run_task() override { ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); }
|
|
|
+ private:
|
|
|
+ void * buffer_;
|
|
|
+ size_t size_;
|
|
|
+ int32_t value_;
|
|
|
+ aclrtStream stream_;
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
@@ -918,25 +906,24 @@ class async_memset_task : public cann_task {
|
|
|
* same stream are executed in queue order.
|
|
|
*/
|
|
|
|
|
|
-#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
|
|
|
- do { \
|
|
|
- uint64_t workspaceSize = 0; \
|
|
|
- aclOpExecutor * executor; \
|
|
|
- void * workspaceAddr = nullptr; \
|
|
|
- ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor));\
|
|
|
- /* workspace should alloced in main thread to keep malloc order when using vmm. */ \
|
|
|
- if (workspaceSize > 0) { \
|
|
|
- ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
|
|
|
- workspaceAddr = workspace_allocator.get(); \
|
|
|
- } \
|
|
|
- if (CTX.async_mode) { \
|
|
|
- auto task = \
|
|
|
- std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \
|
|
|
- executor, CTX.stream()); \
|
|
|
- CTX.task_queue.submit_task(std::move(task)); \
|
|
|
- } else { \
|
|
|
- ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\
|
|
|
- } \
|
|
|
+#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
|
|
|
+ do { \
|
|
|
+ uint64_t workspaceSize = 0; \
|
|
|
+ aclOpExecutor * executor; \
|
|
|
+ void * workspaceAddr = nullptr; \
|
|
|
+ ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
|
|
+ /* workspace should alloced in main thread to keep malloc order when using vmm. */ \
|
|
|
+ if (workspaceSize > 0) { \
|
|
|
+ ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
|
|
|
+ workspaceAddr = workspace_allocator.get(); \
|
|
|
+ } \
|
|
|
+ if (CTX.async_mode) { \
|
|
|
+ auto task = \
|
|
|
+ std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, executor, CTX.stream()); \
|
|
|
+ CTX.task_queue.submit_task(std::move(task)); \
|
|
|
+ } else { \
|
|
|
+ ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
|
|
|
+ } \
|
|
|
} while (0)
|
|
|
|
|
|
/**
|
|
|
@@ -947,11 +934,10 @@ class async_memset_task : public cann_task {
|
|
|
* @param ctx Backend context which manages task submission and async mode.
|
|
|
* @param args Pointers to ACL resources to be released.
|
|
|
*/
|
|
|
-template <typename... Args>
|
|
|
-void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
|
|
|
+template <typename... Args> void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
|
|
|
std::vector<any_acl_resource> resources;
|
|
|
register_acl_resources(resources, std::forward<Args>(args)...);
|
|
|
- if(ctx.async_mode) {
|
|
|
+ if (ctx.async_mode) {
|
|
|
auto task = std::make_unique<release_resource_task>(std::move(resources));
|
|
|
ctx.task_queue.submit_task(std::move(task));
|
|
|
}
|
|
|
@@ -966,8 +952,11 @@ void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... arg
|
|
|
* @param len Size of memory to copy (in bytes).
|
|
|
* @param kind Type of memory copy (host-to-device, device-to-host, etc).
|
|
|
*/
|
|
|
-inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
|
|
|
- const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
|
+inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx,
|
|
|
+ void * dst,
|
|
|
+ const void * src,
|
|
|
+ size_t len,
|
|
|
+ aclrtMemcpyKind kind) {
|
|
|
if (ctx.async_mode) {
|
|
|
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
|
|
|
ctx.task_queue.submit_task(std::move(task));
|
|
|
@@ -976,8 +965,11 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
|
|
|
- const void * src, size_t len, aclrtMemcpyKind kind) {
|
|
|
+inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx,
|
|
|
+ void * dst,
|
|
|
+ const void * src,
|
|
|
+ size_t len,
|
|
|
+ aclrtMemcpyKind kind) {
|
|
|
if (ctx->async_mode) {
|
|
|
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
|
|
|
ctx->task_queue.submit_task(std::move(task));
|
|
|
@@ -994,8 +986,7 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
|
|
|
* @param size Size of the memory buffer (in bytes).
|
|
|
* @param value Value to set in the buffer.
|
|
|
*/
|
|
|
-inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
|
|
|
- size_t size, int value) {
|
|
|
+inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, size_t size, int value) {
|
|
|
if (ctx.async_mode) {
|
|
|
auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
|
|
|
ctx.task_queue.submit_task(std::move(task));
|
|
|
@@ -1029,7 +1020,7 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
|
|
|
* @param dst The destination tensor where the expert-weighted token outputs are stored.
|
|
|
* Expected to be of shape [M, K, N, 1].
|
|
|
*/
|
|
|
-void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Check whether a tensor is a weight tensor for matrix multiplication.
|
|
|
@@ -1041,20 +1032,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
*
|
|
|
* @param tensor Pointer to the target ggml_tensor object (const-qualified).
|
|
|
*/
|
|
|
-static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
|
- std::string name = ggml_get_name(tensor);
|
|
|
- static const std::unordered_set<std::string> weight_suffixes{
|
|
|
- "output.weight",
|
|
|
- "attn_q.weight",
|
|
|
- "attn_k.weight",
|
|
|
- "attn_v.weight",
|
|
|
- "attn_output.weight",
|
|
|
- "ffn_gate.weight",
|
|
|
- "ffn_up.weight",
|
|
|
- "ffn_down.weight"
|
|
|
- };
|
|
|
-
|
|
|
- for (const auto& suffix : weight_suffixes) {
|
|
|
+static bool is_matmul_weight(const ggml_tensor * tensor) {
|
|
|
+ std::string name = ggml_get_name(tensor);
|
|
|
+ static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
|
|
|
+ "attn_k.weight", "attn_v.weight",
|
|
|
+ "attn_output.weight", "ffn_gate.weight",
|
|
|
+ "ffn_up.weight", "ffn_down.weight" };
|
|
|
+
|
|
|
+ for (const auto & suffix : weight_suffixes) {
|
|
|
if (name.find(suffix) != std::string::npos) {
|
|
|
return true;
|
|
|
}
|
|
|
@@ -1078,14 +1063,13 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
|
|
|
* @param ctx The CANN backend context used to manage execution and resources.
|
|
|
* @param dst The destination tensor.
|
|
|
*/
|
|
|
-template <auto binary_op>
|
|
|
-void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
- ggml_tensor* src0 = dst->src[0];
|
|
|
- ggml_tensor* src1 = dst->src[1];
|
|
|
+template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
|
+ ggml_tensor * src0 = dst->src[0];
|
|
|
+ ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
|
- aclTensor* acl_src0;
|
|
|
- aclTensor* acl_src1;
|
|
|
- aclTensor* acl_dst;
|
|
|
+ aclTensor * acl_src0;
|
|
|
+ aclTensor * acl_src1;
|
|
|
+ aclTensor * acl_dst;
|
|
|
|
|
|
// Need bcast
|
|
|
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
|
|
|
@@ -1094,7 +1078,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
/**
|
|
|
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
|
|
*
|
|
|
@@ -1107,12 +1090,12 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
* @param ctx The CANN backend context for managing resources and execution.
|
|
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
|
|
*/
|
|
|
-template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
|
- void ggml_cann_op_unary(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
- ggml_tensor* src = dst->src[0];
|
|
|
+template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
|
|
|
+void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|
|
+ ggml_tensor * src = dst->src[0];
|
|
|
|
|
|
- aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|
|
- aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
|
+ aclTensor * acl_src = ggml_cann_create_tensor(src);
|
|
|
+ aclTensor * acl_dst = ggml_cann_create_tensor(dst);
|
|
|
|
|
|
unary_op(ctx, acl_src, acl_dst);
|
|
|
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
|
@@ -1138,9 +1121,9 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
|
*
|
|
|
* @see GGML_CANN_CALL_OP_UNARY
|
|
|
*/
|
|
|
-void ggml_cann_op_unary(
|
|
|
- std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
|
- ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
|
+ ggml_backend_cann_context & ctx,
|
|
|
+ ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
|
|
|
@@ -1172,9 +1155,9 @@ void ggml_cann_op_unary(
|
|
|
*
|
|
|
* @see GGML_CANN_CALL_OP_UNARY_GATED
|
|
|
*/
|
|
|
-void ggml_cann_op_unary_gated(
|
|
|
- std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
|
- ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
|
+void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
|
|
|
+ ggml_backend_cann_context & ctx,
|
|
|
+ ggml_tensor * dst);
|
|
|
|
|
|
/**
|
|
|
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
|
|
|
@@ -1197,16 +1180,13 @@ void ggml_cann_op_unary_gated(
|
|
|
* @see ggml_cann_op_unary
|
|
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
|
*/
|
|
|
-#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
|
|
|
- do { \
|
|
|
- auto lambda = [](ggml_backend_cann_context& ctx, \
|
|
|
- aclTensor* acl_src, \
|
|
|
- aclTensor* acl_dst) { \
|
|
|
- GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
|
- }; \
|
|
|
- ggml_cann_op_unary(lambda, ctx, dst); \
|
|
|
- } \
|
|
|
- while (0)
|
|
|
+#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
|
|
|
+ do { \
|
|
|
+ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
|
+ GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
|
+ }; \
|
|
|
+ ggml_cann_op_unary(lambda, ctx, dst); \
|
|
|
+ } while (0)
|
|
|
|
|
|
/**
|
|
|
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
|
|
|
@@ -1229,15 +1209,12 @@ void ggml_cann_op_unary_gated(
|
|
|
* @see ggml_cann_op_unary_gated
|
|
|
* @see GGML_CANN_CALL_ACLNN_OP
|
|
|
*/
|
|
|
-#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
|
|
|
- do { \
|
|
|
- auto lambda = [](ggml_backend_cann_context& ctx, \
|
|
|
- aclTensor* acl_src, \
|
|
|
- aclTensor* acl_dst) { \
|
|
|
- GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
|
- }; \
|
|
|
- ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
|
- } \
|
|
|
- while (0)
|
|
|
+#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
|
|
|
+ do { \
|
|
|
+ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
|
|
|
+ GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
|
|
|
+ }; \
|
|
|
+ ggml_cann_op_unary_gated(lambda, ctx, dst); \
|
|
|
+ } while (0)
|
|
|
|
|
|
#endif // CANN_ACLNN_OPS
|