|
@@ -1,6 +1,60 @@
|
|
|
#include "mmvq.hpp"
|
|
#include "mmvq.hpp"
|
|
|
|
|
+
|
|
|
|
|
+#include "ggml.h"
|
|
|
|
|
+#include "common.hpp"
|
|
|
|
|
+#include "quants.hpp"
|
|
|
#include "vecdotq.hpp"
|
|
#include "vecdotq.hpp"
|
|
|
-#include <cassert>
|
|
|
|
|
|
|
+
|
|
|
|
|
+template <typename reorder_vec_dot_q_sycl>
|
|
|
|
|
+static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst,
|
|
|
|
|
+ const int ncols, const int nrows, const sycl::nd_item<3> & nd_item) {
|
|
|
|
|
+ using block_type = ggml_sycl_reordered::block_q_t<reorder_vec_dot_q_sycl::gtype>;
|
|
|
|
|
+ using block_traits = typename block_type::traits;
|
|
|
|
|
+
|
|
|
|
|
+ const auto sg = nd_item.get_sub_group();
|
|
|
|
|
+ const int sg_range = sg.get_group_linear_range();
|
|
|
|
|
+ const int workgroup_id = nd_item.get_group_linear_id();
|
|
|
|
|
+ const int sg_id = sg.get_group_linear_id();
|
|
|
|
|
+ const int row = workgroup_id * sg_range + sg_id;
|
|
|
|
|
+
|
|
|
|
|
+ if (row >= nrows) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int blocks_per_row = ncols / block_traits::qk;
|
|
|
|
|
+ constexpr int blocks_per_subgroup = ceil_div(block_traits::vdr_mmvq * WARP_SIZE, block_traits::qi);
|
|
|
|
|
+ constexpr int block_elements_per_subgroup = block_traits::qi / block_traits::vdr_mmvq;
|
|
|
|
|
+
|
|
|
|
|
+ static_assert(blocks_per_subgroup > 0);
|
|
|
|
|
+ static_assert(block_elements_per_subgroup > 0);
|
|
|
|
|
+
|
|
|
|
|
+ const block_q8_1 * y = (const block_q8_1 *) vy;
|
|
|
|
|
+
|
|
|
|
|
+ float partial_sum = 0.0f;
|
|
|
|
|
+ for (int i = sg.get_local_linear_id() / block_elements_per_subgroup; i < blocks_per_row; i += blocks_per_subgroup) {
|
|
|
|
|
+ const int ibx = row * blocks_per_row + i; // x block index
|
|
|
|
|
+ // TODO: Generalize offsets, right now only works for quantizations that don't split high and low bits
|
|
|
|
|
+ const int bx_offset = block_type::get_block_offset(ibx);
|
|
|
|
|
+ const int d_offset = block_type::get_d_offset(nrows, ncols, ibx);
|
|
|
|
|
+
|
|
|
|
|
+ // Y block index that aligns with ibx
|
|
|
|
|
+ const int iby = i * block_type::block_to_q8_1_ratio();
|
|
|
|
|
+
|
|
|
|
|
+#pragma unroll
|
|
|
|
|
+ for (int elem = 0; elem < block_elements_per_subgroup; elem += WARP_SIZE) {
|
|
|
|
|
+ // x block quant index when casting the quants to int
|
|
|
|
|
+ const int iqs = elem + block_traits::vdr_mmvq * (sg.get_local_linear_id() % block_elements_per_subgroup);
|
|
|
|
|
+
|
|
|
|
|
+ partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, &y[iby], iqs);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ auto sum = sycl::reduce_over_group(nd_item.get_sub_group(), partial_sum, std::plus<>());
|
|
|
|
|
+
|
|
|
|
|
+ if (sg.leader()) {
|
|
|
|
|
+ dst[row] = sum;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
|
|
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
|
|
|
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst,
|
|
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst,
|
|
@@ -480,26 +534,39 @@ static void mul_mat_vec_q_iq4_xs_q8_1(const void *__restrict__ vx,
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
|
|
|
- float *dst, const int ncols,
|
|
|
|
|
- const int nrows,
|
|
|
|
|
|
|
+static void reorder_mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy, float * dst, const int ncols,
|
|
|
|
|
+ const int nrows, dpct::queue_ptr stream) {
|
|
|
|
|
+ GGML_ASSERT(ncols % QK4_0 == 0);
|
|
|
|
|
+ const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
|
|
|
|
+ constexpr size_t num_subgroups = 16;
|
|
|
|
|
+ GGML_ASSERT(block_num_y % num_subgroups == 0);
|
|
|
|
|
+
|
|
|
|
|
+ const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, (block_num_y * WARP_SIZE));
|
|
|
|
|
+ const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
|
|
|
|
+
|
|
|
|
|
+ stream->submit([&](sycl::handler & cgh) {
|
|
|
|
|
+ cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
|
|
|
|
+ [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
|
|
|
|
+ mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0>>(vx, vy, dst, ncols, nrows,
|
|
|
|
|
+ nd_item);
|
|
|
|
|
+ });
|
|
|
|
|
+ });
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy, float * dst, const int ncols, const int nrows,
|
|
|
dpct::queue_ptr stream) {
|
|
dpct::queue_ptr stream) {
|
|
|
GGML_ASSERT(ncols % QK4_0 == 0);
|
|
GGML_ASSERT(ncols % QK4_0 == 0);
|
|
|
const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y;
|
|
const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y;
|
|
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
|
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
|
|
- {
|
|
|
|
|
-
|
|
|
|
|
- stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
|
|
|
- cgh.parallel_for(
|
|
|
|
|
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
|
- [=](sycl::nd_item<3> item_ct1)
|
|
|
|
|
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
|
|
|
|
- mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
|
|
|
|
|
- VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
|
|
|
|
|
- vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
|
- });
|
|
|
|
|
|
|
+ {
|
|
|
|
|
+ stream->submit([&](sycl::handler & cgh) {
|
|
|
|
|
+ cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
|
+ [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
|
|
|
|
+ mul_mat_vec_q<QK4_0, QI4_0, block_q4_0, VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
|
|
|
|
|
+ vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
|
+ });
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -916,93 +983,95 @@ static void mul_mat_vec_iq4_xs_q8_1_sycl(const void *vx, const void *vy,
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-void ggml_sycl_op_mul_mat_vec_q(
|
|
|
|
|
- ggml_backend_sycl_context & ctx,
|
|
|
|
|
- const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
|
|
|
|
|
- const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
|
|
|
|
|
- float *dst_dd_i, const int64_t row_low, const int64_t row_high,
|
|
|
|
|
- const int64_t src1_ncols, const int64_t src1_padded_col_size,
|
|
|
|
|
- const dpct::queue_ptr &stream) {
|
|
|
|
|
-
|
|
|
|
|
|
|
+void ggml_sycl_op_mul_mat_vec_q(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1,
|
|
|
|
|
+ ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
|
|
|
|
+ const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low,
|
|
|
|
|
+ const int64_t row_high, const int64_t src1_ncols, const int64_t src1_padded_col_size,
|
|
|
|
|
+ const dpct::queue_ptr & stream) {
|
|
|
const int64_t ne10 = src1->ne[0];
|
|
const int64_t ne10 = src1->ne[0];
|
|
|
GGML_ASSERT(ne10 % QK8_1 == 0);
|
|
GGML_ASSERT(ne10 % QK8_1 == 0);
|
|
|
|
|
|
|
|
- const int64_t ne00 = src0->ne[0];
|
|
|
|
|
|
|
+ const int64_t ne00 = src0->ne[0];
|
|
|
const int64_t row_diff = row_high - row_low;
|
|
const int64_t row_diff = row_high - row_low;
|
|
|
|
|
|
|
|
int id;
|
|
int id;
|
|
|
- SYCL_CHECK(
|
|
|
|
|
- CHECK_TRY_ERROR(id = get_current_device_id()));
|
|
|
|
|
|
|
+ SYCL_CHECK(CHECK_TRY_ERROR(id = get_current_device_id()));
|
|
|
const size_t q8_1_ts = sizeof(block_q8_1);
|
|
const size_t q8_1_ts = sizeof(block_q8_1);
|
|
|
const size_t q8_1_bs = QK8_1;
|
|
const size_t q8_1_bs = QK8_1;
|
|
|
// the main device has a larger memory buffer to hold the results from all GPUs
|
|
// the main device has a larger memory buffer to hold the results from all GPUs
|
|
|
// nrows_dst == nrows of the matrix that the kernel writes into
|
|
// nrows_dst == nrows of the matrix that the kernel writes into
|
|
|
|
|
|
|
|
- for (int i = 0; i < src1_ncols; i++)
|
|
|
|
|
- {
|
|
|
|
|
|
|
+ for (int i = 0; i < src1_ncols; i++) {
|
|
|
const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;
|
|
const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;
|
|
|
- const char* src1_ddq_i_bs = src1_ddq_i + src1_ddq_i_offset;
|
|
|
|
|
- float* dst_dd_i_bs = dst_dd_i + i * dst->ne[0];
|
|
|
|
|
|
|
+ const char * src1_ddq_i_bs = src1_ddq_i + src1_ddq_i_offset;
|
|
|
|
|
+ float * dst_dd_i_bs = dst_dd_i + i * dst->ne[0];
|
|
|
switch (src0->type) {
|
|
switch (src0->type) {
|
|
|
- case GGML_TYPE_Q4_0:
|
|
|
|
|
- mul_mat_vec_q4_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q4_1:
|
|
|
|
|
- mul_mat_vec_q4_1_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q5_0:
|
|
|
|
|
- mul_mat_vec_q5_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q5_1:
|
|
|
|
|
- mul_mat_vec_q5_1_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q8_0:
|
|
|
|
|
- mul_mat_vec_q8_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q2_K:
|
|
|
|
|
- mul_mat_vec_q2_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q3_K:
|
|
|
|
|
- mul_mat_vec_q3_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q4_K:
|
|
|
|
|
- mul_mat_vec_q4_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q5_K:
|
|
|
|
|
- mul_mat_vec_q5_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_Q6_K:
|
|
|
|
|
- mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ1_S:
|
|
|
|
|
- mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ1_M:
|
|
|
|
|
- mul_mat_vec_iq1_m_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ2_XXS:
|
|
|
|
|
- mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ2_XS:
|
|
|
|
|
- mul_mat_vec_iq2_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ2_S:
|
|
|
|
|
- mul_mat_vec_iq2_s_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ3_XXS:
|
|
|
|
|
- mul_mat_vec_iq3_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ3_S:
|
|
|
|
|
- mul_mat_vec_iq3_s_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ4_NL:
|
|
|
|
|
- mul_mat_vec_iq4_nl_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- case GGML_TYPE_IQ4_XS:
|
|
|
|
|
- mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
- break;
|
|
|
|
|
- default:
|
|
|
|
|
- GGML_ABORT("fatal error");
|
|
|
|
|
|
|
+ case GGML_TYPE_Q4_0:
|
|
|
|
|
+ if ((ggml_tensor_extra_gpu *) dst->src[0]->extra &&
|
|
|
|
|
+ ((ggml_tensor_extra_gpu *) dst->src[0]->extra)->optimized_feature.reorder) {
|
|
|
|
|
+ GGML_SYCL_DEBUG("Calling reorder_mul_mat_vec_q4_0_q8_1_sycl\n");
|
|
|
|
|
+ reorder_mul_mat_vec_q4_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ GGML_SYCL_DEBUG("Calling mul_mat_vec_q4_0_q8_1_sycl\n");
|
|
|
|
|
+ mul_mat_vec_q4_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ }
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q4_1:
|
|
|
|
|
+ mul_mat_vec_q4_1_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q5_0:
|
|
|
|
|
+ mul_mat_vec_q5_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q5_1:
|
|
|
|
|
+ mul_mat_vec_q5_1_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q8_0:
|
|
|
|
|
+ mul_mat_vec_q8_0_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q2_K:
|
|
|
|
|
+ mul_mat_vec_q2_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q3_K:
|
|
|
|
|
+ mul_mat_vec_q3_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q4_K:
|
|
|
|
|
+ mul_mat_vec_q4_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q5_K:
|
|
|
|
|
+ mul_mat_vec_q5_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_Q6_K:
|
|
|
|
|
+ mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ1_S:
|
|
|
|
|
+ mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ1_M:
|
|
|
|
|
+ mul_mat_vec_iq1_m_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ2_XXS:
|
|
|
|
|
+ mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ2_XS:
|
|
|
|
|
+ mul_mat_vec_iq2_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ2_S:
|
|
|
|
|
+ mul_mat_vec_iq2_s_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ3_XXS:
|
|
|
|
|
+ mul_mat_vec_iq3_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ3_S:
|
|
|
|
|
+ mul_mat_vec_iq3_s_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ4_NL:
|
|
|
|
|
+ mul_mat_vec_iq4_nl_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case GGML_TYPE_IQ4_XS:
|
|
|
|
|
+ mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ GGML_ABORT("fatal error");
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
GGML_UNUSED(src1);
|
|
GGML_UNUSED(src1);
|