|
@@ -4222,15 +4222,19 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
|
|
|
GGML_ASSERT(dst);
|
|
GGML_ASSERT(dst);
|
|
|
GGML_ASSERT(dst->extra);
|
|
GGML_ASSERT(dst->extra);
|
|
|
|
|
|
|
|
- const int ne00 = src0 ? src0->ne[0] : 0;
|
|
|
|
|
- const cl_ulong nb01 = src0 ? src0->nb[1] : 0;
|
|
|
|
|
- const cl_ulong nb02 = src0 ? src0->nb[2] : 0;
|
|
|
|
|
- const int ne10 = src1 ? src1->ne[0] : 0;
|
|
|
|
|
- const cl_ulong nb10 = src1 ? src1->nb[0] : 0;
|
|
|
|
|
- const int ne11 = src1 ? src1->ne[1] : 0;
|
|
|
|
|
- const cl_ulong nb11 = src1 ? src1->nb[1] : 0;
|
|
|
|
|
- const cl_ulong nb1 = dst ? dst->nb[1] : 0;
|
|
|
|
|
- const cl_ulong nb2 = dst ? dst->nb[2] : 0;
|
|
|
|
|
|
|
+ const int ne00 = src0->ne[0];
|
|
|
|
|
+ const cl_ulong nb01 = src0->nb[1];
|
|
|
|
|
+ const cl_ulong nb02 = src0->nb[2];
|
|
|
|
|
+ const cl_ulong nb03 = src0->nb[3];
|
|
|
|
|
+ const int ne10 = src1->ne[0];
|
|
|
|
|
+ const cl_ulong nb10 = src1->nb[0];
|
|
|
|
|
+ const int ne11 = src1->ne[1];
|
|
|
|
|
+ const int ne12 = src1->ne[2];
|
|
|
|
|
+ const cl_ulong nb11 = src1->nb[1];
|
|
|
|
|
+ const cl_ulong nb12 = src1->nb[2];
|
|
|
|
|
+ const cl_ulong nb1 = dst->nb[1];
|
|
|
|
|
+ const cl_ulong nb2 = dst->nb[2];
|
|
|
|
|
+ const cl_ulong nb3 = dst->nb[3];
|
|
|
|
|
|
|
|
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
|
|
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
|
|
|
|
|
|
|
@@ -4267,14 +4271,17 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
|
|
|
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00));
|
|
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00));
|
|
|
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb01));
|
|
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb01));
|
|
|
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &nb02));
|
|
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &nb02));
|
|
|
- CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne10));
|
|
|
|
|
- CL_CHECK(clSetKernelArg(kernel, 10, sizeof(cl_ulong), &nb10));
|
|
|
|
|
- CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_ulong), &nb11));
|
|
|
|
|
- CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_ulong), &nb1));
|
|
|
|
|
- CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_ulong), &nb2));
|
|
|
|
|
-
|
|
|
|
|
- size_t global_work_size[] = {(size_t)ne10, (size_t)ne11, 1};
|
|
|
|
|
- size_t local_work_size[] = {1, 1, 1};
|
|
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_ulong), &nb03));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne10));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 11, sizeof(cl_ulong), &nb10));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_ulong), &nb11));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_ulong), &nb12));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_ulong), &nb1));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_ulong), &nb2));
|
|
|
|
|
+ CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &nb3));
|
|
|
|
|
+
|
|
|
|
|
+ size_t global_work_size[] = {(size_t)ne10*64, (size_t)ne11, (size_t)ne12};
|
|
|
|
|
+ size_t local_work_size[] = {64, 1, 1};
|
|
|
|
|
|
|
|
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
|
|
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
|
|
|
}
|
|
}
|