|
|
@@ -407,6 +407,16 @@ enum ggml_metal_kernel_type {
|
|
|
GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0,
|
|
|
GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1,
|
|
|
GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q4_0_F32,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q4_0_F16,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q4_1_F32,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q4_1_F16,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q5_0_F32,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q5_0_F16,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q5_1_F32,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q5_1_F16,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q8_0_F32,
|
|
|
+ GGML_METAL_KERNEL_TYPE_CPY_Q8_0_F16,
|
|
|
GGML_METAL_KERNEL_TYPE_CONCAT,
|
|
|
GGML_METAL_KERNEL_TYPE_SQR,
|
|
|
GGML_METAL_KERNEL_TYPE_SQRT,
|
|
|
@@ -1012,6 +1022,16 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0, cpy_f32_q5_0, true);
|
|
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1, cpy_f32_q5_1, true);
|
|
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL, cpy_f32_iq4_nl, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q4_0_F32, cpy_q4_0_f32, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q4_0_F16, cpy_q4_0_f16, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q4_1_F32, cpy_q4_1_f32, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q4_1_F16, cpy_q4_1_f16, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q5_0_F32, cpy_q5_0_f32, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q5_0_F16, cpy_q5_0_f16, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q5_1_F32, cpy_q5_1_f32, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q5_1_F16, cpy_q5_1_f16, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q8_0_F32, cpy_q8_0_f32, true);
|
|
|
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_Q8_0_F16, cpy_q8_0_f16, true);
|
|
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CONCAT, concat, true);
|
|
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SQR, sqr, true);
|
|
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SQRT, sqrt, true);
|
|
|
@@ -1287,6 +1307,18 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
|
|
|
default:
|
|
|
return false;
|
|
|
}
|
|
|
+ case GGML_TYPE_Q4_0:
|
|
|
+ case GGML_TYPE_Q4_1:
|
|
|
+ case GGML_TYPE_Q5_0:
|
|
|
+ case GGML_TYPE_Q5_1:
|
|
|
+ case GGML_TYPE_Q8_0:
|
|
|
+ switch (op->type) {
|
|
|
+ case GGML_TYPE_F32:
|
|
|
+ case GGML_TYPE_F16:
|
|
|
+ return true;
|
|
|
+ default:
|
|
|
+ return false;
|
|
|
+ }
|
|
|
default:
|
|
|
return false;
|
|
|
};
|
|
|
@@ -3899,10 +3931,6 @@ static void ggml_metal_encode_node(
|
|
|
case GGML_OP_CPY:
|
|
|
case GGML_OP_CONT:
|
|
|
{
|
|
|
- GGML_ASSERT(ne00 % ggml_blck_size(src0->type) == 0);
|
|
|
-
|
|
|
- int nth = MIN(1024, ne00/ggml_blck_size(src0->type));
|
|
|
-
|
|
|
id<MTLComputePipelineState> pipeline = nil;
|
|
|
|
|
|
switch (src0t) {
|
|
|
@@ -3936,7 +3964,47 @@ static void ggml_metal_encode_node(
|
|
|
switch (dstt) {
|
|
|
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_BF16_F32].pipeline; break;
|
|
|
case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16].pipeline; break;
|
|
|
- default: GGML_ASSERT(false && "not implemented");
|
|
|
+ default: GGML_ABORT("not implemented");
|
|
|
+ };
|
|
|
+ } break;
|
|
|
+ case GGML_TYPE_Q4_0:
|
|
|
+ {
|
|
|
+ switch (dstt) {
|
|
|
+ case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q4_0_F32].pipeline; break;
|
|
|
+ case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q4_0_F16].pipeline; break;
|
|
|
+ default: GGML_ABORT("not implemented");
|
|
|
+ };
|
|
|
+ } break;
|
|
|
+ case GGML_TYPE_Q4_1:
|
|
|
+ {
|
|
|
+ switch (dstt) {
|
|
|
+ case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q4_1_F32].pipeline; break;
|
|
|
+ case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q4_1_F16].pipeline; break;
|
|
|
+ default: GGML_ABORT("not implemented");
|
|
|
+ };
|
|
|
+ } break;
|
|
|
+ case GGML_TYPE_Q5_0:
|
|
|
+ {
|
|
|
+ switch (dstt) {
|
|
|
+ case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q5_0_F32].pipeline; break;
|
|
|
+ case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q5_0_F16].pipeline; break;
|
|
|
+ default: GGML_ABORT("not implemented");
|
|
|
+ };
|
|
|
+ } break;
|
|
|
+ case GGML_TYPE_Q5_1:
|
|
|
+ {
|
|
|
+ switch (dstt) {
|
|
|
+ case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q5_1_F32].pipeline; break;
|
|
|
+ case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q5_1_F16].pipeline; break;
|
|
|
+ default: GGML_ABORT("not implemented");
|
|
|
+ };
|
|
|
+ } break;
|
|
|
+ case GGML_TYPE_Q8_0:
|
|
|
+ {
|
|
|
+ switch (dstt) {
|
|
|
+ case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q8_0_F32].pipeline; break;
|
|
|
+ case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_Q8_0_F16].pipeline; break;
|
|
|
+ default: GGML_ABORT("not implemented");
|
|
|
};
|
|
|
} break;
|
|
|
default: GGML_ABORT("not implemented");
|
|
|
@@ -3966,7 +4034,11 @@ static void ggml_metal_encode_node(
|
|
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:1];
|
|
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
|
|
|
|
|
+ GGML_ASSERT(ne00 % ggml_blck_size(src0->type) == 0);
|
|
|
+ int nth = MIN(1024, ne00/ggml_blck_size(src0->type));
|
|
|
+
|
|
|
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
|
|
+
|
|
|
} break;
|
|
|
case GGML_OP_SET:
|
|
|
{
|