|
|
@@ -87,6 +87,7 @@ struct ggml_metal_context {
|
|
|
GGML_METAL_DECL_KERNEL(get_rows_q4_K);
|
|
|
GGML_METAL_DECL_KERNEL(get_rows_q5_K);
|
|
|
GGML_METAL_DECL_KERNEL(get_rows_q6_K);
|
|
|
+ GGML_METAL_DECL_KERNEL(get_rows_i32);
|
|
|
GGML_METAL_DECL_KERNEL(rms_norm);
|
|
|
GGML_METAL_DECL_KERNEL(group_norm);
|
|
|
GGML_METAL_DECL_KERNEL(norm);
|
|
|
@@ -377,6 +378,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
|
GGML_METAL_ADD_KERNEL(get_rows_q4_K);
|
|
|
GGML_METAL_ADD_KERNEL(get_rows_q5_K);
|
|
|
GGML_METAL_ADD_KERNEL(get_rows_q6_K);
|
|
|
+ GGML_METAL_ADD_KERNEL(get_rows_i32);
|
|
|
GGML_METAL_ADD_KERNEL(rms_norm);
|
|
|
GGML_METAL_ADD_KERNEL(group_norm);
|
|
|
GGML_METAL_ADD_KERNEL(norm);
|
|
|
@@ -499,6 +501,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
|
|
|
GGML_METAL_DEL_KERNEL(get_rows_q4_K);
|
|
|
GGML_METAL_DEL_KERNEL(get_rows_q5_K);
|
|
|
GGML_METAL_DEL_KERNEL(get_rows_q6_K);
|
|
|
+ GGML_METAL_DEL_KERNEL(get_rows_i32);
|
|
|
GGML_METAL_DEL_KERNEL(rms_norm);
|
|
|
GGML_METAL_DEL_KERNEL(group_norm);
|
|
|
GGML_METAL_DEL_KERNEL(norm);
|
|
|
@@ -1978,6 +1981,7 @@ void ggml_metal_graph_compute(
|
|
|
case GGML_TYPE_Q4_K: [encoder setComputePipelineState:ctx->pipeline_get_rows_q4_K]; break;
|
|
|
case GGML_TYPE_Q5_K: [encoder setComputePipelineState:ctx->pipeline_get_rows_q5_K]; break;
|
|
|
case GGML_TYPE_Q6_K: [encoder setComputePipelineState:ctx->pipeline_get_rows_q6_K]; break;
|
|
|
+ case GGML_TYPE_I32: [encoder setComputePipelineState:ctx->pipeline_get_rows_i32]; break;
|
|
|
default: GGML_ASSERT(false && "not implemented");
|
|
|
}
|
|
|
|