|
|
@@ -524,7 +524,7 @@ int ggml_metal_op_concat(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.dim =*/ dim,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_base(lib, GGML_OP_CONCAT);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_base(lib, GGML_OP_CONCAT);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -550,7 +550,7 @@ int ggml_metal_op_repeat(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_repeat(lib, op->type);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_repeat(lib, op->type);
|
|
|
|
|
|
ggml_metal_kargs_repeat args = {
|
|
|
/*.ne00 =*/ ne00,
|
|
|
@@ -616,7 +616,7 @@ int ggml_metal_op_acc(ggml_metal_op_t ctx, int idx) {
|
|
|
// TODO: make a simpler cpy_bytes kernel
|
|
|
|
|
|
//const id<MTLComputePipelineState> pipeline = ctx->pipelines[GGML_METAL_PIPELINE_TYPE_CPY_F32_F32].obj;
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[0]->type, op->type);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[0]->type, op->type);
|
|
|
|
|
|
ggml_metal_kargs_cpy args = {
|
|
|
/*.nk0 =*/ ne00,
|
|
|
@@ -679,7 +679,7 @@ int ggml_metal_op_acc(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.o1 =*/ { 0 },
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_bin(lib, GGML_OP_ADD, 1, false);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_bin(lib, GGML_OP_ADD, 1, false);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -721,7 +721,7 @@ int ggml_metal_op_scale(ggml_metal_op_t ctx, int idx) {
|
|
|
n /= 4;
|
|
|
}
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -760,7 +760,7 @@ int ggml_metal_op_clamp(ggml_metal_op_t ctx, int idx) {
|
|
|
n /= 4;
|
|
|
}
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -789,7 +789,7 @@ int ggml_metal_op_unary(ggml_metal_op_t ctx, int idx) {
|
|
|
n /= 4;
|
|
|
}
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[0]), 0);
|
|
|
@@ -817,7 +817,7 @@ int ggml_metal_op_glu(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_ASSERT(ggml_are_same_shape(op->src[0], op->src[1]));
|
|
|
}
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_glu(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_glu(lib, op);
|
|
|
|
|
|
const int32_t swp = ggml_get_op_params_i32(op, 1);
|
|
|
const float alpha = ggml_get_op_params_f32(op, 2);
|
|
|
@@ -870,7 +870,7 @@ int ggml_metal_op_sum(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.np =*/ n,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_sum(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_sum(lib, op);
|
|
|
|
|
|
int nth = 32; // SIMD width
|
|
|
|
|
|
@@ -925,7 +925,7 @@ int ggml_metal_op_sum_rows(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb3 =*/ nb3,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_sum_rows(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_sum_rows(lib, op);
|
|
|
|
|
|
int nth = 32; // SIMD width
|
|
|
|
|
|
@@ -936,7 +936,7 @@ int ggml_metal_op_sum_rows(ggml_metal_op_t ctx, int idx) {
|
|
|
nth = std::min(nth, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
nth = std::min(nth, ne00);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -963,7 +963,7 @@ int ggml_metal_op_cumsum(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline_blk = ggml_metal_library_get_pipeline_cumsum_blk(lib, op);
|
|
|
+ auto pipeline_blk = ggml_metal_library_get_pipeline_cumsum_blk(lib, op);
|
|
|
|
|
|
int nth = 1;
|
|
|
while (nth < ne00 && 2*nth <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline_blk)) {
|
|
|
@@ -1060,7 +1060,7 @@ int ggml_metal_op_cumsum(ggml_metal_op_t ctx, int idx) {
|
|
|
ggml_metal_op_concurrency_reset(ctx);
|
|
|
|
|
|
{
|
|
|
- ggml_metal_pipeline_t pipeline_add = ggml_metal_library_get_pipeline_cumsum_add(lib, op);
|
|
|
+ auto pipeline_add = ggml_metal_library_get_pipeline_cumsum_add(lib, op);
|
|
|
|
|
|
ggml_metal_kargs_cumsum_add args = {
|
|
|
/*.ne00 =*/ ne00,
|
|
|
@@ -1106,7 +1106,7 @@ int ggml_metal_op_get_rows(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_get_rows(lib, op->src[0]->type);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_get_rows(lib, op->src[0]->type);
|
|
|
|
|
|
ggml_metal_kargs_get_rows args = {
|
|
|
/*.ne00t =*/ ggml_is_quantized(op->src[0]->type) ? ne00/16 : ne00,
|
|
|
@@ -1151,7 +1151,7 @@ int ggml_metal_op_set_rows(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_set_rows(lib, op->src[1]->type, op->type);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_set_rows(lib, op->src[1]->type, op->type);
|
|
|
|
|
|
const int32_t nk0 = ne0/ggml_blck_size(op->type);
|
|
|
|
|
|
@@ -1252,7 +1252,7 @@ int ggml_metal_op_soft_max(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.n_head_log2 =*/ n_head_log2,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_soft_max(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_soft_max(lib, op);
|
|
|
|
|
|
int nth = 32; // SIMD width
|
|
|
|
|
|
@@ -1266,7 +1266,7 @@ int ggml_metal_op_soft_max(ggml_metal_op_t ctx, int idx) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
|
|
@@ -1322,7 +1322,7 @@ int ggml_metal_op_ssm_conv(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb2 =*/ nb2,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_ssm_conv(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_ssm_conv(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
|
|
@@ -1409,11 +1409,11 @@ int ggml_metal_op_ssm_scan(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb0 =*/ nb0,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_ssm_scan(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_ssm_scan(lib, op);
|
|
|
|
|
|
GGML_ASSERT(d_state <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
|
|
|
- const size_t sms = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -1426,7 +1426,7 @@ int ggml_metal_op_ssm_scan(ggml_metal_op_t ctx, int idx) {
|
|
|
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[6]), 7);
|
|
|
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op), 8);
|
|
|
|
|
|
- ggml_metal_encoder_set_threadgroup_memory_size(enc, sms, 0);
|
|
|
+ ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0);
|
|
|
|
|
|
ggml_metal_encoder_dispatch_threadgroups(enc, d_inner, n_head, n_seqs, d_state, 1, 1);
|
|
|
|
|
|
@@ -1449,7 +1449,7 @@ int ggml_metal_op_rwkv(ggml_metal_op_t ctx, int idx) {
|
|
|
const int64_t C = op->ne[0];
|
|
|
const int64_t H = op->src[0]->ne[1];
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_rwkv(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_rwkv(lib, op);
|
|
|
|
|
|
int ida = 0;
|
|
|
|
|
|
@@ -1485,7 +1485,7 @@ int ggml_metal_op_cpy(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[0]->type, op->type);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[0]->type, op->type);
|
|
|
|
|
|
GGML_ASSERT(ne00 % ggml_blck_size(op->src[0]->type) == 0);
|
|
|
|
|
|
@@ -1592,7 +1592,7 @@ int ggml_metal_op_pool_2d(ggml_metal_op_t ctx, int idx) {
|
|
|
/* .np = */ np
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_pool_2d(lib, op, op_pool);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_pool_2d(lib, op, op_pool);
|
|
|
|
|
|
const int nth = std::min(ggml_metal_pipeline_max_theads_per_threadgroup(pipeline), (int) np);
|
|
|
const int ntg = (np + nth - 1) / nth;
|
|
|
@@ -1701,7 +1701,7 @@ int ggml_metal_op_mul_mat(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_ABORT("unsupported ne11");
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_mul_mv_ext(lib, op->src[0]->type, op->src[1]->type, nsg, nxpsg, r1ptg);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_mul_mv_ext(lib, op->src[0]->type, op->src[1]->type, nsg, nxpsg, r1ptg);
|
|
|
|
|
|
ggml_metal_kargs_mul_mv_ext args = {
|
|
|
/*.ne00 =*/ ne00,
|
|
|
@@ -1748,7 +1748,7 @@ int ggml_metal_op_mul_mat(ggml_metal_op_t ctx, int idx) {
|
|
|
// default: break;
|
|
|
//}
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_mul_mm(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_mul_mm(lib, op);
|
|
|
|
|
|
ggml_metal_kargs_mul_mm args = {
|
|
|
/*.ne00 =*/ ne00,
|
|
|
@@ -1773,18 +1773,18 @@ int ggml_metal_op_mul_mat(ggml_metal_op_t ctx, int idx) {
|
|
|
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
|
|
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op), 3);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0);
|
|
|
ggml_metal_encoder_dispatch_threadgroups(enc, ((ne11 + 31)/32), ((ne01 + 63)/64), ne12*ne13, 128, 1, 1);
|
|
|
} else {
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_mul_mv(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_mul_mv(lib, op);
|
|
|
|
|
|
- const int nr0 = ggml_metal_pipeline_get_nr0(pipeline);
|
|
|
- const int nr1 = ggml_metal_pipeline_get_nr1(pipeline);
|
|
|
- const int nsg = ggml_metal_pipeline_get_nsg(pipeline);
|
|
|
+ const int nr0 = pipeline.nr0;
|
|
|
+ const int nr1 = pipeline.nr1;
|
|
|
+ const int nsg = pipeline.nsg;
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_kargs_mul_mv args = {
|
|
|
/*.ne00 =*/ ne00,
|
|
|
@@ -1915,9 +1915,9 @@ int ggml_metal_op_mul_mat_id(ggml_metal_op_t ctx, int idx) {
|
|
|
nb21,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_mul_mm_id_map0(lib, ne02, ne20);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_mul_mm_id_map0(lib, ne02, ne20);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
GGML_ASSERT(ne02 <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
|
|
|
@@ -1938,7 +1938,7 @@ int ggml_metal_op_mul_mat_id(ggml_metal_op_t ctx, int idx) {
|
|
|
ggml_metal_op_concurrency_reset(ctx);
|
|
|
|
|
|
{
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_mul_mm_id(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_mul_mm_id(lib, op);
|
|
|
|
|
|
ggml_metal_kargs_mul_mm_id args = {
|
|
|
/*.ne00 =*/ ne00,
|
|
|
@@ -1967,20 +1967,20 @@ int ggml_metal_op_mul_mat_id(ggml_metal_op_t ctx, int idx) {
|
|
|
ggml_metal_encoder_set_buffer (enc, bid_ids, 4);
|
|
|
ggml_metal_encoder_set_buffer (enc, bid_dst, 5);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0);
|
|
|
|
|
|
ggml_metal_encoder_dispatch_threadgroups(enc, (ne21 + 31)/32, (ne01 + 63)/64, ne02, 128, 1, 1);
|
|
|
}
|
|
|
} else {
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_mul_mv_id(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_mul_mv_id(lib, op);
|
|
|
|
|
|
- const int nr0 = ggml_metal_pipeline_get_nr0(pipeline);
|
|
|
- const int nr1 = ggml_metal_pipeline_get_nr1(pipeline);
|
|
|
- const int nsg = ggml_metal_pipeline_get_nsg(pipeline);
|
|
|
+ const int nr0 = pipeline.nr0;
|
|
|
+ const int nr1 = pipeline.nr1;
|
|
|
+ const int nsg = pipeline.nsg;
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_kargs_mul_mv_id args = {
|
|
|
/*.nei0 =*/ ne20,
|
|
|
@@ -2064,7 +2064,7 @@ int ggml_metal_op_add_id(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb21 =*/ nb21,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_base(lib, GGML_OP_ADD_ID);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_base(lib, GGML_OP_ADD_ID);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -2308,7 +2308,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb33 =*/nb33,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_pad(lib, op, has_mask, ncpsg);
|
|
|
+ auto pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_pad(lib, op, has_mask, ncpsg);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline0);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args0, sizeof(args0), 0);
|
|
|
@@ -2339,7 +2339,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb33 =*/ nb33,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_blk(lib, op, nqptg, ncpsg);
|
|
|
+ auto pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_blk(lib, op, nqptg, ncpsg);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline0);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args0, sizeof(args0), 0);
|
|
|
@@ -2424,7 +2424,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.logit_softcap =*/ logit_softcap,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_flash_attn_ext(lib, op, has_mask, has_sinks, has_bias, has_scap, has_kvpad, nsg);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_flash_attn_ext(lib, op, has_mask, has_sinks, has_bias, has_scap, has_kvpad, nsg);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -2476,7 +2476,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb33 =*/nb33,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_pad(lib, op, has_mask, ncpsg);
|
|
|
+ auto pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_pad(lib, op, has_mask, ncpsg);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline0);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args0, sizeof(args0), 0);
|
|
|
@@ -2578,7 +2578,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.logit_softcap =*/ logit_softcap,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_flash_attn_ext_vec(lib, op, has_mask, has_sinks, has_bias, has_scap, has_kvpad, nsg, nwg);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_flash_attn_ext_vec(lib, op, has_mask, has_sinks, has_bias, has_scap, has_kvpad, nsg, nwg);
|
|
|
|
|
|
GGML_ASSERT(nsg*32 <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
|
|
|
@@ -2630,7 +2630,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
|
|
|
nrows,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_vec_reduce(lib, op, ne20, nwg);
|
|
|
+ auto pipeline0 = ggml_metal_library_get_pipeline_flash_attn_ext_vec_reduce(lib, op, ne20, nwg);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline0);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args0, sizeof(args0), 0);
|
|
|
@@ -2762,7 +2762,7 @@ int ggml_metal_op_bin(ggml_metal_op_t ctx, int idx) {
|
|
|
// the offsets of src1 and all fused buffers are relative to the start of the src1 buffer
|
|
|
bid_src1.offs = 0;
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = nullptr;
|
|
|
+ struct ggml_metal_pipeline_with_params pipeline;
|
|
|
|
|
|
if (ggml_nelements(op->src[1]) == ne10 && ggml_is_contiguous(op->src[1]) && ne00 % 4 == 0 && ne10 % 4 == 0) {
|
|
|
GGML_ASSERT(ggml_is_contiguous(op->src[0]));
|
|
|
@@ -2835,7 +2835,7 @@ int ggml_metal_op_l2_norm(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.eps =*/ eps,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_l2_norm(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_l2_norm(lib, op);
|
|
|
|
|
|
while (nth < ne00/4 && nth < ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) {
|
|
|
nth *= 2;
|
|
|
@@ -2844,7 +2844,7 @@ int ggml_metal_op_l2_norm(ggml_metal_op_t ctx, int idx) {
|
|
|
nth = std::min(nth, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
nth = std::min(nth, ne00/4);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
const int64_t nrows = ggml_nrows(op->src[0]);
|
|
|
|
|
|
@@ -2887,7 +2887,7 @@ int ggml_metal_op_group_norm(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.eps =*/ eps,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_group_norm(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_group_norm(lib, op);
|
|
|
|
|
|
int nth = 32; // SIMD width
|
|
|
//while (nth < ne00/4 && nth < ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) {
|
|
|
@@ -2897,7 +2897,7 @@ int ggml_metal_op_group_norm(ggml_metal_op_t ctx, int idx) {
|
|
|
//nth = std::min(nth, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
//nth = std::min(nth, ne00/4);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3022,7 +3022,7 @@ int ggml_metal_op_norm(ggml_metal_op_t ctx, int idx) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_norm(lib, op, n_fuse);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_norm(lib, op, n_fuse);
|
|
|
|
|
|
int nth = 32; // SIMD width
|
|
|
|
|
|
@@ -3033,7 +3033,7 @@ int ggml_metal_op_norm(ggml_metal_op_t ctx, int idx) {
|
|
|
nth = std::min(nth, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
nth = std::min(nth, args.ne00_t);
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3127,7 +3127,7 @@ int ggml_metal_op_rope(ggml_metal_op_t ctx, int idx) {
|
|
|
/* src2 =*/ op->src[2] != nullptr,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_rope(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_rope(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3199,7 +3199,7 @@ int ggml_metal_op_im2col(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.KHW =*/ KH * KW,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_im2col(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_im2col(lib, op);
|
|
|
|
|
|
GGML_ASSERT(KH*KW <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
|
|
|
|
|
@@ -3270,7 +3270,7 @@ int ggml_metal_op_conv_2d(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.d1 =*/ d1,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_conv_2d(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_conv_2d(lib, op);
|
|
|
|
|
|
int nth = ggml_metal_pipeline_max_theads_per_threadgroup(pipeline);
|
|
|
nth = std::min(nth, 256);
|
|
|
@@ -3325,7 +3325,7 @@ int ggml_metal_op_conv_transpose_1d(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb1 =*/ nb1,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_conv_transpose_1d(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_conv_transpose_1d(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3377,7 +3377,7 @@ int ggml_metal_op_conv_transpose_2d(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb2 =*/ nb2,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_conv_transpose_2d(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_conv_transpose_2d(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3433,7 +3433,7 @@ int ggml_metal_op_upscale(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.sf3 =*/ sf3
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_upscale(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_upscale(lib, op);
|
|
|
|
|
|
const int nth = std::min(ggml_metal_pipeline_max_theads_per_threadgroup(pipeline), ne0);
|
|
|
|
|
|
@@ -3477,7 +3477,7 @@ int ggml_metal_op_pad(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb3 =*/ nb3
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_pad(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_pad(lib, op);
|
|
|
|
|
|
const int nth = std::min(1024, ne0);
|
|
|
|
|
|
@@ -3523,7 +3523,7 @@ int ggml_metal_op_pad_reflect_1d(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.p1 =*/ ((const int32_t *)(op->op_params))[1]
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_pad_reflect_1d(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_pad_reflect_1d(lib, op);
|
|
|
|
|
|
const int nth = std::min(1024, ne0);
|
|
|
|
|
|
@@ -3560,7 +3560,7 @@ int ggml_metal_op_arange(ggml_metal_op_t ctx, int idx) {
|
|
|
|
|
|
const int nth = std::min(1024, ne0);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_arange(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_arange(lib, op);
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3591,7 +3591,7 @@ int ggml_metal_op_timestep_embedding(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.max_period =*/ max_period,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_timestep_embedding(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_timestep_embedding(lib, op);
|
|
|
|
|
|
const int nth = std::max(1, std::min(1024, dim/2));
|
|
|
|
|
|
@@ -3621,7 +3621,7 @@ int ggml_metal_op_argmax(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.nb01 = */ nb01,
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_argmax(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_argmax(lib, op);
|
|
|
|
|
|
const int64_t nrows = ggml_nrows(op->src[0]);
|
|
|
|
|
|
@@ -3630,7 +3630,7 @@ int ggml_metal_op_argmax(ggml_metal_op_t ctx, int idx) {
|
|
|
nth *= 2;
|
|
|
}
|
|
|
|
|
|
- const size_t smem = ggml_metal_pipeline_get_smem(pipeline);
|
|
|
+ const size_t smem = pipeline.smem;
|
|
|
|
|
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
|
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
|
@@ -3657,7 +3657,7 @@ int ggml_metal_op_argsort(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_argsort(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_argsort(lib, op);
|
|
|
|
|
|
// bitonic sort requires the number of elements to be power of 2
|
|
|
int nth = 1;
|
|
|
@@ -3706,7 +3706,7 @@ int ggml_metal_op_argsort(ggml_metal_op_t ctx, int idx) {
|
|
|
|
|
|
ggml_metal_encoder_dispatch_threadgroups(enc, npr*ne01, ne02, ne03, nth, 1, 1);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline_merge = ggml_metal_library_get_pipeline_argsort_merge(lib, op);
|
|
|
+ auto pipeline_merge = ggml_metal_library_get_pipeline_argsort_merge(lib, op);
|
|
|
|
|
|
int len = nth;
|
|
|
|
|
|
@@ -3764,7 +3764,7 @@ int ggml_metal_op_top_k(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_top_k(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_top_k(lib, op);
|
|
|
|
|
|
// bitonic sort requires the number of elements to be power of 2
|
|
|
int nth = 1;
|
|
|
@@ -3818,7 +3818,7 @@ int ggml_metal_op_top_k(ggml_metal_op_t ctx, int idx) {
|
|
|
|
|
|
ggml_metal_encoder_dispatch_threadgroups(enc, npr*ne01, ne02, ne03, nth, 1, 1);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline_merge = ggml_metal_library_get_pipeline_top_k_merge(lib, op);
|
|
|
+ auto pipeline_merge = ggml_metal_library_get_pipeline_top_k_merge(lib, op);
|
|
|
|
|
|
int len = args.top_k;
|
|
|
|
|
|
@@ -3881,7 +3881,7 @@ int ggml_metal_op_leaky_relu(ggml_metal_op_t ctx, int idx) {
|
|
|
/*.slope =*/ slope
|
|
|
};
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_unary(lib, op);
|
|
|
|
|
|
int64_t n = ggml_nelements(op);
|
|
|
|
|
|
@@ -3910,7 +3910,7 @@ int ggml_metal_op_opt_step_adamw(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_opt_step_adamw(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_opt_step_adamw(lib, op);
|
|
|
|
|
|
const int64_t np = ggml_nelements(op->src[0]);
|
|
|
ggml_metal_kargs_opt_step_adamw args = {
|
|
|
@@ -3946,7 +3946,7 @@ int ggml_metal_op_opt_step_sgd(ggml_metal_op_t ctx, int idx) {
|
|
|
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
|
|
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
|
|
|
|
|
- ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_opt_step_sgd(lib, op);
|
|
|
+ auto pipeline = ggml_metal_library_get_pipeline_opt_step_sgd(lib, op);
|
|
|
|
|
|
const int64_t np = ggml_nelements(op->src[0]);
|
|
|
ggml_metal_kargs_opt_step_sgd args = {
|