|
|
@@ -1195,24 +1195,24 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
|
|
} break;
|
|
|
case GGML_OP_CLAMP:
|
|
|
- {
|
|
|
- id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CLAMP].pipeline;
|
|
|
+ {
|
|
|
+ id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CLAMP].pipeline;
|
|
|
|
|
|
- float min;
|
|
|
- float max;
|
|
|
- memcpy(&min, ((int32_t *) dst->op_params) + 0, sizeof(float));
|
|
|
- memcpy(&max, ((int32_t *) dst->op_params) + 1, sizeof(float));
|
|
|
+ float min;
|
|
|
+ float max;
|
|
|
+ memcpy(&min, ((int32_t *) dst->op_params) + 0, sizeof(float));
|
|
|
+ memcpy(&max, ((int32_t *) dst->op_params) + 1, sizeof(float));
|
|
|
|
|
|
- [encoder setComputePipelineState:pipeline];
|
|
|
- [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
|
|
- [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
|
|
- [encoder setBytes:&min length:sizeof(min) atIndex:2];
|
|
|
- [encoder setBytes:&max length:sizeof(max) atIndex:3];
|
|
|
+ [encoder setComputePipelineState:pipeline];
|
|
|
+ [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
|
|
+ [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
|
|
+ [encoder setBytes:&min length:sizeof(min) atIndex:2];
|
|
|
+ [encoder setBytes:&max length:sizeof(max) atIndex:3];
|
|
|
|
|
|
- const int64_t n = ggml_nelements(dst);
|
|
|
+ const int64_t n = ggml_nelements(dst);
|
|
|
|
|
|
- [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
|
|
- } break;
|
|
|
+ [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
|
|
+ } break;
|
|
|
case GGML_OP_UNARY:
|
|
|
switch (ggml_get_unary_op(gf->nodes[i])) {
|
|
|
// we are not taking into account the strides, so for now require contiguous tensors
|