5 달 전 · 89d1029559
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -484,6 +484,7 @@ struct vk_device_struct {
 
				     vk_pipeline pipeline_rwkv_wkv7_f32;
			
 
				     vk_pipeline pipeline_opt_step_adamw_f32;
			
 
				     vk_pipeline pipeline_conv2d_f32;
			
 
				+    vk_pipeline pipeline_conv2d_f16_f32;
			
 
				     vk_pipeline pipeline_conv2d_dw_whcn_f32;
			
 
				     vk_pipeline pipeline_conv2d_dw_cwhn_f32;
			
 
				 
			
@@ -3074,12 +3075,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
 
				             device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
			
 
				             sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
			
 
				             { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
			
 
				+        ggml_vk_create_pipeline(
			
 
				+            device, device->pipeline_conv2d_f16_f32, "conv2d_f16_f32", conv2d_f16_f32_len, conv2d_f16_f32_data, "main", 3,
			
 
				+            sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
			
 
				+            { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
			
 
				     } else {
			
 
				         ggml_vk_create_pipeline(
			
 
				             device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
			
 
				             sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
			
 
				             { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
			
 
				             false);
			
 
				+        ggml_vk_create_pipeline(
			
 
				+            device, device->pipeline_conv2d_f16_f32, "conv2d_f16_f32", conv2d_f16_f32_len, conv2d_f16_f32_data, "main", 3,
			
 
				+            sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
			
 
				+            { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
			
 
				+            false);
			
 
				     }
			
 
				 
			
 
				     ggml_vk_create_pipeline(device, device->pipeline_conv2d_dw_whcn_f32, "conv2d_dw_whcn_f32", conv2d_dw_whcn_f32_len, conv2d_dw_whcn_f32_data, "main", 3, sizeof(vk_op_conv2d_dw_push_constants), {512, 1, 1}, {}, 1);
			
@@ -6958,9 +6968,13 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
 
				         }
			
 
				         return nullptr;
			
 
				     case GGML_OP_CONV_2D:
			
 
				-        if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 &&
			
 
				+        if (src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 &&
			
 
				             ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) {
			
 
				-            return ctx->device->pipeline_conv2d_f32;
			
 
				+            if (src0->type == GGML_TYPE_F32) {
			
 
				+                return ctx->device->pipeline_conv2d_f32;
			
 
				+            } else if (src0->type == GGML_TYPE_F16) {
			
 
				+                return ctx->device->pipeline_conv2d_f16_f32;
			
 
				+            }
			
 
				         }
			
 
				         return nullptr;
			
 
				     case GGML_OP_CONV_2D_DW:
			
@@ -8185,13 +8199,13 @@ static void ggml_vk_pool_2d(ggml_backend_vk_context * ctx, vk_context& subctx, c
 
				 
			
 
				 static void ggml_vk_conv_2d(ggml_backend_vk_context * ctx, vk_context & subctx, const ggml_tensor * src0,
			
 
				                             const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
			
 
				-    GGML_ASSERT(src0->type == GGML_TYPE_F32);
			
 
				+    GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
			
 
				     GGML_ASSERT(src1->type == GGML_TYPE_F32);
			
 
				     GGML_ASSERT(dst->type == GGML_TYPE_F32);
			
 
				 
			
 
				     GGML_TENSOR_BINARY_OP_LOCALS
			
 
				 
			
 
				-    GGML_ASSERT(nb00 == sizeof(float));
			
 
				+    GGML_ASSERT(nb00 == sizeof(float) || nb00 == sizeof(ggml_fp16_t));
			
 
				     GGML_ASSERT(nb10 == sizeof(float));
			
 
				     GGML_ASSERT(nb0 == sizeof(float));
			
 
				 
			
@@ -10874,7 +10888,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
 
				                 const vk_device& device = ggml_vk_get_device(ctx->device);
			
 
				                 bool is_Apple = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_APPLE;
			
 
				                 // Channel-contiguous format is not supported yet.
			
 
				-                return (op->src[0]->type == GGML_TYPE_F32 &&
			
 
				+                return ((op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16) &&
			
 
				                     op->src[1]->type == GGML_TYPE_F32 &&
			
 
				                     op->type == GGML_TYPE_F32 &&
			
 
				                     ggml_is_contiguous(op->src[0]) &&
			
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -656,6 +656,7 @@ void process_shaders() {
 
				     string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
			
 
				 
			
 
				     string_to_spv("conv2d_f32", "conv2d_mm.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
			
 
				+    string_to_spv("conv2d_f16_f32", "conv2d_mm.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
			
 
				 
			
 
				     string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}}));
			
 
				     string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}}));
			
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -3734,6 +3734,7 @@ struct test_im2col : public test_case {
 
				 struct test_conv_2d : public test_case {
			
 
				     const std::array<int64_t, 4> ne_input;
			
 
				     const std::array<int64_t, 4> ne_kernel;
			
 
				+    const ggml_type              type_kernel;
			
 
				     const int                    stride0;
			
 
				     const int                    stride1;
			
 
				     const int                    padding0;
			
@@ -3751,7 +3752,11 @@ struct test_conv_2d : public test_case {
 
				     // IM2COL -> MUL_MM graph will be built.
			
 
				 
			
 
				     std::string vars() override {
			
 
				-        return VARS_TO_STR9(ne_input, ne_kernel, stride0, stride1, padding0, padding1, dilation0, dilation1, cwhn);
			
 
				+        return VARS_TO_STR10(ne_input, ne_kernel, type_kernel, stride0, stride1, padding0, padding1, dilation0, dilation1, cwhn);
			
 
				+    }
			
 
				+
			
 
				+    double max_nmse_err() override {
			
 
				+        return 5e-4;
			
 
				     }
			
 
				 
			
 
				     uint64_t op_flops(ggml_tensor * t) override {
			
@@ -3782,10 +3787,11 @@ struct test_conv_2d : public test_case {
 
				     }
			
 
				 
			
 
				     test_conv_2d(std::array<int64_t, 4> ne_input  = { 64, 64, 16, 1 },
			
 
				-                 std::array<int64_t, 4> ne_kernel = { 3, 3, 1, 16 }, int stride0 = 1, int stride1 = 1, int padding0 = 0,
			
 
				-                 int padding1 = 0, int dilation0 = 1, int dilation1 = 1, bool cwhn = false) :
			
 
				+                 std::array<int64_t, 4> ne_kernel = { 3, 3, 1, 16 }, ggml_type type_kernel = GGML_TYPE_F32, int stride0 = 1,
			
 
				+                 int stride1 = 1, int padding0 = 0, int padding1 = 0, int dilation0 = 1, int dilation1 = 1, bool cwhn = false) :
			
 
				         ne_input(ne_input),
			
 
				         ne_kernel(ne_kernel),
			
 
				+        type_kernel(type_kernel),
			
 
				         stride0(stride0),
			
 
				         stride1(stride1),
			
 
				         padding0(padding0),
			
@@ -3798,7 +3804,7 @@ struct test_conv_2d : public test_case {
 
				         ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
			
 
				         ggml_set_name(input, "input");
			
 
				 
			
 
				-        ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
			
 
				+        ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
			
 
				         ggml_set_name(kernel, "kernel");
			
 
				 
			
 
				         if (cwhn) {
			
@@ -5165,10 +5171,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
				         { 16,  3, 256,  128, 8 }
			
 
				     };
			
 
				 
			
 
				-    for (auto act_case : cases) {
			
 
				-        test_cases.emplace_back(new test_conv_2d(
			
 
				-            { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
			
 
				-            { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] }, 1, 1, 0, 0, 1, 1, false));
			
 
				+    for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
			
 
				+        for (auto act_case : cases) {
			
 
				+            test_cases.emplace_back(new test_conv_2d(
			
 
				+                { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
			
 
				+                { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
			
 
				+                kernel_type, 1, 1, 0, 0, 1, 1, false));
			
 
				+        }
			
 
				     }
			
 
				 #endif
			
 
				 
			
@@ -5194,8 +5203,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
				                                 for (uint32_t W : { 1, 141 }) {
			
 
				                                     if (calc_conv_output_size(W, KW, s0, p0, d0) > 0 &&
			
 
				                                         calc_conv_output_size(H, KH, s1, p1, d1) > 0) {
			
 
				-                                        test_cases.emplace_back(new test_conv_2d(
			
 
				-                                            { W, H, Cin, 2 }, { KW, KH, Cin, Cout }, s0, s1, p0, p1, d0, d1, false));
			
 
				+                                        for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
			
 
				+                                            test_cases.emplace_back(new test_conv_2d(
			
 
				+                                                { W, H, Cin, 2 }, { KW, KH, Cin, Cout }, kernel_type, s0, s1, p0, p1, d0, d1, false));
			
 
				+                                        }
			
 
				                                     }
			
 
				                                 }
			
 
				                             }
			
@@ -5840,11 +5851,14 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
 
				         { 16,  3, 512,  128, 8 },
			
 
				     };
			
 
				 
			
 
				-    for (auto act_case : cases) {
			
 
				-        // Direct CONV_2D
			
 
				-        test_cases.emplace_back(new test_conv_2d(
			
 
				-            { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
			
 
				-            { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] }, 1, 1, 0, 0, 1, 1, false));
			
 
				+    for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
			
 
				+        for (auto act_case : cases) {
			
 
				+            // Direct CONV_2D
			
 
				+            test_cases.emplace_back(new test_conv_2d(
			
 
				+                { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
			
 
				+                { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
			
 
				+                kernel_type, 1, 1, 0, 0, 1, 1, false));
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1,   1, 1, 1}));