|
|
@@ -1404,10 +1404,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
|
// spec constants and tile sizes for non-quant matmul/matmul_id
|
|
|
l_warptile = { 256, 128, 256, 64 };
|
|
|
m_warptile = { 256, 128, 128, 64 };
|
|
|
- s_warptile = { 128, 32, 16, 64 };
|
|
|
+ s_warptile = { 128, 64, 64, 64 };
|
|
|
l_wg_denoms = {128, 256, 1 };
|
|
|
m_wg_denoms = {128, 128, 1 };
|
|
|
- s_wg_denoms = { 32, 16, 1 };
|
|
|
+ s_wg_denoms = { 64, 64, 1 };
|
|
|
|
|
|
// spec constants and tile sizes for quant matmul (non-Qi_K)
|
|
|
l_warptile_mmq = { 256, 128, 256, 64 };
|
|
|
@@ -2017,11 +2017,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
|
|
|
|
ggml_vk_create_pipeline(device, device->pipeline_sum_rows_f32, "sum_rows_f32", sum_rows_f32_len, sum_rows_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);
|
|
|
|
|
|
- ggml_vk_create_pipeline(device, device->pipeline_im2col_f32, "im2col_f32", im2col_f32_len, im2col_f32_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
|
|
|
+ ggml_vk_create_pipeline(device, device->pipeline_im2col_f32, "im2col_f32", im2col_f32_len, im2col_f32_data, "main", 2, sizeof(vk_op_im2col_push_constants), {512, 1, 1}, { device->subgroup_size }, 1, true);
|
|
|
if (device->float_controls_rte_fp16) {
|
|
|
- ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_rte_len, im2col_f32_f16_rte_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
|
|
|
+ ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_rte_len, im2col_f32_f16_rte_data, "main", 2, sizeof(vk_op_im2col_push_constants), {512, 1, 1}, { device->subgroup_size }, 1, true);
|
|
|
} else {
|
|
|
- ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_len, im2col_f32_f16_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
|
|
|
+ ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_len, im2col_f32_f16_data, "main", 2, sizeof(vk_op_im2col_push_constants), {512, 1, 1}, { device->subgroup_size }, 1, true);
|
|
|
}
|
|
|
|
|
|
ggml_vk_create_pipeline(device, device->pipeline_timestep_embedding_f32, "timestep_embedding_f32", timestep_embedding_f32_len, timestep_embedding_f32_data, "main", 2, sizeof(vk_op_timestep_embedding_push_constants), {256, 1, 1}, {}, 1);
|