Procházet zdrojové kódy

metal : simplify f16 and f32 dequant kernels (#0)

Georgi Gerganov před 1 rokem
rodič
revize
05697f670b
1 změnil soubory, kde provedl 2 přidání a 8 odebrání
  1. 2 8
      ggml/src/ggml-metal.metal

+ 2 - 8
ggml/src/ggml-metal.metal

@@ -19,18 +19,12 @@ constexpr constant static float kvalues_iq4nl_f[16] = {
 // NOTE: this is not dequantizing - we are simply fitting the template
 // NOTE: this is not dequantizing - we are simply fitting the template
 template <typename type4x4>
 template <typename type4x4>
 void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) {
 void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) {
-    float4x4 temp = *(((device float4x4 *)src));
-    for (int i = 0; i < 16; i++){
-        reg[i/4][i%4] = temp[i/4][i%4];
-    }
+    reg = (type4x4)(*src);
 }
 }
 
 
 template <typename type4x4>
 template <typename type4x4>
 void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) {
 void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) {
-    half4x4 temp = *(((device half4x4 *)src));
-    for (int i = 0; i < 16; i++){
-        reg[i/4][i%4] = temp[i/4][i%4];
-    }
+    reg = (type4x4)(*src);
 }
 }
 
 
 template <typename type4x4>
 template <typename type4x4>