2 лет назад · 69c92298a9
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -89,7 +89,7 @@ jobs:
 
				         run: |
			
 
				           mkdir build
			
 
				           cd build
			
 
				-          cmake ..
			
 
				+          cmake -DLLAMA_AVX2=OFF ..
			
 
				           cmake --build . --config Release
			
 
				           ctest --output-on-failure
			
 
				 
			
--- a/ggml.c
+++ b/ggml.c
@@ -403,9 +403,55 @@ static inline __m128i packNibbles( __m256i bytes )
 
				 // method 5
			
 
				 // blocks of QK elements
			
 
				 // represented with a single float (delta) and QK/2 8-bit ints (i.e QK 4-bit signed integer factors)
			
 
				+
			
 
				+// reference implementation for deterministic creation of model files
			
 
				+static void quantize_row_q4_0_reference(const float * restrict x, void * restrict y, int k) {
			
 
				+    assert(k % QK == 0);
			
 
				+    const int nb = k / QK;
			
 
				+
			
 
				+    const size_t bs = sizeof(float) + QK/2;
			
 
				+
			
 
				+    uint8_t * restrict pd = ((uint8_t *)y + 0*bs);
			
 
				+    uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float));
			
 
				+
			
 
				+    uint8_t pp[QK/2];
			
 
				+
			
 
				+    for (int i = 0; i < nb; i++) {
			
 
				+        float amax = 0.0f; // absolute max
			
 
				+
			
 
				+        for (int l = 0; l < QK; l++) {
			
 
				+            const float v = x[i*QK + l];
			
 
				+            amax = MAX(amax, fabsf(v));
			
 
				+        }
			
 
				+
			
 
				+        const float d = amax / ((1 << 3) - 1);
			
 
				+        const float id = d ? 1.0f/d : 0.0f;
			
 
				+
			
 
				+        *(float *)pd = d;
			
 
				+        pd += bs;
			
 
				+
			
 
				+        for (int l = 0; l < QK; l += 2) {
			
 
				+            const float v0 = x[i*QK + l + 0]*id;
			
 
				+            const float v1 = x[i*QK + l + 1]*id;
			
 
				+
			
 
				+            const uint8_t vi0 = ((int8_t) (round(v0))) + 8;
			
 
				+            const uint8_t vi1 = ((int8_t) (round(v1))) + 8;
			
 
				+
			
 
				+            assert(vi0 >= 0 && vi0 < 16);
			
 
				+            assert(vi1 >= 0 && vi1 < 16);
			
 
				+
			
 
				+            pp[l/2] = vi0 | (vi1 << 4);
			
 
				+        }
			
 
				+
			
 
				+        memcpy(pb, pp, sizeof(pp));
			
 
				+        pb += bs;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
			
 
				     assert(k % QK == 0);
			
 
				 
			
 
				+#if __ARM_NEON || defined(__AVX2__) || defined(__wasm_simd128__)
			
 
				     const int nb = k / QK;
			
 
				     const size_t bs = sizeof(float) + QK/2;
			
 
				 
			
@@ -413,6 +459,7 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
 
				     uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float));
			
 
				 
			
 
				     uint8_t pp[QK/2];
			
 
				+#endif
			
 
				 
			
 
				 #if __ARM_NEON
			
 
				 #if QK == 32
			
@@ -569,36 +616,7 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
 
				 #endif
			
 
				 #else
			
 
				     // scalar
			
 
				-    for (int i = 0; i < nb; i++) {
			
 
				-        float amax = 0.0f; // absolute max
			
 
				-
			
 
				-        for (int l = 0; l < QK; l++) {
			
 
				-            const float v = x[i*QK + l];
			
 
				-            amax = MAX(amax, fabsf(v));
			
 
				-        }
			
 
				-
			
 
				-        const float d = amax / ((1 << 3) - 1);
			
 
				-        const float id = d ? 1.0f/d : 0.0f;
			
 
				-
			
 
				-        *(float *)pd = d;
			
 
				-        pd += bs;
			
 
				-
			
 
				-        for (int l = 0; l < QK; l += 2) {
			
 
				-            const float v0 = x[i*QK + l + 0]*id;
			
 
				-            const float v1 = x[i*QK + l + 1]*id;
			
 
				-
			
 
				-            const uint8_t vi0 = ((int8_t) (round(v0))) + 8;
			
 
				-            const uint8_t vi1 = ((int8_t) (round(v1))) + 8;
			
 
				-
			
 
				-            assert(vi0 >= 0 && vi0 < 16);
			
 
				-            assert(vi1 >= 0 && vi1 < 16);
			
 
				-
			
 
				-            pp[l/2] = vi0 | (vi1 << 4);
			
 
				-        }
			
 
				-
			
 
				-        memcpy(pb, pp, sizeof(pp));
			
 
				-        pb += bs;
			
 
				-    }
			
 
				+    quantize_row_q4_0_reference(x, y, k);
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -10705,119 +10723,60 @@ enum ggml_opt_result ggml_opt(
 
				 
			
 
				 ////////////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				-size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) {
			
 
				+size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int qk, int64_t * hist) {
			
 
				     const int nb = k / qk;
			
 
				     const size_t bs = (sizeof(float) + sizeof(uint8_t)*qk/2);
			
 
				     const size_t row_size = nb*bs;
			
 
				 
			
 
				     assert(k % qk == 0);
			
 
				 
			
 
				-    const size_t pp_size = qk / 2;
			
 
				-    uint8_t * pp = (uint8_t *) alloca(pp_size);
			
 
				-
			
 
				     char * pdst = (char *) dst;
			
 
				 
			
 
				     for (int j = 0; j < n; j += k) {
			
 
				         uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs);
			
 
				         uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float));
			
 
				 
			
 
				-        for (int i = 0; i < nb; i++) {
			
 
				-            float amax = 0.0f; // absolute max
			
 
				-
			
 
				-            {
			
 
				-                for (int l = 0; l < qk; l++) {
			
 
				-                    const float v = src[j + i*qk + l];
			
 
				-                    amax = MAX(amax, fabsf(v));
			
 
				-                }
			
 
				-
			
 
				-                const float d = amax / ((1 << 3) - 1);
			
 
				-                const float id = d ? 1.0f/d : 0.0f;
			
 
				-
			
 
				-                *(float *) pd = d;
			
 
				-                pd += bs;
			
 
				+        quantize_row_q4_0_reference(src + j, pd, k);
			
 
				 
			
 
				-                for (int l = 0; l < qk; l += 2) {
			
 
				-                    const float v0 = (src[j + i*qk + l + 0])*id;
			
 
				-                    const float v1 = (src[j + i*qk + l + 1])*id;
			
 
				-
			
 
				-                    const uint8_t vi0 = ((int8_t) (round(v0))) + 8;
			
 
				-                    const uint8_t vi1 = ((int8_t) (round(v1))) + 8;
			
 
				-
			
 
				-                    assert(vi0 >= 0 && vi0 < 16);
			
 
				-                    assert(vi1 >= 0 && vi1 < 16);
			
 
				-
			
 
				-                    hist[vi0]++;
			
 
				-                    hist[vi1]++;
			
 
				-
			
 
				-                    pp[l/2] = vi0 | (vi1 << 4);
			
 
				-                }
			
 
				+        for (int i = 0; i < nb; i++) {
			
 
				+            for (int l = 0; l < qk; l += 2) {
			
 
				+                const uint8_t vi0 = pb[l/2] & 0xF;
			
 
				+                const uint8_t vi1 = pb[l/2] >> 4;
			
 
				 
			
 
				-                memcpy(pb, pp, pp_size);
			
 
				-                pb += bs;
			
 
				+                hist[vi0]++;
			
 
				+                hist[vi1]++;
			
 
				             }
			
 
				+            pb += bs;
			
 
				         }
			
 
				     }
			
 
				 
			
 
				     return (n/k)*row_size;
			
 
				 }
			
 
				 
			
 
				-size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist) {
			
 
				+size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int qk, int64_t * hist) {
			
 
				     const int nb = k / qk;
			
 
				     const size_t bs = (2*sizeof(float) + sizeof(uint8_t)*qk/2);
			
 
				     const size_t row_size = nb*bs;
			
 
				 
			
 
				     assert(k % qk == 0);
			
 
				 
			
 
				-    const size_t pp_size = qk / 2;
			
 
				-    uint8_t * pp = (uint8_t *) alloca(pp_size);
			
 
				-
			
 
				     char * pdst = (char *) dst;
			
 
				 
			
 
				     for (int j = 0; j < n; j += k) {
			
 
				         uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs);
			
 
				-        uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs +   sizeof(float));
			
 
				         uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float));
			
 
				 
			
 
				-        //printf("n = %d, k = %d, nb = %d, row_size = %d, j = %d, pm = %p, pd = %p, pb = %p\n", n, k, nb, row_size, j, pm, pd, pb);
			
 
				+        quantize_row_q4_1(src + j, pd, k);
			
 
				 
			
 
				         for (int i = 0; i < nb; i++) {
			
 
				-            float min = FLT_MAX;
			
 
				-            float max = -FLT_MAX;
			
 
				-
			
 
				-            {
			
 
				-                for (int l = 0; l < qk; l++) {
			
 
				-                    const float v = src[j + i*qk + l];
			
 
				-                    if (v < min) min = v;
			
 
				-                    if (v > max) max = v;
			
 
				-                }
			
 
				-
			
 
				-                const float d = (max - min) / ((1 << 4) - 1);
			
 
				-                const float id = d ? 1.0f/d : 0.0f;
			
 
				-
			
 
				-                *(float *) pd = d;
			
 
				-                *(float *) pm = min;
			
 
				-                pd += bs;
			
 
				-                pm += bs;
			
 
				-
			
 
				-                for (int l = 0; l < qk; l += 2) {
			
 
				-                    const float v0 = (src[j + i*qk + l + 0] - min)*id;
			
 
				-                    const float v1 = (src[j + i*qk + l + 1] - min)*id;
			
 
				-
			
 
				-                    const uint8_t vi0 = round(v0);
			
 
				-                    const uint8_t vi1 = round(v1);
			
 
				-
			
 
				-                    assert(vi0 >= 0 && vi0 < 16);
			
 
				-                    assert(vi1 >= 0 && vi1 < 16);
			
 
				-
			
 
				-                    hist[vi0]++;
			
 
				-                    hist[vi1]++;
			
 
				-
			
 
				-                    pp[l/2] = vi0 | (vi1 << 4);
			
 
				-                }
			
 
				+            for (int l = 0; l < qk; l += 2) {
			
 
				+                const uint8_t vi0 = pb[l/2] & 0xF;
			
 
				+                const uint8_t vi1 = pb[l/2] >> 4;
			
 
				 
			
 
				-                memcpy(pb, pp, pp_size);
			
 
				-                pb += bs;
			
 
				+                hist[vi0]++;
			
 
				+                hist[vi1]++;
			
 
				             }
			
 
				+            pb += bs;
			
 
				         }
			
 
				     }
			
 
				 
			
--- a/ggml.h
+++ b/ggml.h
@@ -745,8 +745,8 @@ enum ggml_opt_result ggml_opt(
 
				 // quantization
			
 
				 //
			
 
				 
			
 
				-size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist);
			
 
				-size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist);
			
 
				+size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int qk, int64_t * hist);
			
 
				+size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int qk, int64_t * hist);
			
 
				 
			
 
				 //
			
 
				 // system info
			
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,4 +1,9 @@
 
				-set(TEST_TARGET test-tokenizer-0)
			
 
				-add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
			
 
				-target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils)
			
 
				-add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
			
 
				+function(llama_add_test source)
			
 
				+    get_filename_component(TEST_TARGET ${source} NAME_WE)
			
 
				+    add_executable(${TEST_TARGET} ${source})
			
 
				+    target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils)
			
 
				+    add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
			
 
				+endfunction()
			
 
				+
			
 
				+llama_add_test(test-quantize.c)
			
 
				+llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
			
--- a/tests/test-quantize.c
+++ b/tests/test-quantize.c
@@ -0,0 +1,42 @@
 
				+#include "ggml.h"
			
 
				+#undef NDEBUG
			
 
				+#include <assert.h>
			
 
				+#include <math.h>
			
 
				+
			
 
				+int main(void) {
			
 
				+    #define QK 32
			
 
				+    float src[QK];
			
 
				+    uint8_t dst[24];
			
 
				+    int64_t hist[16];
			
 
				+
			
 
				+    for (int i = 0; i < QK; i++) {
			
 
				+        src[i] = (float)(i + 1);
			
 
				+    }
			
 
				+
			
 
				+    size_t size = ggml_quantize_q4_0(src, dst, QK, QK, QK, hist);
			
 
				+    assert(size == 20);
			
 
				+    float max_result = ((float *)dst)[0];
			
 
				+    float max_expected = src[31] / ((1 << 3) - 1);
			
 
				+    assert(max_result == max_expected);
			
 
				+    for (int i = 0; i < QK; i++) {
			
 
				+        uint8_t q4_result = (i % 2) ? (dst[sizeof(float) + i/2] >> 4) : (dst[sizeof(float) + i/2] & 0xF);
			
 
				+        uint8_t q4_expected = roundf(src[i] / max_expected) + 8;
			
 
				+        assert(q4_result == q4_expected);
			
 
				+    }
			
 
				+
			
 
				+    size = ggml_quantize_q4_1(src, dst, QK, QK, QK, hist);
			
 
				+    assert(size == 24);
			
 
				+    float delta_result = ((float *)dst)[0];
			
 
				+    float delta_expected = (src[31] - src[0]) / ((1 << 4) - 1);
			
 
				+    assert(delta_result == delta_expected);
			
 
				+    float min_result = ((float *)dst)[1];
			
 
				+    float min_expected = src[0];
			
 
				+    assert(min_result == min_expected);
			
 
				+    for (int i = 0; i < QK; i++) {
			
 
				+        uint8_t q4_result = (i % 2) ? (dst[sizeof(float)*2 + i/2] >> 4) : (dst[sizeof(float)*2 + i/2] & 0xF);
			
 
				+        uint8_t q4_expected = roundf((src[i] - min_expected) / delta_expected);
			
 
				+        assert(q4_result == q4_expected);
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}