Преглед на файлове

sync : ggml (conv 1d + 2d updates, UB fixes) (#3468)

* sync : ggml (conv 1d + 2d updates)

ggml-ci

* ggml : fix UB in q5_0 and q5_1 quantize code

ggml.c:1033:39: runtime error: left shift of 1 by 31 places cannot be represented in type 'int'
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior

ggml.c:1081:39: runtime error: left shift of 1 by 31 places cannot be represented in type 'int'
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior

ggml-ci

* tests : fix UB in test-quantize-perf
Georgi Gerganov преди 2 години
родител
ревизия
f93af02488
променени са 6 файла, в които са добавени 539 реда и са изтрити 249 реда
  1. 512 183
      ggml.c
  2. 13 0
      ggml.h
  3. 0 2
      k_quants.c
  4. 0 20
      tests/test-grad0.cpp
  5. 0 29
      tests/test-opt.cpp
  6. 14 15
      tests/test-quantize-perf.cpp

Файловите разлики са ограничени, защото са твърде много
+ 512 - 183
ggml.c


+ 13 - 0
ggml.h

@@ -401,10 +401,14 @@ extern "C" {
         GGML_OP_CLAMP,
         GGML_OP_CONV_1D,
         GGML_OP_CONV_2D,
+        GGML_OP_CONV_TRANSPOSE_1D,
         GGML_OP_CONV_TRANSPOSE_2D,
         GGML_OP_POOL_1D,
         GGML_OP_POOL_2D,
 
+        GGML_OP_CONV_1D_STAGE_0,  // internal
+        GGML_OP_CONV_1D_STAGE_1,  // internal
+
         GGML_OP_UPSCALE, // nearest interpolate
 
         GGML_OP_FLASH_ATTN,
@@ -1386,6 +1390,14 @@ extern "C" {
             int                   s,
             int                   d);
 
+    GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b,
+            int                   s0,
+            int                   p0,
+            int                   d0);
+
     GGML_API struct ggml_tensor * ggml_conv_2d(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
@@ -1759,6 +1771,7 @@ extern "C" {
         GGML_OPT_NO_CONTEXT,
         GGML_OPT_INVALID_WOLFE,
         GGML_OPT_FAIL,
+        GGML_OPT_CANCEL,
 
         GGML_LINESEARCH_FAIL = -128,
         GGML_LINESEARCH_MINIMUM_STEP,

+ 0 - 2
k_quants.c

@@ -69,7 +69,6 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
 // 2-6 bit quantization in super-blocks
 //
 
-
 //
 // ===================== Helper functions
 //
@@ -348,7 +347,6 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
     const float q4scale = 15.f;
 
     for (int i = 0; i < nb; i++) {
-
         float max_scale = 0; // as we are deducting the min, scales are always positive
         float max_min = 0;
         for (int j = 0; j < QK_K/16; ++j) {

+ 0 - 20
tests/test-grad0.cpp

@@ -208,26 +208,6 @@ static struct ggml_tensor * get_random_tensor_i32(
     return result;
 }
 
-static void print_elements(const char* label, const struct ggml_tensor * t) {
-    if (!t) {
-        printf("%s: %s = null\n", __func__, label);
-        return;
-    }
-    const int nelements = ggml_nelements(t);
-    printf("%s: %s = [", __func__, label);
-    for (int k = 0; k < nelements; ++k) {
-        if (k > 0) { printf(", "); }
-        printf("%.5f", ggml_get_f32_1d(t, k));
-    }
-    printf("] shape: [");
-    for (int k = 0; k < t->n_dims; ++k) {
-        if (k > 0) { printf(", "); }
-        printf("%d", (int)t->ne[k]);
-    }
-    printf("]\n");
-
-}
-
 static bool check_gradient(
         const char * op_name,
         struct ggml_context * ctx0,

+ 0 - 29
tests/test-opt.cpp

@@ -40,27 +40,6 @@ static float frand(void) {
     return (float)rand()/(float)RAND_MAX;
 }
 
-static int irand(int n) {
-    return rand()%n;
-}
-
-static void get_random_dims(int64_t * dims, int ndims) {
-    dims[0] = dims[1] = dims[2] = dims[3] = 1;
-
-    for (int i = 0; i < ndims; i++) {
-        dims[i] = 1 + irand(4);
-    }
-}
-
-static void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {
-    dims[0] = dims[1] = dims[2] = dims[3] = 1;
-
-    for (int i = 0; i < ndims; i++) {
-        dims[i] = min + irand(max-min);
-    }
-}
-
-
 static struct ggml_tensor * get_random_tensor(
     struct ggml_context * ctx0, int ndims, int64_t ne[], float fmin, float fmax
 ) {
@@ -106,14 +85,6 @@ static struct ggml_tensor * get_random_tensor(
     return result;
 }
 
-static float get_element(const struct ggml_tensor * t, int idx) {
-    return ((float *)t->data)[idx];
-}
-
-static void set_element(struct ggml_tensor * t, int idx, float value) {
-    ((float *)t->data)[idx] = value;
-}
-
 int main(void) {
     struct ggml_init_params params = {
         /* .mem_size   = */ 1024*1024*1024,

+ 14 - 15
tests/test-quantize-perf.cpp

@@ -76,22 +76,21 @@ static void * align_with_offset(void * ptr, int offset) {
     return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
 }
 
-static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
+static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<float(void)> & func) {
     int64_t min_time_us = INT64_MAX;
     int64_t total_time_us = 0;
     int64_t min_time_cycles = INT64_MAX;
     int64_t total_time_cycles = 0;
 
     for (int i = 0; i < WARMUP; i++) {
-        function();
+        func();
     }
 
-
     for (int i = 0; i < iterations; i++) {
         const int64_t start_time = ggml_time_us();
         const int64_t start_cycles = cpu_cycles();
 
-        function();
+        func();
 
         const int64_t end_cycles = cpu_cycles();
         const int64_t end_time = ggml_time_us();
@@ -245,15 +244,15 @@ int main(int argc, char * argv[]) {
 
     std::vector<uint8_t> test_data1_v(largest*4 + MAX_ALIGNMENT*2);
     std::vector<uint8_t> test_data2_v(largest*4 + MAX_ALIGNMENT*2);
-    std::vector<uint8_t> test_q1_v(largest*4 + MAX_ALIGNMENT*2);
-    std::vector<uint8_t> test_q2_v(largest*4 + MAX_ALIGNMENT*2);
-    std::vector<uint8_t> test_out_v(largest*4 + MAX_ALIGNMENT*2);
+    std::vector<uint8_t> test_q1_v   (largest*4 + MAX_ALIGNMENT*2);
+    std::vector<uint8_t> test_q2_v   (largest*4 + MAX_ALIGNMENT*2);
+    std::vector<uint8_t> test_out_v  (largest*4 + MAX_ALIGNMENT*2);
 
     float * test_data1 = (float *) align_with_offset(test_data1_v.data(), params.alignment_offset);
     float * test_data2 = (float *) align_with_offset(test_data2_v.data(), params.alignment_offset);
-    float * test_q1 = (float *) align_with_offset(test_q1_v.data(), params.alignment_offset);
-    float * test_q2 = (float *) align_with_offset(test_q2_v.data(), params.alignment_offset);
-    float * test_out = (float *) align_with_offset(test_out_v.data(), params.alignment_offset);
+    float * test_q1    = (float *) align_with_offset(test_q1_v.data(),    params.alignment_offset);
+    float * test_q2    = (float *) align_with_offset(test_q2_v.data(),    params.alignment_offset);
+    float * test_out   = (float *) align_with_offset(test_out_v.data(),   params.alignment_offset);
 
     generate_data(0, largest, test_data1);
     generate_data(1, largest, test_data2);
@@ -283,7 +282,7 @@ int main(int argc, char * argv[]) {
                 printf("  quantize_row_q_reference\n");
                 for (size_t size : params.test_sizes) {
                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
-                    auto quantize_fn = [&](void ) {
+                    auto quantize_fn = [&](void) -> float {
                         qfns.from_float_reference(test_data1, test_q1, size);
                         return test_q1[0];
                     };
@@ -297,7 +296,7 @@ int main(int argc, char * argv[]) {
                 printf("  quantize_row_q\n");
                 for (size_t size : params.test_sizes) {
                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
-                    auto quantize_fn = [&](void ) {
+                    auto quantize_fn = [&](void) -> float {
                         qfns.from_float(test_data1, test_q1, size);
                         return test_q1[0];
                     };
@@ -312,7 +311,7 @@ int main(int argc, char * argv[]) {
                 qfns.from_float(test_data1, test_q1, largest);
                 for (size_t size : params.test_sizes) {
                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
-                    auto quantize_fn = [&](void ) {
+                    auto quantize_fn = [&](void) -> float {
                         qfns.to_float(test_q1, test_out, size);
                         return test_out[0];
                     };
@@ -326,7 +325,7 @@ int main(int argc, char * argv[]) {
                 printf("  quantize_row_q_dot\n");
                 for (size_t size : params.test_sizes) {
                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
-                    auto quantize_fn = [&](void ) {
+                    auto quantize_fn = [&](void) -> float {
                         auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
                         vdot.from_float(test_data1, test_q1, size);
                         return test_q1[0];
@@ -343,7 +342,7 @@ int main(int argc, char * argv[]) {
                 qfns.from_float(test_data2, test_q2, largest);
                 for (size_t size : params.test_sizes) {
                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
-                    auto quantize_fn = [&](void ) {
+                    auto quantize_fn = [&](void) -> float {
                         float result;
                         qfns.vec_dot(size, &result, test_q1, test_q2);
                         return result;

Някои файлове не бяха показани, защото твърде много файлове са промени