il y a 1 an · dca1d4b58a
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -314,9 +314,9 @@ struct lora_merge_ctx {
 
				             // optionally dequantize it
			
 
				             printf("%s :   + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type));
			
 
				             auto nels = ggml_nelements(inp_base);
			
 
				-            ggml_type_traits_t qtype = ggml_internal_get_type_traits(base->type);
			
 
				+            const auto * qtype = ggml_get_type_traits(base->type);
			
 
				             std::vector<uint8_t> dequant_buf(nels * sizeof(float));
			
 
				-            qtype.to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
			
 
				+            qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
			
 
				             ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size());
			
 
				         } else {
			
 
				             ggml_backend_tensor_set(inp_base, read_buf.data(), 0, ggml_nbytes(inp_base));
			
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -142,7 +142,7 @@ static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
 
				 }
			
 
				 
			
 
				 static void test_roundtrip_on_chunk(
			
 
				-    const ggml_tensor * layer, int64_t offset, int64_t chunk_size, const ggml_type_traits_t & qfns, bool use_reference,
			
 
				+    const ggml_tensor * layer, int64_t offset, int64_t chunk_size, const ggml_type_traits & qfns, bool use_reference,
			
 
				     float * input_scratch, char * quantized_scratch, float * output_scratch, error_stats & stats
			
 
				 ) {
			
 
				     if (layer->type == GGML_TYPE_F16) {
			
@@ -166,7 +166,7 @@ static void test_roundtrip_on_chunk(
 
				 
			
 
				 // Run quantization function for a single layer and update error stats
			
 
				 static void test_roundtrip_on_layer(
			
 
				-    std::string & name, bool print_layer_stats, const ggml_type_traits_t & qfns, bool use_reference,
			
 
				+    std::string & name, bool print_layer_stats, const ggml_type_traits & qfns, bool use_reference,
			
 
				     const ggml_tensor * layer, std::vector<float> & input_scratch, std::vector<char> & quantized_scratch,
			
 
				     std::vector<float> & output_scratch, error_stats & total_error, int max_thread = 0
			
 
				 ) {
			
@@ -371,8 +371,8 @@ int main(int argc, char ** argv) {
 
				         if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
			
 
				             continue;
			
 
				         }
			
 
				-        ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
			
 
				-        if (qfns.from_float && qfns.to_float) {
			
 
				+        const auto *  qfns = ggml_get_type_traits(type);
			
 
				+        if (qfns->from_float && qfns->to_float) {
			
 
				             if (params.verbose) {
			
 
				                 printf("testing %s ...\n",  ggml_type_name(type));
			
 
				             }
			
@@ -393,7 +393,7 @@ int main(int argc, char ** argv) {
 
				                 test_roundtrip_on_layer(
			
 
				                         layer_name,
			
 
				                         params.per_layer_stats,
			
 
				-                        qfns,
			
 
				+                        *qfns,
			
 
				                         params.reference,
			
 
				                         kv_tensor.second,
			
 
				                         input_scratch,
			
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -2535,7 +2535,7 @@ extern "C" {
 
				     typedef void (*ggml_gemm_t)     (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
			
 
				                                        const void * GGML_RESTRICT y, int nr, int nc);
			
 
				 
			
 
				-    typedef struct {
			
 
				+    struct ggml_type_traits {
			
 
				         const char             * type_name;
			
 
				         int64_t                  blck_size;
			
 
				         int64_t                  blck_size_interleave; // interleave elements in blocks
			
@@ -2551,9 +2551,9 @@ extern "C" {
 
				         int64_t                  ncols; // number of columns to process simultaneously
			
 
				         ggml_gemv_t              gemv;
			
 
				         ggml_gemm_t              gemm;
			
 
				-    } ggml_type_traits_t;
			
 
				+    };
			
 
				 
			
 
				-    GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
			
 
				+    GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
			
 
				 
			
 
				 #ifdef  __cplusplus
			
 
				 }
			
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -1177,7 +1177,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
 
				                 op->type != GGML_TYPE_IQ1_S   &&
			
 
				                 op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
			
 
				         case GGML_OP_MUL_MAT:
			
 
				-            return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
			
 
				+            return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
			
 
				         case GGML_OP_ROPE_BACK:
			
 
				             return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
			
 
				         case GGML_OP_IM2COL_BACK:
			
--- a/ggml/src/ggml-blas.cpp
+++ b/ggml/src/ggml-blas.cpp
@@ -65,8 +65,8 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
 
				 
			
 
				     // convert src0 to float
			
 
				     if (type != GGML_TYPE_F32) {
			
 
				-        ggml_type_traits_t type_traits = ggml_internal_get_type_traits(type);
			
 
				-        ggml_to_float_t const to_float = type_traits.to_float;
			
 
				+        const auto * type_traits = ggml_get_type_traits(type);
			
 
				+        ggml_to_float_t const to_float = type_traits->to_float;
			
 
				 
			
 
				         for (int64_t i03 = 0; i03 < ne03; i03++) {
			
 
				             for (int64_t i02 = 0; i02 < ne02; i02++) {
			
@@ -420,19 +420,21 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s
 
				             // TODO: find the optimal value
			
 
				             const int64_t min_batch = 32;
			
 
				 
			
 
				-            return (ggml_is_contiguous(src0) &&
			
 
				-                    ggml_is_contiguous(src1) &&
			
 
				-                    src1->type == GGML_TYPE_F32 &&
			
 
				-                    (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch));
			
 
				+            return ggml_is_contiguous(src0) &&
			
 
				+                   ggml_is_contiguous(src1) &&
			
 
				+                   src1->type == GGML_TYPE_F32 &&
			
 
				+                   (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
			
 
				+                   (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
			
 
				         }
			
 
				 
			
 
				         case GGML_OP_OUT_PROD:
			
 
				-            return (op->src[0]->type == GGML_TYPE_F32 &&
			
 
				-                    op->src[1]->type == GGML_TYPE_F32 &&
			
 
				-                    ggml_is_matrix(src0) &&
			
 
				-                    ggml_is_matrix(src1) &&
			
 
				-                    ggml_is_contiguous(src0) &&
			
 
				-                    (ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
			
 
				+            return op->src[0]->type == GGML_TYPE_F32 &&
			
 
				+                   op->src[1]->type == GGML_TYPE_F32 &&
			
 
				+                   ggml_is_matrix(src0) &&
			
 
				+                   ggml_is_matrix(src1) &&
			
 
				+                   ggml_is_contiguous(src0) &&
			
 
				+                   (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
			
 
				+                   (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
			
 
				 
			
 
				         default:
			
 
				             return false;
			
--- a/ggml/src/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan.cpp
@@ -5287,9 +5287,9 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
 
				         return;
			
 
				     }
			
 
				 
			
 
				-    ggml_type_traits_t tt = ggml_internal_get_type_traits(quant);
			
 
				+    const auto * tt = ggml_get_type_traits(quant);
			
 
				 
			
 
				-    ggml_to_float_t dequant_fn = tt.to_float;
			
 
				+    ggml_to_float_t dequant_fn = tt->to_float;
			
 
				 
			
 
				     dequant_fn(from, to, ne);
			
 
				 }
			
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -729,7 +729,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
 
				 static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
			
 
				 static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
			
 
				 
			
 
				-static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
			
 
				+static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
			
 
				     [GGML_TYPE_I8] = {
			
 
				         .type_name                = "i8",
			
 
				         .blck_size                = 1,
			
@@ -1151,9 +1151,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
 
				 };
			
 
				 
			
 
				 // For internal test use
			
 
				-ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
			
 
				+const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
			
 
				     GGML_ASSERT(type < GGML_TYPE_COUNT);
			
 
				-    return type_traits[type];
			
 
				+    return &type_traits[type];
			
 
				 }
			
 
				 
			
 
				 //
			
--- a/pocs/vdot/q8dot.cpp
+++ b/pocs/vdot/q8dot.cpp
@@ -136,7 +136,7 @@ int main(int argc, char** argv) {
 
				 
			
 
				     auto ggml_type = type == 0 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q4_1;
			
 
				 
			
 
				-    auto funcs = ggml_internal_get_type_traits(ggml_type);
			
 
				+    const auto * funcs = ggml_get_type_traits(ggml_type);
			
 
				 
			
 
				     Stat simple, ggml;
			
 
				 
			
@@ -156,8 +156,8 @@ int main(int argc, char** argv) {
 
				 
			
 
				         t1 = std::chrono::high_resolution_clock::now();
			
 
				         float fs;
			
 
				-        if (type == 0) funcs.vec_dot(kVecSize * QK4_1, &fs, 0, x40.data(), 0, y.data(), 0, 1);
			
 
				-        else funcs.vec_dot(kVecSize * QK4_1, &fs, 0, x41.data(), 0, y.data(), 0, 1);
			
 
				+        if (type == 0) funcs->vec_dot(kVecSize * QK4_1, &fs, 0, x40.data(), 0, y.data(), 0, 1);
			
 
				+        else funcs->vec_dot(kVecSize * QK4_1, &fs, 0, x41.data(), 0, y.data(), 0, 1);
			
 
				         t2 = std::chrono::high_resolution_clock::now();
			
 
				         t = 1e-3*std::chrono::duration_cast<std::chrono::nanoseconds>(t2-t1).count();
			
 
				         if (iloop > 3) ggml.addResult(fs, t);
			
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@@ -236,7 +236,7 @@ int main(int argc, char** argv) {
 
				     int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
			
 
				     int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
			
 
				 
			
 
				-    auto funcs = useQ4_1 ? ggml_internal_get_type_traits(GGML_TYPE_Q4_1) : ggml_internal_get_type_traits(GGML_TYPE_Q4_0);
			
 
				+    const auto * funcs = useQ4_1 ? ggml_get_type_traits(GGML_TYPE_Q4_1) : ggml_get_type_traits(GGML_TYPE_Q4_0);
			
 
				 
			
 
				     std::vector<block_q4_0> q40;
			
 
				     std::vector<block_q4_1> q41;
			
@@ -261,9 +261,9 @@ int main(int argc, char** argv) {
 
				         // Note, we do not include this in the timing as in practical application
			
 
				         // we already have the quantized model weights.
			
 
				         if (useQ4_1) {
			
 
				-            funcs.from_float(x1.data(), q41.data(), kVecSize);
			
 
				+            funcs->from_float(x1.data(), q41.data(), kVecSize);
			
 
				         } else {
			
 
				-            funcs.from_float(x1.data(), q40.data(), kVecSize);
			
 
				+            funcs->from_float(x1.data(), q40.data(), kVecSize);
			
 
				         }
			
 
				 
			
 
				         // Now measure time the dot product needs using the "scalar" version above
			
@@ -282,10 +282,10 @@ int main(int argc, char** argv) {
 
				             dot_q4_q8(kVecSize, &result, q40.data(), q8.data());
			
 
				         }
			
 
				         else {
			
 
				-            auto vdot = ggml_internal_get_type_traits(funcs.vec_dot_type);
			
 
				-            vdot.from_float(y1.data(), q8.data(), kVecSize);
			
 
				-            if (useQ4_1) funcs.vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
			
 
				-            else funcs.vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
			
 
				+            const auto * vdot = ggml_get_type_traits(funcs->vec_dot_type);
			
 
				+            vdot->from_float(y1.data(), q8.data(), kVecSize);
			
 
				+            if (useQ4_1) funcs->vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
			
 
				+            else funcs->vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
			
 
				         }
			
 
				         sumq += result;
			
 
				         t2 = std::chrono::high_resolution_clock::now();
			
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17872,10 +17872,9 @@ static void llama_tensor_dequantize_internal(
 
				     }
			
 
				     float * f32_output = (float *) output.data();
			
 
				 
			
 
				-    ggml_type_traits_t qtype;
			
 
				+    const ggml_type_traits * qtype = ggml_get_type_traits(tensor->type);
			
 
				     if (ggml_is_quantized(tensor->type)) {
			
 
				-        qtype = ggml_internal_get_type_traits(tensor->type);
			
 
				-        if (qtype.to_float == NULL) {
			
 
				+        if (qtype->to_float == NULL) {
			
 
				             throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type)));
			
 
				         }
			
 
				     } else if (tensor->type != GGML_TYPE_F16 &&
			
@@ -17889,7 +17888,7 @@ static void llama_tensor_dequantize_internal(
 
				         } else if (tensor->type == GGML_TYPE_BF16) {
			
 
				             ggml_bf16_to_fp32_row((ggml_bf16_t *)tensor->data, f32_output, nelements);
			
 
				         } else if (ggml_is_quantized(tensor->type)) {
			
 
				-            qtype.to_float(tensor->data, f32_output, nelements);
			
 
				+            qtype->to_float(tensor->data, f32_output, nelements);
			
 
				         } else {
			
 
				             GGML_ABORT("fatal error"); // unreachable
			
 
				         }
			
@@ -17925,7 +17924,7 @@ static void llama_tensor_dequantize_internal(
 
				             } else if (typ == GGML_TYPE_BF16) {
			
 
				                 ggml_bf16_to_fp32_row((ggml_bf16_t *)inbuf, outbuf, nels);
			
 
				             } else {
			
 
				-                qtype.to_float(inbuf, outbuf, nels);
			
 
				+                qtype->to_float(inbuf, outbuf, nels);
			
 
				             }
			
 
				         };
			
 
				         workers.emplace_back(compute, tensor->type, (uint8_t *) tensor->data + in_buff_offs, f32_output + out_buff_offs, thr_elems);
			
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -133,7 +133,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
 
				     std::vector<uint8_t> buf(ggml_nbytes(t));
			
 
				     ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
			
 
				 
			
 
				-    ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
			
 
				+    const auto * tt = ggml_get_type_traits(t->type);
			
 
				     size_t bs = ggml_blck_size(t->type);
			
 
				     std::vector<float> vq(ggml_blck_size(t->type));
			
 
				     bool quantized = ggml_is_quantized(t->type);
			
@@ -159,7 +159,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
 
				                     } else if (t->type == GGML_TYPE_I8) {
			
 
				                         tv.push_back((float)*(int8_t *) &buf[i]);
			
 
				                     } else if (quantized) {
			
 
				-                        tt.to_float(&buf[i], vq.data(), bs);
			
 
				+                        tt->to_float(&buf[i], vq.data(), bs);
			
 
				                         tv.insert(tv.end(), vq.begin(), vq.end());
			
 
				                     } else {
			
 
				                         GGML_ABORT("fatal error");
			
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -44,26 +44,26 @@ static float array_rmse(const float * a1, const float * a2, size_t n) {
 
				 }
			
 
				 
			
 
				 // Total quantization error on test data
			
 
				-static float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
			
 
				+static float total_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
			
 
				     std::vector<uint8_t> tmp_q(2*test_size);
			
 
				     std::vector<float> tmp_out(test_size);
			
 
				 
			
 
				-    qfns.from_float(test_data, tmp_q.data(), test_size);
			
 
				-    qfns.to_float(tmp_q.data(), tmp_out.data(), test_size);
			
 
				+    qfns->from_float(test_data, tmp_q.data(), test_size);
			
 
				+    qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
			
 
				     return array_rmse(test_data, tmp_out.data(), test_size);
			
 
				 }
			
 
				 
			
 
				 // Total quantization error on test data
			
 
				-static float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
			
 
				+static float reference_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
			
 
				     std::vector<uint8_t> tmp_q(2*test_size);
			
 
				     std::vector<float> tmp_out(test_size);
			
 
				     std::vector<float> tmp_out_ref(test_size);
			
 
				 
			
 
				-    qfns.from_float(test_data, tmp_q.data(), test_size);
			
 
				-    qfns.to_float(tmp_q.data(), tmp_out.data(), test_size);
			
 
				+    qfns->from_float(test_data, tmp_q.data(), test_size);
			
 
				+    qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
			
 
				 
			
 
				-    qfns.from_float_ref(test_data, tmp_q.data(), test_size);
			
 
				-    qfns.to_float(tmp_q.data(), tmp_out_ref.data(), test_size);
			
 
				+    qfns->from_float_ref(test_data, tmp_q.data(), test_size);
			
 
				+    qfns->to_float(tmp_q.data(), tmp_out_ref.data(), test_size);
			
 
				 
			
 
				     return array_rmse(tmp_out.data(), tmp_out_ref.data(), test_size);
			
 
				 }
			
@@ -78,18 +78,18 @@ static float dot_product(const float * a1, const float * a2, size_t test_size) {
 
				 
			
 
				 // Total dot product error
			
 
				 static float dot_product_error(
			
 
				-    ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2
			
 
				+    const ggml_type_traits * qfns, size_t test_size, const float * test_data1, const float *test_data2
			
 
				 ) {
			
 
				     std::vector<uint8_t> tmp_q1(2*test_size);
			
 
				     std::vector<uint8_t> tmp_q2(2*test_size);
			
 
				 
			
 
				-    auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
			
 
				+    const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type);
			
 
				 
			
 
				-    qfns.from_float(test_data1, tmp_q1.data(), test_size);
			
 
				-    vdot.from_float(test_data2, tmp_q2.data(), test_size);
			
 
				+    qfns->from_float(test_data1, tmp_q1.data(), test_size);
			
 
				+    vdot->from_float(test_data2, tmp_q2.data(), test_size);
			
 
				 
			
 
				     float result = INFINITY;
			
 
				-    qfns.vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, 1);
			
 
				+    qfns->vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, 1);
			
 
				 
			
 
				     const float dot_ref = dot_product(test_data1, test_data2, test_size);
			
 
				 
			
@@ -131,10 +131,10 @@ int main(int argc, char * argv[]) {
 
				 
			
 
				     for (int i = 0; i < GGML_TYPE_COUNT; i++) {
			
 
				         ggml_type type = (ggml_type) i;
			
 
				-        ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
			
 
				+        const auto * qfns = ggml_get_type_traits(type);
			
 
				 
			
 
				         // deprecated - skip
			
 
				-        if (qfns.blck_size == 0) {
			
 
				+        if (qfns->blck_size == 0) {
			
 
				             continue;
			
 
				         }
			
 
				 
			
@@ -143,7 +143,7 @@ int main(int argc, char * argv[]) {
 
				         printf("Testing %s\n", ggml_type_name((ggml_type) i));
			
 
				         ggml_quantize_init(ei);
			
 
				 
			
 
				-        if (qfns.from_float && qfns.to_float) {
			
 
				+        if (qfns->from_float && qfns->to_float) {
			
 
				             const float total_error = total_quantization_error(qfns, test_size, test_data.data());
			
 
				             const float max_quantization_error =
			
 
				                 type == GGML_TYPE_TQ1_0   ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
			
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -122,9 +122,9 @@ static void usage(char * argv[]) {
 
				     printf("  --type TYPE           set test type as");
			
 
				     for (int i = 0; i < GGML_TYPE_COUNT; i++) {
			
 
				         ggml_type type = (ggml_type) i;
			
 
				-        ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
			
 
				+        const auto * qfns = ggml_get_type_traits(type);
			
 
				         if (ggml_type_name(type) != NULL) {
			
 
				-            if (qfns.from_float && qfns.to_float) {
			
 
				+            if (qfns->from_float && qfns->to_float) {
			
 
				                 printf(" %s", ggml_type_name(type));
			
 
				             }
			
 
				         }
			
@@ -270,12 +270,12 @@ int main(int argc, char * argv[]) {
 
				 
			
 
				     for (int i = 0; i < GGML_TYPE_COUNT; i++) {
			
 
				         ggml_type type = (ggml_type) i;
			
 
				-        ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
			
 
				+        const auto * qfns = ggml_get_type_traits(type);
			
 
				         if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
			
 
				             continue;
			
 
				         }
			
 
				 
			
 
				-        if (qfns.from_float && qfns.to_float) {
			
 
				+        if (qfns->from_float && qfns->to_float) {
			
 
				             printf("%s\n", ggml_type_name(type));
			
 
				 
			
 
				             ggml_quantize_init(type);
			
@@ -285,7 +285,7 @@ int main(int argc, char * argv[]) {
 
				                 for (size_t size : params.test_sizes) {
			
 
				                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
			
 
				                     auto quantize_fn = [&](void) -> float {
			
 
				-                        qfns.from_float_ref(test_data1, test_q1, size);
			
 
				+                        qfns->from_float_ref(test_data1, test_q1, size);
			
 
				                         return test_q1[0];
			
 
				                     };
			
 
				                     size_t quantized_size = ggml_row_size(type, size);
			
@@ -299,7 +299,7 @@ int main(int argc, char * argv[]) {
 
				                 for (size_t size : params.test_sizes) {
			
 
				                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
			
 
				                     auto quantize_fn = [&](void) -> float {
			
 
				-                        qfns.from_float(test_data1, test_q1, size);
			
 
				+                        qfns->from_float(test_data1, test_q1, size);
			
 
				                         return test_q1[0];
			
 
				                     };
			
 
				                     size_t quantized_size = ggml_row_size(type, size);
			
@@ -310,11 +310,11 @@ int main(int argc, char * argv[]) {
 
				 
			
 
				             if (params.op_dequantize_row_q) {
			
 
				                 printf("  dequantize_row_q\n");
			
 
				-                qfns.from_float(test_data1, test_q1, largest);
			
 
				+                qfns->from_float(test_data1, test_q1, largest);
			
 
				                 for (size_t size : params.test_sizes) {
			
 
				                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
			
 
				                     auto quantize_fn = [&](void) -> float {
			
 
				-                        qfns.to_float(test_q1, test_out, size);
			
 
				+                        qfns->to_float(test_q1, test_out, size);
			
 
				                         return test_out[0];
			
 
				                     };
			
 
				                     size_t quantized_size = ggml_row_size(type, size);
			
@@ -328,8 +328,8 @@ int main(int argc, char * argv[]) {
 
				                 for (size_t size : params.test_sizes) {
			
 
				                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
			
 
				                     auto quantize_fn = [&](void) -> float {
			
 
				-                        auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
			
 
				-                        vdot.from_float(test_data1, test_q1, size);
			
 
				+                        const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type);
			
 
				+                        vdot->from_float(test_data1, test_q1, size);
			
 
				                         return test_q1[0];
			
 
				                     };
			
 
				                     size_t quantized_size = ggml_row_size(type, size);
			
@@ -340,13 +340,13 @@ int main(int argc, char * argv[]) {
 
				 
			
 
				             if (params.op_vec_dot_q) {
			
 
				                 printf("  vec_dot_q\n");
			
 
				-                qfns.from_float(test_data1, test_q1, largest);
			
 
				-                qfns.from_float(test_data2, test_q2, largest);
			
 
				+                qfns->from_float(test_data1, test_q1, largest);
			
 
				+                qfns->from_float(test_data2, test_q2, largest);
			
 
				                 for (size_t size : params.test_sizes) {
			
 
				                     printf("    %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
			
 
				                     auto quantize_fn = [&](void) -> float {
			
 
				                         float result;
			
 
				-                        qfns.vec_dot(size, &result, 0, test_q1, 0, test_q2, 0, 1);
			
 
				+                        qfns->vec_dot(size, &result, 0, test_q1, 0, test_q2, 0, 1);
			
 
				                         return result;
			
 
				                     };
			
 
				                     size_t quantized_size = ggml_row_size(type, size);