1 rok pred · 53ff6b9b9f
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -3,3 +3,9 @@
 
															 /ci/ @ggerganov
														
 
															 /.devops/*.Dockerfile @ngxson
														
 
															 /examples/server/ @ngxson
														
 
															+/ggml/src/ggml-cuda/fattn* @JohannesGaessler
														
 
															+/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
														
 
															+/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
														
 
															+/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
														
 
															+/ggml/src/ggml-opt.cpp @JohannesGaessler
														
 
															+/ggml/src/gguf.cpp @JohannesGaessler
														
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2,6 +2,9 @@
 
															 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
														
 
															 #endif
														
 
															+#include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															+
														
 
															 #include "common.h"
														
 
															 #include "log.h"
														
 
															 // Change JSON_ASSERT from assert() to GGML_ASSERT:
														
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -1,4 +1,6 @@
 
															 #include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															+
														
 
															 #include "llama.h"
														
 
															 #include "common.h"
														
 
															 #include "log.h"
														
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@@ -1,7 +1,9 @@
 
															+#include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															+
														
 
															 #include "arg.h"
														
 
															 #include "common.h"
														
 
															 #include "llama.h"
														
 
															-#include "ggml.h"
														
 
															 #include "pca.hpp"
														
 
															 #include "mean.hpp"
														
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -1,7 +1,9 @@
 
															-#include "arg.h"
														
 
															-#include "common.h"
														
 
															 #include "ggml.h"
														
 
															 #include "ggml-alloc.h"
														
 
															+#include "gguf.h"
														
 
															+
														
 
															+#include "arg.h"
														
 
															+#include "common.h"
														
 
															 #include <map>
														
 
															 #include <vector>
														
--- a/examples/gguf-hash/gguf-hash.cpp
+++ b/examples/gguf-hash/gguf-hash.cpp
@@ -1,4 +1,5 @@
 
															 #include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															 #include <cstdlib>   /* abort() */
														
 
															 #include <cstddef>
														
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@@ -1,16 +1,18 @@
 
															+#include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															 #include "llama.h"
														
 
															 #include "common.h"
														
 
															 #include <algorithm>
														
 
															+#include <cinttypes>
														
 
															+#include <climits>
														
 
															+#include <cstdio>
														
 
															 #include <cstdlib>
														
 
															+#include <stdexcept>
														
 
															+#include <cstring>
														
 
															 #include <fstream>
														
 
															 #include <string>
														
 
															 #include <vector>
														
 
															-#include <climits>
														
 
															-
														
 
															-#include <cstdio>
														
 
															-#include <cstring>
														
 
															-#include <stdexcept>
														
 
															 #if defined(_WIN32)
														
 
															     #include <windows.h>
														
@@ -296,7 +298,7 @@ struct split_strategy {
 
															                 total_size += ggml_nbytes(t);
														
 
															             }
														
 
															             total_size = total_size / 1000 / 1000; // convert to megabytes
														
 
															-            printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
														
 
															+            printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
														
 
															             i_split++;
														
 
															         }
														
 
															     }
														
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -1,10 +1,9 @@
 
															 #include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															 #include <cstdio>
														
 
															-#include <cinttypes>
														
 
															 #include <string>
														
 
															 #include <sstream>
														
 
															-#include <fstream>
														
 
															 #include <vector>
														
 
															 #undef MIN
														
@@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
 
															         for (int i = 0; i < n_tensors; ++i) {
														
 
															             const char * name   = gguf_get_tensor_name  (ctx, i);
														
 
															+            const size_t size   = gguf_get_tensor_size  (ctx, i);
														
 
															             const size_t offset = gguf_get_tensor_offset(ctx, i);
														
 
															-            printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
														
 
															+            printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
														
 
															         }
														
 
															     }
														
@@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
 
															         for (int i = 0; i < n_tensors; ++i) {
														
 
															             const char * name   = gguf_get_tensor_name  (ctx, i);
														
 
															+            const size_t size   = gguf_get_tensor_size  (ctx, i);
														
 
															             const size_t offset = gguf_get_tensor_offset(ctx, i);
														
 
															-            printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
														
 
															+            printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
														
 
															         }
														
 
															     }
														
@@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
 
															             struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
														
 
															-            printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
														
 
															+            printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
														
 
															+                __func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
														
 
															             // print first 10 elements
														
 
															             const float * data = (const float *) cur->data;
														
@@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
 
															                 const float * data = (const float *) cur->data;
														
 
															                 for (int j = 0; j < ggml_nelements(cur); ++j) {
														
 
															                     if (data[j] != 100 + i) {
														
 
															-                        fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
														
 
															+                        fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
														
 
															                         gguf_free(ctx);
														
 
															                         return false;
														
 
															                     }
														
@@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
 
															         check_data = false;
														
 
															     }
														
 
															+    srand(123456);
														
 
															+
														
 
															     const std::string fname(argv[1]);
														
 
															     const std::string mode (argv[2]);
														
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -7,6 +7,7 @@
 
															 #include "ggml-cpu.h"
														
 
															 #include "ggml-alloc.h"
														
 
															 #include "ggml-backend.h"
														
 
															+#include "gguf.h"
														
 
															 //#ifdef GGML_USE_CUDA
														
 
															 //#include "ggml-cuda.h"
														
@@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
 
															             {
														
 
															                 const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
														
 
															                 int arr_n = gguf_get_arr_n(ctx_gguf, i);
														
 
															-                const void * data = gguf_get_arr_data(ctx_gguf, i);
														
 
															+                const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
														
 
															                 std::stringstream ss;
														
 
															                 ss << "[";
														
 
															                 for (int j = 0; j < arr_n; j++) {
														
@@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
 
															         total_size_org += orig_size;
														
 
															         total_size_new += new_size;
														
 
															         gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
														
 
															-        gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
														
 
															+        GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
														
 
															+        gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
														
 
															         fout.write((const char *)new_data, new_size);
														
 
															         size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
														
 
															         for (size_t j = 0; j < pad; ++j) {
														
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS
 
															     include/ggml-metal.h
														
 
															     include/ggml-rpc.h
														
 
															     include/ggml-sycl.h
														
 
															-    include/ggml-vulkan.h)
														
 
															+    include/ggml-vulkan.h
														
 
															+    include/gguf.h)
														
 
															 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
														
 
															 #if (GGML_METAL)
														
--- a/ggml/include/ggml-cpp.h
+++ b/ggml/include/ggml-cpp.h
@@ -7,6 +7,7 @@
 
															 #include "ggml.h"
														
 
															 #include "ggml-alloc.h"
														
 
															 #include "ggml-backend.h"
														
 
															+#include "gguf.h"
														
 
															 #include <memory>
														
 
															 // Smart pointers for ggml types
														
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -241,12 +241,6 @@
 
															 #define GGML_ROPE_TYPE_MROPE  8
														
 
															 #define GGML_ROPE_TYPE_VISION 24
														
 
															-#define GGUF_MAGIC "GGUF"
														
 
															-
														
 
															-#define GGUF_VERSION 3
														
 
															-
														
 
															-#define GGUF_DEFAULT_ALIGNMENT 32
														
 
															-
														
 
															 #define GGML_UNUSED(x) (void)(x)
														
 
															 #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
														
@@ -403,12 +397,6 @@ extern "C" {
 
															         GGML_PREC_F32,
														
 
															     };
														
 
															-    enum ggml_backend_type {
														
 
															-        GGML_BACKEND_TYPE_CPU = 0,
														
 
															-        GGML_BACKEND_TYPE_GPU = 10,
														
 
															-        GGML_BACKEND_TYPE_GPU_SPLIT = 20,
														
 
															-    };
														
 
															-
														
 
															     // model file types
														
 
															     enum ggml_ftype {
														
 
															         GGML_FTYPE_UNKNOWN        = -1,
														
@@ -587,8 +575,6 @@ extern "C" {
 
															     struct ggml_tensor {
														
 
															         enum ggml_type type;
														
 
															-        GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
														
 
															-
														
 
															         struct ggml_backend_buffer * buffer;
														
 
															         int64_t ne[GGML_MAX_DIMS]; // number of elements
														
@@ -2111,132 +2097,6 @@ extern "C" {
 
															                    int64_t   n_per_row,
														
 
															                const float * imatrix);
														
 
															-    //
														
 
															-    // gguf
														
 
															-    //
														
 
															-
														
 
															-    enum gguf_type {
														
 
															-        GGUF_TYPE_UINT8   = 0,
														
 
															-        GGUF_TYPE_INT8    = 1,
														
 
															-        GGUF_TYPE_UINT16  = 2,
														
 
															-        GGUF_TYPE_INT16   = 3,
														
 
															-        GGUF_TYPE_UINT32  = 4,
														
 
															-        GGUF_TYPE_INT32   = 5,
														
 
															-        GGUF_TYPE_FLOAT32 = 6,
														
 
															-        GGUF_TYPE_BOOL    = 7,
														
 
															-        GGUF_TYPE_STRING  = 8,
														
 
															-        GGUF_TYPE_ARRAY   = 9,
														
 
															-        GGUF_TYPE_UINT64  = 10,
														
 
															-        GGUF_TYPE_INT64   = 11,
														
 
															-        GGUF_TYPE_FLOAT64 = 12,
														
 
															-        GGUF_TYPE_COUNT,       // marks the end of the enum
														
 
															-    };
														
 
															-
														
 
															-    struct gguf_context;
														
 
															-
														
 
															-    struct gguf_init_params {
														
 
															-        bool no_alloc;
														
 
															-
														
 
															-        // if not NULL, create a ggml_context and allocate the tensor data in it
														
 
															-        struct ggml_context ** ctx;
														
 
															-    };
														
 
															-
														
 
															-    GGML_API struct gguf_context * gguf_init_empty(void);
														
 
															-    GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
														
 
															-    //GGML_API struct gguf_context * gguf_init_from_buffer(..);
														
 
															-
														
 
															-    GGML_API void gguf_free(struct gguf_context * ctx);
														
 
															-
														
 
															-    GGML_API const char * gguf_type_name(enum gguf_type type);
														
 
															-
														
 
															-    GGML_API int    gguf_get_version    (const struct gguf_context * ctx);
														
 
															-    GGML_API size_t gguf_get_alignment  (const struct gguf_context * ctx);
														
 
															-    GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
														
 
															-    GGML_API void * gguf_get_data       (const struct gguf_context * ctx);
														
 
															-
														
 
															-    GGML_API int          gguf_get_n_kv(const struct gguf_context * ctx);
														
 
															-    GGML_API int          gguf_find_key(const struct gguf_context * ctx, const char * key);
														
 
															-    GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
														
 
															-
														
 
															-    GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
														
 
															-
														
 
															-    // will abort if the wrong type is used for the key
														
 
															-    GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API int          gguf_get_arr_n   (const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
														
 
															-    GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
														
 
															-
														
 
															-    GGML_API int            gguf_get_n_tensors    (const struct gguf_context * ctx);
														
 
															-    GGML_API int            gguf_find_tensor      (const struct gguf_context * ctx, const char * name);
														
 
															-    GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
														
 
															-    GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
														
 
															-    GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);
														
 
															-
														
 
															-    // removes key if it exists
														
 
															-    GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
														
 
															-
														
 
															-    // overrides existing values or adds a new one
														
 
															-    GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val);
														
 
															-    GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t   val);
														
 
															-    GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
														
 
															-    GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t  val);
														
 
															-    GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
														
 
															-    GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t  val);
														
 
															-    GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float    val);
														
 
															-    GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
														
 
															-    GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t  val);
														
 
															-    GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double   val);
														
 
															-    GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool     val);
														
 
															-    GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
														
 
															-    GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
														
 
															-    GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
														
 
															-
														
 
															-    // set or add KV pairs from another context
														
 
															-    GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
														
 
															-
														
 
															-    // manage tensor info
														
 
															-    GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
														
 
															-    GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
														
 
															-    GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
														
 
															-
														
 
															-    // writing gguf files can be done in 2 ways:
														
 
															-    //
														
 
															-    // - write the entire gguf_context to a binary file in a single pass:
														
 
															-    //
														
 
															-    //   gguf_write_to_file(ctx, fname);
														
 
															-    //
														
 
															-    // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
														
 
															-    //
														
 
															-    //   FILE * f = fopen(fname, "wb");
														
 
															-    //   fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
														
 
															-    //   fwrite(f, ...);
														
 
															-    //   void * data = gguf_meta_get_meta_data(ctx);
														
 
															-    //   fseek(f, 0, SEEK_SET);
														
 
															-    //   fwrite(f, data, gguf_get_meta_size(ctx));
														
 
															-    //   free(data);
														
 
															-    //   fclose(f);
														
 
															-    //
														
 
															-
														
 
															-    // write the entire context to a binary file
														
 
															-    GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
														
 
															-
														
 
															-    // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
														
 
															-    GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
														
 
															-    GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data);
														
 
															-
														
 
															 #ifdef __cplusplus
														
 
															     // restrict not standard in C++
														
 
															 #    if defined(__GNUC__)
														
--- a/ggml/include/gguf.h
+++ b/ggml/include/gguf.h
@@ -0,0 +1,202 @@
 
															+// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
														
 
															+// GGUF files have the following structure:
														
 
															+//
														
 
															+// 1. File magic "GGUF" (4 bytes).
														
 
															+// 2. File version (uint32_t).
														
 
															+// 3. Number of ggml tensors in file (int64_t).
														
 
															+// 4. Number of key-value-pairs in file (int64_t).
														
 
															+// 5. For each KV pair:
														
 
															+//   1. The key (string).
														
 
															+//   2. The value type (gguf_type).
														
 
															+//   3a. If the value type is GGUF_TYPE_ARRAY:
														
 
															+//     1. The type of the array (gguf_type).
														
 
															+//     2. The number of elements in the array (uint64_t).
														
 
															+//     3. The binary representation of each element in the array.
														
 
															+//   3b. Otherwise:
														
 
															+//     1. The binary representation of the value.
														
 
															+// 6. For each ggml tensor:
														
 
															+//   1. The tensor name (string).
														
 
															+//   2. The number of dimensions of the tensor (uint32_t).
														
 
															+//   3. For each dimension:
														
 
															+//     1. The size of the tensor in the dimension (int64_t).
														
 
															+//   4. The tensor data type (ggml_type).
														
 
															+//   5. The tensor data offset in the tensor data binary blob (uint64_t).
														
 
															+// 7. The tensor data binary blob (optional, aligned).
														
 
															+//
														
 
															+// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
														
 
															+// All enums are stored as int32_t.
														
 
															+// All bool values are stored as int8_t.
														
 
															+// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
														
 
															+//   otherwise GGUF_DEFAULT_ALIGNMENT is used.
														
 
															+//
														
 
															+// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
														
 
															+
														
 
															+#pragma once
														
 
															+
														
 
															+#include "ggml.h"
														
 
															+
														
 
															+#include <stdbool.h>
														
 
															+#include <stdint.h>
														
 
															+
														
 
															+#define GGUF_MAGIC   "GGUF"
														
 
															+#define GGUF_VERSION 3
														
 
															+
														
 
															+#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
														
 
															+
														
 
															+#define GGUF_DEFAULT_ALIGNMENT 32
														
 
															+
														
 
															+#ifdef  __cplusplus
														
 
															+extern "C" {
														
 
															+#endif
														
 
															+
														
 
															+    // types that can be stored as GGUF KV data
														
 
															+    enum gguf_type {
														
 
															+        GGUF_TYPE_UINT8   = 0,
														
 
															+        GGUF_TYPE_INT8    = 1,
														
 
															+        GGUF_TYPE_UINT16  = 2,
														
 
															+        GGUF_TYPE_INT16   = 3,
														
 
															+        GGUF_TYPE_UINT32  = 4,
														
 
															+        GGUF_TYPE_INT32   = 5,
														
 
															+        GGUF_TYPE_FLOAT32 = 6,
														
 
															+        GGUF_TYPE_BOOL    = 7,
														
 
															+        GGUF_TYPE_STRING  = 8,
														
 
															+        GGUF_TYPE_ARRAY   = 9,
														
 
															+        GGUF_TYPE_UINT64  = 10,
														
 
															+        GGUF_TYPE_INT64   = 11,
														
 
															+        GGUF_TYPE_FLOAT64 = 12,
														
 
															+        GGUF_TYPE_COUNT,       // marks the end of the enum
														
 
															+    };
														
 
															+
														
 
															+    struct gguf_context;
														
 
															+
														
 
															+    struct gguf_init_params {
														
 
															+        bool no_alloc;
														
 
															+
														
 
															+        // if not NULL, create a ggml_context and allocate the tensor data in it
														
 
															+        struct ggml_context ** ctx;
														
 
															+    };
														
 
															+
														
 
															+    GGML_API struct gguf_context * gguf_init_empty(void);
														
 
															+    GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
														
 
															+    //GGML_API struct gguf_context * gguf_init_from_buffer(..);
														
 
															+
														
 
															+    GGML_API void gguf_free(struct gguf_context * ctx);
														
 
															+
														
 
															+    GGML_API const char * gguf_type_name(enum gguf_type type);
														
 
															+
														
 
															+    GGML_API uint32_t gguf_get_version    (const struct gguf_context * ctx);
														
 
															+    GGML_API size_t   gguf_get_alignment  (const struct gguf_context * ctx);
														
 
															+    GGML_API size_t   gguf_get_data_offset(const struct gguf_context * ctx);
														
 
															+
														
 
															+    GGML_API int64_t      gguf_get_n_kv(const struct gguf_context * ctx);
														
 
															+    GGML_API int64_t      gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
														
 
															+    GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
														
 
															+
														
 
															+    GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
														
 
															+
														
 
															+    // will abort if the wrong type is used for the key
														
 
															+    GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
														
 
															+    GGML_API size_t       gguf_get_arr_n   (const struct gguf_context * ctx, int64_t key_id);
														
 
															+
														
 
															+    // get raw pointer to the first element of the array with the given key_id
														
 
															+    // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
														
 
															+    GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
														
 
															+
														
 
															+    // get ith C string from array with given key_id
														
 
															+    GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
														
 
															+
														
 
															+    GGML_API int64_t        gguf_get_n_tensors    (const struct gguf_context * ctx);
														
 
															+    GGML_API int64_t        gguf_find_tensor      (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
														
 
															+    GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
														
 
															+    GGML_API const char *   gguf_get_tensor_name  (const struct gguf_context * ctx, int64_t tensor_id);
														
 
															+    GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int64_t tensor_id);
														
 
															+    GGML_API size_t         gguf_get_tensor_size  (const struct gguf_context * ctx, int64_t tensor_id);
														
 
															+
														
 
															+    // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
														
 
															+    GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
														
 
															+
														
 
															+    // overrides an existing KV pair or adds a new one, the new KV pair is always at the back
														
 
															+    GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t      val);
														
 
															+    GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t       val);
														
 
															+    GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t     val);
														
 
															+    GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t      val);
														
 
															+    GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t     val);
														
 
															+    GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t      val);
														
 
															+    GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float        val);
														
 
															+    GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t     val);
														
 
															+    GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t      val);
														
 
															+    GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double       val);
														
 
															+    GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool         val);
														
 
															+    GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
														
 
															+
														
 
															+    // creates a new array with n elements of the given type and copies the corresponding number of bytes from data
														
 
															+    GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
														
 
															+
														
 
															+    // creates a new array with n strings and copies the corresponding strings from data
														
 
															+    GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
														
 
															+
														
 
															+    // set or add KV pairs from another context
														
 
															+    GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
														
 
															+
														
 
															+    // add tensor to GGUF context, tensor name must be unique
														
 
															+    GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
														
 
															+
														
 
															+    // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
														
 
															+    //   in such a way that the tensor data remains as one contiguous block (except for padding)
														
 
															+    GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
														
 
															+
														
 
															+    // assumes that at least gguf_get_tensor_size bytes can be read from data
														
 
															+    GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
														
 
															+
														
 
															+    // writing gguf files can be done in 3 ways:
														
 
															+    //
														
 
															+    // - write the entire gguf_context to a binary file in a single pass:
														
 
															+    //
														
 
															+    //   gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
														
 
															+    //
														
 
															+    // - write only the meta data to a file, then re-open the file and append the tensor data:
														
 
															+    //
														
 
															+    //   gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
														
 
															+    //   FILE * f = fopen(fname, "ab");
														
 
															+    //   fwrite(f, ...); // write tensor data
														
 
															+    //   fclose(f);
														
 
															+    //
														
 
															+    // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
														
 
															+    //
														
 
															+    //   FILE * f = fopen(fname, "wb");
														
 
															+    //   const size_t size_meta = gguf_get_meta_size(ctx);
														
 
															+    //   fseek(f, size_meta, SEEK_SET);
														
 
															+    //   fwrite(f, ...); // write tensor data
														
 
															+    //   void * data = malloc(size_meta);
														
 
															+    //   gguf_get_meta_data(ctx, data);
														
 
															+    //   rewind(f);
														
 
															+    //   fwrite(data, 1, data, f);
														
 
															+    //   free(data);
														
 
															+    //   fclose(f);
														
 
															+    //
														
 
															+
														
 
															+    // write the entire context to a binary file
														
 
															+    GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
														
 
															+
														
 
															+    // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
														
 
															+    GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
														
 
															+
														
 
															+    // writes the meta data to pointer "data"
														
 
															+    GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data);
														
 
															+
														
 
															+#ifdef  __cplusplus
														
 
															+}
														
 
															+#endif
														
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -208,6 +208,7 @@ add_library(ggml-base
 
															             ../include/ggml-backend.h
														
 
															             ../include/ggml-cpp.h
														
 
															             ../include/ggml-opt.h
														
 
															+            ../include/gguf.h
														
 
															             ggml.c
														
 
															             ggml-alloc.c
														
 
															             ggml-backend.cpp
														
@@ -215,7 +216,8 @@ add_library(ggml-base
 
															             ggml-threading.cpp
														
 
															             ggml-threading.h
														
 
															             ggml-quants.c
														
 
															-            ggml-quants.h)
														
 
															+            ggml-quants.h
														
 
															+            gguf.cpp)
														
 
															 target_include_directories(ggml-base PRIVATE .)
														
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@@ -3,6 +3,8 @@
 
															 // GGML internal header
														
 
															 #include "ggml.h"
														
 
															+#include "gguf.h"
														
 
															+
														
 
															 #include <assert.h>
														
 
															 #include <math.h>
														
 
															 #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
														
@@ -551,22 +553,15 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
 
															 #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
														
 
															 #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
														
 
															-// expose GGUF internals for test code
														
 
															-
														
 
															-GGML_API size_t gguf_type_size(enum gguf_type type);
														
 
															-
														
 
															-GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
														
 
															-
														
 
															-struct gguf_buf {
														
 
															-    void * data;
														
 
															-    size_t size;
														
 
															-    size_t offset;
														
 
															-};
														
 
															-GGML_API struct gguf_buf gguf_buf_init(size_t size);
														
 
															-GGML_API void gguf_buf_free(struct gguf_buf buf);
														
 
															-
														
 
															-GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
														
 
															-
														
 
															 #ifdef __cplusplus
														
 
															 }
														
 
															 #endif
														
 
															+
														
 
															+#ifdef __cplusplus
														
 
															+#include <vector>
														
 
															+
														
 
															+// expose GGUF internals for test code
														
 
															+GGML_API size_t gguf_type_size(enum gguf_type type);
														
 
															+GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
														
 
															+GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
														
 
															+#endif // __cplusplus
														
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -1588,15 +1588,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
 
															     struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
														
 
															-#ifdef __clang__
														
 
															-    // temporary until ggml_tensor::backend is removed
														
 
															-    #pragma clang diagnostic push
														
 
															-    #pragma clang diagnostic ignored "-Wdeprecated-declarations"
														
 
															-#endif
														
 
															-
														
 
															     *result = (struct ggml_tensor) {
														
 
															         /*.type         =*/ type,
														
 
															-        /*.backend      =*/ GGML_BACKEND_TYPE_CPU,
														
 
															         /*.buffer       =*/ NULL,
														
 
															         /*.ne           =*/ { 1, 1, 1, 1 },
														
 
															         /*.nb           =*/ { 0, 0, 0, 0 },
														
@@ -1612,10 +1605,6 @@ static struct ggml_tensor * ggml_new_tensor_impl(
 
															         /*.padding      =*/ { 0 },
														
 
															     };
														
 
															-#ifdef __clang__
														
 
															-    #pragma clang diagnostic pop
														
 
															-#endif
														
 
															-
														
 
															     // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
														
 
															     //GGML_ASSERT_ALIGNED(result->data);
														
@@ -6417,1271 +6406,6 @@ size_t ggml_quantize_chunk(
 
															 ////////////////////////////////////////////////////////////////////////////////
														
 
															-struct gguf_str {
														
 
															-    uint64_t n;  // GGUFv2
														
 
															-    char * data;
														
 
															-};
														
 
															-
														
 
															-static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
														
 
															-    [GGUF_TYPE_UINT8]   = sizeof(uint8_t),
														
 
															-    [GGUF_TYPE_INT8]    = sizeof(int8_t),
														
 
															-    [GGUF_TYPE_UINT16]  = sizeof(uint16_t),
														
 
															-    [GGUF_TYPE_INT16]   = sizeof(int16_t),
														
 
															-    [GGUF_TYPE_UINT32]  = sizeof(uint32_t),
														
 
															-    [GGUF_TYPE_INT32]   = sizeof(int32_t),
														
 
															-    [GGUF_TYPE_FLOAT32] = sizeof(float),
														
 
															-    [GGUF_TYPE_BOOL]    = sizeof(bool),
														
 
															-    [GGUF_TYPE_STRING]  = sizeof(struct gguf_str),
														
 
															-    [GGUF_TYPE_UINT64]  = sizeof(uint64_t),
														
 
															-    [GGUF_TYPE_INT64]   = sizeof(int64_t),
														
 
															-    [GGUF_TYPE_FLOAT64] = sizeof(double),
														
 
															-    [GGUF_TYPE_ARRAY]   = 0, // undefined
														
 
															-};
														
 
															-static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
														
 
															-
														
 
															-static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
														
 
															-    [GGUF_TYPE_UINT8]   = "u8",
														
 
															-    [GGUF_TYPE_INT8]    = "i8",
														
 
															-    [GGUF_TYPE_UINT16]  = "u16",
														
 
															-    [GGUF_TYPE_INT16]   = "i16",
														
 
															-    [GGUF_TYPE_UINT32]  = "u32",
														
 
															-    [GGUF_TYPE_INT32]   = "i32",
														
 
															-    [GGUF_TYPE_FLOAT32] = "f32",
														
 
															-    [GGUF_TYPE_BOOL]    = "bool",
														
 
															-    [GGUF_TYPE_STRING]  = "str",
														
 
															-    [GGUF_TYPE_ARRAY]   = "arr",
														
 
															-    [GGUF_TYPE_UINT64]  = "u64",
														
 
															-    [GGUF_TYPE_INT64]   = "i64",
														
 
															-    [GGUF_TYPE_FLOAT64] = "f64",
														
 
															-};
														
 
															-static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
														
 
															-
														
 
															-union gguf_value {
														
 
															-    uint8_t  uint8;
														
 
															-    int8_t   int8;
														
 
															-    uint16_t uint16;
														
 
															-    int16_t  int16;
														
 
															-    uint32_t uint32;
														
 
															-    int32_t  int32;
														
 
															-    float    float32;
														
 
															-    uint64_t uint64;
														
 
															-    int64_t  int64;
														
 
															-    double   float64;
														
 
															-    bool     bool_;
														
 
															-
														
 
															-    struct gguf_str str;
														
 
															-
														
 
															-    struct {
														
 
															-        enum gguf_type type;
														
 
															-
														
 
															-        uint64_t n;  // GGUFv2
														
 
															-        void * data;
														
 
															-    } arr;
														
 
															-};
														
 
															-
														
 
															-struct gguf_kv {
														
 
															-    struct gguf_str key;
														
 
															-
														
 
															-    enum  gguf_type  type;
														
 
															-    union gguf_value value;
														
 
															-};
														
 
															-
														
 
															-struct gguf_header {
														
 
															-    char magic[4];
														
 
															-
														
 
															-    uint32_t version;
														
 
															-    uint64_t n_tensors; // GGUFv2
														
 
															-    uint64_t n_kv;      // GGUFv2
														
 
															-};
														
 
															-
														
 
															-struct gguf_tensor_info {
														
 
															-    struct gguf_str name;
														
 
															-
														
 
															-    uint32_t n_dims;
														
 
															-    uint64_t ne[GGML_MAX_DIMS];
														
 
															-
														
 
															-    enum ggml_type type;
														
 
															-
														
 
															-    uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
														
 
															-
														
 
															-    // for writing API
														
 
															-    const void * data;
														
 
															-    size_t size;
														
 
															-};
														
 
															-
														
 
															-struct gguf_context {
														
 
															-    struct gguf_header header;
														
 
															-
														
 
															-    struct gguf_kv          * kv;
														
 
															-    struct gguf_tensor_info * infos;
														
 
															-
														
 
															-    size_t alignment;
														
 
															-    size_t offset;    // offset of `data` from beginning of file
														
 
															-    size_t size;      // size of `data` in bytes
														
 
															-
														
 
															-    //uint8_t * padding;
														
 
															-    void * data;
														
 
															-};
														
 
															-
														
 
															-size_t gguf_type_size(enum gguf_type type) {
														
 
															-    GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
														
 
															-    return GGUF_TYPE_SIZE[type];
														
 
															-}
														
 
															-
														
 
															-static bool gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
														
 
															-    if (info->n_dims > GGML_MAX_DIMS) {
														
 
															-        fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    if (info->type < 0 || info->type >= GGML_TYPE_COUNT) {
														
 
															-        fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    if (strlen(info->name.data) >= GGML_MAX_NAME) {
														
 
															-        fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    for (uint32_t i = 0; i < info->n_dims; ++i) {
														
 
															-        if (info->ne[i] <= 0) {
														
 
															-            fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]);
														
 
															-            return false;
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // prevent overflow for total number of elements
														
 
															-    if (INT64_MAX/info->ne[1] <= info->ne[0]) {
														
 
															-        fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) {
														
 
															-        fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) {
														
 
															-        fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    return true;
														
 
															-}
														
 
															-
														
 
															-static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
														
 
															-    const size_t n = fread(dst, 1, size, file);
														
 
															-    *offset += n;
														
 
															-    return n == size;
														
 
															-}
														
 
															-
														
 
															-static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
														
 
															-    p->n    = 0;
														
 
															-    p->data = NULL;
														
 
															-
														
 
															-    bool ok = true;
														
 
															-
														
 
															-    ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
														
 
															-
														
 
															-    // early exit if string length is invalid, prevents from integer overflow
														
 
															-    if (p->n == SIZE_MAX) {
														
 
															-        fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    p->data = calloc(p->n + 1, 1);
														
 
															-    if (!p->data) {
														
 
															-        fprintf(stderr, "%s: failed to allocate memory for string of length %" PRIu64 "\n", __func__, p->n);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    ok = ok && gguf_fread_el(file,  p->data, p->n, offset);
														
 
															-
														
 
															-    return ok;
														
 
															-}
														
 
															-
														
 
															-static void gguf_free_kv(struct gguf_kv * kv) {
														
 
															-    if (kv->key.data) {
														
 
															-        GGML_FREE(kv->key.data);
														
 
															-    }
														
 
															-
														
 
															-    if (kv->type == GGUF_TYPE_STRING) {
														
 
															-        if (kv->value.str.data) {
														
 
															-            GGML_FREE(kv->value.str.data);
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    if (kv->type == GGUF_TYPE_ARRAY) {
														
 
															-        if (kv->value.arr.data) {
														
 
															-            if (kv->value.arr.type == GGUF_TYPE_STRING) {
														
 
															-                for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
														
 
															-                    struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
														
 
															-                    if (str->data) {
														
 
															-                        GGML_FREE(str->data);
														
 
															-                    }
														
 
															-                }
														
 
															-            }
														
 
															-            GGML_FREE(kv->value.arr.data);
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-struct gguf_context * gguf_init_empty(void) {
														
 
															-    struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
														
 
															-    if (!ctx) {
														
 
															-        fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
														
 
															-        return NULL;
														
 
															-    }
														
 
															-
														
 
															-    memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
														
 
															-    ctx->header.version   = GGUF_VERSION;
														
 
															-    ctx->header.n_tensors = 0;
														
 
															-    ctx->header.n_kv      = 0;
														
 
															-
														
 
															-    ctx->kv    = NULL;
														
 
															-    ctx->infos = NULL;
														
 
															-
														
 
															-    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
														
 
															-    ctx->offset    = 0;
														
 
															-    ctx->size      = 0;
														
 
															-
														
 
															-    ctx->data = NULL;
														
 
															-
														
 
															-    return ctx;
														
 
															-}
														
 
															-
														
 
															-struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
														
 
															-    // offset from start of file
														
 
															-    size_t offset = 0;
														
 
															-
														
 
															-    char magic[4];
														
 
															-
														
 
															-    // check the magic before making allocations
														
 
															-    {
														
 
															-        gguf_fread_el(file, &magic, sizeof(magic), &offset);
														
 
															-
														
 
															-        for (uint32_t i = 0; i < sizeof(magic); i++) {
														
 
															-            if (magic[i] != GGUF_MAGIC[i]) {
														
 
															-                fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
														
 
															-                return NULL;
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    bool ok = true;
														
 
															-
														
 
															-    struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
														
 
															-    if (!ctx) {
														
 
															-        fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
														
 
															-        return NULL;
														
 
															-    }
														
 
															-
														
 
															-    // read the header
														
 
															-    {
														
 
															-        strncpy(ctx->header.magic, magic, 4);
														
 
															-
														
 
															-        ctx->kv    = NULL;
														
 
															-        ctx->infos = NULL;
														
 
															-        ctx->data  = NULL;
														
 
															-
														
 
															-        ok = ok && gguf_fread_el(file, &ctx->header.version,   sizeof(ctx->header.version),   &offset);
														
 
															-        ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
														
 
															-        ok = ok && gguf_fread_el(file, &ctx->header.n_kv,      sizeof(ctx->header.n_kv),      &offset);
														
 
															-
														
 
															-        if (ctx->header.version == 1) {
														
 
															-            fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
														
 
															-            gguf_free(ctx);
														
 
															-            return NULL;
														
 
															-        }
														
 
															-
														
 
															-        // sanity-checks to prevent from integer/buffer overflows
														
 
															-
														
 
															-        ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info));
														
 
															-        ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead());
														
 
															-        ok = ok && (ctx->header.n_kv      < (SIZE_MAX/2)/sizeof(struct gguf_kv));
														
 
															-
														
 
															-        if (!ok) {
														
 
															-            fprintf(stderr, "%s: failed to read header\n", __func__);
														
 
															-            gguf_free(ctx);
														
 
															-            return NULL;
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // read the kv pairs
														
 
															-    {
														
 
															-        const uint64_t n_kv = ctx->header.n_kv;
														
 
															-
														
 
															-        if (n_kv > 0) {
														
 
															-            ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
														
 
															-            if (!ctx->kv) {
														
 
															-                fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
														
 
															-                gguf_free(ctx);
														
 
															-                return NULL;
														
 
															-            }
														
 
															-        }
														
 
															-
														
 
															-        for (uint64_t i = 0; i < n_kv; ++i) {
														
 
															-            struct gguf_kv * kv = &ctx->kv[i];
														
 
															-
														
 
															-            //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
														
 
															-
														
 
															-            ok = ok && gguf_fread_str(file, &kv->key,                    &offset);
														
 
															-            ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
														
 
															-
														
 
															-            //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
														
 
															-
														
 
															-            switch (kv->type) {
														
 
															-                case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (file, &kv->value.uint8,   sizeof(kv->value.uint8),   &offset); break;
														
 
															-                case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (file, &kv->value.int8,    sizeof(kv->value.int8),    &offset); break;
														
 
															-                case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (file, &kv->value.uint16,  sizeof(kv->value.uint16),  &offset); break;
														
 
															-                case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (file, &kv->value.int16,   sizeof(kv->value.int16),   &offset); break;
														
 
															-                case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (file, &kv->value.uint32,  sizeof(kv->value.uint32),  &offset); break;
														
 
															-                case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (file, &kv->value.int32,   sizeof(kv->value.int32),   &offset); break;
														
 
															-                case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
														
 
															-                case GGUF_TYPE_UINT64:  ok = ok && gguf_fread_el (file, &kv->value.uint64,  sizeof(kv->value.uint64),  &offset); break;
														
 
															-                case GGUF_TYPE_INT64:   ok = ok && gguf_fread_el (file, &kv->value.int64,   sizeof(kv->value.int64),   &offset); break;
														
 
															-                case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
														
 
															-                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (file, &kv->value.bool_,   sizeof(kv->value.bool_),   &offset); break;
														
 
															-                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(file, &kv->value.str,                                &offset); break;
														
 
															-                case GGUF_TYPE_ARRAY:
														
 
															-                    {
														
 
															-                        ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
														
 
															-                        ok = ok && gguf_fread_el(file, &kv->value.arr.n,    sizeof(kv->value.arr.n),    &offset);
														
 
															-
														
 
															-                        switch (kv->value.arr.type) {
														
 
															-                            case GGUF_TYPE_UINT8:
														
 
															-                            case GGUF_TYPE_INT8:
														
 
															-                            case GGUF_TYPE_UINT16:
														
 
															-                            case GGUF_TYPE_INT16:
														
 
															-                            case GGUF_TYPE_UINT32:
														
 
															-                            case GGUF_TYPE_INT32:
														
 
															-                            case GGUF_TYPE_FLOAT32:
														
 
															-                            case GGUF_TYPE_UINT64:
														
 
															-                            case GGUF_TYPE_INT64:
														
 
															-                            case GGUF_TYPE_FLOAT64:
														
 
															-                            case GGUF_TYPE_BOOL:
														
 
															-                                {
														
 
															-                                    // prevent from integer overflow in the malloc below
														
 
															-                                    if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
														
 
															-                                        fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
														
 
															-                                        gguf_free(ctx);
														
 
															-                                        return NULL;
														
 
															-                                    }
														
 
															-
														
 
															-                                    kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
														
 
															-                                    if (!kv->value.arr.data) {
														
 
															-                                        fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
														
 
															-                                        gguf_free(ctx);
														
 
															-                                        return NULL;
														
 
															-                                    }
														
 
															-
														
 
															-                                    ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
														
 
															-                                } break;
														
 
															-                            case GGUF_TYPE_STRING:
														
 
															-                                {
														
 
															-                                    // prevent from integer overflow in the malloc below
														
 
															-                                    if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
														
 
															-                                        fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
														
 
															-                                        gguf_free(ctx);
														
 
															-                                        return NULL;
														
 
															-                                    }
														
 
															-
														
 
															-                                    kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
														
 
															-                                    if (!kv->value.arr.data) {
														
 
															-                                        fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
														
 
															-                                        gguf_free(ctx);
														
 
															-                                        return NULL;
														
 
															-                                    }
														
 
															-
														
 
															-                                    for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
														
 
															-                                        ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
														
 
															-                                    }
														
 
															-                                } break;
														
 
															-                            case GGUF_TYPE_ARRAY:
														
 
															-                            default:
														
 
															-                                {
														
 
															-                                    fprintf(stderr, "%s: invalid array type %d\n", __func__, kv->value.arr.type);
														
 
															-                                    ok = false;
														
 
															-                                } break;
														
 
															-                        }
														
 
															-                    } break;
														
 
															-                default:
														
 
															-                    {
														
 
															-                        fprintf(stderr, "%s: invalid type %d\n", __func__, kv->type);
														
 
															-                        ok = false;
														
 
															-                    } break;
														
 
															-            }
														
 
															-
														
 
															-            if (!ok) {
														
 
															-                break;
														
 
															-            }
														
 
															-        }
														
 
															-
														
 
															-        if (!ok) {
														
 
															-            fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
														
 
															-            gguf_free(ctx);
														
 
															-            return NULL;
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // read the tensor infos
														
 
															-    if (ctx->header.n_tensors > 0) {
														
 
															-        ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
														
 
															-        if (!ctx->infos) {
														
 
															-            fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
														
 
															-            gguf_free(ctx);
														
 
															-            return NULL;
														
 
															-        }
														
 
															-
														
 
															-        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
														
 
															-            struct gguf_tensor_info * info = &ctx->infos[i];
														
 
															-
														
 
															-            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
														
 
															-                info->ne[j] = 1;
														
 
															-            }
														
 
															-
														
 
															-            ok = ok && gguf_fread_str(file, &info->name,                          &offset);
														
 
															-            ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims),  &offset);
														
 
															-
														
 
															-            ok = ok && (info->n_dims <= GGML_MAX_DIMS);
														
 
															-
														
 
															-            for (uint32_t j = 0; j < info->n_dims; ++j) {
														
 
															-                ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
														
 
															-            }
														
 
															-
														
 
															-            ok = ok && gguf_fread_el (file, &info->type,   sizeof(info->type),    &offset);
														
 
															-            ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset),  &offset);
														
 
															-
														
 
															-            ok = ok && gguf_tensor_info_sanitize(info);
														
 
															-
														
 
															-            // make sure there is no duplicated tensor names
														
 
															-            for (uint64_t j = 0; j < i && ok; ++j) {
														
 
															-                if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
														
 
															-                    fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
														
 
															-                    ok = false;
														
 
															-                }
														
 
															-            }
														
 
															-
														
 
															-            if (!ok) {
														
 
															-                fprintf(stderr, "%s: failed to read tensor info\n", __func__);
														
 
															-                gguf_free(ctx);
														
 
															-                return NULL;
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
														
 
															-
														
 
															-    int alignment_idx = gguf_find_key(ctx, "general.alignment");
														
 
															-    if (alignment_idx != -1) {
														
 
															-        ctx->alignment = gguf_get_val_u32(ctx, alignment_idx);
														
 
															-    }
														
 
															-
														
 
															-    // we require the data section to be aligned, so take into account any padding
														
 
															-    {
														
 
															-        const size_t offset_pad = offset % ctx->alignment;
														
 
															-
														
 
															-        if (offset_pad != 0) {
														
 
															-            offset += ctx->alignment - offset_pad;
														
 
															-            fseek(file, offset, SEEK_SET);
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // store the current file offset - this is where the data section starts
														
 
															-    ctx->offset = offset;
														
 
															-
														
 
															-    // compute the total size of the data section, taking into account the alignment
														
 
															-    {
														
 
															-        ctx->size = 0;
														
 
															-        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
														
 
															-            struct gguf_tensor_info * info = &ctx->infos[i];
														
 
															-
														
 
															-            const int64_t ne =
														
 
															-                (int64_t) info->ne[0] *
														
 
															-                (int64_t) info->ne[1] *
														
 
															-                (int64_t) info->ne[2] *
														
 
															-                (int64_t) info->ne[3];
														
 
															-
														
 
															-            if (ggml_blck_size(info->type) == 0 ) {
														
 
															-                // this tensor type support have been removed:
														
 
															-                fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
														
 
															-                        __func__, info->name.data, (int) info->type, ggml_type_name(info->type));
														
 
															-                gguf_free(ctx);
														
 
															-                return NULL;
														
 
															-            }
														
 
															-
														
 
															-            if (ne % ggml_blck_size(info->type) != 0) {
														
 
															-                fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
														
 
															-                        __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
														
 
															-                gguf_free(ctx);
														
 
															-                return NULL;
														
 
															-            }
														
 
															-
														
 
															-            const size_t size_cur = ggml_row_size(info->type, ne);
														
 
															-
														
 
															-            ctx->size += GGML_PAD(size_cur, ctx->alignment);
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // load the tensor data only if requested
														
 
															-    if (params.ctx != NULL) {
														
 
															-        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
														
 
															-        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
														
 
															-        // the ggml_tensor structs to the appropriate locations in the binary blob
														
 
															-
														
 
															-        // compute the exact size needed for the new ggml_context
														
 
															-        const size_t mem_size =
														
 
															-            params.no_alloc ?
														
 
															-            (ctx->header.n_tensors    )*ggml_tensor_overhead() :
														
 
															-            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
														
 
															-
														
 
															-        struct ggml_init_params pdata = {
														
 
															-            .mem_size   = mem_size,
														
 
															-            .mem_buffer = NULL,
														
 
															-            .no_alloc   = params.no_alloc,
														
 
															-        };
														
 
															-
														
 
															-        *params.ctx = ggml_init(pdata);
														
 
															-        if (*params.ctx == NULL) {
														
 
															-            fprintf(stderr, "%s: failed to initialize context\n", __func__);
														
 
															-            gguf_free(ctx);
														
 
															-            return NULL;
														
 
															-        }
														
 
															-
														
 
															-        struct ggml_context * ctx_data = *params.ctx;
														
 
															-
														
 
															-        struct ggml_tensor * data = NULL;
														
 
															-
														
 
															-        if (!params.no_alloc) {
														
 
															-            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
														
 
															-
														
 
															-            ok = ok && data != NULL;
														
 
															-
														
 
															-            // read the binary blob with the tensor data
														
 
															-            ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
														
 
															-
														
 
															-            if (!ok) {
														
 
															-                fprintf(stderr, "%s: failed to read tensor data\n", __func__);
														
 
															-                ggml_free(ctx_data);
														
 
															-                gguf_free(ctx);
														
 
															-                return NULL;
														
 
															-            }
														
 
															-
														
 
															-            ctx->data = data->data;
														
 
															-        }
														
 
															-
														
 
															-        ggml_set_no_alloc(ctx_data, true);
														
 
															-
														
 
															-        // create the tensors
														
 
															-        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
														
 
															-            const int64_t ne[GGML_MAX_DIMS] = {
														
 
															-                ctx->infos[i].ne[0],
														
 
															-                ctx->infos[i].ne[1],
														
 
															-                ctx->infos[i].ne[2],
														
 
															-                ctx->infos[i].ne[3],
														
 
															-            };
														
 
															-
														
 
															-            struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne);
														
 
															-
														
 
															-            ok = ok && cur != NULL;
														
 
															-
														
 
															-            if (!ok) {
														
 
															-                break;
														
 
															-            }
														
 
															-
														
 
															-            ggml_set_name(cur, ctx->infos[i].name.data);
														
 
															-
														
 
															-            // point the data member to the appropriate location in the binary blob using the tensor infos
														
 
															-            if (!params.no_alloc) {
														
 
															-              //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
														
 
															-                cur->data = (char *) data->data + ctx->infos[i].offset;               // offset from data
														
 
															-            }
														
 
															-        }
														
 
															-
														
 
															-        if (!ok) {
														
 
															-            fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
														
 
															-            ggml_free(ctx_data);
														
 
															-            gguf_free(ctx);
														
 
															-            return NULL;
														
 
															-        }
														
 
															-
														
 
															-        ggml_set_no_alloc(ctx_data, params.no_alloc);
														
 
															-    }
														
 
															-
														
 
															-    return ctx;
														
 
															-}
														
 
															-
														
 
															-struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
														
 
															-    FILE * file = ggml_fopen(fname, "rb");
														
 
															-    if (!file) {
														
 
															-        fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
														
 
															-        return NULL;
														
 
															-    }
														
 
															-
														
 
															-    struct gguf_context * result = gguf_init_from_file_impl(file, params);
														
 
															-    fclose(file);
														
 
															-    return result;
														
 
															-}
														
 
															-
														
 
															-void gguf_free(struct gguf_context * ctx) {
														
 
															-    if (ctx == NULL) {
														
 
															-        return;
														
 
															-    }
														
 
															-
														
 
															-    if (ctx->kv) {
														
 
															-        // free string memory - not great..
														
 
															-        for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
														
 
															-            gguf_free_kv(&ctx->kv[i]);
														
 
															-        }
														
 
															-
														
 
															-        GGML_FREE(ctx->kv);
														
 
															-    }
														
 
															-
														
 
															-    if (ctx->infos) {
														
 
															-        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
														
 
															-            struct gguf_tensor_info * info = &ctx->infos[i];
														
 
															-
														
 
															-            if (info->name.data) {
														
 
															-                GGML_FREE(info->name.data);
														
 
															-            }
														
 
															-        }
														
 
															-
														
 
															-        GGML_FREE(ctx->infos);
														
 
															-    }
														
 
															-
														
 
															-    GGML_FREE(ctx);
														
 
															-}
														
 
															-
														
 
															-const char * gguf_type_name(enum gguf_type type) {
														
 
															-    return GGUF_TYPE_NAME[type];
														
 
															-}
														
 
															-
														
 
															-int gguf_get_version(const struct gguf_context * ctx) {
														
 
															-    return ctx->header.version;
														
 
															-}
														
 
															-
														
 
															-size_t gguf_get_alignment(const struct gguf_context * ctx) {
														
 
															-    return ctx->alignment;
														
 
															-}
														
 
															-
														
 
															-size_t gguf_get_data_offset(const struct gguf_context * ctx) {
														
 
															-    return ctx->offset;
														
 
															-}
														
 
															-
														
 
															-void * gguf_get_data(const struct gguf_context * ctx) {
														
 
															-    return ctx->data;
														
 
															-}
														
 
															-
														
 
															-int gguf_get_n_kv(const struct gguf_context * ctx) {
														
 
															-    return ctx->header.n_kv;
														
 
															-}
														
 
															-
														
 
															-int gguf_find_key(const struct gguf_context * ctx, const char * key) {
														
 
															-    // return -1 if key not found
														
 
															-    int keyfound = -1;
														
 
															-
														
 
															-    const int n_kv = gguf_get_n_kv(ctx);
														
 
															-
														
 
															-    for (int i = 0; i < n_kv; ++i) {
														
 
															-        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
														
 
															-            keyfound = i;
														
 
															-            break;
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    return keyfound;
														
 
															-}
														
 
															-
														
 
															-const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    return ctx->kv[key_id].key.data;
														
 
															-}
														
 
															-
														
 
															-enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    return ctx->kv[key_id].type;
														
 
															-}
														
 
															-
														
 
															-enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
														
 
															-    return ctx->kv[key_id].value.arr.type;
														
 
															-}
														
 
															-
														
 
															-const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
														
 
															-    return ctx->kv[key_id].value.arr.data;
														
 
															-}
														
 
															-
														
 
															-const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
														
 
															-    struct gguf_kv * kv = &ctx->kv[key_id];
														
 
															-    struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
														
 
															-    return str->data;
														
 
															-}
														
 
															-
														
 
															-int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
														
 
															-    return ctx->kv[key_id].value.arr.n;
														
 
															-}
														
 
															-
														
 
															-uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
														
 
															-    return ctx->kv[key_id].value.uint8;
														
 
															-}
														
 
															-
														
 
															-int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
														
 
															-    return ctx->kv[key_id].value.int8;
														
 
															-}
														
 
															-
														
 
															-uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
														
 
															-    return ctx->kv[key_id].value.uint16;
														
 
															-}
														
 
															-
														
 
															-int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
														
 
															-    return ctx->kv[key_id].value.int16;
														
 
															-}
														
 
															-
														
 
															-uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
														
 
															-    return ctx->kv[key_id].value.uint32;
														
 
															-}
														
 
															-
														
 
															-int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
														
 
															-    return ctx->kv[key_id].value.int32;
														
 
															-}
														
 
															-
														
 
															-float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
														
 
															-    return ctx->kv[key_id].value.float32;
														
 
															-}
														
 
															-
														
 
															-uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
														
 
															-    return ctx->kv[key_id].value.uint64;
														
 
															-}
														
 
															-
														
 
															-int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
														
 
															-    return ctx->kv[key_id].value.int64;
														
 
															-}
														
 
															-
														
 
															-double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
														
 
															-    return ctx->kv[key_id].value.float64;
														
 
															-}
														
 
															-
														
 
															-bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
														
 
															-    return ctx->kv[key_id].value.bool_;
														
 
															-}
														
 
															-
														
 
															-const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
														
 
															-    return ctx->kv[key_id].value.str.data;
														
 
															-}
														
 
															-
														
 
															-const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
														
 
															-    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
														
 
															-    GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
														
 
															-    return &ctx->kv[key_id].value;
														
 
															-}
														
 
															-
														
 
															-int gguf_get_n_tensors(const struct gguf_context * ctx) {
														
 
															-    return ctx->header.n_tensors;
														
 
															-}
														
 
															-
														
 
															-int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
														
 
															-    // return -1 if tensor not found
														
 
															-    int tensorfound = -1;
														
 
															-
														
 
															-    const int n_tensors = gguf_get_n_tensors(ctx);
														
 
															-
														
 
															-    for (int i = 0; i < n_tensors; ++i) {
														
 
															-        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
														
 
															-            tensorfound = i;
														
 
															-            break;
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    return tensorfound;
														
 
															-}
														
 
															-
														
 
															-size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
														
 
															-    return ctx->infos[i].offset;
														
 
															-}
														
 
															-
														
 
															-char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
														
 
															-    return ctx->infos[i].name.data;
														
 
															-}
														
 
															-
														
 
															-enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int i) {
														
 
															-    return ctx->infos[i].type;
														
 
															-}
														
 
															-
														
 
															-// returns the index
														
 
															-static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
														
 
															-    const int idx = gguf_find_key(ctx, key);
														
 
															-    if (idx >= 0) {
														
 
															-        return idx;
														
 
															-    }
														
 
															-
														
 
															-    const int n_kv = gguf_get_n_kv(ctx);
														
 
															-
														
 
															-    ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
														
 
															-    ctx->kv[n_kv].key.n    = strlen(key);
														
 
															-    ctx->kv[n_kv].key.data = strdup(key);
														
 
															-    ctx->header.n_kv++;
														
 
															-
														
 
															-    return n_kv;
														
 
															-}
														
 
															-
														
 
															-void gguf_remove_key(struct gguf_context * ctx, const char * key) {
														
 
															-    const int idx = gguf_find_key(ctx, key);
														
 
															-    if (idx >= 0) {
														
 
															-        const int n_kv = gguf_get_n_kv(ctx);
														
 
															-        gguf_free_kv(&ctx->kv[idx]);
														
 
															-        for (int i = idx; i < n_kv-1; ++i) {
														
 
															-            ctx->kv[i] = ctx->kv[i+1];
														
 
															-        }
														
 
															-        ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
														
 
															-        ctx->header.n_kv--;
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type        = GGUF_TYPE_UINT8;
														
 
															-    ctx->kv[idx].value.uint8 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type       = GGUF_TYPE_INT8;
														
 
															-    ctx->kv[idx].value.int8 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type         = GGUF_TYPE_UINT16;
														
 
															-    ctx->kv[idx].value.uint16 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type        = GGUF_TYPE_INT16;
														
 
															-    ctx->kv[idx].value.int16 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type         = GGUF_TYPE_UINT32;
														
 
															-    ctx->kv[idx].value.uint32 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type        = GGUF_TYPE_INT32;
														
 
															-    ctx->kv[idx].value.int32 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type          = GGUF_TYPE_FLOAT32;
														
 
															-    ctx->kv[idx].value.float32 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type         = GGUF_TYPE_UINT64;
														
 
															-    ctx->kv[idx].value.uint64 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type        = GGUF_TYPE_INT64;
														
 
															-    ctx->kv[idx].value.int64 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type          = GGUF_TYPE_FLOAT64;
														
 
															-    ctx->kv[idx].value.float64 = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type        = GGUF_TYPE_BOOL;
														
 
															-    ctx->kv[idx].value.bool_ = val;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type           = GGUF_TYPE_STRING;
														
 
															-    ctx->kv[idx].value.str.n    = strlen(val);
														
 
															-    ctx->kv[idx].value.str.data = strdup(val);
														
 
															-}
														
 
															-
														
 
															-void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
														
 
															-    ctx->kv[idx].value.arr.type = type;
														
 
															-    ctx->kv[idx].value.arr.n    = n;
														
 
															-    ctx->kv[idx].value.arr.data = GGML_CALLOC(n, gguf_type_size(type));
														
 
															-    memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
														
 
															-}
														
 
															-
														
 
															-void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
														
 
															-    const int idx = gguf_get_or_add_key(ctx, key);
														
 
															-
														
 
															-    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
														
 
															-    ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
														
 
															-    ctx->kv[idx].value.arr.n    = n;
														
 
															-    ctx->kv[idx].value.arr.data = GGML_CALLOC(n, sizeof(struct gguf_str));
														
 
															-    for (int i = 0; i < n; i++) {
														
 
															-        struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
														
 
															-        str->n    = strlen(data[i]);
														
 
															-        str->data = strdup(data[i]);
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-// set or add KV pairs from another context
														
 
															-void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
														
 
															-    for (uint32_t i = 0; i < src->header.n_kv; i++) {
														
 
															-        switch (src->kv[i].type) {
														
 
															-            case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, src->kv[i].key.data, src->kv[i].value.uint8);    break;
														
 
															-            case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, src->kv[i].key.data, src->kv[i].value.int8);     break;
														
 
															-            case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16);   break;
														
 
															-            case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16);    break;
														
 
															-            case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32);   break;
														
 
															-            case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32);    break;
														
 
															-            case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32);  break;
														
 
															-            case GGUF_TYPE_UINT64:  gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64);   break;
														
 
															-            case GGUF_TYPE_INT64:   gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64);    break;
														
 
															-            case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64);  break;
														
 
															-            case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_);    break;
														
 
															-            case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
														
 
															-            case GGUF_TYPE_ARRAY:
														
 
															-                {
														
 
															-                    if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
														
 
															-                        const char ** data = GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
														
 
															-                        for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
														
 
															-                            data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
														
 
															-                        }
														
 
															-                        gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
														
 
															-                        GGML_FREE((void *)data);
														
 
															-                    } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
														
 
															-                        GGML_ABORT("nested arrays not supported");
														
 
															-                    } else {
														
 
															-                        gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
														
 
															-                    }
														
 
															-                } break;
														
 
															-            default: GGML_ABORT("invalid type");
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-void gguf_add_tensor(
														
 
															-             struct gguf_context * ctx,
														
 
															-        const struct ggml_tensor * tensor) {
														
 
															-    GGML_ASSERT(tensor);
														
 
															-    if (gguf_find_tensor(ctx, tensor->name) != -1) {
														
 
															-        GGML_ABORT("duplicated tensor name");
														
 
															-    }
														
 
															-
														
 
															-    const int idx = ctx->header.n_tensors;
														
 
															-    ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
														
 
															-
														
 
															-    ctx->infos[idx].name.n    = strlen(tensor->name);
														
 
															-    ctx->infos[idx].name.data = strdup(tensor->name);
														
 
															-
														
 
															-    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
														
 
															-        ctx->infos[idx].ne[i] = 1;
														
 
															-    }
														
 
															-
														
 
															-    ctx->infos[idx].n_dims = ggml_n_dims(tensor);
														
 
															-    for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
														
 
															-        ctx->infos[idx].ne[i] = tensor->ne[i];
														
 
															-    }
														
 
															-
														
 
															-    ctx->infos[idx].type   = tensor->type;
														
 
															-    ctx->infos[idx].offset = 0;
														
 
															-    ctx->infos[idx].data   = tensor->data;
														
 
															-    ctx->infos[idx].size   = ggml_nbytes(tensor);
														
 
															-
														
 
															-    if (ctx->header.n_tensors > 0) {
														
 
															-        ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
														
 
															-    }
														
 
															-
														
 
															-    ctx->header.n_tensors++;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
														
 
															-    const int idx = gguf_find_tensor(ctx, name);
														
 
															-    if (idx < 0) {
														
 
															-        GGML_ABORT("tensor not found");
														
 
															-    }
														
 
															-
														
 
															-    ctx->infos[idx].type = type;
														
 
															-}
														
 
															-
														
 
															-void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
														
 
															-    const int idx = gguf_find_tensor(ctx, name);
														
 
															-    if (idx < 0) {
														
 
															-        GGML_ABORT("tensor not found");
														
 
															-    }
														
 
															-
														
 
															-    ctx->infos[idx].data = data;
														
 
															-    ctx->infos[idx].size = size;
														
 
															-
														
 
															-    // update offsets
														
 
															-    for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
														
 
															-        ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
														
 
															-//    fwrite(&val->n,   sizeof(val->n),    1, file);
														
 
															-//    fwrite(val->data, sizeof(char), val->n, file);
														
 
															-//}
														
 
															-//
														
 
															-//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
														
 
															-//    fwrite(val, sizeof(char), size, file);
														
 
															-//}
														
 
															-
														
 
															-struct gguf_buf gguf_buf_init(size_t size) {
														
 
															-    struct gguf_buf buf = {
														
 
															-        /*buf.data   =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
														
 
															-        /*buf.size   =*/ size,
														
 
															-        /*buf.offset =*/ 0,
														
 
															-    };
														
 
															-
														
 
															-    return buf;
														
 
															-}
														
 
															-
														
 
															-void gguf_buf_free(struct gguf_buf buf) {
														
 
															-    if (buf.data) {
														
 
															-        GGML_FREE(buf.data);
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
														
 
															-    if (buf->offset + size > buf->size) {
														
 
															-        buf->size = 1.5*(buf->offset + size);
														
 
															-        if (buf->data) {
														
 
															-            buf->data = realloc(buf->data, buf->size);
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
														
 
															-    gguf_buf_grow(buf, sizeof(val->n) + val->n);
														
 
															-
														
 
															-    if (buf->data) {
														
 
															-        memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
														
 
															-    }
														
 
															-    buf->offset += sizeof(val->n);
														
 
															-
														
 
															-    if (buf->data) {
														
 
															-        memcpy((char *) buf->data + buf->offset, val->data, val->n);
														
 
															-    }
														
 
															-    buf->offset += val->n;
														
 
															-}
														
 
															-
														
 
															-static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
														
 
															-    gguf_buf_grow(buf, el_size);
														
 
															-
														
 
															-    if (buf->data) {
														
 
															-        memcpy((char *) buf->data + buf->offset, val, el_size);
														
 
															-    }
														
 
															-    buf->offset += el_size;
														
 
															-}
														
 
															-
														
 
															-void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
														
 
															-    // write header
														
 
															-    gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic));
														
 
															-    gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version));
														
 
															-    gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
														
 
															-    gguf_bwrite_el(buf, &ctx->header.n_kv,      sizeof(ctx->header.n_kv));
														
 
															-
														
 
															-    // write key-value pairs
														
 
															-    for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
														
 
															-        struct gguf_kv * kv = &ctx->kv[i];
														
 
															-
														
 
															-        gguf_bwrite_str(buf, &kv->key);
														
 
															-        gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
														
 
															-
														
 
															-        switch (kv->type) {
														
 
															-            case GGUF_TYPE_UINT8:   gguf_bwrite_el( buf, &kv->value.uint8,   sizeof(kv->value.uint8)  ); break;
														
 
															-            case GGUF_TYPE_INT8:    gguf_bwrite_el (buf, &kv->value.int8,    sizeof(kv->value.int8)   ); break;
														
 
															-            case GGUF_TYPE_UINT16:  gguf_bwrite_el (buf, &kv->value.uint16,  sizeof(kv->value.uint16) ); break;
														
 
															-            case GGUF_TYPE_INT16:   gguf_bwrite_el (buf, &kv->value.int16,   sizeof(kv->value.int16)  ); break;
														
 
															-            case GGUF_TYPE_UINT32:  gguf_bwrite_el (buf, &kv->value.uint32,  sizeof(kv->value.uint32) ); break;
														
 
															-            case GGUF_TYPE_INT32:   gguf_bwrite_el (buf, &kv->value.int32,   sizeof(kv->value.int32)  ); break;
														
 
															-            case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
														
 
															-            case GGUF_TYPE_UINT64:  gguf_bwrite_el (buf, &kv->value.uint64,  sizeof(kv->value.uint64) ); break;
														
 
															-            case GGUF_TYPE_INT64:   gguf_bwrite_el (buf, &kv->value.int64,   sizeof(kv->value.int64)  ); break;
														
 
															-            case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
														
 
															-            case GGUF_TYPE_BOOL:    gguf_bwrite_el (buf, &kv->value.bool_,   sizeof(kv->value.bool_)  ); break;
														
 
															-            case GGUF_TYPE_STRING:  gguf_bwrite_str(buf, &kv->value.str                               ); break;
														
 
															-            case GGUF_TYPE_ARRAY:
														
 
															-                {
														
 
															-                    gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
														
 
															-                    gguf_bwrite_el(buf, &kv->value.arr.n,    sizeof(kv->value.arr.n)   );
														
 
															-
														
 
															-                    switch (kv->value.arr.type) {
														
 
															-                        case GGUF_TYPE_UINT8:
														
 
															-                        case GGUF_TYPE_INT8:
														
 
															-                        case GGUF_TYPE_UINT16:
														
 
															-                        case GGUF_TYPE_INT16:
														
 
															-                        case GGUF_TYPE_UINT32:
														
 
															-                        case GGUF_TYPE_INT32:
														
 
															-                        case GGUF_TYPE_FLOAT32:
														
 
															-                        case GGUF_TYPE_UINT64:
														
 
															-                        case GGUF_TYPE_INT64:
														
 
															-                        case GGUF_TYPE_FLOAT64:
														
 
															-                        case GGUF_TYPE_BOOL:
														
 
															-                            {
														
 
															-                                gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type));
														
 
															-                            } break;
														
 
															-                        case GGUF_TYPE_STRING:
														
 
															-                            {
														
 
															-                                for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
														
 
															-                                    gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
														
 
															-                                }
														
 
															-                            } break;
														
 
															-                        case GGUF_TYPE_ARRAY:
														
 
															-                        default: GGML_ABORT("invalid type");
														
 
															-                    }
														
 
															-                } break;
														
 
															-            default: GGML_ABORT("invalid type");
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // write tensor infos
														
 
															-    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
														
 
															-        struct gguf_tensor_info * info = &ctx->infos[i];
														
 
															-
														
 
															-        gguf_bwrite_str(buf, &info->name);
														
 
															-        gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
														
 
															-        for (uint32_t j = 0; j < info->n_dims; ++j) {
														
 
															-            gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
														
 
															-        }
														
 
															-        gguf_bwrite_el(buf, &info->type,   sizeof(info->type));
														
 
															-        gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
														
 
															-    }
														
 
															-
														
 
															-    // we require the data section to be aligned, so take into account any padding
														
 
															-    {
														
 
															-        const size_t offset     = buf->offset;
														
 
															-        const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
														
 
															-
														
 
															-        if (offset_pad != offset) {
														
 
															-            uint8_t pad = 0;
														
 
															-            for (size_t i = 0; i < offset_pad - offset; ++i) {
														
 
															-                gguf_bwrite_el(buf, &pad, sizeof(pad));
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    if (only_meta) {
														
 
															-        return;
														
 
															-    }
														
 
															-
														
 
															-    size_t offset = 0;
														
 
															-
														
 
															-    // write tensor data
														
 
															-    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
														
 
															-        struct gguf_tensor_info * info = &ctx->infos[i];
														
 
															-
														
 
															-        const size_t size     = info->size;
														
 
															-        const size_t size_pad = GGML_PAD(size, ctx->alignment);
														
 
															-
														
 
															-        gguf_bwrite_el(buf, info->data, size);
														
 
															-
														
 
															-        if (size_pad != size) {
														
 
															-            uint8_t pad = 0;
														
 
															-            for (size_t j = 0; j < size_pad - size; ++j) {
														
 
															-                gguf_bwrite_el(buf, &pad, sizeof(pad));
														
 
															-            }
														
 
															-        }
														
 
															-
														
 
															-        GGML_ASSERT(offset == info->offset);
														
 
															-
														
 
															-        offset += size_pad;
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
														
 
															-    FILE * file = ggml_fopen(fname, "wb");
														
 
															-    if (!file) {
														
 
															-        GGML_ABORT("failed to open file for writing");
														
 
															-    }
														
 
															-
														
 
															-    struct gguf_buf buf = gguf_buf_init(16*1024);
														
 
															-
														
 
															-    gguf_write_to_buf(ctx, &buf, only_meta);
														
 
															-
														
 
															-    fwrite(buf.data, 1, buf.offset, file);
														
 
															-
														
 
															-    gguf_buf_free(buf);
														
 
															-
														
 
															-    fclose(file);
														
 
															-}
														
 
															-
														
 
															-size_t gguf_get_meta_size(const struct gguf_context * ctx) {
														
 
															-    // no allocs - only compute size
														
 
															-    struct gguf_buf buf = gguf_buf_init(0);
														
 
															-
														
 
															-    gguf_write_to_buf(ctx, &buf, true);
														
 
															-
														
 
															-    return buf.offset;
														
 
															-}
														
 
															-
														
 
															-void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
														
 
															-    struct gguf_buf buf = gguf_buf_init(16*1024);
														
 
															-
														
 
															-    gguf_write_to_buf(ctx, &buf, true);
														
 
															-
														
 
															-    memcpy(data, buf.data, buf.offset);
														
 
															-
														
 
															-    gguf_buf_free(buf);
														
 
															-}
														
 
															-
														
 
															 void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
														
 
															     g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
														
 
															     g_logger_state.log_callback_user_data = user_data;
														
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -0,0 +1,1325 @@
 
															+#include "ggml.h"
														
 
															+#include "ggml-backend.h"
														
 
															+#include "ggml-impl.h"
														
 
															+#include "gguf.h"
														
 
															+
														
 
															+#include <cinttypes>
														
 
															+#include <cstddef>
														
 
															+#include <cstdint>
														
 
															+#include <cstdio>
														
 
															+#include <cstdlib>
														
 
															+#include <cstring>
														
 
															+#include <map>
														
 
															+#include <new>
														
 
															+#include <stdexcept>
														
 
															+#include <string>
														
 
															+#include <vector>
														
 
															+
														
 
															+template <typename T>
														
 
															+struct type_to_gguf_type;
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<uint8_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_UINT8;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<int8_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_INT8;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<uint16_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_UINT16;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<int16_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_INT16;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<uint32_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_UINT32;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<int32_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_INT32;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<float> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT32;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<bool> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_BOOL;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<std::string> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_STRING;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<uint64_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_UINT64;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<int64_t> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_INT64;
														
 
															+};
														
 
															+
														
 
															+template <>
														
 
															+struct type_to_gguf_type<double> {
														
 
															+    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT64;
														
 
															+};
														
 
															+
														
 
															+static const std::map<gguf_type, size_t> GGUF_TYPE_SIZE = {
														
 
															+    {GGUF_TYPE_UINT8,   sizeof(uint8_t)},
														
 
															+    {GGUF_TYPE_INT8,    sizeof(int8_t)},
														
 
															+    {GGUF_TYPE_UINT16,  sizeof(uint16_t)},
														
 
															+    {GGUF_TYPE_INT16,   sizeof(int16_t)},
														
 
															+    {GGUF_TYPE_UINT32,  sizeof(uint32_t)},
														
 
															+    {GGUF_TYPE_INT32,   sizeof(int32_t)},
														
 
															+    {GGUF_TYPE_FLOAT32, sizeof(float)},
														
 
															+    {GGUF_TYPE_BOOL,    sizeof(int8_t)},
														
 
															+    {GGUF_TYPE_STRING,  0}, // undefined
														
 
															+    {GGUF_TYPE_ARRAY,   0}, // undefined
														
 
															+    {GGUF_TYPE_UINT64,  sizeof(uint64_t)},
														
 
															+    {GGUF_TYPE_INT64,   sizeof(int64_t)},
														
 
															+    {GGUF_TYPE_FLOAT64, sizeof(double)},
														
 
															+};
														
 
															+static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
														
 
															+
														
 
															+static const std::map<gguf_type, const char *> GGUF_TYPE_NAME = {
														
 
															+    {GGUF_TYPE_UINT8,   "u8"},
														
 
															+    {GGUF_TYPE_INT8,    "i8"},
														
 
															+    {GGUF_TYPE_UINT16,  "u16"},
														
 
															+    {GGUF_TYPE_INT16,   "i16"},
														
 
															+    {GGUF_TYPE_UINT32,  "u32"},
														
 
															+    {GGUF_TYPE_INT32,   "i32"},
														
 
															+    {GGUF_TYPE_FLOAT32, "f32"},
														
 
															+    {GGUF_TYPE_BOOL,    "bool"},
														
 
															+    {GGUF_TYPE_STRING,  "str"},
														
 
															+    {GGUF_TYPE_ARRAY,   "arr"},
														
 
															+    {GGUF_TYPE_UINT64,  "u64"},
														
 
															+    {GGUF_TYPE_INT64,   "i64"},
														
 
															+    {GGUF_TYPE_FLOAT64, "f64"},
														
 
															+};
														
 
															+static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
														
 
															+
														
 
															+size_t gguf_type_size(enum gguf_type type) {
														
 
															+    auto it = GGUF_TYPE_SIZE.find(type);
														
 
															+    return it == GGUF_TYPE_SIZE.end() ? 0 : it->second;
														
 
															+}
														
 
															+
														
 
															+struct gguf_kv {
														
 
															+    std::string key;
														
 
															+
														
 
															+    bool is_array;
														
 
															+    enum gguf_type type;
														
 
															+
														
 
															+    std::vector<int8_t>      data;
														
 
															+    std::vector<std::string> data_string;
														
 
															+
														
 
															+    template <typename T>
														
 
															+    gguf_kv(const std::string & key, const T value)
														
 
															+            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
														
 
															+        GGML_ASSERT(!key.empty());
														
 
															+        data.resize(sizeof(T));
														
 
															+        memcpy(data.data(), &value, sizeof(T));
														
 
															+    }
														
 
															+
														
 
															+    template <typename T>
														
 
															+    gguf_kv(const std::string & key, const std::vector<T> & value)
														
 
															+            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
														
 
															+        GGML_ASSERT(!key.empty());
														
 
															+        data.resize(value.size()*sizeof(T));
														
 
															+        for (size_t i = 0; i < value.size(); ++i) {
														
 
															+            const T tmp = value[i];
														
 
															+            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    gguf_kv(const std::string & key, const std::string & value)
														
 
															+            : key(key), is_array(false), type(GGUF_TYPE_STRING) {
														
 
															+        GGML_ASSERT(!key.empty());
														
 
															+        data_string.push_back(value);
														
 
															+    }
														
 
															+
														
 
															+    gguf_kv(const std::string & key, const std::vector<std::string> & value)
														
 
															+            : key(key), is_array(true), type(GGUF_TYPE_STRING) {
														
 
															+        GGML_ASSERT(!key.empty());
														
 
															+        data_string = value;
														
 
															+    }
														
 
															+
														
 
															+    const std::string & get_key() const {
														
 
															+        return key;
														
 
															+    }
														
 
															+
														
 
															+    const enum gguf_type & get_type() const {
														
 
															+        return type;
														
 
															+    }
														
 
															+
														
 
															+    size_t get_ne() const {
														
 
															+        if (type == GGUF_TYPE_STRING) {
														
 
															+            const size_t ne = data_string.size();
														
 
															+            GGML_ASSERT(is_array || ne == 1);
														
 
															+            return ne;
														
 
															+        }
														
 
															+        const size_t type_size = gguf_type_size(type);
														
 
															+        GGML_ASSERT(data.size() % type_size == 0);
														
 
															+        const size_t ne = data.size() / type_size;
														
 
															+        GGML_ASSERT(is_array || ne == 1);
														
 
															+        return ne;
														
 
															+    }
														
 
															+
														
 
															+    template <typename T>
														
 
															+    const T & get_val(const size_t i = 0) const {
														
 
															+        GGML_ASSERT(type_to_gguf_type<T>::value == type);
														
 
															+        if constexpr (std::is_same<T, std::string>::value) {
														
 
															+            GGML_ASSERT(data_string.size() >= i+1);
														
 
															+            return data_string[i];
														
 
															+        }
														
 
															+        const size_t type_size = gguf_type_size(type);
														
 
															+        GGML_ASSERT(data.size() % type_size == 0);
														
 
															+        GGML_ASSERT(data.size() >= (i+1)*type_size);
														
 
															+        return reinterpret_cast<const T *>(data.data())[i];
														
 
															+    }
														
 
															+
														
 
															+    void cast(const enum gguf_type new_type) {
														
 
															+        const size_t new_type_size = gguf_type_size(new_type);
														
 
															+        GGML_ASSERT(data.size() % new_type_size == 0);
														
 
															+        type = new_type;
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+struct gguf_tensor_info {
														
 
															+    struct ggml_tensor t; // for holding the equivalent info
														
 
															+    uint64_t offset;      // offset from start of `data`, must be a multiple of `ALIGNMENT`
														
 
															+};
														
 
															+
														
 
															+struct gguf_context {
														
 
															+    uint32_t version = GGUF_VERSION;
														
 
															+
														
 
															+    std::vector<struct gguf_kv> kv;
														
 
															+    std::vector<struct gguf_tensor_info> info;
														
 
															+
														
 
															+    size_t alignment = GGUF_DEFAULT_ALIGNMENT;
														
 
															+    size_t offset    = 0; // offset of `data` from beginning of file
														
 
															+    size_t size      = 0; // size of `data` in bytes
														
 
															+
														
 
															+    void * data = nullptr;
														
 
															+};
														
 
															+
														
 
															+struct gguf_reader {
														
 
															+    FILE * file;
														
 
															+
														
 
															+    gguf_reader(FILE * file) : file(file) {}
														
 
															+
														
 
															+    template <typename T>
														
 
															+    bool read(T & dst) const {
														
 
															+        return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
														
 
															+    }
														
 
															+
														
 
															+    template <typename T>
														
 
															+    bool read(std::vector<T> & dst, const size_t n) const {
														
 
															+        dst.resize(n);
														
 
															+        for (size_t i = 0; i < dst.size(); ++i) {
														
 
															+            if constexpr (std::is_same<T, bool>::value) {
														
 
															+                bool tmp;
														
 
															+                if (!read(tmp)) {
														
 
															+                    return false;
														
 
															+                }
														
 
															+                dst[i] = tmp;
														
 
															+            } else {
														
 
															+                if (!read(dst[i])) {
														
 
															+                    return false;
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    bool read(bool & dst) const {
														
 
															+        int8_t tmp = -1;
														
 
															+        if (!read(tmp)) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        dst = tmp != 0;
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    bool read(enum ggml_type & dst) const {
														
 
															+        int32_t tmp = -1;
														
 
															+        if (!read(tmp)) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        dst = ggml_type(tmp);
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    bool read(enum gguf_type & dst) const {
														
 
															+        int32_t tmp = -1;
														
 
															+        if (!read(tmp)) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        dst = gguf_type(tmp);
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    bool read(std::string & dst) const {
														
 
															+        uint64_t size = -1;
														
 
															+        if (!read(size)) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        dst.resize(size);
														
 
															+        return fread(dst.data(), 1, dst.length(), file) == dst.length();
														
 
															+    }
														
 
															+
														
 
															+    bool read(void * dst, const size_t size) const {
														
 
															+        return fread(dst, 1, size, file) == size;
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+struct gguf_context * gguf_init_empty(void) {
														
 
															+    return new gguf_context;
														
 
															+}
														
 
															+
														
 
															+template<typename T>
														
 
															+bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
														
 
															+    if (is_array) {
														
 
															+        std::vector<T> value;
														
 
															+        try {
														
 
															+            if (!gr.read(value, n)) {
														
 
															+                return false;
														
 
															+            }
														
 
															+        } catch (std::length_error &) {
														
 
															+            fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
														
 
															+            return false;
														
 
															+        } catch (std::bad_alloc &) {
														
 
															+            fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
														
 
															+            return false;
														
 
															+        }
														
 
															+        kv.emplace_back(key, value);
														
 
															+    } else {
														
 
															+        T value;
														
 
															+        if (!gr.read(value)) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        kv.emplace_back(key, value);
														
 
															+    }
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
														
 
															+    const struct gguf_reader gr(file);
														
 
															+    struct gguf_context * ctx = new gguf_context;
														
 
															+
														
 
															+    bool ok = true;
														
 
															+
														
 
															+    // file magic
														
 
															+    {
														
 
															+        std::vector<char> magic;
														
 
															+        ok = ok && gr.read(magic, 4);
														
 
															+
														
 
															+        if (!ok) {
														
 
															+            fprintf(stderr, "%s: failed to read magic\n", __func__);
														
 
															+            gguf_free(ctx);
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+
														
 
															+        for (uint32_t i = 0; i < magic.size(); i++) {
														
 
															+            if (magic[i] != GGUF_MAGIC[i]) {
														
 
															+                fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
														
 
															+                gguf_free(ctx);
														
 
															+                return nullptr;
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    // header
														
 
															+    int64_t n_kv      = 0;
														
 
															+    int64_t n_tensors = 0;
														
 
															+
														
 
															+    if (ok && gr.read(ctx->version)) {
														
 
															+        if (ctx->version == 1) {
														
 
															+            fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
														
 
															+            ok = false;
														
 
															+        }
														
 
															+        if (ctx->version > GGUF_VERSION) {
														
 
															+            fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
														
 
															+                __func__, ctx->version, GGUF_VERSION);
														
 
															+            ok = false;
														
 
															+        }
														
 
															+    } else {
														
 
															+        ok = false;
														
 
															+    }
														
 
															+
														
 
															+    if (ok && gr.read(n_tensors)) {
														
 
															+        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
														
 
															+        if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
														
 
															+            fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
														
 
															+                __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
														
 
															+            ok = false;
														
 
															+        }
														
 
															+    } else {
														
 
															+        ok = false;
														
 
															+    }
														
 
															+
														
 
															+    if (ok && gr.read(n_kv)) {
														
 
															+        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
														
 
															+        if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
														
 
															+            fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
														
 
															+                    __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
														
 
															+            ok = false;
														
 
															+        }
														
 
															+    } else {
														
 
															+        ok = false;
														
 
															+    }
														
 
															+
														
 
															+    if (!ok) {
														
 
															+        fprintf(stderr, "%s: failed to read header\n", __func__);
														
 
															+        gguf_free(ctx);
														
 
															+        return nullptr;
														
 
															+    }
														
 
															+
														
 
															+    // KV pairs
														
 
															+    {
														
 
															+        for (int64_t i = 0; ok && i < n_kv; ++i) {
														
 
															+            std::string key;
														
 
															+            gguf_type   type     = gguf_type(-1);
														
 
															+            bool        is_array = false;
														
 
															+            uint64_t    n        = 1;
														
 
															+
														
 
															+            try {
														
 
															+                ok = ok && gr.read(key);
														
 
															+            } catch (std::length_error &) {
														
 
															+                fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
														
 
															+                ok = false;
														
 
															+            } catch (std::bad_alloc &) {
														
 
															+                fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
														
 
															+                ok = false;
														
 
															+            }
														
 
															+            for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
														
 
															+                if (key == ctx->kv[j].key) {
														
 
															+                    fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
														
 
															+                    ok = false;
														
 
															+                }
														
 
															+            }
														
 
															+            if (!ok) {
														
 
															+                break;
														
 
															+            }
														
 
															+
														
 
															+            ok = ok && gr.read(type);
														
 
															+            if (type == GGUF_TYPE_ARRAY) {
														
 
															+                is_array = true;
														
 
															+                ok = ok && gr.read(type);
														
 
															+                ok = ok && gr.read(n);
														
 
															+            }
														
 
															+            if (!ok) {
														
 
															+                break;
														
 
															+            }
														
 
															+
														
 
															+            switch (type) {
														
 
															+                case GGUF_TYPE_UINT8:   ok = ok && gguf_read_emplace_helper<uint8_t>    (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_INT8:    ok = ok && gguf_read_emplace_helper<int8_t>     (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_UINT16:  ok = ok && gguf_read_emplace_helper<uint16_t>   (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_INT16:   ok = ok && gguf_read_emplace_helper<int16_t>    (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_UINT32:  ok = ok && gguf_read_emplace_helper<uint32_t>   (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_INT32:   ok = ok && gguf_read_emplace_helper<int32_t>    (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper<float>      (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_BOOL:    ok = ok && gguf_read_emplace_helper<bool>       (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_STRING:  ok = ok && gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_UINT64:  ok = ok && gguf_read_emplace_helper<uint64_t>   (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_INT64:   ok = ok && gguf_read_emplace_helper<int64_t>    (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper<double>     (gr, ctx->kv, key, is_array, n); break;
														
 
															+                case GGUF_TYPE_ARRAY:
														
 
															+                default:
														
 
															+                    {
														
 
															+                        fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
														
 
															+                        ok = false;
														
 
															+                    } break;
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        if (!ok) {
														
 
															+            fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
														
 
															+            gguf_free(ctx);
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+        GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
														
 
															+
														
 
															+        const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT);
														
 
															+        ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
														
 
															+
														
 
															+        if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
														
 
															+            fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
														
 
															+            gguf_free(ctx);
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    // read the tensor info
														
 
															+    for (int64_t i = 0; ok && i < n_tensors; ++i) {
														
 
															+        struct gguf_tensor_info info;
														
 
															+
														
 
															+        // tensor name
														
 
															+        {
														
 
															+            std::string name;
														
 
															+            try {
														
 
															+                ok = ok && gr.read(name);
														
 
															+            } catch (std::length_error &) {
														
 
															+                fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
														
 
															+                ok = false;
														
 
															+            } catch (std::bad_alloc &) {
														
 
															+                fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
														
 
															+                ok = false;
														
 
															+            }
														
 
															+            if (name.length() >= GGML_MAX_NAME) {
														
 
															+                fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
														
 
															+                ok = false;
														
 
															+                break;
														
 
															+            }
														
 
															+            ggml_set_name(&info.t, name.c_str());
														
 
															+
														
 
															+            // make sure there are no duplicate tensor names
														
 
															+            for (int64_t j = 0; ok && j < i; ++j) {
														
 
															+                if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
														
 
															+                    fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
														
 
															+                    ok = false;
														
 
															+                    break;
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+        if (!ok) {
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        // tensor shape
														
 
															+        {
														
 
															+            uint32_t n_dims = -1;
														
 
															+            ok = ok && gr.read(n_dims);
														
 
															+            if (n_dims > GGML_MAX_DIMS) {
														
 
															+                fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
														
 
															+                    __func__, info.t.name, n_dims, GGML_MAX_DIMS);
														
 
															+                ok = false;
														
 
															+                break;
														
 
															+            }
														
 
															+            for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) {
														
 
															+                info.t.ne[j] = 1;
														
 
															+                if (j < n_dims) {
														
 
															+                    ok = ok && gr.read(info.t.ne[j]);
														
 
															+                }
														
 
															+
														
 
															+                // check that all ne are non-negative
														
 
															+                if (info.t.ne[j] < 0) {
														
 
															+                    fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
														
 
															+                        __func__, info.t.name, j, info.t.ne[j]);
														
 
															+                    ok = false;
														
 
															+                    break;
														
 
															+                }
														
 
															+            }
														
 
															+
														
 
															+            // check that the total number of elements is representable
														
 
															+            if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
														
 
															+                       (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
														
 
															+                       (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
														
 
															+
														
 
															+                fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
														
 
															+                    "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
														
 
															+                    __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
														
 
															+                ok = false;
														
 
															+                break;
														
 
															+            }
														
 
															+        }
														
 
															+        if (!ok) {
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        // tensor type
														
 
															+        {
														
 
															+            ok = ok && gr.read(info.t.type);
														
 
															+
														
 
															+            // check that tensor type is within defined range
														
 
															+            if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
														
 
															+                fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
														
 
															+                    __func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
														
 
															+                ok = false;
														
 
															+                break;
														
 
															+            }
														
 
															+            const size_t  type_size = ggml_type_size(info.t.type);
														
 
															+            const int64_t blck_size = ggml_blck_size(info.t.type);
														
 
															+
														
 
															+            // check that row size is divisible by block size
														
 
															+            if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
														
 
															+                fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
														
 
															+                    "not a multiple of block size (%" PRId64 ")\n",
														
 
															+                    __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
														
 
															+                ok = false;
														
 
															+                break;
														
 
															+            }
														
 
															+
														
 
															+            // calculate byte offsets given the tensor shape and type
														
 
															+            info.t.nb[0] = type_size;
														
 
															+            info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
														
 
															+            for (int j = 2; j < GGML_MAX_DIMS; ++j) {
														
 
															+                info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
														
 
															+            }
														
 
															+        }
														
 
															+        if (!ok) {
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        // tensor data offset within buffer
														
 
															+        ok = ok && gr.read(info.offset);
														
 
															+
														
 
															+        ctx->info.push_back(info);
														
 
															+    }
														
 
															+
														
 
															+    if (!ok) {
														
 
															+        fprintf(stderr, "%s: failed to read tensor info\n", __func__);
														
 
															+        gguf_free(ctx);
														
 
															+        return nullptr;
														
 
															+    }
														
 
															+    GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
														
 
															+
														
 
															+    // we require the data section to be aligned, so take into account any padding
														
 
															+    if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
														
 
															+        fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
														
 
															+        gguf_free(ctx);
														
 
															+        return nullptr;
														
 
															+    }
														
 
															+
														
 
															+    // store the current file offset - this is where the data section starts
														
 
															+    ctx->offset = ftell(file);
														
 
															+
														
 
															+    // compute the total size of the data section, taking into account the alignment
														
 
															+    {
														
 
															+        ctx->size = 0;
														
 
															+        for (size_t i = 0; i < ctx->info.size(); ++i) {
														
 
															+            const gguf_tensor_info & ti = ctx->info[i];
														
 
															+            if (ti.offset != ctx->size) {
														
 
															+                fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
														
 
															+                    __func__, ti.t.name, ti.offset, ctx->size);
														
 
															+                fprintf(stderr, "%s: failed to read tensor data\n", __func__);
														
 
															+                gguf_free(ctx);
														
 
															+                return nullptr;
														
 
															+            }
														
 
															+            ctx->size += GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    // load the tensor data only if requested
														
 
															+    if (params.ctx != nullptr) {
														
 
															+        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
														
 
															+        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
														
 
															+        //   the ggml_tensor structs to the appropriate locations in the binary blob
														
 
															+
														
 
															+        // compute the exact size needed for the new ggml_context
														
 
															+        const size_t mem_size =
														
 
															+            params.no_alloc ?
														
 
															+            (n_tensors    )*ggml_tensor_overhead() :
														
 
															+            (n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
														
 
															+
														
 
															+        struct ggml_init_params pdata = {
														
 
															+            /*mem_size   =*/ mem_size,
														
 
															+            /*mem_buffer =*/ nullptr,
														
 
															+            /*no_alloc   =*/ params.no_alloc,
														
 
															+        };
														
 
															+
														
 
															+        *params.ctx = ggml_init(pdata);
														
 
															+        if (*params.ctx == nullptr) {
														
 
															+            fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
														
 
															+            gguf_free(ctx);
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+
														
 
															+        struct ggml_context * ctx_data = *params.ctx;
														
 
															+
														
 
															+        struct ggml_tensor * data = nullptr;
														
 
															+
														
 
															+        if (!params.no_alloc) {
														
 
															+            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
														
 
															+
														
 
															+            ok = ok && data != nullptr;
														
 
															+
														
 
															+            // read the binary blob with the tensor data
														
 
															+            ok = ok && gr.read(data->data, ctx->size);
														
 
															+
														
 
															+            if (!ok) {
														
 
															+                fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
														
 
															+                ggml_free(ctx_data);
														
 
															+                *params.ctx = nullptr;
														
 
															+                gguf_free(ctx);
														
 
															+                return nullptr;
														
 
															+            }
														
 
															+
														
 
															+            ctx->data = data->data;
														
 
															+        }
														
 
															+
														
 
															+        ggml_set_no_alloc(ctx_data, true);
														
 
															+
														
 
															+        // create the tensors
														
 
															+        for (size_t i = 0; i < ctx->info.size(); ++i) {
														
 
															+            const struct gguf_tensor_info & info = ctx->info[i];
														
 
															+
														
 
															+            struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne);
														
 
															+
														
 
															+            ok = ok && cur != nullptr;
														
 
															+
														
 
															+            if (!ok) {
														
 
															+                break;
														
 
															+            }
														
 
															+
														
 
															+            ggml_set_name(cur, info.t.name);
														
 
															+
														
 
															+            // point the data member to the appropriate location in the binary blob using the tensor info
														
 
															+            if (!params.no_alloc) {
														
 
															+                cur->data = (char *) data->data + info.offset;
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        if (!ok) {
														
 
															+            fprintf(stderr, "%s: failed to create tensors\n", __func__);
														
 
															+            ggml_free(ctx_data);
														
 
															+            *params.ctx = nullptr;
														
 
															+            gguf_free(ctx);
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+
														
 
															+        ggml_set_no_alloc(ctx_data, params.no_alloc);
														
 
															+    }
														
 
															+
														
 
															+    return ctx;
														
 
															+}
														
 
															+
														
 
															+struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
														
 
															+    FILE * file = ggml_fopen(fname, "rb");
														
 
															+
														
 
															+    if (!file) {
														
 
															+        fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
														
 
															+        return nullptr;
														
 
															+    }
														
 
															+
														
 
															+    struct gguf_context * result = gguf_init_from_file_impl(file, params);
														
 
															+    fclose(file);
														
 
															+    return result;
														
 
															+}
														
 
															+
														
 
															+void gguf_free(struct gguf_context * ctx) {
														
 
															+    if (ctx == nullptr) {
														
 
															+        return;
														
 
															+    }
														
 
															+    delete ctx;
														
 
															+}
														
 
															+
														
 
															+const char * gguf_type_name(enum gguf_type type) {
														
 
															+    auto it = GGUF_TYPE_NAME.find(type);
														
 
															+    return it == GGUF_TYPE_NAME.end() ? nullptr : it->second;
														
 
															+}
														
 
															+
														
 
															+uint32_t gguf_get_version(const struct gguf_context * ctx) {
														
 
															+    return ctx->version;
														
 
															+}
														
 
															+
														
 
															+size_t gguf_get_alignment(const struct gguf_context * ctx) {
														
 
															+    return ctx->alignment;
														
 
															+}
														
 
															+
														
 
															+size_t gguf_get_data_offset(const struct gguf_context * ctx) {
														
 
															+    return ctx->offset;
														
 
															+}
														
 
															+
														
 
															+int64_t gguf_get_n_kv(const struct gguf_context * ctx) {
														
 
															+    return ctx->kv.size();
														
 
															+}
														
 
															+
														
 
															+int64_t gguf_find_key(const struct gguf_context * ctx, const char * key) {
														
 
															+    // return -1 if key not found
														
 
															+    int64_t keyfound = -1;
														
 
															+
														
 
															+    const int64_t n_kv = gguf_get_n_kv(ctx);
														
 
															+
														
 
															+    for (int64_t i = 0; i < n_kv; ++i) {
														
 
															+        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
														
 
															+            keyfound = i;
														
 
															+            break;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return keyfound;
														
 
															+}
														
 
															+
														
 
															+const char * gguf_get_key(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    return ctx->kv[key_id].get_key().c_str();
														
 
															+}
														
 
															+
														
 
															+enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    return ctx->kv[key_id].is_array ? GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
														
 
															+}
														
 
															+
														
 
															+enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].is_array);
														
 
															+    return ctx->kv[key_id].get_type();
														
 
															+}
														
 
															+
														
 
															+const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
														
 
															+    return ctx->kv[key_id].data.data();
														
 
															+}
														
 
															+
														
 
															+const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING);
														
 
															+    return ctx->kv[key_id].data_string[i].c_str();
														
 
															+}
														
 
															+
														
 
															+size_t gguf_get_arr_n(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+
														
 
															+    if (ctx->kv[key_id].type == GGUF_TYPE_STRING) {
														
 
															+        return ctx->kv[key_id].data_string.size();
														
 
															+    }
														
 
															+
														
 
															+    const size_t type_size = gguf_type_size(ctx->kv[key_id].type);
														
 
															+    GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
														
 
															+    return ctx->kv[key_id].data.size() / type_size;
														
 
															+}
														
 
															+
														
 
															+uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<uint8_t>();
														
 
															+}
														
 
															+
														
 
															+int8_t gguf_get_val_i8(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<int8_t>();
														
 
															+}
														
 
															+
														
 
															+uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<uint16_t>();
														
 
															+}
														
 
															+
														
 
															+int16_t gguf_get_val_i16(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<int16_t>();
														
 
															+}
														
 
															+
														
 
															+uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<uint32_t>();
														
 
															+}
														
 
															+
														
 
															+int32_t gguf_get_val_i32(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<int32_t>();
														
 
															+}
														
 
															+
														
 
															+float gguf_get_val_f32(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<float>();
														
 
															+}
														
 
															+
														
 
															+uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<uint64_t>();
														
 
															+}
														
 
															+
														
 
															+int64_t gguf_get_val_i64(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<int64_t>();
														
 
															+}
														
 
															+
														
 
															+double gguf_get_val_f64(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<double>();
														
 
															+}
														
 
															+
														
 
															+bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<bool>();
														
 
															+}
														
 
															+
														
 
															+const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    return ctx->kv[key_id].get_val<std::string>().c_str();
														
 
															+}
														
 
															+
														
 
															+const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) {
														
 
															+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
														
 
															+    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
														
 
															+    return ctx->kv[key_id].data.data();
														
 
															+}
														
 
															+
														
 
															+int64_t gguf_get_n_tensors(const struct gguf_context * ctx) {
														
 
															+    return ctx->info.size();
														
 
															+}
														
 
															+
														
 
															+int64_t gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
														
 
															+    // return -1 if tensor not found
														
 
															+    int64_t tensor_id = -1;
														
 
															+
														
 
															+    const int64_t n_tensors = gguf_get_n_tensors(ctx);
														
 
															+
														
 
															+    for (int64_t i = 0; i < n_tensors; ++i) {
														
 
															+        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
														
 
															+            tensor_id = i;
														
 
															+            break;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return tensor_id;
														
 
															+}
														
 
															+
														
 
															+size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id) {
														
 
															+    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
														
 
															+    return ctx->info[tensor_id].offset;
														
 
															+}
														
 
															+
														
 
															+const char * gguf_get_tensor_name(const struct gguf_context * ctx, int64_t tensor_id) {
														
 
															+    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
														
 
															+    return ctx->info[tensor_id].t.name;
														
 
															+}
														
 
															+
														
 
															+enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int64_t tensor_id) {
														
 
															+    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
														
 
															+    return ctx->info[tensor_id].t.type;
														
 
															+}
														
 
															+
														
 
															+size_t gguf_get_tensor_size(const struct gguf_context * ctx, int64_t tensor_id) {
														
 
															+    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
														
 
															+    return ggml_nbytes(&ctx->info[tensor_id].t);
														
 
															+}
														
 
															+
														
 
															+int64_t gguf_remove_key(struct gguf_context * ctx, const char * key) {
														
 
															+    const int64_t key_id = gguf_find_key(ctx, key);
														
 
															+    if (key_id >= 0) {
														
 
															+        ctx->kv.erase(ctx->kv.begin() + key_id);
														
 
															+    }
														
 
															+    return key_id;
														
 
															+}
														
 
															+
														
 
															+template<typename T>
														
 
															+static void gguf_check_reserved_keys(const std::string & key, const T val) {
														
 
															+    if (key == GGUF_KEY_GENERAL_ALIGNMENT) {
														
 
															+        if constexpr (std::is_same<T, uint32_t>::value) {
														
 
															+            GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
														
 
															+        } else {
														
 
															+            GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, val);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
														
 
															+    gguf_check_reserved_keys(key, val);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+    ctx->kv.emplace_back(key, std::string(val));
														
 
															+}
														
 
															+
														
 
															+void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n) {
														
 
															+    gguf_check_reserved_keys(key, data);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+
														
 
															+    const size_t nbytes = n*gguf_type_size(type);
														
 
															+    std::vector<int8_t> tmp(nbytes);
														
 
															+    if (!tmp.empty()) {
														
 
															+        memcpy(tmp.data(), data, nbytes);
														
 
															+    }
														
 
															+    ctx->kv.emplace_back(key, tmp);
														
 
															+    ctx->kv.back().cast(type);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, size_t n) {
														
 
															+    gguf_check_reserved_keys(key, data);
														
 
															+    gguf_remove_key(ctx, key);
														
 
															+
														
 
															+    std::vector<std::string> tmp(n);
														
 
															+    for (size_t i = 0; i < n; ++i) {
														
 
															+        tmp[i] = data[i];
														
 
															+    }
														
 
															+    ctx->kv.emplace_back(key, tmp);
														
 
															+}
														
 
															+
														
 
															+// set or add KV pairs from another context
														
 
															+void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src) {
														
 
															+    const int64_t n_kv = gguf_get_n_kv(src);
														
 
															+    for (int64_t i = 0; i < n_kv; ++i) {
														
 
															+        const struct gguf_kv & kv = src->kv[i];
														
 
															+
														
 
															+        if (!kv.is_array) {
														
 
															+            switch (kv.get_type()) {
														
 
															+                case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>());             break;
														
 
															+                case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, kv.get_key().c_str(), kv.get_val<int8_t>());              break;
														
 
															+                case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>());            break;
														
 
															+                case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>());             break;
														
 
															+                case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>());            break;
														
 
															+                case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>());             break;
														
 
															+                case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>());               break;
														
 
															+                case GGUF_TYPE_UINT64:  gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>());            break;
														
 
															+                case GGUF_TYPE_INT64:   gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>());             break;
														
 
															+                case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>());              break;
														
 
															+                case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>());                break;
														
 
															+                case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
														
 
															+                case GGUF_TYPE_ARRAY:
														
 
															+                default: GGML_ABORT("invalid type");
														
 
															+            }
														
 
															+            continue;
														
 
															+        }
														
 
															+
														
 
															+        const size_t ne = kv.get_ne();
														
 
															+
														
 
															+        switch (kv.get_type()) {
														
 
															+            case GGUF_TYPE_UINT8:
														
 
															+            case GGUF_TYPE_INT8:
														
 
															+            case GGUF_TYPE_UINT16:
														
 
															+            case GGUF_TYPE_INT16:
														
 
															+            case GGUF_TYPE_UINT32:
														
 
															+            case GGUF_TYPE_INT32:
														
 
															+            case GGUF_TYPE_FLOAT32:
														
 
															+            case GGUF_TYPE_UINT64:
														
 
															+            case GGUF_TYPE_INT64:
														
 
															+            case GGUF_TYPE_FLOAT64:
														
 
															+            case GGUF_TYPE_BOOL: {
														
 
															+                gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
														
 
															+            } break;
														
 
															+            case GGUF_TYPE_STRING: {
														
 
															+                std::vector<const char *> tmp(ne);
														
 
															+                for (size_t j = 0; j < ne; ++j) {
														
 
															+                    tmp[j] = kv.data_string[j].c_str();
														
 
															+                }
														
 
															+                gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
														
 
															+            } break;
														
 
															+            case GGUF_TYPE_ARRAY:
														
 
															+            default: GGML_ABORT("invalid type");
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void gguf_add_tensor(
														
 
															+             struct gguf_context * ctx,
														
 
															+        const struct ggml_tensor * tensor) {
														
 
															+    GGML_ASSERT(tensor);
														
 
															+    if (gguf_find_tensor(ctx, tensor->name) != -1) {
														
 
															+        GGML_ABORT("duplicate tensor name: %s", tensor->name);
														
 
															+    }
														
 
															+
														
 
															+    struct gguf_tensor_info ti;
														
 
															+    ti.t = *tensor;
														
 
															+    ti.offset = ctx->info.empty() ? 0 :
														
 
															+        ctx->info.back().offset + GGML_PAD(ggml_nbytes(&ctx->info.back().t), ctx->alignment);
														
 
															+    ctx->info.push_back(ti);
														
 
															+}
														
 
															+
														
 
															+void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
														
 
															+    const int64_t tensor_id = gguf_find_tensor(ctx, name);
														
 
															+    if (tensor_id < 0) {
														
 
															+        GGML_ABORT("tensor not found: %s", name);
														
 
															+    }
														
 
															+    struct ggml_tensor * tensor = &ctx->info[tensor_id].t;
														
 
															+    const size_t  type_size = ggml_type_size(type);
														
 
															+    const int64_t blck_size = ggml_blck_size(type);
														
 
															+
														
 
															+    tensor->type = type;
														
 
															+    GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
														
 
															+
														
 
															+    tensor->nb[0] = type_size;
														
 
															+    tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
														
 
															+    for (int i = 2; i < GGML_MAX_DIMS; i++) {
														
 
															+        tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
														
 
															+    }
														
 
															+
														
 
															+    // update offsets
														
 
															+    const int64_t n_tensors = gguf_get_n_tensors(ctx);
														
 
															+    for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
														
 
															+        ctx->info[i].offset = ctx->info[i - 1].offset + GGML_PAD(ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data) {
														
 
															+    const int64_t tensor_id = gguf_find_tensor(ctx, name);
														
 
															+    if (tensor_id < 0) {
														
 
															+        GGML_ABORT("tensor not found: %s", name);
														
 
															+    }
														
 
															+
														
 
															+    ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
														
 
															+}
														
 
															+
														
 
															+struct gguf_writer {
														
 
															+    std::vector<int8_t> & buf;
														
 
															+
														
 
															+    gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
														
 
															+
														
 
															+    template <typename T>
														
 
															+    void write(const T & val) const {
														
 
															+        for (size_t i = 0; i < sizeof(val); ++i) {
														
 
															+            buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    void write(const std::vector<int8_t> & val) const {
														
 
															+        buf.insert(buf.end(), val.begin(), val.end());
														
 
															+    }
														
 
															+
														
 
															+    void write(const bool & val) const {
														
 
															+        const int8_t val8 = val ? 1 : 0;
														
 
															+        write(val8);
														
 
															+    }
														
 
															+
														
 
															+    void write(const std::string & val) const {
														
 
															+        {
														
 
															+            const uint64_t n = val.length();
														
 
															+            write(n);
														
 
															+        }
														
 
															+        for (size_t i = 0; i < val.length(); ++i) {
														
 
															+            buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    void write(const char * val) const {
														
 
															+        write(std::string(val));
														
 
															+    }
														
 
															+
														
 
															+    void write(const enum ggml_type & val) const {
														
 
															+        write(int32_t(val));
														
 
															+    }
														
 
															+
														
 
															+    void write(const enum gguf_type & val) const {
														
 
															+        write(int32_t(val));
														
 
															+    }
														
 
															+
														
 
															+    void write(const struct gguf_kv & kv) const {
														
 
															+        const uint64_t ne = kv.get_ne();
														
 
															+
														
 
															+        write(kv.get_key());
														
 
															+
														
 
															+        if (kv.is_array) {
														
 
															+            write(GGUF_TYPE_ARRAY);
														
 
															+            write(kv.get_type());
														
 
															+            write(ne);
														
 
															+        } else {
														
 
															+            write(kv.get_type());
														
 
															+        }
														
 
															+
														
 
															+        switch (kv.get_type()) {
														
 
															+            case GGUF_TYPE_UINT8:
														
 
															+            case GGUF_TYPE_INT8:
														
 
															+            case GGUF_TYPE_UINT16:
														
 
															+            case GGUF_TYPE_INT16:
														
 
															+            case GGUF_TYPE_UINT32:
														
 
															+            case GGUF_TYPE_INT32:
														
 
															+            case GGUF_TYPE_FLOAT32:
														
 
															+            case GGUF_TYPE_UINT64:
														
 
															+            case GGUF_TYPE_INT64:
														
 
															+            case GGUF_TYPE_FLOAT64: {
														
 
															+                write(kv.data);
														
 
															+            } break;
														
 
															+            case GGUF_TYPE_BOOL: {
														
 
															+                for (size_t i = 0; i < ne; ++i) {
														
 
															+                    write(kv.get_val<bool>(i));
														
 
															+                }
														
 
															+            } break;
														
 
															+            case GGUF_TYPE_STRING: {
														
 
															+                for (size_t i = 0; i < ne; ++i) {
														
 
															+                    write(kv.get_val<std::string>(i));
														
 
															+                }
														
 
															+            } break;
														
 
															+            case GGUF_TYPE_ARRAY:
														
 
															+            default: GGML_ABORT("invalid type");
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    void write_tensor_meta(const struct gguf_tensor_info & info) const {
														
 
															+        write(info.t.name);
														
 
															+
														
 
															+        const uint32_t n_dims = ggml_n_dims(&info.t);
														
 
															+        write(n_dims);
														
 
															+
														
 
															+        for (uint32_t j = 0; j < n_dims; ++j) {
														
 
															+            write(info.t.ne[j]);
														
 
															+        }
														
 
															+        write(info.t.type);
														
 
															+        write(info.offset);
														
 
															+    }
														
 
															+
														
 
															+    void pad(const size_t alignment) const {
														
 
															+        while (buf.size() % alignment != 0) {
														
 
															+            const int8_t zero = 0;
														
 
															+            write(zero);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
														
 
															+        GGML_ASSERT(buf.size() - offset_data == info.offset);
														
 
															+
														
 
															+        GGML_ASSERT(ggml_is_contiguous(&info.t));
														
 
															+        const size_t offset = buf.size();
														
 
															+        const size_t nbytes = ggml_nbytes(&info.t);
														
 
															+
														
 
															+        buf.resize(offset + nbytes);
														
 
															+        if (info.t.buffer) {
														
 
															+            ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
														
 
															+        } else {
														
 
															+            GGML_ASSERT(info.t.data);
														
 
															+            memcpy(buf.data() + offset, info.t.data, nbytes);
														
 
															+        }
														
 
															+
														
 
															+        pad(alignment);
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
														
 
															+    const struct gguf_writer gw(buf);
														
 
															+
														
 
															+    const int64_t n_kv      = gguf_get_n_kv(ctx);
														
 
															+    const int64_t n_tensors = gguf_get_n_tensors(ctx);
														
 
															+
														
 
															+    // write header
														
 
															+    gw.write(GGUF_MAGIC[0]);
														
 
															+    gw.write(GGUF_MAGIC[1]);
														
 
															+    gw.write(GGUF_MAGIC[2]);
														
 
															+    gw.write(GGUF_MAGIC[3]);
														
 
															+    gw.write(ctx->version);
														
 
															+    gw.write(n_tensors);
														
 
															+    gw.write(n_kv);
														
 
															+
														
 
															+    // write key-value pairs
														
 
															+    for (int64_t i = 0; i < n_kv; ++i) {
														
 
															+        gw.write(ctx->kv[i]);
														
 
															+    }
														
 
															+
														
 
															+    // write tensor info
														
 
															+    for (int64_t i = 0; i < n_tensors; ++i) {
														
 
															+        gw.write_tensor_meta(ctx->info[i]);
														
 
															+    }
														
 
															+
														
 
															+    // we require the data section to be aligned
														
 
															+    gw.pad(ctx->alignment);
														
 
															+
														
 
															+    if (only_meta) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    const size_t offset_data = gw.buf.size();
														
 
															+
														
 
															+    // write tensor data
														
 
															+    for (int64_t i = 0; i < n_tensors; ++i) {
														
 
															+        gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
														
 
															+    FILE * file = ggml_fopen(fname, "wb");
														
 
															+
														
 
															+    if (!file) {
														
 
															+        fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
														
 
															+        return false;
														
 
															+    }
														
 
															+
														
 
															+    std::vector<int8_t> buf;
														
 
															+    gguf_write_to_buf(ctx, buf, only_meta);
														
 
															+    const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size();
														
 
															+    fclose(file);
														
 
															+    return ok;
														
 
															+}
														
 
															+
														
 
															+size_t gguf_get_meta_size(const struct gguf_context * ctx) {
														
 
															+    // only return size
														
 
															+    std::vector<int8_t> buf;
														
 
															+    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
														
 
															+    return buf.size();
														
 
															+}
														
 
															+
														
 
															+void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
														
 
															+    std::vector<int8_t> buf;
														
 
															+    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
														
 
															+    memcpy(data, buf.data(), buf.size());
														
 
															+}
														
--- a/src/llama-impl.cpp
+++ b/src/llama-impl.cpp
@@ -1,5 +1,6 @@
 
															 #include "llama-impl.h"
														
 
															+#include "gguf.h"
														
 
															 #include "llama.h"
														
 
															 #include <cinttypes>
														
@@ -138,7 +139,7 @@ std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
 
															             {
														
 
															                 const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
														
 
															                 int arr_n = gguf_get_arr_n(ctx_gguf, i);
														
 
															-                const void * data = gguf_get_arr_data(ctx_gguf, i);
														
 
															+                const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
														
 
															                 std::stringstream ss;
														
 
															                 ss << "[";
														
 
															                 for (int j = 0; j < arr_n; j++) {
														
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@@ -18,7 +18,7 @@ const char * llama_file_version_name(llama_fver version) {
 
															 }
														
 
															 namespace GGUFMeta {
														
 
															-    template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int)>
														
 
															+    template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int64_t)>
														
 
															     struct GKV_Base_Type {
														
 
															         static constexpr gguf_type gt = gt_;
														
@@ -60,10 +60,11 @@ namespace GGUFMeta {
 
															         public:
														
 
															         static constexpr gguf_type gt = GGUF_TYPE_ARRAY;
														
 
															         static ArrayInfo getter(const gguf_context *ctx, const int k) {
														
 
															+            const enum gguf_type arr_type = gguf_get_arr_type(ctx, k);
														
 
															             return ArrayInfo {
														
 
															-                gguf_get_arr_type(ctx, k),
														
 
															+                arr_type,
														
 
															                 size_t(gguf_get_arr_n(ctx, k)),
														
 
															-                gguf_get_arr_data(ctx, k),
														
 
															+                arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx, k),
														
 
															             };
														
 
															         }
														
 
															     };
														
@@ -553,7 +554,7 @@ llama_model_loader::llama_model_loader(const std::string & fname, bool use_mmap,
 
															             const enum gguf_type type   = gguf_get_kv_type(meta.get(), i);
														
 
															             const std::string type_name =
														
 
															                 type == GGUF_TYPE_ARRAY
														
 
															-                ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(meta.get(), i)), gguf_get_arr_n(meta.get(), i))
														
 
															+                ? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(meta.get(), i)), gguf_get_arr_n(meta.get(), i))
														
 
															                 : gguf_type_name(type);
														
 
															             std::string value          = gguf_kv_to_str(meta.get(), i);
														
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -875,7 +875,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
 
															         // update the gguf meta data as we go
														
 
															         gguf_set_tensor_type(ctx_outs[cur_split].get(), name.c_str(), new_type);
														
 
															-        gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data, new_size);
														
 
															+        GGML_ASSERT(gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), name.c_str())) == new_size);
														
 
															+        gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data);
														
 
															         // write tensor data + padding
														
 
															         fout.write((const char *) new_data, new_size);
														
--- a/tests/test-gguf.cpp
+++ b/tests/test-gguf.cpp
@@ -15,66 +15,71 @@ constexpr int offset_has_tensors = 2000;
 
															 constexpr int offset_has_data    = 3000;
														
 
															 enum handcrafted_file_type {
														
 
															-    HANDCRAFTED_HEADER_BAD_MAGIC          =  10,
														
 
															-    HANDCRAFTED_HEADER_BAD_VERSION_1      =  20,
														
 
															-    HANDCRAFTED_HEADER_BAD_VERSION_FUTURE =  30,
														
 
															-    HANDCRAFTED_HEADER_BAD_N_TENSORS      =  40,
														
 
															-    HANDCRAFTED_HEADER_BAD_N_KV           =  50,
														
 
															-    HANDCRAFTED_HEADER_EMPTY              = 800,
														
 
															-
														
 
															-    HANDCRAFTED_KV_BAD_KEY_SIZE           =  10 + offset_has_kv,
														
 
															-    HANDCRAFTED_KV_BAD_TYPE               =  20 + offset_has_kv,
														
 
															-    HANDCRAFTED_KV_BAD_VALUE_SIZE         =  30 + offset_has_kv,
														
 
															-    HANDCRAFTED_KV_DUPLICATE_KEY          =  40 + offset_has_kv,
														
 
															-    HANDCRAFTED_KV_SUCCESS                = 800 + offset_has_kv,
														
 
															-
														
 
															-    HANDCRAFTED_TENSORS_BAD_NAME_SIZE     =  10 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_BAD_N_DIMS        =  20 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_BAD_SHAPE         =  30 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_NE_TOO_BIG        =  40 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_BAD_TYPE          =  50 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_BAD_OFFSET        =  60 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_DUPLICATE_NAME    =  70 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_BAD_ALIGNMENT     =  80 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_SUCCESS           = 800 + offset_has_tensors,
														
 
															-    HANDCRAFTED_TENSORS_CUSTOM_ALIGN      = 810 + offset_has_tensors,
														
 
															-
														
 
															-    HANDCRAFTED_DATA_NOT_ENOUGH_DATA      =  10 + offset_has_data,
														
 
															-    HANDCRAFTED_DATA_BAD_ALIGNMENT        =  20 + offset_has_data,
														
 
															-    HANDCRAFTED_DATA_SUCCESS              = 800 + offset_has_data,
														
 
															-    HANDCRAFTED_DATA_CUSTOM_ALIGN         = 810 + offset_has_data,
														
 
															+    HANDCRAFTED_HEADER_BAD_MAGIC           =  10,
														
 
															+    HANDCRAFTED_HEADER_BAD_VERSION_1       =  20,
														
 
															+    HANDCRAFTED_HEADER_BAD_VERSION_FUTURE  =  30,
														
 
															+    HANDCRAFTED_HEADER_BAD_N_TENSORS       =  40,
														
 
															+    HANDCRAFTED_HEADER_BAD_N_KV            =  50,
														
 
															+    HANDCRAFTED_HEADER_EMPTY               = 800,
														
 
															+
														
 
															+    HANDCRAFTED_KV_BAD_KEY_SIZE            =  10 + offset_has_kv,
														
 
															+    HANDCRAFTED_KV_BAD_TYPE                =  20 + offset_has_kv,
														
 
															+    // HANDCRAFTED_KV_BAD_VALUE_SIZE          =  30 + offset_has_kv, // removed because it can result in allocations > 1 TB (default sanitizer limit)
														
 
															+    HANDCRAFTED_KV_DUPLICATE_KEY           =  40 + offset_has_kv,
														
 
															+    HANDCRAFTED_KV_BAD_ALIGN               =  50 + offset_has_kv,
														
 
															+    HANDCRAFTED_KV_SUCCESS                 = 800 + offset_has_kv,
														
 
															+
														
 
															+    HANDCRAFTED_TENSORS_BAD_NAME_SIZE      =  10 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_BAD_N_DIMS         =  20 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_BAD_SHAPE          =  30 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_NE_TOO_BIG         =  40 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_BAD_TYPE           =  50 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_BAD_OFFSET         =  60 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_DUPLICATE_NAME     =  70 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_BAD_ALIGN          =  75 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN =  80 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_SUCCESS            = 800 + offset_has_tensors,
														
 
															+    HANDCRAFTED_TENSORS_CUSTOM_ALIGN       = 810 + offset_has_tensors,
														
 
															+
														
 
															+    HANDCRAFTED_DATA_NOT_ENOUGH_DATA       =  10 + offset_has_data,
														
 
															+    HANDCRAFTED_DATA_BAD_ALIGN             =  15 + offset_has_data,
														
 
															+    HANDCRAFTED_DATA_INCONSISTENT_ALIGN    =  20 + offset_has_data,
														
 
															+    HANDCRAFTED_DATA_SUCCESS               = 800 + offset_has_data,
														
 
															+    HANDCRAFTED_DATA_CUSTOM_ALIGN          = 810 + offset_has_data,
														
 
															 };
														
 
															 std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) {
														
 
															     switch (hft) {
														
 
															-        case HANDCRAFTED_HEADER_BAD_MAGIC:          return "HEADER_BAD_MAGIC";
														
 
															-        case HANDCRAFTED_HEADER_BAD_VERSION_1:      return "HEADER_BAD_VERSION_1";
														
 
															-        case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE: return "HEADER_BAD_VERSION_FUTURE";
														
 
															-        case HANDCRAFTED_HEADER_BAD_N_KV:           return "HEADER_BAD_N_KV";
														
 
															-        case HANDCRAFTED_HEADER_BAD_N_TENSORS:      return "HEADER_BAD_N_TENSORS";
														
 
															-        case HANDCRAFTED_HEADER_EMPTY:              return "HEADER_EMPTY";
														
 
															-
														
 
															-        case HANDCRAFTED_KV_BAD_KEY_SIZE:           return "KV_BAD_KEY_SIZE";
														
 
															-        case HANDCRAFTED_KV_BAD_TYPE:               return "KV_BAD_TYPE";
														
 
															-        case HANDCRAFTED_KV_BAD_VALUE_SIZE:         return "KV_BAD_VALUE_SIZE";
														
 
															-        case HANDCRAFTED_KV_DUPLICATE_KEY:          return "KV_DUPLICATE_KEY";
														
 
															-        case HANDCRAFTED_KV_SUCCESS:                return "KV_RANDOM_KV";
														
 
															-
														
 
															-        case HANDCRAFTED_TENSORS_BAD_NAME_SIZE:     return "TENSORS_BAD_NAME_SIZE";
														
 
															-        case HANDCRAFTED_TENSORS_BAD_N_DIMS:        return "TENSORS_BAD_N_DIMS";
														
 
															-        case HANDCRAFTED_TENSORS_BAD_SHAPE:         return "TENSORS_BAD_SHAPE";
														
 
															-        case HANDCRAFTED_TENSORS_NE_TOO_BIG:        return "TENSORS_NE_TOO_BIG";
														
 
															-        case HANDCRAFTED_TENSORS_BAD_TYPE:          return "TENSORS_BAD_TYPE";
														
 
															-        case HANDCRAFTED_TENSORS_BAD_OFFSET:        return "TENSORS_BAD_OFFSET";
														
 
															-        case HANDCRAFTED_TENSORS_DUPLICATE_NAME:    return "TENSORS_DUPLICATE_NAME";
														
 
															-        case HANDCRAFTED_TENSORS_BAD_ALIGNMENT:     return "TENSORS_BAD_ALIGNMENT";
														
 
															-        case HANDCRAFTED_TENSORS_SUCCESS:           return "TENSORS_SUCCESS";
														
 
															-        case HANDCRAFTED_TENSORS_CUSTOM_ALIGN:      return "TENSORS_CUSTOM_ALIGN";
														
 
															-
														
 
															-        case HANDCRAFTED_DATA_NOT_ENOUGH_DATA:      return "DATA_NOT_ENOUGH_DATA";
														
 
															-        case HANDCRAFTED_DATA_BAD_ALIGNMENT:        return "DATA_BAD_ALIGNMENT";
														
 
															-        case HANDCRAFTED_DATA_SUCCESS:              return "DATA_SUCCESS";
														
 
															-        case HANDCRAFTED_DATA_CUSTOM_ALIGN:         return "DATA_CUSTOM_ALIGN";
														
 
															+        case HANDCRAFTED_HEADER_BAD_MAGIC:           return "HEADER_BAD_MAGIC";
														
 
															+        case HANDCRAFTED_HEADER_BAD_VERSION_1:       return "HEADER_BAD_VERSION_1";
														
 
															+        case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE:  return "HEADER_BAD_VERSION_FUTURE";
														
 
															+        case HANDCRAFTED_HEADER_BAD_N_KV:            return "HEADER_BAD_N_KV";
														
 
															+        case HANDCRAFTED_HEADER_BAD_N_TENSORS:       return "HEADER_BAD_N_TENSORS";
														
 
															+        case HANDCRAFTED_HEADER_EMPTY:               return "HEADER_EMPTY";
														
 
															+
														
 
															+        case HANDCRAFTED_KV_BAD_KEY_SIZE:            return "KV_BAD_KEY_SIZE";
														
 
															+        case HANDCRAFTED_KV_BAD_TYPE:                return "KV_BAD_TYPE";
														
 
															+        case HANDCRAFTED_KV_DUPLICATE_KEY:           return "KV_DUPLICATE_KEY";
														
 
															+        case HANDCRAFTED_KV_BAD_ALIGN:               return "KV_BAD_ALIGN";
														
 
															+        case HANDCRAFTED_KV_SUCCESS:                 return "KV_RANDOM_KV";
														
 
															+
														
 
															+        case HANDCRAFTED_TENSORS_BAD_NAME_SIZE:      return "TENSORS_BAD_NAME_SIZE";
														
 
															+        case HANDCRAFTED_TENSORS_BAD_N_DIMS:         return "TENSORS_BAD_N_DIMS";
														
 
															+        case HANDCRAFTED_TENSORS_BAD_SHAPE:          return "TENSORS_BAD_SHAPE";
														
 
															+        case HANDCRAFTED_TENSORS_NE_TOO_BIG:         return "TENSORS_NE_TOO_BIG";
														
 
															+        case HANDCRAFTED_TENSORS_BAD_TYPE:           return "TENSORS_BAD_TYPE";
														
 
															+        case HANDCRAFTED_TENSORS_BAD_OFFSET:         return "TENSORS_BAD_OFFSET";
														
 
															+        case HANDCRAFTED_TENSORS_DUPLICATE_NAME:     return "TENSORS_DUPLICATE_NAME";
														
 
															+        case HANDCRAFTED_TENSORS_BAD_ALIGN:          return "TENSORS_BAD_ALIGN";
														
 
															+        case HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN: return "TENSORS_INCONSISTENT_ALIGN";
														
 
															+        case HANDCRAFTED_TENSORS_SUCCESS:            return "TENSORS_SUCCESS";
														
 
															+        case HANDCRAFTED_TENSORS_CUSTOM_ALIGN:       return "TENSORS_CUSTOM_ALIGN";
														
 
															+
														
 
															+        case HANDCRAFTED_DATA_NOT_ENOUGH_DATA:       return "DATA_NOT_ENOUGH_DATA";
														
 
															+        case HANDCRAFTED_DATA_BAD_ALIGN:             return "DATA_BAD_ALIGN";
														
 
															+        case HANDCRAFTED_DATA_INCONSISTENT_ALIGN:    return "DATA_INCONSISTENT_ALIGN";
														
 
															+        case HANDCRAFTED_DATA_SUCCESS:               return "DATA_SUCCESS";
														
 
															+        case HANDCRAFTED_DATA_CUSTOM_ALIGN:          return "DATA_CUSTOM_ALIGN";
														
 
															     }
														
 
															     GGML_ABORT("fatal error");
														
 
															 }
														
@@ -140,31 +145,41 @@ std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::mt19937
 
															     return kv_types;
														
 
															 }
														
 
															-static void helper_write(const void * data, const size_t nbytes, FILE * file) {
														
 
															+template <typename T>
														
 
															+static void helper_write(FILE * file, const T & val) {
														
 
															+    GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val));
														
 
															+}
														
 
															+
														
 
															+static void helper_write(FILE * file, const void * data, const size_t nbytes) {
														
 
															     GGML_ASSERT(fwrite(data, 1, nbytes, file) == nbytes);
														
 
															 }
														
 
															 static FILE * get_handcrafted_file(const unsigned int seed, const enum handcrafted_file_type hft, const int extra_bytes = 0) {
														
 
															     FILE * file = tmpfile();
														
 
															+    if (!file) {
														
 
															+        return file;
														
 
															+    }
														
 
															+
														
 
															     std::mt19937 rng(seed);
														
 
															+    uint32_t alignment = GGUF_DEFAULT_ALIGNMENT;
														
 
															     if (hft == HANDCRAFTED_HEADER_BAD_MAGIC) {
														
 
															         const char bad_magic[4] = {'F', 'U', 'G', 'G'};
														
 
															-        helper_write(bad_magic, sizeof(bad_magic), file);
														
 
															+        helper_write(file, bad_magic, sizeof(bad_magic));
														
 
															     } else {
														
 
															-        helper_write(GGUF_MAGIC, 4, file);
														
 
															+        helper_write(file, GGUF_MAGIC, 4);
														
 
															     }
														
 
															     if (hft == HANDCRAFTED_HEADER_BAD_VERSION_1) {
														
 
															         const uint32_t version = 1;
														
 
															-        helper_write(&version, sizeof(version), file);
														
 
															+        helper_write(file, version);
														
 
															     } else if (hft == HANDCRAFTED_HEADER_BAD_VERSION_FUTURE) {
														
 
															         const uint32_t version = GGUF_VERSION + 1;
														
 
															-        helper_write(&version, sizeof(version), file);
														
 
															+        helper_write(file, version);
														
 
															     } else {
														
 
															         const uint32_t version = GGUF_VERSION;
														
 
															-        helper_write(&version, sizeof(version), file);
														
 
															+        helper_write(file, version);
														
 
															     }
														
 
															     std::vector<tensor_config_t> tensor_configs;
														
@@ -174,10 +189,10 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															     if (hft == HANDCRAFTED_HEADER_BAD_N_TENSORS) {
														
 
															         const uint64_t n_tensors = -1;
														
 
															-        helper_write(&n_tensors, sizeof(n_tensors), file);
														
 
															+        helper_write(file, n_tensors);
														
 
															     } else {
														
 
															         const uint64_t n_tensors = tensor_configs.size();
														
 
															-        helper_write(&n_tensors, sizeof(n_tensors), file);
														
 
															+        helper_write(file, n_tensors);
														
 
															     }
														
 
															     std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types;
														
@@ -186,41 +201,49 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															     }
														
 
															     {
														
 
															         uint64_t n_kv = kv_types.size();
														
 
															-        if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
														
 
															+        if (hft == HANDCRAFTED_KV_BAD_ALIGN      ||
														
 
															+            hft == HANDCRAFTED_TENSORS_BAD_ALIGN || hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN ||
														
 
															+            hft == HANDCRAFTED_DATA_BAD_ALIGN    || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
														
 
															+
														
 
															             n_kv += 1;
														
 
															         } else if (hft == HANDCRAFTED_HEADER_BAD_N_KV) {
														
 
															             n_kv = -1;
														
 
															         }
														
 
															-        helper_write(&n_kv, sizeof(n_kv), file);
														
 
															+        helper_write(file, n_kv);
														
 
															     }
														
 
															     if (hft < offset_has_kv) {
														
 
															+        while (ftell(file) % alignment != 0) {
														
 
															+            const char pad = 0;
														
 
															+            helper_write(file, pad);
														
 
															+        }
														
 
															+
														
 
															         for (int i = 0; i < extra_bytes; ++i) {
														
 
															             const char tmp = 0;
														
 
															-            helper_write(&tmp, sizeof(tmp), file);
														
 
															+            helper_write(file, tmp);
														
 
															         }
														
 
															         rewind(file);
														
 
															         return file;
														
 
															     }
														
 
															     for (int i = 0; i < int(kv_types.size()); ++i) {
														
 
															-        const enum gguf_type type     = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? -1 : kv_types[i].first);
														
 
															-        const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? -1 : kv_types[i].second);
														
 
															+        const enum gguf_type type     = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types[i].first);
														
 
															+        const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types[i].second);
														
 
															         const std::string key = "my_key_" + std::to_string((hft == HANDCRAFTED_KV_DUPLICATE_KEY ? i/2 : i));
														
 
															         if (hft == HANDCRAFTED_KV_BAD_KEY_SIZE) {
														
 
															             const uint64_t n = -1;
														
 
															-            helper_write(&n, sizeof(n), file);
														
 
															+            helper_write(file, n);
														
 
															         } else {
														
 
															             const uint64_t n = key.length();
														
 
															-            helper_write(&n, sizeof(n), file);
														
 
															+            helper_write(file, n);
														
 
															         }
														
 
															-        helper_write(key.data(), key.length(), file);
														
 
															+        helper_write(file, key.data(), key.length());
														
 
															         {
														
 
															             const int32_t type32 = int32_t(type);
														
 
															-            helper_write(&type32, sizeof(type32), file);
														
 
															+            helper_write(file, type32);
														
 
															         }
														
 
															         uint32_t data[16];
														
@@ -233,69 +256,67 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															         if (type == GGUF_TYPE_STRING) {
														
 
															             const uint64_t n = rng() % sizeof(data);
														
 
															-            helper_write(&n,   sizeof(n), file);
														
 
															-            helper_write(data,        n,  file);
														
 
															+            helper_write(file, n);
														
 
															+            helper_write(file, data, n);
														
 
															             continue;
														
 
															         }
														
 
															         if (type == GGUF_TYPE_ARRAY) {
														
 
															             {
														
 
															                 const int32_t type32 = int32_t(type_arr);
														
 
															-                helper_write(&type32, sizeof(type32), file);
														
 
															+                helper_write(file, type32);
														
 
															             }
														
 
															             if (type_arr == GGUF_TYPE_STRING) {
														
 
															                 const uint64_t nstr = rng() % (16 + 1);
														
 
															-                helper_write(&nstr, sizeof(nstr), file);
														
 
															+                helper_write(file, nstr);
														
 
															                 for (uint64_t istr = 0; istr < nstr; ++istr) {
														
 
															                     const uint64_t n = rng() % (sizeof(uint32_t) + 1);
														
 
															-                    helper_write(&n,          sizeof(n), file);
														
 
															-                    helper_write(&data[istr],        n,  file);
														
 
															+                    helper_write(file, n);
														
 
															+                    helper_write(file, &data[istr], n);
														
 
															                 }
														
 
															                 continue;
														
 
															             }
														
 
															             const size_t type_size = gguf_type_size(type_arr);
														
 
															             const uint64_t n = (rng() % sizeof(data)) / type_size;
														
 
															-            helper_write(&n,    sizeof(n),   file);
														
 
															-            helper_write(&data, n*type_size, file);
														
 
															+            helper_write(file, n);
														
 
															+            helper_write(file, &data, n*type_size);
														
 
															             continue;
														
 
															         }
														
 
															-        size_t type_size = hft == HANDCRAFTED_KV_BAD_TYPE ? 1 : gguf_type_size(type);
														
 
															-        if (hft == HANDCRAFTED_KV_BAD_VALUE_SIZE) {
														
 
															-            type_size += rng() % 3;
														
 
															-        }
														
 
															-        helper_write(data, type_size, file);
														
 
															+        helper_write(file, data, hft == HANDCRAFTED_KV_BAD_TYPE ? 1 : gguf_type_size(type));
														
 
															     }
														
 
															-    if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
														
 
															-        const std::string key = "general.alignment";
														
 
															-        {
														
 
															-            const uint64_t n = key.length();
														
 
															-            helper_write(&n, sizeof(n), file);
														
 
															-        }
														
 
															-        helper_write(key.data(), key.length(), file);
														
 
															+    if (hft == HANDCRAFTED_KV_BAD_ALIGN      ||
														
 
															+        hft == HANDCRAFTED_TENSORS_BAD_ALIGN || hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN ||
														
 
															+        hft == HANDCRAFTED_DATA_BAD_ALIGN    || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
														
 
															+
														
 
															+        const uint64_t n = strlen(GGUF_KEY_GENERAL_ALIGNMENT);
														
 
															+        helper_write(file, n);
														
 
															+        helper_write(file, GGUF_KEY_GENERAL_ALIGNMENT, n);
														
 
															         const int32_t type = gguf_type(GGUF_TYPE_UINT32);
														
 
															-        helper_write(&type, sizeof(type), file);
														
 
															+        helper_write(file, type);
														
 
															-        const uint32_t alignment = GGUF_DEFAULT_ALIGNMENT + 1;
														
 
															-        helper_write(&alignment, sizeof(alignment), file);
														
 
															+        alignment = expect_context_not_null(hft) ? 1 : 13;
														
 
															+        helper_write(file, alignment);
														
 
															     }
														
 
															     if (hft < offset_has_tensors) {
														
 
															+        while (ftell(file) % alignment != 0) {
														
 
															+            const char pad = 0;
														
 
															+            helper_write(file, pad);
														
 
															+        }
														
 
															+
														
 
															         for (int i = 0; i < extra_bytes; ++i) {
														
 
															             const char tmp = 0;
														
 
															-            helper_write(&tmp, sizeof(tmp), file);
														
 
															+            helper_write(file, tmp);
														
 
															         }
														
 
															         rewind(file);
														
 
															         return file;
														
 
															     }
														
 
															-    uint32_t alignment = GGUF_DEFAULT_ALIGNMENT;
														
 
															-    if (hft == HANDCRAFTED_TENSORS_BAD_ALIGNMENT || hft == HANDCRAFTED_DATA_BAD_ALIGNMENT) {
														
 
															-        alignment -= 1;
														
 
															-    } else if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
														
 
															-        alignment += 1;
														
 
															+    if (hft == HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN || hft == HANDCRAFTED_DATA_INCONSISTENT_ALIGN) {
														
 
															+        alignment = 1;
														
 
															     }
														
 
															     uint64_t offset = 0;
														
@@ -313,9 +334,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															         }
														
 
															         {
														
 
															             const uint64_t n = name.length();
														
 
															-            helper_write(&n, sizeof(n), file);
														
 
															+            helper_write(file, n);
														
 
															         }
														
 
															-        helper_write(name.data(), name.length(), file);
														
 
															+        helper_write(file, name.data(), name.length());
														
 
															         uint32_t n_dims = hft == HANDCRAFTED_TENSORS_NE_TOO_BIG ? 2 : 1;
														
 
															         for (int i = GGML_MAX_DIMS-1; i >= 1; --i) {
														
@@ -326,35 +347,35 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															         }
														
 
															         if (hft == HANDCRAFTED_TENSORS_BAD_N_DIMS) {
														
 
															             const uint32_t n_dims_bad = GGML_MAX_DIMS + 1;
														
 
															-            helper_write(&n_dims_bad, sizeof(n_dims_bad), file);
														
 
															+            helper_write(file, n_dims_bad);
														
 
															         } else {
														
 
															-            helper_write(&n_dims,     sizeof(n_dims),     file);
														
 
															+            helper_write(file, n_dims);
														
 
															         }
														
 
															         if (hft == HANDCRAFTED_TENSORS_BAD_SHAPE) {
														
 
															             for (uint32_t j = 0; j < n_dims; ++j) {
														
 
															                 const int64_t bad_dim = -1;
														
 
															-                helper_write(&bad_dim, sizeof(bad_dim), file);
														
 
															+                helper_write(file, bad_dim);
														
 
															             }
														
 
															         } else if (hft == HANDCRAFTED_TENSORS_NE_TOO_BIG){
														
 
															             for (uint32_t j = 0; j < n_dims; ++j) {
														
 
															                 const int64_t big_dim = 4*int64_t(INT32_MAX);
														
 
															-                helper_write(&big_dim, sizeof(big_dim), file);
														
 
															+                helper_write(file, big_dim);
														
 
															             }
														
 
															         } else {
														
 
															-            helper_write(shape.data(), n_dims*sizeof(int64_t), file);
														
 
															+            helper_write(file, shape.data(), n_dims*sizeof(int64_t));
														
 
															         }
														
 
															         {
														
 
															-            const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? -1 : int32_t(type);
														
 
															-            helper_write(&type32, sizeof(type32), file);
														
 
															+            const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? GGML_TYPE_COUNT : int32_t(type);
														
 
															+            helper_write(file, type32);
														
 
															         }
														
 
															         if (hft == HANDCRAFTED_TENSORS_BAD_OFFSET) {
														
 
															             const uint64_t bad_offset = -1;
														
 
															-            helper_write(&bad_offset, sizeof(bad_offset), file);
														
 
															+            helper_write(file, bad_offset);
														
 
															         } else {
														
 
															-            helper_write(&offset, sizeof(offset), file);
														
 
															+            helper_write(file, offset);
														
 
															         }
														
 
															         int64_t ne = shape[0];
														
@@ -364,12 +385,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															         offset += GGML_PAD(ggml_row_size(type, ne), alignment);
														
 
															     }
														
 
															-    const uint32_t alignment_overshoot = ftell(file) % alignment;
														
 
															-    if (alignment_overshoot != 0) {
														
 
															-        for (size_t i = alignment_overshoot; i < alignment; ++i) {
														
 
															-            const char pad = 0;
														
 
															-            helper_write(&pad, sizeof(pad), file);
														
 
															-        }
														
 
															+    while (ftell(file) % alignment != 0) {
														
 
															+        const char pad = 0;
														
 
															+        helper_write(file, pad);
														
 
															     }
														
 
															     if (hft >= offset_has_data) {
														
@@ -380,13 +398,13 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
 
															         }
														
 
															         for (uint64_t i = 0; i < nbytes; ++i) {
														
 
															             const uint8_t random_byte = i % 256;
														
 
															-            helper_write(&random_byte, sizeof(random_byte), file);
														
 
															+            helper_write(file, random_byte);
														
 
															         }
														
 
															     }
														
 
															     for (int i = 0; i < extra_bytes; ++i) {
														
 
															         const char tmp = 0;
														
 
															-        helper_write(&tmp, sizeof(tmp), file);
														
 
															+        helper_write(file, tmp);
														
 
															     }
														
 
															     rewind(file);
														
 
															     return file;
														
@@ -505,6 +523,16 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
 
															             }
														
 
															             const char * data_gguf = reinterpret_cast<const char *>(gguf_get_arr_data(gguf_ctx, id));
														
 
															+
														
 
															+            if (type_arr == GGUF_TYPE_BOOL) {
														
 
															+                for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) {
														
 
															+                    if (bool(data8[arr_i]) != bool(data_gguf[arr_i])) {
														
 
															+                        ok = false;
														
 
															+                    }
														
 
															+                }
														
 
															+                continue;
														
 
															+            }
														
 
															+
														
 
															             if (!std::equal(data8, data8 + arr_n*type_size, data_gguf)) {
														
 
															                 ok = false;
														
 
															             }
														
@@ -512,12 +540,20 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
 
															         }
														
 
															         const char * data_gguf = reinterpret_cast<const char *>(gguf_get_val_data(gguf_ctx, id));
														
 
															+
														
 
															+        if (type == GGUF_TYPE_BOOL) {
														
 
															+            if (bool(*data8) != bool(*data_gguf)) {
														
 
															+                ok = false;
														
 
															+            }
														
 
															+            continue;
														
 
															+        }
														
 
															+
														
 
															         if (!std::equal(data8, data8 + gguf_type_size(type), data_gguf)) {
														
 
															             ok = false;
														
 
															         }
														
 
															     }
														
 
															-    const uint32_t expected_alignment = alignment_defined ? GGUF_DEFAULT_ALIGNMENT + 1 : GGUF_DEFAULT_ALIGNMENT;
														
 
															+    const uint32_t expected_alignment = alignment_defined ? 1 : GGUF_DEFAULT_ALIGNMENT;
														
 
															     if (gguf_get_alignment(gguf_ctx) != expected_alignment) {
														
 
															         ok = false;
														
 
															     }
														
@@ -539,7 +575,7 @@ static bool handcrafted_check_tensors(const gguf_context * gguf_ctx, const unsig
 
															     bool ok = true;
														
 
															-    const int id_alignment = gguf_find_key(gguf_ctx, "general.alignment");
														
 
															+    const int id_alignment = gguf_find_key(gguf_ctx, GGUF_KEY_GENERAL_ALIGNMENT);
														
 
															     const uint32_t alignment = id_alignment >= 0 ? gguf_get_val_u32(gguf_ctx, id_alignment) : GGUF_DEFAULT_ALIGNMENT;
														
 
															     uint64_t expected_offset = 0;
														
@@ -607,7 +643,7 @@ static bool handcrafted_check_tensor_data(const gguf_context * gguf_ctx, const u
 
															         std::vector<uint8_t> data(size);
														
 
															         GGML_ASSERT(fseek(file, gguf_get_data_offset(gguf_ctx) + offset, SEEK_SET) == 0);
														
 
															-        GGML_ASSERT(fread(data.data(), 1, size, file) == size);
														
 
															+        GGML_ASSERT(fread(data.data(), 1, data.size(), file) == data.size());
														
 
															         for (size_t j = 0; j < size; ++j) {
														
 
															             const uint8_t expected_byte = (j + offset) % 256;
														
@@ -627,15 +663,15 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
															     const std::vector<handcrafted_file_type> hfts = {
														
 
															         HANDCRAFTED_HEADER_BAD_MAGIC,
														
 
															         HANDCRAFTED_HEADER_BAD_VERSION_1,
														
 
															-        // HANDCRAFTED_FILE_TYPE_BAD_VERSION_FUTURE, // FIXME
														
 
															+        HANDCRAFTED_HEADER_BAD_VERSION_FUTURE,
														
 
															         HANDCRAFTED_HEADER_BAD_N_KV,
														
 
															         HANDCRAFTED_HEADER_BAD_N_TENSORS,
														
 
															         HANDCRAFTED_HEADER_EMPTY,
														
 
															         HANDCRAFTED_KV_BAD_KEY_SIZE,
														
 
															         HANDCRAFTED_KV_BAD_TYPE,
														
 
															-        // HANDCRAFTED_KV_BAD_VALUE_SIZE, // FIXME sanitizer limit
														
 
															-        // HANDCRAFTED_FILE_TYPE_DUPLICATE_KEY, // FIXME
														
 
															+        HANDCRAFTED_KV_DUPLICATE_KEY,
														
 
															+        HANDCRAFTED_KV_BAD_ALIGN,
														
 
															         HANDCRAFTED_KV_SUCCESS,
														
 
															         HANDCRAFTED_TENSORS_BAD_NAME_SIZE,
														
@@ -643,14 +679,16 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
															         HANDCRAFTED_TENSORS_BAD_SHAPE,
														
 
															         HANDCRAFTED_TENSORS_NE_TOO_BIG,
														
 
															         HANDCRAFTED_TENSORS_BAD_TYPE,
														
 
															-        // HANDCRAFTED_TENSORS_BAD_OFFSET, // FIXME
														
 
															+        HANDCRAFTED_TENSORS_BAD_OFFSET,
														
 
															         HANDCRAFTED_TENSORS_DUPLICATE_NAME,
														
 
															-        // HANDCRAFTED_TENSORS_BAD_ALIGNMENT, // FIXME
														
 
															+        HANDCRAFTED_TENSORS_BAD_ALIGN,
														
 
															+        HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN,
														
 
															         HANDCRAFTED_TENSORS_SUCCESS,
														
 
															         HANDCRAFTED_TENSORS_CUSTOM_ALIGN,
														
 
															         HANDCRAFTED_DATA_NOT_ENOUGH_DATA,
														
 
															-        // HANDCRAFTED_DATA_BAD_ALIGNMENT, // FIXME
														
 
															+        HANDCRAFTED_DATA_BAD_ALIGN,
														
 
															+        HANDCRAFTED_DATA_INCONSISTENT_ALIGN,
														
 
															         HANDCRAFTED_DATA_SUCCESS,
														
 
															         HANDCRAFTED_DATA_CUSTOM_ALIGN,
														
 
															     };
														
@@ -674,6 +712,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
															             /*no_alloc =*/ false,
														
 
															             /*ctx      =*/ hft >= offset_has_data ? &ctx : nullptr,
														
 
															         };
														
 
															+
														
 
															         struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
														
 
															         if (expect_context_not_null(hft)) {
														
@@ -689,7 +728,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
															         }
														
 
															         ntest++;
														
 
															-        if (false && hft >= offset_has_data && !expect_context_not_null(hft)) { // FIXME
														
 
															+        if (hft >= offset_has_data && !expect_context_not_null(hft)) {
														
 
															             printf("%s:   - no_dangling_ggml_context_pointer: ", __func__);
														
 
															             if (ctx) {
														
 
															                 printf("\033[1;31mFAIL\033[0m\n");
														
@@ -700,23 +739,6 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
															             ntest++;
														
 
															         }
														
 
															-        if (false && expect_context_not_null(hft)) { // FIXME
														
 
															-            FILE * file_eb = get_handcrafted_file(seed, hft, /*extra_bytes =*/ 1);
														
 
															-            struct gguf_context * gguf_ctx_eb = gguf_init_from_file_impl(file_eb, gguf_params);
														
 
															-
														
 
															-            printf("%s:   - context_null_with_extra_bytes: ", __func__);
														
 
															-            if (gguf_ctx_eb) {
														
 
															-                printf("\033[1;31mFAIL\033[0m\n");
														
 
															-            } else {
														
 
															-                printf("\033[1;32mOK\033[0m\n");
														
 
															-                npass++;
														
 
															-            }
														
 
															-            ntest++;
														
 
															-
														
 
															-            gguf_free(gguf_ctx_eb);
														
 
															-            fclose(file_eb);
														
 
															-        }
														
 
															-
														
 
															         const bool alignment_defined = hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN;
														
 
															         if (expect_context_not_null(hft)) {
														
@@ -763,14 +785,15 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
															             ntest++;
														
 
															         }
														
 
															+        fclose(file);
														
 
															         if (gguf_ctx) {
														
 
															             ggml_free(ctx);
														
 
															             gguf_free(gguf_ctx);
														
 
															         }
														
 
															-        fclose(file);
														
 
															         printf("\n");
														
 
															     }
														
 
															+
														
 
															     return std::make_pair(npass, ntest);
														
 
															 }
														
@@ -789,10 +812,6 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t
 
															         const std::string key = "my_key_" + std::to_string(rng() % 1024);
														
 
															         const enum gguf_type type = gguf_type(rng() % GGUF_TYPE_COUNT);
														
 
															-        if (type == GGUF_TYPE_STRING || type == GGUF_TYPE_ARRAY) {
														
 
															-            continue; // FIXME memory leak
														
 
															-        }
														
 
															-
														
 
															         switch (type) {
														
 
															             case GGUF_TYPE_UINT8:   gguf_set_val_u8  (gguf_ctx, key.c_str(), rng() % (1 <<  7));             break;
														
 
															             case GGUF_TYPE_INT8:    gguf_set_val_i8  (gguf_ctx, key.c_str(), rng() % (1 <<  7) - (1 <<  6)); break;
														
@@ -826,6 +845,9 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t
 
															                         std::vector<uint32_t> random_data((nbytes + sizeof(uint32_t) - 1) / sizeof(uint32_t));
														
 
															                         for (size_t j = 0; j < random_data.size(); ++j) {
														
 
															                             random_data[j] = rng();
														
 
															+                            if (type_arr == GGUF_TYPE_BOOL) {
														
 
															+                                random_data[j] &= 0x01010101; // the sanitizer complains if booleans are not 0 or 1
														
 
															+                            }
														
 
															                         }
														
 
															                         gguf_set_arr_data(gguf_ctx, key.c_str(), type_arr, random_data.data(), ne);
														
 
															                     } break;
														
@@ -928,6 +950,17 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
 
															                 continue;
														
 
															             }
														
 
															+            if (type_arr == GGUF_TYPE_BOOL) {
														
 
															+                const int8_t * data       = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx,   id));
														
 
															+                const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(other, idx_other));
														
 
															+                for (int arr_i = 0; arr_i < arr_n; ++arr_i) {
														
 
															+                    if (bool(data[arr_i]) != bool(data_other[arr_i])) {
														
 
															+                        ok = false;
														
 
															+                    }
														
 
															+                }
														
 
															+                continue;
														
 
															+            }
														
 
															+
														
 
															             if (type_arr == GGUF_TYPE_STRING) {
														
 
															                 for (int arr_i = 0; arr_i < arr_n; ++arr_i) {
														
 
															                     const std::string str       = gguf_get_arr_str(ctx,   id,       arr_i);
														
@@ -939,8 +972,8 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
 
															                 continue;
														
 
															             }
														
 
															-            const char * data       = reinterpret_cast<const char *>(gguf_get_arr_data(ctx,   id));
														
 
															-            const char * data_other = reinterpret_cast<const char *>(gguf_get_arr_data(other, idx_other));
														
 
															+            const int8_t * data       = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx,   id));
														
 
															+            const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(other, idx_other));
														
 
															             if (!std::equal(data, data + arr_n*gguf_type_size(type_arr), data_other)) {
														
 
															                 ok = false;
														
 
															             }
														
@@ -1028,21 +1061,6 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml
 
															 }
														
 
															 static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) {
														
 
															-    FILE * file = tmpfile();
														
 
															-#ifdef _WIN32
														
 
															-    if (!file) {
														
 
															-        printf("%s: failed to create tmpfile(), needs elevated privileges on Windows");
														
 
															-        printf("%s: skipping tests");
														
 
															-        return std::make_pair(0, 0);
														
 
															-    }
														
 
															-#else
														
 
															-    GGML_ASSERT(file);
														
 
															-#endif // _WIN32
														
 
															-
														
 
															-    if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) {
														
 
															-        return std::make_pair(0, 0); // FIXME
														
 
															-    }
														
 
															-
														
 
															     ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr);
														
 
															     printf("%s: device=%s, backend=%s, only_meta=%s\n",
														
 
															         __func__, ggml_backend_dev_description(dev), ggml_backend_name(backend), only_meta ? "yes" : "no");
														
@@ -1060,10 +1078,24 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
 
															         bbuf       = result.buffer;
														
 
															     }
														
 
															-    struct gguf_buf gbuf = gguf_buf_init(16 * 1024);
														
 
															-    gguf_write_to_buf(gguf_ctx_0, &gbuf, only_meta);
														
 
															-    helper_write(gbuf.data, gbuf.offset, file);
														
 
															-    rewind(file);
														
 
															+    FILE * file = tmpfile();
														
 
															+
														
 
															+#ifdef _WIN32
														
 
															+    if (!file) {
														
 
															+        printf("%s: failed to create tmpfile(), needs elevated privileges on Windows");
														
 
															+        printf("%s: skipping tests");
														
 
															+        return std::make_pair(0, 0);
														
 
															+    }
														
 
															+#else
														
 
															+    GGML_ASSERT(file);
														
 
															+#endif // _WIN32
														
 
															+
														
 
															+    {
														
 
															+        std::vector<int8_t> buf;
														
 
															+        gguf_write_to_buf(gguf_ctx_0, buf, only_meta);
														
 
															+        GGML_ASSERT(fwrite(buf.data(), 1, buf.size(), file) == buf.size());
														
 
															+        rewind(file);
														
 
															+    }
														
 
															     struct ggml_context * ctx_1 = nullptr;
														
 
															     struct gguf_init_params gguf_params = {
														
@@ -1151,9 +1183,8 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
 
															     ggml_free(ctx_1);
														
 
															     gguf_free(gguf_ctx_0);
														
 
															     gguf_free(gguf_ctx_1);
														
 
															-    gguf_buf_free(gbuf);
														
 
															     ggml_backend_free(backend);
														
 
															-    GGML_ASSERT(fclose(file) == 0);
														
 
															+    fclose(file);
														
 
															     printf("\n");
														
 
															     return std::make_pair(npass, ntest);