hace 1 año · 91c736015b
--- a/ggml.c
+++ b/ggml.c
@@ -20550,6 +20550,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
 
															     return ok;
														
 
															 }
														
 
															+static void gguf_free_kv(struct gguf_kv * kv) {
														
 
															+    if (kv->key.data) {
														
 
															+        GGML_FREE(kv->key.data);
														
 
															+    }
														
 
															+
														
 
															+    if (kv->type == GGUF_TYPE_STRING) {
														
 
															+        if (kv->value.str.data) {
														
 
															+            GGML_FREE(kv->value.str.data);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    if (kv->type == GGUF_TYPE_ARRAY) {
														
 
															+        if (kv->value.arr.data) {
														
 
															+            if (kv->value.arr.type == GGUF_TYPE_STRING) {
														
 
															+                for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
														
 
															+                    struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
														
 
															+                    if (str->data) {
														
 
															+                        GGML_FREE(str->data);
														
 
															+                    }
														
 
															+                }
														
 
															+            }
														
 
															+            GGML_FREE(kv->value.arr.data);
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															 struct gguf_context * gguf_init_empty(void) {
														
 
															     struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
														
@@ -20899,31 +20925,7 @@ void gguf_free(struct gguf_context * ctx) {
 
															     if (ctx->kv) {
														
 
															         // free string memory - not great..
														
 
															         for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
														
 
															-            struct gguf_kv * kv = &ctx->kv[i];
														
 
															-
														
 
															-            if (kv->key.data) {
														
 
															-                GGML_FREE(kv->key.data);
														
 
															-            }
														
 
															-
														
 
															-            if (kv->type == GGUF_TYPE_STRING) {
														
 
															-                if (kv->value.str.data) {
														
 
															-                    GGML_FREE(kv->value.str.data);
														
 
															-                }
														
 
															-            }
														
 
															-
														
 
															-            if (kv->type == GGUF_TYPE_ARRAY) {
														
 
															-                if (kv->value.arr.data) {
														
 
															-                    if (kv->value.arr.type == GGUF_TYPE_STRING) {
														
 
															-                        for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
														
 
															-                            struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
														
 
															-                            if (str->data) {
														
 
															-                                GGML_FREE(str->data);
														
 
															-                            }
														
 
															-                        }
														
 
															-                    }
														
 
															-                    GGML_FREE(kv->value.arr.data);
														
 
															-                }
														
 
															-            }
														
 
															+            gguf_free_kv(&ctx->kv[i]);
														
 
															         }
														
 
															         GGML_FREE(ctx->kv);
														
@@ -21148,6 +21150,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
 
															     return n_kv;
														
 
															 }
														
 
															+void gguf_remove_key(struct gguf_context * ctx, const char * key) {
														
 
															+    const int idx = gguf_find_key(ctx, key);
														
 
															+    if (idx >= 0) {
														
 
															+        const int n_kv = gguf_get_n_kv(ctx);
														
 
															+        gguf_free_kv(&ctx->kv[idx]);
														
 
															+        for (int i = idx; i < n_kv-1; ++i) {
														
 
															+            ctx->kv[i] = ctx->kv[i+1];
														
 
															+        }
														
 
															+        ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
														
 
															+        ctx->header.n_kv--;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															 void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
														
 
															     const int idx = gguf_get_or_add_key(ctx, key);
														
--- a/ggml.h
+++ b/ggml.h
@@ -2289,6 +2289,9 @@ extern "C" {
 
															     GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
														
 
															     GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);
														
 
															+    // removes key if it exists
														
 
															+    GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
														
 
															+
														
 
															     // overrides existing values or adds a new one
														
 
															     GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val);
														
 
															     GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t   val);
														
--- a/llama.cpp
+++ b/llama.cpp
@@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
															     gguf_set_kv     (ctx_out, ml.meta);
														
 
															     gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
														
 
															     gguf_set_val_u32(ctx_out, "general.file_type", ftype);
														
 
															+    // Remove split metadata
														
 
															+    gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str());
														
 
															+    gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str());
														
 
															+    gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str());
														
 
															     if (params->kv_overrides) {
														
 
															         const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;