1 سال پیش · 7ce2c77f88
--- a/ggml.c
+++ b/ggml.c
@@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
 
															         .type_size                = sizeof(int32_t),
														
 
															         .is_quantized             = false,
														
 
															     },
														
 
															+    [GGML_TYPE_I64] = {
														
 
															+        .type_name                = "i64",
														
 
															+        .blck_size                = 1,
														
 
															+        .type_size                = sizeof(int64_t),
														
 
															+        .is_quantized             = false,
														
 
															+    },
														
 
															+    [GGML_TYPE_F64] = {
														
 
															+        .type_name                = "f64",
														
 
															+        .blck_size                = 1,
														
 
															+        .type_size                = sizeof(double),
														
 
															+        .is_quantized             = false,
														
 
															+        .nrows                    = 1,
														
 
															+    },
														
 
															     [GGML_TYPE_F32] = {
														
 
															         .type_name                = "f32",
														
 
															         .blck_size                = 1,
														
@@ -12418,6 +12431,8 @@ static void ggml_compute_forward_alibi(
 
															         case GGML_TYPE_I8:
														
 
															         case GGML_TYPE_I16:
														
 
															         case GGML_TYPE_I32:
														
 
															+        case GGML_TYPE_I64:
														
 
															+        case GGML_TYPE_F64:
														
 
															         case GGML_TYPE_COUNT:
														
 
															             {
														
 
															                 GGML_ASSERT(false);
														
@@ -12504,6 +12519,8 @@ static void ggml_compute_forward_clamp(
 
															         case GGML_TYPE_I8:
														
 
															         case GGML_TYPE_I16:
														
 
															         case GGML_TYPE_I32:
														
 
															+        case GGML_TYPE_I64:
														
 
															+        case GGML_TYPE_F64:
														
 
															         case GGML_TYPE_COUNT:
														
 
															             {
														
 
															                 GGML_ASSERT(false);
														
--- a/ggml.h
+++ b/ggml.h
@@ -366,6 +366,8 @@ extern "C" {
 
															         GGML_TYPE_I8      = 24,
														
 
															         GGML_TYPE_I16     = 25,
														
 
															         GGML_TYPE_I32     = 26,
														
 
															+        GGML_TYPE_I64     = 27,
														
 
															+        GGML_TYPE_F64     = 28,
														
 
															         GGML_TYPE_COUNT,
														
 
															     };
														
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -665,6 +665,8 @@ class GGMLQuantizationType(IntEnum):
 
															     I8      = 24
														
 
															     I16     = 25
														
 
															     I32     = 26
														
 
															+    I64     = 27
														
 
															+    F64     = 28
														
 
															 class GGUFEndian(IntEnum):
														
@@ -734,6 +736,8 @@ GGML_QUANT_SIZES = {
 
															     GGMLQuantizationType.I8:      (1, 1),
														
 
															     GGMLQuantizationType.I16:     (1, 2),
														
 
															     GGMLQuantizationType.I32:     (1, 4),
														
 
															+    GGMLQuantizationType.I64:     (1, 8),
														
 
															+    GGMLQuantizationType.F64:     (1, 8),
														
 
															 }
														
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -242,12 +242,15 @@ class GGUFReader:
 
															             n_bytes = n_elems * type_size // block_size
														
 
															             data_offs = int(start_offs + offset_tensor[0])
														
 
															             item_type: npt.DTypeLike
														
 
															-            if ggml_type == GGMLQuantizationType.F32:
														
 
															+            if ggml_type == GGMLQuantizationType.F16:
														
 
															+                item_count = n_elems
														
 
															+                item_type = np.float16
														
 
															+            elif ggml_type == GGMLQuantizationType.F32:
														
 
															                 item_count = n_elems
														
 
															                 item_type = np.float32
														
 
															-            elif ggml_type == GGMLQuantizationType.F16:
														
 
															+            elif ggml_type == GGMLQuantizationType.F64:
														
 
															                 item_count = n_elems
														
 
															-                item_type = np.float16
														
 
															+                item_type = np.float64
														
 
															             elif ggml_type == GGMLQuantizationType.I8:
														
 
															                 item_count = n_elems
														
 
															                 item_type = np.int8
														
@@ -257,6 +260,9 @@ class GGUFReader:
 
															             elif ggml_type == GGMLQuantizationType.I32:
														
 
															                 item_count = n_elems
														
 
															                 item_type = np.int32
														
 
															+            elif ggml_type == GGMLQuantizationType.I64:
														
 
															+                item_count = n_elems
														
 
															+                item_type = np.int64
														
 
															             else:
														
 
															                 item_count = n_bytes
														
 
															                 item_type = np.uint8
														
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -204,18 +204,22 @@ class GGUFWriter:
 
															         for i in range(n_dims):
														
 
															             self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
														
 
															         if raw_dtype is None:
														
 
															-            if tensor_dtype == np.float32:
														
 
															-                dtype = GGMLQuantizationType.F32
														
 
															-            elif tensor_dtype == np.float16:
														
 
															+            if tensor_dtype == np.float16:
														
 
															                 dtype = GGMLQuantizationType.F16
														
 
															+            elif tensor_dtype == np.float32:
														
 
															+                dtype = GGMLQuantizationType.F32
														
 
															+            elif tensor_dtype == np.float64:
														
 
															+                dtype = GGMLQuantizationType.F64
														
 
															             elif tensor_dtype == np.int8:
														
 
															                 dtype = GGMLQuantizationType.I8
														
 
															             elif tensor_dtype == np.int16:
														
 
															                 dtype = GGMLQuantizationType.I16
														
 
															             elif tensor_dtype == np.int32:
														
 
															                 dtype = GGMLQuantizationType.I32
														
 
															+            elif tensor_dtype == np.int64:
														
 
															+                dtype = GGMLQuantizationType.I64
														
 
															             else:
														
 
															-                raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
														
 
															+                raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
														
 
															         else:
														
 
															             dtype = raw_dtype
														
 
															         self.ti_data += self._pack("I", dtype)