1 год назад · 7ce2c77f88
--- a/ggml.c
+++ b/ggml.c
@@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
 
				         .type_size                = sizeof(int32_t),
			
 
				         .is_quantized             = false,
			
 
				     },
			
 
				+    [GGML_TYPE_I64] = {
			
 
				+        .type_name                = "i64",
			
 
				+        .blck_size                = 1,
			
 
				+        .type_size                = sizeof(int64_t),
			
 
				+        .is_quantized             = false,
			
 
				+    },
			
 
				+    [GGML_TYPE_F64] = {
			
 
				+        .type_name                = "f64",
			
 
				+        .blck_size                = 1,
			
 
				+        .type_size                = sizeof(double),
			
 
				+        .is_quantized             = false,
			
 
				+        .nrows                    = 1,
			
 
				+    },
			
 
				     [GGML_TYPE_F32] = {
			
 
				         .type_name                = "f32",
			
 
				         .blck_size                = 1,
			
@@ -12418,6 +12431,8 @@ static void ggml_compute_forward_alibi(
 
				         case GGML_TYPE_I8:
			
 
				         case GGML_TYPE_I16:
			
 
				         case GGML_TYPE_I32:
			
 
				+        case GGML_TYPE_I64:
			
 
				+        case GGML_TYPE_F64:
			
 
				         case GGML_TYPE_COUNT:
			
 
				             {
			
 
				                 GGML_ASSERT(false);
			
@@ -12504,6 +12519,8 @@ static void ggml_compute_forward_clamp(
 
				         case GGML_TYPE_I8:
			
 
				         case GGML_TYPE_I16:
			
 
				         case GGML_TYPE_I32:
			
 
				+        case GGML_TYPE_I64:
			
 
				+        case GGML_TYPE_F64:
			
 
				         case GGML_TYPE_COUNT:
			
 
				             {
			
 
				                 GGML_ASSERT(false);
			
--- a/ggml.h
+++ b/ggml.h
@@ -366,6 +366,8 @@ extern "C" {
 
				         GGML_TYPE_I8      = 24,
			
 
				         GGML_TYPE_I16     = 25,
			
 
				         GGML_TYPE_I32     = 26,
			
 
				+        GGML_TYPE_I64     = 27,
			
 
				+        GGML_TYPE_F64     = 28,
			
 
				         GGML_TYPE_COUNT,
			
 
				     };
			
 
				 
			
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -665,6 +665,8 @@ class GGMLQuantizationType(IntEnum):
 
				     I8      = 24
			
 
				     I16     = 25
			
 
				     I32     = 26
			
 
				+    I64     = 27
			
 
				+    F64     = 28
			
 
				 
			
 
				 
			
 
				 class GGUFEndian(IntEnum):
			
@@ -734,6 +736,8 @@ GGML_QUANT_SIZES = {
 
				     GGMLQuantizationType.I8:      (1, 1),
			
 
				     GGMLQuantizationType.I16:     (1, 2),
			
 
				     GGMLQuantizationType.I32:     (1, 4),
			
 
				+    GGMLQuantizationType.I64:     (1, 8),
			
 
				+    GGMLQuantizationType.F64:     (1, 8),
			
 
				 }
			
 
				 
			
 
				 
			
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -242,12 +242,15 @@ class GGUFReader:
 
				             n_bytes = n_elems * type_size // block_size
			
 
				             data_offs = int(start_offs + offset_tensor[0])
			
 
				             item_type: npt.DTypeLike
			
 
				-            if ggml_type == GGMLQuantizationType.F32:
			
 
				+            if ggml_type == GGMLQuantizationType.F16:
			
 
				+                item_count = n_elems
			
 
				+                item_type = np.float16
			
 
				+            elif ggml_type == GGMLQuantizationType.F32:
			
 
				                 item_count = n_elems
			
 
				                 item_type = np.float32
			
 
				-            elif ggml_type == GGMLQuantizationType.F16:
			
 
				+            elif ggml_type == GGMLQuantizationType.F64:
			
 
				                 item_count = n_elems
			
 
				-                item_type = np.float16
			
 
				+                item_type = np.float64
			
 
				             elif ggml_type == GGMLQuantizationType.I8:
			
 
				                 item_count = n_elems
			
 
				                 item_type = np.int8
			
@@ -257,6 +260,9 @@ class GGUFReader:
 
				             elif ggml_type == GGMLQuantizationType.I32:
			
 
				                 item_count = n_elems
			
 
				                 item_type = np.int32
			
 
				+            elif ggml_type == GGMLQuantizationType.I64:
			
 
				+                item_count = n_elems
			
 
				+                item_type = np.int64
			
 
				             else:
			
 
				                 item_count = n_bytes
			
 
				                 item_type = np.uint8
			
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -204,18 +204,22 @@ class GGUFWriter:
 
				         for i in range(n_dims):
			
 
				             self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
			
 
				         if raw_dtype is None:
			
 
				-            if tensor_dtype == np.float32:
			
 
				-                dtype = GGMLQuantizationType.F32
			
 
				-            elif tensor_dtype == np.float16:
			
 
				+            if tensor_dtype == np.float16:
			
 
				                 dtype = GGMLQuantizationType.F16
			
 
				+            elif tensor_dtype == np.float32:
			
 
				+                dtype = GGMLQuantizationType.F32
			
 
				+            elif tensor_dtype == np.float64:
			
 
				+                dtype = GGMLQuantizationType.F64
			
 
				             elif tensor_dtype == np.int8:
			
 
				                 dtype = GGMLQuantizationType.I8
			
 
				             elif tensor_dtype == np.int16:
			
 
				                 dtype = GGMLQuantizationType.I16
			
 
				             elif tensor_dtype == np.int32:
			
 
				                 dtype = GGMLQuantizationType.I32
			
 
				+            elif tensor_dtype == np.int64:
			
 
				+                dtype = GGMLQuantizationType.I64
			
 
				             else:
			
 
				-                raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
			
 
				+                raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
			
 
				         else:
			
 
				             dtype = raw_dtype
			
 
				         self.ti_data += self._pack("I", dtype)