4 miesięcy temu · 64387f6e95
--- a/gguf-py/gguf/scripts/gguf_convert_endian.py
+++ b/gguf-py/gguf/scripts/gguf_convert_endian.py
@@ -19,6 +19,61 @@ import gguf
 
															 logger = logging.getLogger("gguf-convert-endian")
														
 
															+def byteswap_q4_0(tensor, block_offs):
														
 
															+    # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
														
 
															+
														
 
															+    # Byte-Swap f16 sized delta field
														
 
															+    delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
														
 
															+    delta.byteswap(inplace=True)
														
 
															+
														
 
															+
														
 
															+def byteswap_q8_0(tensor, block_offs):
														
 
															+    # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
														
 
															+
														
 
															+    # Byte-Swap f16 sized delta field
														
 
															+    delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
														
 
															+    delta.byteswap(inplace=True)
														
 
															+
														
 
															+
														
 
															+def byteswap_q4_k(tensor, block_offs):
														
 
															+    # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
														
 
															+
														
 
															+    # Byte-Swap f16 sized fields
														
 
															+    delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
														
 
															+    delta.byteswap(inplace=True)
														
 
															+
														
 
															+    delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
														
 
															+    delta.byteswap(inplace=True)
														
 
															+
														
 
															+
														
 
															+def byteswap_q6_k(tensor, block_offs):
														
 
															+    # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
														
 
															+
														
 
															+    # Byte-Swap f16 sized field
														
 
															+    delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
														
 
															+    delta.byteswap(inplace=True)
														
 
															+
														
 
															+
														
 
															+byteswap_tensors = {
														
 
															+    gguf.GGMLQuantizationType.Q4_0: {
														
 
															+        "block_size": 18, # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
														
 
															+        "byteswap_func": byteswap_q4_0,
														
 
															+    },
														
 
															+    gguf.GGMLQuantizationType.Q8_0: {
														
 
															+        "block_size": 34, # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
														
 
															+        "byteswap_func": byteswap_q8_0,
														
 
															+    },
														
 
															+    gguf.GGMLQuantizationType.Q4_K: {
														
 
															+        "block_size": 144, # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
														
 
															+        "byteswap_func": byteswap_q4_k,
														
 
															+    },
														
 
															+    gguf.GGMLQuantizationType.Q6_K: {
														
 
															+        "block_size": 210, # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
														
 
															+        "byteswap_func": byteswap_q6_k,
														
 
															+    },
														
 
															+}
														
 
															+
														
 
															+
														
 
															 def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
														
 
															     file_endian = reader.endianess.name
														
 
															     if reader.byte_order == 'S':
														
@@ -32,13 +87,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
 
															         sys.exit(0)
														
 
															     logger.info("* Checking tensors for conversion compatibility")
														
 
															     for tensor in reader.tensors:
														
 
															-        if tensor.tensor_type not in (
														
 
															-            gguf.GGMLQuantizationType.F32,
														
 
															-            gguf.GGMLQuantizationType.F16,
														
 
															-            gguf.GGMLQuantizationType.Q8_0,
														
 
															-            gguf.GGMLQuantizationType.Q4_K,
														
 
															-            gguf.GGMLQuantizationType.Q6_K,
														
 
															-        ):
														
 
															+        if tensor.tensor_type not in byteswap_tensors and \
														
 
															+           tensor.tensor_type not in (
														
 
															+                gguf.GGMLQuantizationType.F32,
														
 
															+                gguf.GGMLQuantizationType.F16,
														
 
															+           ):
														
 
															             raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
														
 
															     logger.info(f"* Preparing to convert from {file_endian} to {order}")
														
 
															     if args.dry_run:
														
@@ -72,78 +125,29 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
 
															             part.byteswap(inplace=True)
														
 
															         # Byte-swap tensor data if necessary
														
 
															-        if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
														
 
															-            # Handle Q8_0 tensor blocks (block_q8_0)
														
 
															-            # Specific handling of block_q8_0 is required.
														
 
															-            # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
														
 
															-
														
 
															-            block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
														
 
															-
														
 
															-            n_blocks = len(tensor.data) // block_size
														
 
															-            for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
														
 
															-                block_offs = block_num * block_size
														
 
															-
														
 
															-                # Byte-Swap f16 sized delta field
														
 
															-                delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
														
 
															-                delta.byteswap(inplace=True)
														
 
															-
														
 
															-                # Byte-Swap Q8 weights
														
 
															-                if block_num % 100000 == 0:
														
 
															-                    inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
														
 
															-
														
 
															-        elif tensor.tensor_type == gguf.GGMLQuantizationType.Q4_K:
														
 
															-            # Handle Q4_K tensor blocks (block_q4_k)
														
 
															-            # Specific handling of block_q4_k is required.
														
 
															-            # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
														
 
															-
														
 
															+        if tensor.tensor_type in byteswap_tensors:
														
 
															             # first flatten structure
														
 
															+            oldshape = tensor.data.shape
														
 
															             newshape = 1
														
 
															             for i in tensor.data.shape:
														
 
															                 newshape *= i
														
 
															             tensor.data.resize(newshape)
														
 
															-            block_size = 144
														
 
															-            n_blocks = len(tensor.data) // block_size
														
 
															-            for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
														
 
															-                block_offs = block_num * block_size
														
 
															-
														
 
															-                # Byte-Swap f16 sized fields
														
 
															-                delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
														
 
															-                delta.byteswap(inplace=True)
														
 
															-
														
 
															-                delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
														
 
															-                delta.byteswap(inplace=True)
														
 
															-
														
 
															-                # Byte-Swap
														
 
															-                if block_num % 100000 == 0:
														
 
															-                    inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
														
 
															-
														
 
															-        elif tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K:
														
 
															-            # Handle Q6_K tensor blocks (block_q6_k)
														
 
															-            # Specific handling of block_q6_k is required.
														
 
															-            # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
														
 
															-
														
 
															-            # first flatten structure
														
 
															-            newshape = 1
														
 
															-            for i in tensor.data.shape:
														
 
															-                newshape *= i
														
 
															-
														
 
															-            tensor.data.resize(newshape)
														
 
															+            block_size    = byteswap_tensors[tensor.tensor_type]["block_size"]
														
 
															+            byteswap_func = byteswap_tensors[tensor.tensor_type]["byteswap_func"]
														
 
															-            block_size = 210
														
 
															             n_blocks = len(tensor.data) // block_size
														
 
															             for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
														
 
															                 block_offs = block_num * block_size
														
 
															-                # Byte-Swap f16 sized field
														
 
															-                delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
														
 
															-                delta.byteswap(inplace=True)
														
 
															+                byteswap_func(tensor, block_offs)
														
 
															-                # Byte-Swap
														
 
															                 if block_num % 100000 == 0:
														
 
															                     inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
														
 
															+            # restore old shape in case it's ever used
														
 
															+            tensor.data.resize(oldshape)
														
 
															         else:
														
 
															             # Handle other tensor types
														
 
															             tensor.data.byteswap(inplace=True)