Просмотр исходного кода

gguf-py : add support for endian conversion of BF16 data (#16594)

BF16 requires special handling in this script
while it's a 2-bytes data, but view is 1-byte by default.
Switch to correct view before attempting byteswapping.

With this change correctly byteswapping models like
Meta-Llama-3-8B-Instruct-bf16-GGUF
should be possible.
Aleksei Nikiforov 3 месяцев назад
Родитель
Сommit
7adc79c032
1 измененных файлов с 6 добавлено и 0 удалено
  1. 6 0
      gguf-py/gguf/scripts/gguf_convert_endian.py

+ 6 - 0
gguf-py/gguf/scripts/gguf_convert_endian.py

@@ -91,6 +91,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
            tensor.tensor_type not in (
            tensor.tensor_type not in (
                 gguf.GGMLQuantizationType.F32,
                 gguf.GGMLQuantizationType.F32,
                 gguf.GGMLQuantizationType.F16,
                 gguf.GGMLQuantizationType.F16,
+                gguf.GGMLQuantizationType.BF16,
            ):
            ):
             raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
             raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
     logger.info(f"* Preparing to convert from {file_endian} to {order}")
     logger.info(f"* Preparing to convert from {file_endian} to {order}")
@@ -148,6 +149,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
 
 
             # restore old shape in case it's ever used
             # restore old shape in case it's ever used
             tensor.data.resize(oldshape)
             tensor.data.resize(oldshape)
+        elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
+            # Special case for BF16
+            # It is 2-bytes data, but by default view loads it as 1-byte data.
+            # Change to correct view before byteswapping.
+            tensor.data.view(dtype=np.uint16).byteswap(inplace=True)
         else:
         else:
             # Handle other tensor types
             # Handle other tensor types
             tensor.data.byteswap(inplace=True)
             tensor.data.byteswap(inplace=True)