gguf_convert_endian.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import logging
  4. import argparse
  5. import os
  6. import sys
  7. from tqdm import tqdm
  8. from pathlib import Path
  9. import numpy as np
  10. # Necessary to load the local gguf package
  11. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
  12. sys.path.insert(0, str(Path(__file__).parent.parent))
  13. import gguf
  14. logger = logging.getLogger("gguf-convert-endian")
  15. def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
  16. if np.uint32(1) == np.uint32(1).newbyteorder("<"):
  17. # Host is little endian
  18. host_endian = "little"
  19. swapped_endian = "big"
  20. else:
  21. # Sorry PDP or other weird systems that don't use BE or LE.
  22. host_endian = "big"
  23. swapped_endian = "little"
  24. if reader.byte_order == "S":
  25. file_endian = swapped_endian
  26. else:
  27. file_endian = host_endian
  28. order = host_endian if args.order == "native" else args.order
  29. logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
  30. if file_endian == order:
  31. logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
  32. sys.exit(0)
  33. logger.info("* Checking tensors for conversion compatibility")
  34. for tensor in reader.tensors:
  35. if tensor.tensor_type not in (
  36. gguf.GGMLQuantizationType.F32,
  37. gguf.GGMLQuantizationType.F16,
  38. gguf.GGMLQuantizationType.Q8_0,
  39. ):
  40. raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
  41. logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
  42. if args.dry_run:
  43. return
  44. logger.warning("*** Warning *** Warning *** Warning **")
  45. logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
  46. if order != host_endian:
  47. logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
  48. logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
  49. logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
  50. response = input("YES, I am sure> ")
  51. if response != "YES":
  52. logger.warning("You didn't enter YES. Okay then, see ya!")
  53. sys.exit(0)
  54. logger.info(f"* Converting fields ({len(reader.fields)})")
  55. for idx, field in enumerate(reader.fields.values()):
  56. logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
  57. for part in field.parts:
  58. part.byteswap(inplace=True)
  59. logger.info(f"* Converting tensors ({len(reader.tensors)})")
  60. for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
  61. log_message = (
  62. f"Converting tensor {repr(tensor.name)}, "
  63. f"type={tensor.tensor_type.name}, "
  64. f"elements={tensor.n_elements} "
  65. )
  66. # Byte-swap each part of the tensor's field
  67. for part in tensor.field.parts:
  68. part.byteswap(inplace=True)
  69. # Byte-swap tensor data if necessary
  70. if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
  71. # Handle Q8_0 tensor blocks (block_q8_0)
  72. # Specific handling of block_q8_0 is required.
  73. # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
  74. block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
  75. n_blocks = len(tensor.data) // block_size
  76. for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
  77. block_offs = block_num * block_size
  78. # Byte-Swap f16 sized delta field
  79. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  80. delta.byteswap(inplace=True)
  81. # Byte-Swap Q8 weights
  82. if block_num % 100000 == 0:
  83. inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
  84. else:
  85. # Handle other tensor types
  86. tensor.data.byteswap(inplace=True)
  87. pbar.set_description(log_message)
  88. logger.info("* Completion")
  89. def main() -> None:
  90. parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
  91. parser.add_argument(
  92. "model", type=str,
  93. help="GGUF format model filename",
  94. )
  95. parser.add_argument(
  96. "order", type=str, choices=['big', 'little', 'native'],
  97. help="Requested byte order",
  98. )
  99. parser.add_argument(
  100. "--dry-run", action="store_true",
  101. help="Don't actually change anything",
  102. )
  103. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  104. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  105. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  106. logger.info(f'* Loading: {args.model}')
  107. reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
  108. convert_byteorder(reader, args)
  109. if __name__ == "__main__":
  110. main()