gguf_convert_endian.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import logging
  4. import argparse
  5. import os
  6. import sys
  7. from tqdm import tqdm
  8. from pathlib import Path
  9. import numpy as np
  10. # Necessary to load the local gguf package
  11. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
  12. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  13. import gguf
  14. logger = logging.getLogger("gguf-convert-endian")
  15. def byteswap_noop(tensor, block_offs):
  16. # this function is used when byteswapping is not needed
  17. pass
  18. def byteswap_q4_0(tensor, block_offs):
  19. # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
  20. # Byte-Swap f16 sized delta field
  21. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  22. delta.byteswap(inplace=True)
  23. def byteswap_q8_0(tensor, block_offs):
  24. # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
  25. # Byte-Swap f16 sized delta field
  26. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  27. delta.byteswap(inplace=True)
  28. def byteswap_q4_k(tensor, block_offs):
  29. # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
  30. # Byte-Swap f16 sized fields
  31. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  32. delta.byteswap(inplace=True)
  33. delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
  34. delta.byteswap(inplace=True)
  35. def byteswap_q6_k(tensor, block_offs):
  36. # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
  37. # Byte-Swap f16 sized field
  38. delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
  39. delta.byteswap(inplace=True)
  40. byteswap_tensors = {
  41. gguf.GGMLQuantizationType.Q4_0: byteswap_q4_0,
  42. gguf.GGMLQuantizationType.Q8_0: byteswap_q8_0,
  43. gguf.GGMLQuantizationType.Q4_K: byteswap_q4_k,
  44. gguf.GGMLQuantizationType.Q6_K: byteswap_q6_k,
  45. gguf.GGMLQuantizationType.MXFP4: byteswap_noop,
  46. }
  47. def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
  48. file_endian = reader.endianess.name
  49. if reader.byte_order == 'S':
  50. host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
  51. else:
  52. host_endian = file_endian
  53. order = host_endian if args.order == "native" else args.order.upper()
  54. logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
  55. if file_endian == order:
  56. logger.info(f"* File is already {order} endian. Nothing to do.")
  57. sys.exit(0)
  58. logger.info("* Checking tensors for conversion compatibility")
  59. for tensor in reader.tensors:
  60. if tensor.tensor_type not in byteswap_tensors and \
  61. tensor.tensor_type not in (
  62. gguf.GGMLQuantizationType.F32,
  63. gguf.GGMLQuantizationType.F16,
  64. gguf.GGMLQuantizationType.BF16,
  65. ):
  66. raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
  67. logger.info(f"* Preparing to convert from {file_endian} to {order}")
  68. if args.dry_run:
  69. return
  70. logger.warning("*** Warning *** Warning *** Warning **")
  71. logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
  72. if order != host_endian:
  73. logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
  74. logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
  75. logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
  76. response = input("YES, I am sure> ")
  77. if response != "YES":
  78. logger.warning("You didn't enter YES. Okay then, see ya!")
  79. sys.exit(0)
  80. logger.info(f"* Converting fields ({len(reader.fields)})")
  81. for idx, field in enumerate(reader.fields.values()):
  82. logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
  83. for part in field.parts:
  84. part.byteswap(inplace=True)
  85. logger.info(f"* Converting tensors ({len(reader.tensors)})")
  86. for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
  87. log_message = (
  88. f"Converting tensor {repr(tensor.name)}, "
  89. f"type={tensor.tensor_type.name}, "
  90. f"elements={tensor.n_elements} "
  91. )
  92. # Byte-swap each part of the tensor's field
  93. for part in tensor.field.parts:
  94. part.byteswap(inplace=True)
  95. # Byte-swap tensor data if necessary
  96. if tensor.tensor_type in byteswap_tensors:
  97. # first flatten structure
  98. oldshape = tensor.data.shape
  99. newshape = 1
  100. for i in tensor.data.shape:
  101. newshape *= i
  102. tensor.data.resize(newshape)
  103. block_size = gguf.constants.GGML_QUANT_SIZES[tensor.tensor_type][1]
  104. byteswap_func = byteswap_tensors[tensor.tensor_type]
  105. n_blocks = len(tensor.data) // block_size
  106. for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
  107. block_offs = block_num * block_size
  108. byteswap_func(tensor, block_offs)
  109. if block_num % 100000 == 0:
  110. inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
  111. # restore old shape in case it's ever used
  112. tensor.data.resize(oldshape)
  113. elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
  114. # Special case for BF16
  115. # It is 2-bytes data, but by default view loads it as 1-byte data.
  116. # Change to correct view before byteswapping.
  117. tensor.data.view(dtype=np.uint16).byteswap(inplace=True)
  118. else:
  119. # Handle other tensor types
  120. tensor.data.byteswap(inplace=True)
  121. pbar.set_description(log_message)
  122. logger.info("* Completion")
  123. def main() -> None:
  124. parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
  125. parser.add_argument(
  126. "model", type=str,
  127. help="GGUF format model filename",
  128. )
  129. parser.add_argument(
  130. "order", type=str, choices=['big', 'little', 'native'],
  131. help="Requested byte order",
  132. )
  133. parser.add_argument(
  134. "--dry-run", action="store_true",
  135. help="Don't actually change anything",
  136. )
  137. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  138. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  139. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  140. logger.info(f'* Loading: {args.model}')
  141. reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
  142. convert_byteorder(reader, args)
  143. if __name__ == "__main__":
  144. main()