gguf_convert_endian.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import logging
  4. import argparse
  5. import os
  6. import sys
  7. from tqdm import tqdm
  8. from pathlib import Path
  9. import numpy as np
  10. # Necessary to load the local gguf package
  11. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
  12. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  13. import gguf
  14. logger = logging.getLogger("gguf-convert-endian")
  15. def byteswap_q4_0(tensor, block_offs):
  16. # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
  17. # Byte-Swap f16 sized delta field
  18. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  19. delta.byteswap(inplace=True)
  20. def byteswap_q8_0(tensor, block_offs):
  21. # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
  22. # Byte-Swap f16 sized delta field
  23. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  24. delta.byteswap(inplace=True)
  25. def byteswap_q4_k(tensor, block_offs):
  26. # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
  27. # Byte-Swap f16 sized fields
  28. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  29. delta.byteswap(inplace=True)
  30. delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
  31. delta.byteswap(inplace=True)
  32. def byteswap_q6_k(tensor, block_offs):
  33. # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
  34. # Byte-Swap f16 sized field
  35. delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
  36. delta.byteswap(inplace=True)
  37. byteswap_tensors = {
  38. gguf.GGMLQuantizationType.Q4_0: {
  39. "block_size": 18, # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
  40. "byteswap_func": byteswap_q4_0,
  41. },
  42. gguf.GGMLQuantizationType.Q8_0: {
  43. "block_size": 34, # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
  44. "byteswap_func": byteswap_q8_0,
  45. },
  46. gguf.GGMLQuantizationType.Q4_K: {
  47. "block_size": 144, # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
  48. "byteswap_func": byteswap_q4_k,
  49. },
  50. gguf.GGMLQuantizationType.Q6_K: {
  51. "block_size": 210, # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
  52. "byteswap_func": byteswap_q6_k,
  53. },
  54. }
  55. def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
  56. file_endian = reader.endianess.name
  57. if reader.byte_order == 'S':
  58. host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
  59. else:
  60. host_endian = file_endian
  61. order = host_endian if args.order == "native" else args.order.upper()
  62. logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
  63. if file_endian == order:
  64. logger.info(f"* File is already {order} endian. Nothing to do.")
  65. sys.exit(0)
  66. logger.info("* Checking tensors for conversion compatibility")
  67. for tensor in reader.tensors:
  68. if tensor.tensor_type not in byteswap_tensors and \
  69. tensor.tensor_type not in (
  70. gguf.GGMLQuantizationType.F32,
  71. gguf.GGMLQuantizationType.F16,
  72. gguf.GGMLQuantizationType.BF16,
  73. ):
  74. raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
  75. logger.info(f"* Preparing to convert from {file_endian} to {order}")
  76. if args.dry_run:
  77. return
  78. logger.warning("*** Warning *** Warning *** Warning **")
  79. logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
  80. if order != host_endian:
  81. logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
  82. logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
  83. logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
  84. response = input("YES, I am sure> ")
  85. if response != "YES":
  86. logger.warning("You didn't enter YES. Okay then, see ya!")
  87. sys.exit(0)
  88. logger.info(f"* Converting fields ({len(reader.fields)})")
  89. for idx, field in enumerate(reader.fields.values()):
  90. logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
  91. for part in field.parts:
  92. part.byteswap(inplace=True)
  93. logger.info(f"* Converting tensors ({len(reader.tensors)})")
  94. for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
  95. log_message = (
  96. f"Converting tensor {repr(tensor.name)}, "
  97. f"type={tensor.tensor_type.name}, "
  98. f"elements={tensor.n_elements} "
  99. )
  100. # Byte-swap each part of the tensor's field
  101. for part in tensor.field.parts:
  102. part.byteswap(inplace=True)
  103. # Byte-swap tensor data if necessary
  104. if tensor.tensor_type in byteswap_tensors:
  105. # first flatten structure
  106. oldshape = tensor.data.shape
  107. newshape = 1
  108. for i in tensor.data.shape:
  109. newshape *= i
  110. tensor.data.resize(newshape)
  111. block_size = byteswap_tensors[tensor.tensor_type]["block_size"]
  112. byteswap_func = byteswap_tensors[tensor.tensor_type]["byteswap_func"]
  113. n_blocks = len(tensor.data) // block_size
  114. for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
  115. block_offs = block_num * block_size
  116. byteswap_func(tensor, block_offs)
  117. if block_num % 100000 == 0:
  118. inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
  119. # restore old shape in case it's ever used
  120. tensor.data.resize(oldshape)
  121. elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
  122. # Special case for BF16
  123. # It is 2-bytes data, but by default view loads it as 1-byte data.
  124. # Change to correct view before byteswapping.
  125. tensor.data.view(dtype=np.uint16).byteswap(inplace=True)
  126. else:
  127. # Handle other tensor types
  128. tensor.data.byteswap(inplace=True)
  129. pbar.set_description(log_message)
  130. logger.info("* Completion")
  131. def main() -> None:
  132. parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
  133. parser.add_argument(
  134. "model", type=str,
  135. help="GGUF format model filename",
  136. )
  137. parser.add_argument(
  138. "order", type=str, choices=['big', 'little', 'native'],
  139. help="Requested byte order",
  140. )
  141. parser.add_argument(
  142. "--dry-run", action="store_true",
  143. help="Don't actually change anything",
  144. )
  145. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  146. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  147. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  148. logger.info(f'* Loading: {args.model}')
  149. reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
  150. convert_byteorder(reader, args)
  151. if __name__ == "__main__":
  152. main()