gguf_convert_endian.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import logging
  4. import argparse
  5. import os
  6. import sys
  7. from tqdm import tqdm
  8. from pathlib import Path
  9. import numpy as np
  10. # Necessary to load the local gguf package
  11. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
  12. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  13. import gguf
  14. logger = logging.getLogger("gguf-convert-endian")
  15. def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
  16. file_endian = reader.endianess.name
  17. if reader.byte_order == 'S':
  18. host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
  19. else:
  20. host_endian = file_endian
  21. order = host_endian if args.order == "native" else args.order.upper()
  22. logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
  23. if file_endian == order:
  24. logger.info(f"* File is already {order} endian. Nothing to do.")
  25. sys.exit(0)
  26. logger.info("* Checking tensors for conversion compatibility")
  27. for tensor in reader.tensors:
  28. if tensor.tensor_type not in (
  29. gguf.GGMLQuantizationType.F32,
  30. gguf.GGMLQuantizationType.F16,
  31. gguf.GGMLQuantizationType.Q8_0,
  32. gguf.GGMLQuantizationType.Q4_K,
  33. gguf.GGMLQuantizationType.Q6_K,
  34. ):
  35. raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
  36. logger.info(f"* Preparing to convert from {file_endian} to {order}")
  37. if args.dry_run:
  38. return
  39. logger.warning("*** Warning *** Warning *** Warning **")
  40. logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
  41. if order != host_endian:
  42. logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
  43. logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
  44. logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
  45. response = input("YES, I am sure> ")
  46. if response != "YES":
  47. logger.warning("You didn't enter YES. Okay then, see ya!")
  48. sys.exit(0)
  49. logger.info(f"* Converting fields ({len(reader.fields)})")
  50. for idx, field in enumerate(reader.fields.values()):
  51. logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
  52. for part in field.parts:
  53. part.byteswap(inplace=True)
  54. logger.info(f"* Converting tensors ({len(reader.tensors)})")
  55. for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
  56. log_message = (
  57. f"Converting tensor {repr(tensor.name)}, "
  58. f"type={tensor.tensor_type.name}, "
  59. f"elements={tensor.n_elements} "
  60. )
  61. # Byte-swap each part of the tensor's field
  62. for part in tensor.field.parts:
  63. part.byteswap(inplace=True)
  64. # Byte-swap tensor data if necessary
  65. if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
  66. # Handle Q8_0 tensor blocks (block_q8_0)
  67. # Specific handling of block_q8_0 is required.
  68. # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
  69. block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
  70. n_blocks = len(tensor.data) // block_size
  71. for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
  72. block_offs = block_num * block_size
  73. # Byte-Swap f16 sized delta field
  74. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  75. delta.byteswap(inplace=True)
  76. # Byte-Swap Q8 weights
  77. if block_num % 100000 == 0:
  78. inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
  79. elif tensor.tensor_type == gguf.GGMLQuantizationType.Q4_K:
  80. # Handle Q4_K tensor blocks (block_q4_k)
  81. # Specific handling of block_q4_k is required.
  82. # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
  83. # first flatten structure
  84. newshape = 1
  85. for i in tensor.data.shape:
  86. newshape *= i
  87. tensor.data.resize(newshape)
  88. block_size = 144
  89. n_blocks = len(tensor.data) // block_size
  90. for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
  91. block_offs = block_num * block_size
  92. # Byte-Swap f16 sized fields
  93. delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
  94. delta.byteswap(inplace=True)
  95. delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
  96. delta.byteswap(inplace=True)
  97. # Byte-Swap
  98. if block_num % 100000 == 0:
  99. inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
  100. elif tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K:
  101. # Handle Q6_K tensor blocks (block_q6_k)
  102. # Specific handling of block_q6_k is required.
  103. # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
  104. # first flatten structure
  105. newshape = 1
  106. for i in tensor.data.shape:
  107. newshape *= i
  108. tensor.data.resize(newshape)
  109. block_size = 210
  110. n_blocks = len(tensor.data) // block_size
  111. for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
  112. block_offs = block_num * block_size
  113. # Byte-Swap f16 sized field
  114. delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
  115. delta.byteswap(inplace=True)
  116. # Byte-Swap
  117. if block_num % 100000 == 0:
  118. inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
  119. else:
  120. # Handle other tensor types
  121. tensor.data.byteswap(inplace=True)
  122. pbar.set_description(log_message)
  123. logger.info("* Completion")
  124. def main() -> None:
  125. parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
  126. parser.add_argument(
  127. "model", type=str,
  128. help="GGUF format model filename",
  129. )
  130. parser.add_argument(
  131. "order", type=str, choices=['big', 'little', 'native'],
  132. help="Requested byte order",
  133. )
  134. parser.add_argument(
  135. "--dry-run", action="store_true",
  136. help="Don't actually change anything",
  137. )
  138. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  139. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  140. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  141. logger.info(f'* Loading: {args.model}')
  142. reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
  143. convert_byteorder(reader, args)
  144. if __name__ == "__main__":
  145. main()