gguf_dump.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import logging
  4. import argparse
  5. import os
  6. import sys
  7. from pathlib import Path
  8. from typing import Any
  9. import numpy as np
  10. # Necessary to load the local gguf package
  11. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
  12. sys.path.insert(0, str(Path(__file__).parent.parent))
  13. from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402
  14. logger = logging.getLogger("gguf-dump")
  15. def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
  16. host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
  17. if reader.byte_order == 'S':
  18. file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
  19. else:
  20. file_endian = host_endian
  21. return (host_endian, file_endian)
  22. # For more information about what field.parts and field.data represent,
  23. # please see the comments in the modify_gguf.py example.
  24. def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
  25. host_endian, file_endian = get_file_host_endian(reader)
  26. print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100
  27. print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100
  28. for n, field in enumerate(reader.fields.values(), 1):
  29. if not field.types:
  30. pretty_type = 'N/A'
  31. elif field.types[0] == GGUFValueType.ARRAY:
  32. nest_count = len(field.types) - 1
  33. pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
  34. else:
  35. pretty_type = str(field.types[-1].name)
  36. log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
  37. if len(field.types) == 1:
  38. curr_type = field.types[0]
  39. if curr_type == GGUFValueType.STRING:
  40. log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
  41. elif field.types[0] in reader.gguf_scalar_to_np:
  42. log_message += ' = {0}'.format(field.parts[-1][0])
  43. print(log_message) # noqa: NP100
  44. if args.no_tensors:
  45. return
  46. print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100
  47. for n, tensor in enumerate(reader.tensors, 1):
  48. prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
  49. print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100
  50. def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
  51. import json
  52. host_endian, file_endian = get_file_host_endian(reader)
  53. metadata: dict[str, Any] = {}
  54. tensors: dict[str, Any] = {}
  55. result = {
  56. "filename": args.model,
  57. "endian": file_endian,
  58. "metadata": metadata,
  59. "tensors": tensors,
  60. }
  61. for idx, field in enumerate(reader.fields.values()):
  62. curr: dict[str, Any] = {
  63. "index": idx,
  64. "type": field.types[0].name if field.types else 'UNKNOWN',
  65. "offset": field.offset,
  66. }
  67. metadata[field.name] = curr
  68. if field.types[:1] == [GGUFValueType.ARRAY]:
  69. curr["array_types"] = [t.name for t in field.types][1:]
  70. if not args.json_array:
  71. continue
  72. itype = field.types[-1]
  73. if itype == GGUFValueType.STRING:
  74. curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
  75. else:
  76. curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
  77. elif field.types[0] == GGUFValueType.STRING:
  78. curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
  79. else:
  80. curr["value"] = field.parts[-1].tolist()[0]
  81. if not args.no_tensors:
  82. for idx, tensor in enumerate(reader.tensors):
  83. tensors[tensor.name] = {
  84. "index": idx,
  85. "shape": tensor.shape.tolist(),
  86. "type": tensor.tensor_type.name,
  87. "offset": tensor.field.offset,
  88. }
  89. json.dump(result, sys.stdout)
  90. def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]):
  91. # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
  92. # Alignment Utility Function
  93. def strAlign(padding: int, alignMode: str | None, strVal: str):
  94. if alignMode == 'center':
  95. return strVal.center(padding)
  96. elif alignMode == 'right':
  97. return strVal.rjust(padding - 1) + ' '
  98. elif alignMode == 'left':
  99. return ' ' + strVal.ljust(padding - 1)
  100. else: # default left
  101. return ' ' + strVal.ljust(padding - 1)
  102. def dashAlign(padding: int, alignMode: str | None):
  103. if alignMode == 'center':
  104. return ':' + '-' * (padding - 2) + ':'
  105. elif alignMode == 'right':
  106. return '-' * (padding - 1) + ':'
  107. elif alignMode == 'left':
  108. return ':' + '-' * (padding - 1)
  109. else: # default left
  110. return '-' * (padding)
  111. # Calculate Padding For Each Column Based On Header and Data Length
  112. rowsPadding = {}
  113. for index, columnEntry in enumerate(header_map):
  114. padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
  115. headerPadCount = len(columnEntry['header_name']) + 2
  116. rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
  117. # Render Markdown Header
  118. rows = []
  119. rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
  120. rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
  121. # Render Tabular Data
  122. for item in data:
  123. rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
  124. # Convert Tabular String Rows Into String
  125. tableString = ""
  126. for row in rows:
  127. tableString += f'|{row}|\n'
  128. return tableString
  129. def element_count_rounded_notation(count: int) -> str:
  130. if count > 1e15 :
  131. # Quadrillion
  132. scaled_amount = count * 1e-15
  133. scale_suffix = "Q"
  134. elif count > 1e12 :
  135. # Trillions
  136. scaled_amount = count * 1e-12
  137. scale_suffix = "T"
  138. elif count > 1e9 :
  139. # Billions
  140. scaled_amount = count * 1e-9
  141. scale_suffix = "B"
  142. elif count > 1e6 :
  143. # Millions
  144. scaled_amount = count * 1e-6
  145. scale_suffix = "M"
  146. elif count > 1e3 :
  147. # Thousands
  148. scaled_amount = count * 1e-3
  149. scale_suffix = "K"
  150. else:
  151. # Under Thousands
  152. scaled_amount = count
  153. scale_suffix = ""
  154. return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}"
  155. def translate_tensor_name(name):
  156. words = name.split(".")
  157. # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names
  158. abbreviation_dictionary = {
  159. 'token_embd': 'Token embedding',
  160. 'pos_embd': 'Position embedding',
  161. 'output_norm': 'Output normalization',
  162. 'output': 'Output',
  163. 'attn_norm': 'Attention normalization',
  164. 'attn_norm_2': 'Attention normalization',
  165. 'attn_qkv': 'Attention query-key-value',
  166. 'attn_q': 'Attention query',
  167. 'attn_k': 'Attention key',
  168. 'attn_v': 'Attention value',
  169. 'attn_output': 'Attention output',
  170. 'ffn_norm': 'Feed-forward network normalization',
  171. 'ffn_up': 'Feed-forward network "up"',
  172. 'ffn_gate': 'Feed-forward network "gate"',
  173. 'ffn_down': 'Feed-forward network "down"',
  174. 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models',
  175. 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models',
  176. 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models',
  177. 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models',
  178. 'ssm_in': 'State space model input projections',
  179. 'ssm_conv1d': 'State space model rolling/shift',
  180. 'ssm_x': 'State space model selective parametrization',
  181. 'ssm_a': 'State space model state compression',
  182. 'ssm_d': 'State space model skip connection',
  183. 'ssm_dt': 'State space model time step',
  184. 'ssm_out': 'State space model output projection',
  185. 'blk': 'Block',
  186. 'enc': 'Encoder',
  187. 'dec': 'Decoder',
  188. }
  189. expanded_words = []
  190. for word in words:
  191. word_norm = word.strip().lower()
  192. if word_norm in abbreviation_dictionary:
  193. expanded_words.append(abbreviation_dictionary[word_norm].title())
  194. else:
  195. expanded_words.append(word.title())
  196. return ' '.join(expanded_words)
  197. def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
  198. host_endian, file_endian = get_file_host_endian(reader)
  199. markdown_content = ""
  200. markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n'
  201. markdown_content += f'- Endian: {file_endian} endian\n'
  202. markdown_content += '\n'
  203. markdown_content += '## Key Value Metadata Store\n\n'
  204. markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n'
  205. markdown_content += '\n'
  206. kv_dump_table: list[dict[str, str | int]] = []
  207. for n, field in enumerate(reader.fields.values(), 1):
  208. if not field.types:
  209. pretty_type = 'N/A'
  210. elif field.types[0] == GGUFValueType.ARRAY:
  211. nest_count = len(field.types) - 1
  212. pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
  213. else:
  214. pretty_type = str(field.types[-1].name)
  215. total_elements = len(field.data)
  216. value = ""
  217. if len(field.types) == 1:
  218. curr_type = field.types[0]
  219. if curr_type == GGUFValueType.STRING:
  220. value = repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60])
  221. elif curr_type in reader.gguf_scalar_to_np:
  222. value = str(field.parts[-1][0])
  223. else:
  224. if field.types[0] == GGUFValueType.ARRAY:
  225. curr_type = field.types[1]
  226. if curr_type == GGUFValueType.STRING:
  227. render_element = min(5, total_elements)
  228. for element_pos in range(render_element):
  229. value += repr(str(bytes(field.parts[-1 - element_pos]), encoding='utf-8')[:5]) + (", " if total_elements > 1 else "")
  230. elif curr_type in reader.gguf_scalar_to_np:
  231. render_element = min(7, total_elements)
  232. for element_pos in range(render_element):
  233. value += str(field.parts[-1 - element_pos][0]) + (", " if total_elements > 1 else "")
  234. value = f'[ {value}{" ..." if total_elements > 1 else ""} ]'
  235. kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
  236. kv_dump_table_header_map = [
  237. {'key_name':'n', 'header_name':'POS', 'align':'right'},
  238. {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
  239. {'key_name':'total_elements', 'header_name':'Count', 'align':'right'},
  240. {'key_name':'field_name', 'header_name':'Key', 'align':'left'},
  241. {'key_name':'value', 'header_name':'Value', 'align':'left'},
  242. ]
  243. markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table)
  244. markdown_content += "\n"
  245. if not args.no_tensors:
  246. # Group tensors by their prefix and maintain order
  247. tensor_prefix_order: list[str] = []
  248. tensor_name_to_key: dict[str, int] = {}
  249. tensor_groups: dict[str, list[ReaderTensor]] = {}
  250. total_elements = sum(tensor.n_elements for tensor in reader.tensors)
  251. # Parsing Tensors Record
  252. for key, tensor in enumerate(reader.tensors):
  253. tensor_components = tensor.name.split('.')
  254. # Classify Tensor Group
  255. tensor_group_name = "base"
  256. if tensor_components[0] == 'blk':
  257. tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
  258. elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
  259. tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
  260. elif tensor_components[0] in ['enc', 'dec']:
  261. tensor_group_name = f"{tensor_components[0]}"
  262. # Check if new Tensor Group
  263. if tensor_group_name not in tensor_groups:
  264. tensor_groups[tensor_group_name] = []
  265. tensor_prefix_order.append(tensor_group_name)
  266. # Record Tensor and Tensor Position
  267. tensor_groups[tensor_group_name].append(tensor)
  268. tensor_name_to_key[tensor.name] = key
  269. # Tensors Mapping Dump
  270. markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n'
  271. markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n'
  272. markdown_content += '\n'
  273. for group in tensor_prefix_order:
  274. tensors = tensor_groups[group]
  275. group_elements = sum(tensor.n_elements for tensor in tensors)
  276. markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n"
  277. markdown_content += "\n"
  278. markdown_content += "### Tensor Data Offset\n"
  279. markdown_content += '\n'
  280. markdown_content += 'This table contains the offset and data segment relative to start of file\n'
  281. markdown_content += '\n'
  282. tensor_mapping_table: list[dict[str, str | int]] = []
  283. for key, tensor in enumerate(reader.tensors):
  284. data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
  285. data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
  286. tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
  287. tensors_mapping_table_header_map = [
  288. {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
  289. {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
  290. {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'},
  291. {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'},
  292. ]
  293. markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
  294. markdown_content += "\n"
  295. for group in tensor_prefix_order:
  296. tensors = tensor_groups[group]
  297. group_elements = sum(tensor.n_elements for tensor in tensors)
  298. group_percentage = group_elements / total_elements * 100
  299. markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n\n"
  300. # Precalculate column sizing for visual consistency
  301. prettify_element_est_count_size: int = 1
  302. prettify_element_count_size: int = 1
  303. prettify_dimension_max_widths: dict[int, int] = {}
  304. for tensor in tensors:
  305. prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements))))
  306. prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements)))
  307. for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))):
  308. prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size)))
  309. # Generate Tensor Layer Table Content
  310. tensor_dump_table: list[dict[str, str | int]] = []
  311. for tensor in tensors:
  312. human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
  313. pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))))
  314. element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})"
  315. element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}"
  316. type_name_string = f"{tensor.tensor_type.name}"
  317. tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string})
  318. tensor_dump_table_header_map = [
  319. {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
  320. {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
  321. {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
  322. {'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
  323. {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'},
  324. {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
  325. ]
  326. markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table)
  327. markdown_content += "\n"
  328. markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
  329. markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
  330. markdown_content += "\n\n"
  331. print(markdown_content) # noqa: NP100
  332. def main() -> None:
  333. parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
  334. parser.add_argument("model", type=str, help="GGUF format model filename")
  335. parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
  336. parser.add_argument("--json", action="store_true", help="Produce JSON output")
  337. parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
  338. parser.add_argument("--data-offset", action="store_true", help="Start of data offset")
  339. parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
  340. parser.add_argument("--markdown", action="store_true", help="Produce markdown output")
  341. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  342. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  343. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  344. if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
  345. logger.info(f'* Loading: {args.model}')
  346. reader = GGUFReader(args.model, 'r')
  347. if args.json:
  348. dump_metadata_json(reader, args)
  349. elif args.markdown:
  350. dump_markdown_metadata(reader, args)
  351. elif args.data_offset:
  352. print(reader.data_offset) # noqa: NP100
  353. elif args.data_alignment:
  354. print(reader.alignment) # noqa: NP100
  355. else:
  356. dump_metadata(reader, args)
  357. if __name__ == '__main__':
  358. main()