gguf_dump.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import logging
  4. import argparse
  5. import os
  6. import re
  7. import sys
  8. from pathlib import Path
  9. from typing import Any
  10. import numpy as np
  11. # Necessary to load the local gguf package
  12. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
  13. sys.path.insert(0, str(Path(__file__).parent.parent))
  14. from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402
  15. logger = logging.getLogger("gguf-dump")
  16. def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
  17. host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
  18. if reader.byte_order == 'S':
  19. file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
  20. else:
  21. file_endian = host_endian
  22. return (host_endian, file_endian)
  23. # For more information about what field.parts and field.data represent,
  24. # please see the comments in the modify_gguf.py example.
  25. def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
  26. host_endian, file_endian = get_file_host_endian(reader)
  27. print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100
  28. print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100
  29. for n, field in enumerate(reader.fields.values(), 1):
  30. if not field.types:
  31. pretty_type = 'N/A'
  32. elif field.types[0] == GGUFValueType.ARRAY:
  33. nest_count = len(field.types) - 1
  34. pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
  35. else:
  36. pretty_type = str(field.types[-1].name)
  37. log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
  38. if len(field.types) == 1:
  39. curr_type = field.types[0]
  40. if curr_type == GGUFValueType.STRING:
  41. log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
  42. elif field.types[0] in reader.gguf_scalar_to_np:
  43. log_message += ' = {0}'.format(field.parts[-1][0])
  44. print(log_message) # noqa: NP100
  45. if args.no_tensors:
  46. return
  47. print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100
  48. for n, tensor in enumerate(reader.tensors, 1):
  49. prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
  50. print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100
  51. def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
  52. import json
  53. host_endian, file_endian = get_file_host_endian(reader)
  54. metadata: dict[str, Any] = {}
  55. tensors: dict[str, Any] = {}
  56. result = {
  57. "filename": args.model,
  58. "endian": file_endian,
  59. "metadata": metadata,
  60. "tensors": tensors,
  61. }
  62. for idx, field in enumerate(reader.fields.values()):
  63. curr: dict[str, Any] = {
  64. "index": idx,
  65. "type": field.types[0].name if field.types else 'UNKNOWN',
  66. "offset": field.offset,
  67. }
  68. metadata[field.name] = curr
  69. if field.types[:1] == [GGUFValueType.ARRAY]:
  70. curr["array_types"] = [t.name for t in field.types][1:]
  71. if not args.json_array:
  72. continue
  73. itype = field.types[-1]
  74. if itype == GGUFValueType.STRING:
  75. curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
  76. else:
  77. curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
  78. elif field.types[0] == GGUFValueType.STRING:
  79. curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
  80. else:
  81. curr["value"] = field.parts[-1].tolist()[0]
  82. if not args.no_tensors:
  83. for idx, tensor in enumerate(reader.tensors):
  84. tensors[tensor.name] = {
  85. "index": idx,
  86. "shape": tensor.shape.tolist(),
  87. "type": tensor.tensor_type.name,
  88. "offset": tensor.field.offset,
  89. }
  90. json.dump(result, sys.stdout)
  91. def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]):
  92. # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
  93. # Alignment Utility Function
  94. def strAlign(padding: int, alignMode: str | None, strVal: str):
  95. if alignMode == 'center':
  96. return strVal.center(padding)
  97. elif alignMode == 'right':
  98. return strVal.rjust(padding - 1) + ' '
  99. elif alignMode == 'left':
  100. return ' ' + strVal.ljust(padding - 1)
  101. else: # default left
  102. return ' ' + strVal.ljust(padding - 1)
  103. def dashAlign(padding: int, alignMode: str | None):
  104. if alignMode == 'center':
  105. return ':' + '-' * (padding - 2) + ':'
  106. elif alignMode == 'right':
  107. return '-' * (padding - 1) + ':'
  108. elif alignMode == 'left':
  109. return ':' + '-' * (padding - 1)
  110. else: # default left
  111. return '-' * (padding)
  112. # Calculate Padding For Each Column Based On Header and Data Length
  113. rowsPadding = {}
  114. for index, columnEntry in enumerate(header_map):
  115. padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
  116. headerPadCount = len(columnEntry['header_name']) + 2
  117. rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
  118. # Render Markdown Header
  119. rows = []
  120. rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
  121. rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
  122. # Render Tabular Data
  123. for item in data:
  124. rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
  125. # Convert Tabular String Rows Into String
  126. tableString = ""
  127. for row in rows:
  128. tableString += f'|{row}|\n'
  129. return tableString
  130. def element_count_rounded_notation(count: int) -> str:
  131. if count > 1e15 :
  132. # Quadrillion
  133. scaled_amount = count * 1e-15
  134. scale_suffix = "Q"
  135. elif count > 1e12 :
  136. # Trillions
  137. scaled_amount = count * 1e-12
  138. scale_suffix = "T"
  139. elif count > 1e9 :
  140. # Billions
  141. scaled_amount = count * 1e-9
  142. scale_suffix = "B"
  143. elif count > 1e6 :
  144. # Millions
  145. scaled_amount = count * 1e-6
  146. scale_suffix = "M"
  147. elif count > 1e3 :
  148. # Thousands
  149. scaled_amount = count * 1e-3
  150. scale_suffix = "K"
  151. else:
  152. # Under Thousands
  153. scaled_amount = count
  154. scale_suffix = ""
  155. return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}"
  156. def translate_tensor_name(name):
  157. words = name.split(".")
  158. # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names
  159. abbreviation_dictionary = {
  160. 'token_embd': 'Token embedding',
  161. 'pos_embd': 'Position embedding',
  162. 'output_norm': 'Output normalization',
  163. 'output': 'Output',
  164. 'attn_norm': 'Attention normalization',
  165. 'attn_norm_2': 'Attention normalization',
  166. 'attn_qkv': 'Attention query-key-value',
  167. 'attn_q': 'Attention query',
  168. 'attn_k': 'Attention key',
  169. 'attn_v': 'Attention value',
  170. 'attn_output': 'Attention output',
  171. 'ffn_norm': 'Feed-forward network normalization',
  172. 'ffn_up': 'Feed-forward network "up"',
  173. 'ffn_gate': 'Feed-forward network "gate"',
  174. 'ffn_down': 'Feed-forward network "down"',
  175. 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models',
  176. 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models',
  177. 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models',
  178. 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models',
  179. 'ssm_in': 'State space model input projections',
  180. 'ssm_conv1d': 'State space model rolling/shift',
  181. 'ssm_x': 'State space model selective parametrization',
  182. 'ssm_a': 'State space model state compression',
  183. 'ssm_d': 'State space model skip connection',
  184. 'ssm_dt': 'State space model time step',
  185. 'ssm_out': 'State space model output projection',
  186. 'blk': 'Block',
  187. 'enc': 'Encoder',
  188. 'dec': 'Decoder',
  189. }
  190. expanded_words = []
  191. for word in words:
  192. word_norm = word.strip().lower()
  193. if word_norm in abbreviation_dictionary:
  194. expanded_words.append(abbreviation_dictionary[word_norm].title())
  195. else:
  196. expanded_words.append(word.title())
  197. return ' '.join(expanded_words)
  198. def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
  199. host_endian, file_endian = get_file_host_endian(reader)
  200. markdown_content = ""
  201. markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n'
  202. markdown_content += f'- Endian: {file_endian} endian\n'
  203. markdown_content += '\n'
  204. markdown_content += '## Key Value Metadata Store\n\n'
  205. markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n'
  206. markdown_content += '\n'
  207. kv_dump_table: list[dict[str, str | int]] = []
  208. for n, field in enumerate(reader.fields.values(), 1):
  209. if not field.types:
  210. pretty_type = 'N/A'
  211. elif field.types[0] == GGUFValueType.ARRAY:
  212. nest_count = len(field.types) - 1
  213. pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
  214. else:
  215. pretty_type = str(field.types[-1].name)
  216. def escape_markdown_inline_code(value_string):
  217. # Find the longest contiguous sequence of backticks in the string then
  218. # wrap string with appropriate number of backticks required to escape it
  219. max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
  220. inline_code_marker = '`' * (max_backticks + 1)
  221. # If the string starts or ends with a backtick, add a space at the beginning and end
  222. if value_string.startswith('`') or value_string.endswith('`'):
  223. value_string = f" {value_string} "
  224. return f"{inline_code_marker}{value_string}{inline_code_marker}"
  225. total_elements = len(field.data)
  226. value = ""
  227. if len(field.types) == 1:
  228. curr_type = field.types[0]
  229. if curr_type == GGUFValueType.STRING:
  230. truncate_length = 60
  231. value_string = str(bytes(field.parts[-1]), encoding='utf-8')
  232. if len(value_string) > truncate_length:
  233. head = escape_markdown_inline_code(value_string[:truncate_length // 2])
  234. tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
  235. value = "{head}...{tail}".format(head=head, tail=tail)
  236. else:
  237. value = escape_markdown_inline_code(value_string)
  238. elif curr_type in reader.gguf_scalar_to_np:
  239. value = str(field.parts[-1][0])
  240. else:
  241. if field.types[0] == GGUFValueType.ARRAY:
  242. curr_type = field.types[1]
  243. array_elements = []
  244. if curr_type == GGUFValueType.STRING:
  245. render_element = min(5, total_elements)
  246. for element_pos in range(render_element):
  247. truncate_length = 30
  248. value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
  249. if len(value_string) > truncate_length:
  250. head = escape_markdown_inline_code(value_string[:truncate_length // 2])
  251. tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
  252. value = "{head}...{tail}".format(head=head, tail=tail)
  253. else:
  254. value = escape_markdown_inline_code(value_string)
  255. array_elements.append(value)
  256. elif curr_type in reader.gguf_scalar_to_np:
  257. render_element = min(7, total_elements)
  258. for element_pos in range(render_element):
  259. array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
  260. value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
  261. kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
  262. kv_dump_table_header_map = [
  263. {'key_name':'n', 'header_name':'POS', 'align':'right'},
  264. {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
  265. {'key_name':'total_elements', 'header_name':'Count', 'align':'right'},
  266. {'key_name':'field_name', 'header_name':'Key', 'align':'left'},
  267. {'key_name':'value', 'header_name':'Value', 'align':'left'},
  268. ]
  269. markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table)
  270. markdown_content += "\n"
  271. if not args.no_tensors:
  272. # Group tensors by their prefix and maintain order
  273. tensor_prefix_order: list[str] = []
  274. tensor_name_to_key: dict[str, int] = {}
  275. tensor_groups: dict[str, list[ReaderTensor]] = {}
  276. total_elements = sum(tensor.n_elements for tensor in reader.tensors)
  277. # Parsing Tensors Record
  278. for key, tensor in enumerate(reader.tensors):
  279. tensor_components = tensor.name.split('.')
  280. # Classify Tensor Group
  281. tensor_group_name = "base"
  282. if tensor_components[0] == 'blk':
  283. tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
  284. elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
  285. tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
  286. elif tensor_components[0] in ['enc', 'dec']:
  287. tensor_group_name = f"{tensor_components[0]}"
  288. # Check if new Tensor Group
  289. if tensor_group_name not in tensor_groups:
  290. tensor_groups[tensor_group_name] = []
  291. tensor_prefix_order.append(tensor_group_name)
  292. # Record Tensor and Tensor Position
  293. tensor_groups[tensor_group_name].append(tensor)
  294. tensor_name_to_key[tensor.name] = key
  295. # Tensors Mapping Dump
  296. markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n'
  297. markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n'
  298. markdown_content += '\n'
  299. for group in tensor_prefix_order:
  300. tensors = tensor_groups[group]
  301. group_elements = sum(tensor.n_elements for tensor in tensors)
  302. markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n"
  303. markdown_content += "\n"
  304. markdown_content += "### Tensor Data Offset\n"
  305. markdown_content += '\n'
  306. markdown_content += 'This table contains the offset and data segment relative to start of file\n'
  307. markdown_content += '\n'
  308. tensor_mapping_table: list[dict[str, str | int]] = []
  309. for key, tensor in enumerate(reader.tensors):
  310. data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
  311. data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
  312. tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
  313. tensors_mapping_table_header_map = [
  314. {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
  315. {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
  316. {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'},
  317. {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'},
  318. ]
  319. markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
  320. markdown_content += "\n"
  321. for group in tensor_prefix_order:
  322. tensors = tensor_groups[group]
  323. group_elements = sum(tensor.n_elements for tensor in tensors)
  324. group_percentage = group_elements / total_elements * 100
  325. markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n\n"
  326. # Precalculate column sizing for visual consistency
  327. prettify_element_est_count_size: int = 1
  328. prettify_element_count_size: int = 1
  329. prettify_dimension_max_widths: dict[int, int] = {}
  330. for tensor in tensors:
  331. prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements))))
  332. prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements)))
  333. for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))):
  334. prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size)))
  335. # Generate Tensor Layer Table Content
  336. tensor_dump_table: list[dict[str, str | int]] = []
  337. for tensor in tensors:
  338. human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
  339. pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))))
  340. element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})"
  341. element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}"
  342. type_name_string = f"{tensor.tensor_type.name}"
  343. tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string})
  344. tensor_dump_table_header_map = [
  345. {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
  346. {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
  347. {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
  348. {'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
  349. {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'},
  350. {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
  351. ]
  352. markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table)
  353. markdown_content += "\n"
  354. markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
  355. markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
  356. markdown_content += "\n\n"
  357. print(markdown_content) # noqa: NP100
  358. def main() -> None:
  359. parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
  360. parser.add_argument("model", type=str, help="GGUF format model filename")
  361. parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
  362. parser.add_argument("--json", action="store_true", help="Produce JSON output")
  363. parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
  364. parser.add_argument("--data-offset", action="store_true", help="Start of data offset")
  365. parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
  366. parser.add_argument("--markdown", action="store_true", help="Produce markdown output")
  367. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  368. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  369. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  370. if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
  371. logger.info(f'* Loading: {args.model}')
  372. reader = GGUFReader(args.model, 'r')
  373. if args.json:
  374. dump_metadata_json(reader, args)
  375. elif args.markdown:
  376. dump_markdown_metadata(reader, args)
  377. elif args.data_offset:
  378. print(reader.data_offset) # noqa: NP100
  379. elif args.data_alignment:
  380. print(reader.alignment) # noqa: NP100
  381. else:
  382. dump_metadata(reader, args)
  383. if __name__ == '__main__':
  384. main()