gguf_hash.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import uuid
  4. import hashlib
  5. import logging
  6. import argparse
  7. import os
  8. import sys
  9. from pathlib import Path
  10. from tqdm import tqdm
  11. # Necessary to load the local gguf package
  12. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
  13. sys.path.insert(0, str(Path(__file__).parent.parent))
  14. from gguf import GGUFReader # noqa: E402
  15. logger = logging.getLogger("gguf-hash")
  16. # UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
  17. UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
  18. # For more information about what field.parts and field.data represent,
  19. # please see the comments in the modify_gguf.py example.
  20. def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar) -> None:
  21. sha1 = hashlib.sha1()
  22. uuidv5_sha1 = hashlib.sha1()
  23. uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
  24. # Total Weight Calculation For Progress Bar
  25. total_weights = 0
  26. for n, tensor in enumerate(reader.tensors, 1):
  27. # We don't need these
  28. if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
  29. continue
  30. # Calculate Tensor Volume
  31. sum_weights_in_tensor = 1
  32. for dim in tensor.shape:
  33. sum_weights_in_tensor *= dim
  34. total_weights += sum_weights_in_tensor
  35. # Hash Progress Bar
  36. bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
  37. # Hashing Process
  38. for n, tensor in enumerate(reader.tensors, 1):
  39. # We don't need these
  40. if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
  41. continue
  42. # Progressbar
  43. sum_weights_in_tensor = 1
  44. for dim in tensor.shape:
  45. sum_weights_in_tensor *= dim
  46. bar.update(sum_weights_in_tensor)
  47. sha1_layer = hashlib.sha1()
  48. sha1_layer.update(tensor.data.data)
  49. sha1.update(tensor.data.data)
  50. uuidv5_sha1.update(tensor.data.data)
  51. print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
  52. # Flush Hash Progress Bar
  53. bar.close()
  54. # Display Hash Output
  55. print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
  56. print("UUIDv5 {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
  57. def main() -> None:
  58. parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
  59. parser.add_argument("model", type=str, help="GGUF format model filename")
  60. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  61. parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
  62. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  63. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  64. reader = GGUFReader(args.model, 'r')
  65. gguf_hash(reader, args.model, not args.progressbar)
  66. if __name__ == '__main__':
  67. main()