gguf_hash.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import uuid
  4. import hashlib
  5. import logging
  6. import argparse
  7. import os
  8. import sys
  9. from pathlib import Path
  10. from tqdm import tqdm
  11. # Necessary to load the local gguf package
  12. if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
  13. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  14. from gguf import GGUFReader # noqa: E402
  15. logger = logging.getLogger("gguf-hash")
  16. # UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
  17. UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
  18. # For more information about what field.parts and field.data represent,
  19. # please see the comments in the modify_gguf.py example.
  20. def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None:
  21. sha1 = hashlib.sha1()
  22. sha256 = hashlib.sha256()
  23. uuidv5_sha1 = hashlib.sha1()
  24. uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
  25. # Total Weight Calculation For Progress Bar
  26. total_weights = 0
  27. for n, tensor in enumerate(reader.tensors, 1):
  28. # We don't need these
  29. if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
  30. continue
  31. # Calculate Tensor Volume
  32. sum_weights_in_tensor = 1
  33. for dim in tensor.shape:
  34. sum_weights_in_tensor *= dim
  35. total_weights += sum_weights_in_tensor
  36. # Hash Progress Bar
  37. bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
  38. # Hashing Process
  39. for tensor in reader.tensors:
  40. # We don't need these
  41. if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
  42. continue
  43. # Progressbar
  44. sum_weights_in_tensor = 1
  45. for dim in tensor.shape:
  46. sum_weights_in_tensor *= dim
  47. bar.update(sum_weights_in_tensor)
  48. if not no_layer:
  49. sha1_layer = hashlib.sha1()
  50. sha1_layer.update(tensor.data.data)
  51. print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
  52. sha256_layer = hashlib.sha256()
  53. sha256_layer.update(tensor.data.data)
  54. print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
  55. sha1.update(tensor.data.data)
  56. sha256.update(tensor.data.data)
  57. uuidv5_sha1.update(tensor.data.data)
  58. # Flush Hash Progress Bar
  59. bar.close()
  60. # Display Hash Output
  61. print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
  62. print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100
  63. print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
  64. def main() -> None:
  65. parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
  66. parser.add_argument("model", type=str, help="GGUF format model filename")
  67. parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash")
  68. parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
  69. parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
  70. args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
  71. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  72. reader = GGUFReader(args.model, 'r')
  73. gguf_hash(reader, args.model, not args.progressbar, args.no_layer)
  74. if __name__ == '__main__':
  75. main()