gguf-hash.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. #include "ggml.h"
  2. #include <cstdlib> /* abort() */
  3. #include <cstddef>
  4. #include <cstdio>
  5. #include <string>
  6. #include <stdexcept>
  7. #include <algorithm>
  8. #include <cstring>
  9. #include <sstream>
  10. #include <fstream>
  11. #ifdef __cplusplus
  12. extern "C" {
  13. #endif
  14. #include "xxhash/xxhash.h"
  15. #include "sha1/sha1.h"
  16. #include "sha256/sha256.h"
  17. #ifdef __cplusplus
  18. }
  19. #endif
  20. // uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
  21. #define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
  22. #define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
  23. #define HASH_TYPE_SHA256_STR "sha256"
  24. #define HASH_TYPE_SHA1_STR "sha1"
  25. #define HASH_TYPE_XXH64_STR "xxh64"
  26. #define HASH_TYPE_UUID_STR "uuid"
  27. typedef enum {
  28. HASH_EXIT_SUCCESS = 0, // All hash has been generated or validated
  29. HASH_EXIT_FAILURE = 1, // Generic Failure
  30. HASH_EXIT_MISMATCH = 2, // Hash mismatched during validation
  31. HASH_EXIT_MANIFEST_MISSING_ENTRY = 3, // Hash attempted validation but missing entry in manifest
  32. HASH_EXIT_MANIFEST_UNKNOWN_HASH = 4, // Manifest is present, but we do not know any hash format within it
  33. HASH_EXIT_MANIFEST_FILE_ERROR = 5 // Manifest is either missing or not a known format
  34. } hash_exit_code_t;
  35. typedef enum {
  36. HASH_MANIFEST_NOT_FOUND,
  37. HASH_MANIFEST_MISMATCH,
  38. HASH_MANIFEST_OK,
  39. } hash_manifest_result_t;
  40. struct hash_params {
  41. std::string input;
  42. bool xxh64 = false;
  43. bool sha1 = false;
  44. bool sha256 = false;
  45. bool uuid = false;
  46. bool no_layer = false;
  47. bool manifest_is_usable = false;
  48. std::string manifest_file;
  49. };
  50. struct manifest_check_params {
  51. bool xxh64 = false;
  52. bool sha1 = false;
  53. bool sha256 = false;
  54. bool uuid = false;
  55. };
  56. static char const * hash_manifest_result_to_str(hash_manifest_result_t value) {
  57. switch (value) {
  58. case HASH_MANIFEST_NOT_FOUND: return "Not Found";
  59. case HASH_MANIFEST_MISMATCH: return "Mismatch";
  60. case HASH_MANIFEST_OK: return "Ok";
  61. }
  62. return "?";
  63. }
  64. static char const * hash_exit_code_to_str(hash_exit_code_t value) {
  65. switch (value) {
  66. case HASH_EXIT_SUCCESS: return "Success";
  67. case HASH_EXIT_FAILURE: return "Failure";
  68. case HASH_EXIT_MISMATCH: return "Mismatch";
  69. case HASH_EXIT_MANIFEST_MISSING_ENTRY: return "Manifest Missing Entry";
  70. case HASH_EXIT_MANIFEST_UNKNOWN_HASH: return "Manifest Unknown Hash";
  71. case HASH_EXIT_MANIFEST_FILE_ERROR: return "Manifest File Error";
  72. }
  73. return "?";
  74. }
  75. static void hash_print_usage(const char * executable) {
  76. const hash_params default_params;
  77. printf("\n");
  78. printf("usage: %s [options] GGUF_IN\n", executable);
  79. printf("\n");
  80. printf("Hash a GGUF file");
  81. printf("\n");
  82. printf("options:\n");
  83. printf(" -h, --help show this help message and exit\n");
  84. printf(" --xxh64 use xxh64 hash\n");
  85. printf(" --sha1 use sha1 hash\n");
  86. printf(" --sha256 use sha256 hash\n");
  87. printf(" --all use all hash\n");
  88. printf(" --no-layer exclude per layer hash\n");
  89. printf(" --uuid generate UUIDv5 ID\n");
  90. printf(" -c, --check <manifest> verify against a manifest\n");
  91. printf("\n");
  92. }
  93. static void hash_params_parse_ex(int argc, const char ** argv, hash_params & params) {
  94. std::string arg;
  95. bool invalid_param = false;
  96. const std::string arg_prefix = "--";
  97. int arg_idx = 1;
  98. for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
  99. arg = argv[arg_idx];
  100. if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
  101. std::replace(arg.begin(), arg.end(), '_', '-');
  102. }
  103. bool arg_found = false;
  104. if (arg == "-h" || arg == "--help") {
  105. hash_print_usage(argv[0]);
  106. exit(0);
  107. }
  108. if (arg == "--xxh64") {
  109. arg_found = true;
  110. params.xxh64 = true;
  111. }
  112. if (arg == "--sha1") {
  113. arg_found = true;
  114. params.sha1 = true;
  115. }
  116. if (arg == "--uuid") {
  117. arg_found = true;
  118. params.uuid = true;
  119. }
  120. if (arg == "--sha256") {
  121. arg_found = true;
  122. params.sha256 = true;
  123. }
  124. if (arg == "--all") {
  125. arg_found = true;
  126. params.sha256 = true;
  127. params.sha1 = true;
  128. params.xxh64 = true;
  129. }
  130. if (arg == "--no-layer") {
  131. arg_found = true;
  132. params.no_layer = true;
  133. }
  134. if (arg == "-c" || arg == "--check") {
  135. if (++arg_idx >= argc) {
  136. invalid_param = true;
  137. break;
  138. }
  139. arg_found = true;
  140. params.manifest_file = argv[arg_idx];
  141. }
  142. if (!arg_found) {
  143. throw std::invalid_argument("error: unknown argument: " + arg);
  144. }
  145. }
  146. if (invalid_param) {
  147. throw std::invalid_argument("error: invalid parameter for argument:" + arg);
  148. }
  149. if (argc - arg_idx < 1) {
  150. throw std::invalid_argument("error: bad arguments");
  151. }
  152. params.input = argv[arg_idx++];
  153. }
  154. static bool hash_params_parse(int argc, const char ** argv, hash_params & params) {
  155. bool result = true;
  156. try {
  157. hash_params_parse_ex(argc, argv, params);
  158. }
  159. catch (const std::invalid_argument & ex) {
  160. fprintf(stderr, "%s\n", ex.what());
  161. hash_print_usage(argv[0]);
  162. exit(EXIT_FAILURE);
  163. }
  164. return result;
  165. }
  166. static bool manifest_type(const std::string & manifest_file, manifest_check_params & manifest_check) {
  167. if (manifest_file.empty()) {
  168. return false;
  169. }
  170. std::ifstream file(manifest_file);
  171. if (!file.is_open()) {
  172. return false;
  173. }
  174. std::string manifest_entry_line;
  175. while (getline(file, manifest_entry_line)) {
  176. // hash_type_str hash_str tensor_name
  177. // e.g. 'xxh64 f66e9cd66a4396a0 test.gguf:tensor_0'
  178. std::istringstream line_stream(manifest_entry_line);
  179. std::string file_hash_type;
  180. if (line_stream >> file_hash_type) {
  181. if (file_hash_type == HASH_TYPE_SHA256_STR) {
  182. manifest_check.sha256 = true;
  183. } else if (file_hash_type == HASH_TYPE_SHA1_STR) {
  184. manifest_check.sha1 = true;
  185. } else if (file_hash_type == HASH_TYPE_XXH64_STR) {
  186. manifest_check.xxh64 = true;
  187. } else if (file_hash_type == HASH_TYPE_UUID_STR) {
  188. manifest_check.uuid = true;
  189. }
  190. }
  191. }
  192. return true;
  193. }
  194. static hash_manifest_result_t manifest_verify(const std::string& manifest_file, const std::string& hash_type_str, const std::string& hash_str, const std::string& tensor_name) {
  195. if (manifest_file.empty()) {
  196. return HASH_MANIFEST_NOT_FOUND;
  197. }
  198. std::ifstream file(manifest_file);
  199. if (!file.is_open()) {
  200. return HASH_MANIFEST_NOT_FOUND;
  201. }
  202. std::string manifest_entry_line;
  203. while (getline(file, manifest_entry_line)) {
  204. std::istringstream line_stream(manifest_entry_line);
  205. std::string file_hash_type;
  206. std::string file_hash;
  207. std::string file_tensor_name;
  208. if (line_stream >> file_hash_type >> file_hash >> file_tensor_name) {
  209. // Line parsed. Check hash validity
  210. if (file_hash_type != hash_type_str) {
  211. continue;
  212. }
  213. if (file_tensor_name != tensor_name) {
  214. continue;
  215. }
  216. return (file_hash == hash_str) ? HASH_MANIFEST_OK : HASH_MANIFEST_MISMATCH;
  217. }
  218. }
  219. return HASH_MANIFEST_NOT_FOUND;
  220. }
  221. static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
  222. // Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
  223. // Assumes that digest was processed correctly with the expected namespace
  224. for (int i = 0; i < 16; i++) {
  225. uuid[i] = sha1_digest[i];
  226. }
  227. // Set bits corresponding to UUID ver 5
  228. uuid[ 6] &= ~(0xF << 4);
  229. uuid[ 6] |= (5 << 4);
  230. // Set bits corresponding to UUID variant 0b10XX
  231. uuid[ 8] &= ~(0xc << 4);
  232. uuid[ 8] |= (0x8 << 4);
  233. }
  234. static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
  235. const std::string & fname = hash_params.input;
  236. struct ggml_context * ctx_data = NULL;
  237. struct gguf_init_params params = {
  238. /*.no_alloc = */ false,
  239. /*.ctx = */ &ctx_data,
  240. };
  241. // xxh64 init
  242. XXH64_state_t* xxh64_model_hash_state = NULL;
  243. if (hash_params.xxh64) {
  244. xxh64_model_hash_state = XXH64_createState();
  245. if (xxh64_model_hash_state==NULL) {
  246. abort();
  247. }
  248. XXH64_hash_t const seed = 0;
  249. if (XXH64_reset(xxh64_model_hash_state, seed) == XXH_ERROR) {
  250. abort();
  251. }
  252. }
  253. // sha1 init
  254. SHA1_CTX sha1_model_hash_ctx;
  255. if (hash_params.sha1) {
  256. SHA1Init(&sha1_model_hash_ctx);
  257. }
  258. // sha256 init
  259. sha256_t sha256_model_hash_ctx;
  260. if (hash_params.sha256) {
  261. sha256_init(&sha256_model_hash_ctx);
  262. }
  263. // sha1 for uuid init
  264. SHA1_CTX sha1_for_uuid_ctx;
  265. if (hash_params.uuid) {
  266. unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
  267. SHA1Init(&sha1_for_uuid_ctx);
  268. SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
  269. }
  270. struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
  271. const int n_tensors = gguf_get_n_tensors(ctx);
  272. bool tensor_layer_in_manifest = false;
  273. bool model_in_manifest = false;
  274. bool tensor_layer_has_mismatch = false;
  275. bool model_has_mismatch = false;
  276. for (int i = 0; i < n_tensors; ++i) {
  277. const char * name = gguf_get_tensor_name(ctx, i);
  278. struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
  279. auto n_bytes = ggml_nbytes(cur);
  280. auto *raw_data = cur->data;
  281. const std::string tensor_layer_name = fname + ":" + name;
  282. if (hash_params.xxh64) {
  283. if (!hash_params.no_layer) {
  284. // Per Layer Hash
  285. XXH64_hash_t hash = XXH64(raw_data, n_bytes, 0);
  286. char hex_result[17];
  287. for (int offset = 0; offset < 8; offset++) {
  288. unsigned int shift_bits_by = (8 * (8 - offset - 1));
  289. snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
  290. }
  291. if (hash_params.manifest_is_usable) {
  292. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name);
  293. switch (verify_result) {
  294. case HASH_MANIFEST_NOT_FOUND:
  295. break;
  296. case HASH_MANIFEST_MISMATCH:
  297. tensor_layer_in_manifest = true;
  298. tensor_layer_has_mismatch = true;
  299. break;
  300. case HASH_MANIFEST_OK:
  301. tensor_layer_in_manifest = true;
  302. break;
  303. }
  304. printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
  305. } else {
  306. printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str());
  307. }
  308. }
  309. // Overall Model Hash
  310. if (XXH64_update(xxh64_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
  311. }
  312. if (hash_params.sha1) {
  313. if (!hash_params.no_layer) {
  314. // Per Layer Hash
  315. char result[21]; // sha1 outputs 20 bytes
  316. SHA1( result, (const char *)raw_data, n_bytes);
  317. char hex_result[41] = {0};
  318. for (int offset = 0; offset < 20; offset++) {
  319. snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
  320. }
  321. if (hash_params.manifest_is_usable) {
  322. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name);
  323. switch (verify_result) {
  324. case HASH_MANIFEST_NOT_FOUND:
  325. break;
  326. case HASH_MANIFEST_MISMATCH:
  327. tensor_layer_in_manifest = true;
  328. tensor_layer_has_mismatch = true;
  329. break;
  330. case HASH_MANIFEST_OK:
  331. tensor_layer_in_manifest = true;
  332. break;
  333. }
  334. printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
  335. } else {
  336. printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str());
  337. }
  338. }
  339. // Overall Model Hash
  340. SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
  341. }
  342. if (hash_params.sha256) {
  343. if (!hash_params.no_layer) {
  344. // Per Layer Hash
  345. unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
  346. sha256_hash((unsigned char*) result, (const unsigned char *)raw_data, n_bytes);
  347. char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
  348. for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
  349. snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
  350. }
  351. if (hash_params.manifest_is_usable) {
  352. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name);
  353. switch (verify_result) {
  354. case HASH_MANIFEST_NOT_FOUND:
  355. break;
  356. case HASH_MANIFEST_MISMATCH:
  357. tensor_layer_in_manifest = true;
  358. tensor_layer_has_mismatch = true;
  359. break;
  360. case HASH_MANIFEST_OK:
  361. tensor_layer_in_manifest = true;
  362. break;
  363. }
  364. printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
  365. } else {
  366. printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str());
  367. }
  368. }
  369. // Overall Model Hash
  370. sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
  371. }
  372. if (hash_params.uuid) {
  373. SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)raw_data, n_bytes);
  374. }
  375. }
  376. if (hash_params.xxh64) {
  377. XXH64_hash_t const hash = XXH64_digest(xxh64_model_hash_state);
  378. char hex_result[17];
  379. for (int offset = 0; offset < 8; offset++) {
  380. unsigned int shift_bits_by = (8 * (8 - offset - 1));
  381. snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
  382. }
  383. if (hash_params.manifest_is_usable) {
  384. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, fname);
  385. switch (verify_result) {
  386. case HASH_MANIFEST_NOT_FOUND:
  387. break;
  388. case HASH_MANIFEST_MISMATCH:
  389. model_in_manifest = true;
  390. model_has_mismatch = true;
  391. break;
  392. case HASH_MANIFEST_OK:
  393. model_in_manifest = true;
  394. break;
  395. }
  396. printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
  397. } else {
  398. printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str());
  399. }
  400. }
  401. if (hash_params.sha1) {
  402. unsigned char result[21];
  403. SHA1Final(result, &sha1_model_hash_ctx);
  404. char hex_result[41];
  405. for (int offset = 0; offset < 20; offset++) {
  406. snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
  407. }
  408. if (hash_params.manifest_is_usable) {
  409. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, fname);
  410. switch (verify_result) {
  411. case HASH_MANIFEST_NOT_FOUND:
  412. break;
  413. case HASH_MANIFEST_MISMATCH:
  414. model_in_manifest = true;
  415. model_has_mismatch = true;
  416. break;
  417. case HASH_MANIFEST_OK:
  418. model_in_manifest = true;
  419. break;
  420. }
  421. printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
  422. } else {
  423. printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str());
  424. }
  425. }
  426. if (hash_params.sha256) {
  427. unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
  428. sha256_final( &sha256_model_hash_ctx, result);
  429. char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
  430. for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
  431. snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
  432. }
  433. if (hash_params.manifest_is_usable) {
  434. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, fname);
  435. switch (verify_result) {
  436. case HASH_MANIFEST_NOT_FOUND:
  437. break;
  438. case HASH_MANIFEST_MISMATCH:
  439. model_in_manifest = true;
  440. model_has_mismatch = true;
  441. break;
  442. case HASH_MANIFEST_OK:
  443. model_in_manifest = true;
  444. break;
  445. }
  446. printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
  447. } else {
  448. printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str());
  449. }
  450. }
  451. if (hash_params.uuid) {
  452. unsigned char result[21];
  453. SHA1Final(result, &sha1_for_uuid_ctx);
  454. unsigned char uuid[16];
  455. generate_uuidv5(result, uuid);
  456. char string_buffer[37] = {0};
  457. snprintf(string_buffer, sizeof(string_buffer), "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
  458. uuid[0], uuid[1], uuid[2], uuid[3],
  459. uuid[4], uuid[5], uuid[6], uuid[7],
  460. uuid[8], uuid[9], uuid[10], uuid[11],
  461. uuid[12], uuid[13], uuid[14], uuid[15]);
  462. if (hash_params.manifest_is_usable) {
  463. hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, string_buffer, fname);
  464. switch (verify_result) {
  465. case HASH_MANIFEST_NOT_FOUND:
  466. break;
  467. case HASH_MANIFEST_MISMATCH:
  468. model_in_manifest = true;
  469. model_has_mismatch = true;
  470. break;
  471. case HASH_MANIFEST_OK:
  472. model_in_manifest = true;
  473. break;
  474. }
  475. printf("%-8s %-s %s - %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str(), hash_manifest_result_to_str(verify_result));
  476. } else {
  477. printf("%-8s %-s %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str());
  478. }
  479. }
  480. ggml_free(ctx_data);
  481. gguf_free(ctx);
  482. if (hash_params.manifest_is_usable) {
  483. // In hash verification mode
  484. if (!model_in_manifest) {
  485. // model missing in manifest?
  486. // Check tensor layer...
  487. if (!tensor_layer_in_manifest) {
  488. // Still missing? Maybe we are reading the wrong manifest.
  489. return HASH_EXIT_MANIFEST_MISSING_ENTRY;
  490. }
  491. if (tensor_layer_has_mismatch) {
  492. // Per tensor check found error
  493. return HASH_EXIT_FAILURE;
  494. }
  495. // All per tensor layer checks passed? Sounds good enough.
  496. return HASH_EXIT_SUCCESS;
  497. }
  498. // Overall model check passed, but let's check per layer just in case
  499. // If missing, we don't care too much as the overall model checked
  500. if (tensor_layer_in_manifest && tensor_layer_has_mismatch) {
  501. return HASH_EXIT_FAILURE;
  502. }
  503. if (model_has_mismatch) {
  504. // model has failed hash somewhere in the model
  505. return HASH_EXIT_FAILURE;
  506. }
  507. // All checks appears to be fine
  508. return HASH_EXIT_SUCCESS;
  509. }
  510. // In hash generation mode
  511. return HASH_EXIT_SUCCESS;
  512. }
  513. int main(int argc, const char ** argv) {
  514. hash_params params;
  515. manifest_check_params manifest_check;
  516. hash_params_parse(argc, argv, params);
  517. if (!params.manifest_file.empty()) {
  518. if (!manifest_type(params.manifest_file, manifest_check)) {
  519. printf("ERROR cannot open manifest %s", params.manifest_file.c_str());
  520. return HASH_EXIT_MANIFEST_FILE_ERROR;
  521. }
  522. if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.uuid) {
  523. printf("ERROR manifest does not have any known hash format in %s", params.manifest_file.c_str());
  524. return HASH_EXIT_MANIFEST_UNKNOWN_HASH;
  525. }
  526. printf("manifest %s", params.manifest_file.c_str());
  527. if (manifest_check.sha256) {
  528. printf(" sha256");
  529. }
  530. if (manifest_check.sha1) {
  531. printf(" sha1");
  532. }
  533. if (manifest_check.xxh64) {
  534. printf(" xxh64");
  535. }
  536. if (manifest_check.uuid) {
  537. printf(" uuid");
  538. }
  539. printf("\n");
  540. // Autoselect the highest security hash if manifest is provided but
  541. // the user has not specifically defined the hash they care about
  542. if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
  543. // User has not selected a specific value, pick most secure hash
  544. if (manifest_check.sha256) {
  545. params.sha256 = true;
  546. } else if (manifest_check.sha1) {
  547. params.sha1 = true;
  548. } else if (manifest_check.xxh64) {
  549. params.xxh64 = true;
  550. } else if (manifest_check.uuid) {
  551. params.uuid = true;
  552. }
  553. }
  554. params.manifest_is_usable = true;
  555. }
  556. // By default if no swich argument provided, assume xxh64
  557. if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
  558. params.xxh64 = true;
  559. }
  560. hash_exit_code_t exit_code = gguf_hash(params);
  561. if (params.manifest_is_usable) {
  562. printf("\nVerification results for %s - %s\n", params.manifest_file.c_str(), hash_exit_code_to_str(exit_code));
  563. }
  564. return exit_code;
  565. }