6 hónapja · 19e5943d9e
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -7,7 +7,6 @@ import pathlib
 
															 import re
														
 
															 import requests
														
 
															-import sys
														
 
															 import json
														
 
															 import shutil
														
 
															 import argparse
														
@@ -69,8 +68,7 @@ args = parser.parse_args()
 
															 hf_token = args.hf_token if args.hf_token is not None else hf_token
														
 
															 if hf_token is None:
														
 
															-    logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
														
 
															-    sys.exit(1)
														
 
															+    logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
														
 
															 # TODO: this string has to exercise as much pre-tokenizer functionality as possible
														
 
															 #       will be updated with time - contributions welcome
														
@@ -151,7 +149,7 @@ pre_computed_hashes = [
 
															 def download_file_with_auth(url, token, save_path):
														
 
															-    headers = {"Authorization": f"Bearer {token}"}
														
 
															+    headers = {"Authorization": f"Bearer {token}"} if token else None
														
 
															     response = sess.get(url, headers=headers)
														
 
															     response.raise_for_status()
														
 
															     os.makedirs(os.path.dirname(save_path), exist_ok=True)
														
@@ -250,10 +248,9 @@ for model in [*pre_computed_hashes, *all_models]:
 
															     else:
														
 
															         # otherwise, compute the hash of the tokenizer
														
 
															-        # Skip if the tokenizer folder does not exist or there are other download issues previously
														
 
															-        if not os.path.exists(f"models/tokenizers/{name}"):
														
 
															-            logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
														
 
															-            continue
														
 
															+        # Fail if the tokenizer folder with config does not exist or there are other download issues previously
														
 
															+        if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
														
 
															+            raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.")
														
 
															         try:
														
 
															             logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
														
@@ -261,9 +258,8 @@ for model in [*pre_computed_hashes, *all_models]:
 
															                 tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
														
 
															             else:
														
 
															                 tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
														
 
															-        except OSError as e:
														
 
															-            logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
														
 
															-            continue  # Skip to the next model if the tokenizer can't be loaded
														
 
															+        except Exception as e:
														
 
															+            raise OSError(f"Error loading tokenizer for model {name}.") from e
														
 
															         chktok = tokenizer.encode(CHK_TXT)
														
 
															         chkhsh = sha256(str(chktok).encode()).hexdigest()