|
|
@@ -145,8 +145,17 @@ for model in models:
|
|
|
if tokt == TOKENIZER_TYPE.SPM:
|
|
|
continue
|
|
|
|
|
|
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
|
|
|
+ if not os.path.exists(f"models/tokenizers/{name}"):
|
|
|
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
|
|
+ continue
|
|
|
+
|
|
|
# create the tokenizer
|
|
|
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
|
+ try:
|
|
|
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
|
+ except OSError as e:
|
|
|
+ logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
|
|
|
+ continue # Skip to the next model if the tokenizer can't be loaded
|
|
|
|
|
|
chktok = tokenizer.encode(chktxt)
|
|
|
chkhsh = sha256(str(chktok).encode()).hexdigest()
|
|
|
@@ -287,8 +296,17 @@ for model in models:
|
|
|
name = model["name"]
|
|
|
tokt = model["tokt"]
|
|
|
|
|
|
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
|
|
|
+ if not os.path.exists(f"models/tokenizers/{name}"):
|
|
|
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
|
|
+ continue
|
|
|
+
|
|
|
# create the tokenizer
|
|
|
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
|
+ try:
|
|
|
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
|
+ except OSError as e:
|
|
|
+ logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
|
|
|
+ continue # Skip this model and continue with the next one in the loop
|
|
|
|
|
|
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
|
|
|
for text in tests:
|