|
@@ -240,11 +240,6 @@ for model in [*pre_computed_hashes, *all_models]:
|
|
|
if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
|
|
if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- # Skip if the tokenizer folder does not exist or there are other download issues previously
|
|
|
|
|
- if not os.path.exists(f"models/tokenizers/{name}"):
|
|
|
|
|
- logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
# create the tokenizer
|
|
# create the tokenizer
|
|
|
if chkhsh is not None:
|
|
if chkhsh is not None:
|
|
|
# if the model has a pre-computed hash, use it
|
|
# if the model has a pre-computed hash, use it
|
|
@@ -254,6 +249,12 @@ for model in [*pre_computed_hashes, *all_models]:
|
|
|
chkhsh = existing_models[name]
|
|
chkhsh = existing_models[name]
|
|
|
else:
|
|
else:
|
|
|
# otherwise, compute the hash of the tokenizer
|
|
# otherwise, compute the hash of the tokenizer
|
|
|
|
|
+
|
|
|
|
|
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
|
|
|
|
|
+ if not os.path.exists(f"models/tokenizers/{name}"):
|
|
|
|
|
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
|
|
logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
|
|
|
if name == "t5":
|
|
if name == "t5":
|