|
@@ -59,6 +59,10 @@ parser.add_argument(
|
|
|
"--full", action="store_true",
|
|
"--full", action="store_true",
|
|
|
help="download full list of models - make sure you have access to all of them",
|
|
help="download full list of models - make sure you have access to all of them",
|
|
|
)
|
|
)
|
|
|
|
|
+parser.add_argument(
|
|
|
|
|
+ "--check-missing", action="store_true",
|
|
|
|
|
+ help="only check for missing pre-tokenizer hashes",
|
|
|
|
|
+)
|
|
|
parser.add_argument(
|
|
parser.add_argument(
|
|
|
"hf_token",
|
|
"hf_token",
|
|
|
help="optional HF token",
|
|
help="optional HF token",
|
|
@@ -70,6 +74,10 @@ hf_token = args.hf_token if args.hf_token is not None else hf_token
|
|
|
if hf_token is None:
|
|
if hf_token is None:
|
|
|
logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
|
|
logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
|
|
|
|
|
|
|
|
|
|
+if args.check_missing and args.full:
|
|
|
|
|
+ logger.warning("Downloading full list of models requested, ignoring --check-missing!")
|
|
|
|
|
+ args.check_missing = False
|
|
|
|
|
+
|
|
|
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
|
|
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
|
|
|
# will be updated with time - contributions welcome
|
|
# will be updated with time - contributions welcome
|
|
|
CHK_TXT = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
|
|
CHK_TXT = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
|
|
@@ -222,12 +230,13 @@ if not args.full:
|
|
|
all_models = models.copy()
|
|
all_models = models.copy()
|
|
|
models = [model for model in all_models if model["name"] not in existing_models]
|
|
models = [model for model in all_models if model["name"] not in existing_models]
|
|
|
|
|
|
|
|
-logging.info(f"Downloading {len(models)} models...")
|
|
|
|
|
-for model in models:
|
|
|
|
|
- try:
|
|
|
|
|
- download_model(model)
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- logger.error(f"Failed to download model {model['name']}. Error: {e}")
|
|
|
|
|
|
|
+if not args.check_missing:
|
|
|
|
|
+ logging.info(f"Downloading {len(models)} models...")
|
|
|
|
|
+ for model in models:
|
|
|
|
|
+ try:
|
|
|
|
|
+ download_model(model)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"Failed to download model {model['name']}. Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
# generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
|
|
# generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
|