2 månader sedan · aa3b7a90b4
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -740,6 +740,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
 
															             exit(0);
														
 
															         }
														
 
															     ));
														
 
															+    add_opt(common_arg(
														
 
															+        {"-cl", "--cache-list"},
														
 
															+        "show list of models in cache",
														
 
															+        [](common_params &) {
														
 
															+            printf("model cache directory: %s\n", fs_get_cache_directory().c_str());
														
 
															+            auto models = common_list_cached_models();
														
 
															+            printf("number of models in cache: %zu\n", models.size());
														
 
															+            for (size_t i = 0; i < models.size(); i++) {
														
 
															+                auto & model = models[i];
														
 
															+                printf("%4d. %s\n", (int) i + 1, model.to_string().c_str());
														
 
															+            }
														
 
															+            exit(0);
														
 
															+        }
														
 
															+    ));
														
 
															     add_opt(common_arg(
														
 
															         {"--completion-bash"},
														
 
															         "print source-able bash completion script for llama.cpp",
														
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
 
															     return cache_directory + filename;
														
 
															 }
														
 
															+std::vector<common_file_info> fs_list_files(const std::string & path) {
														
 
															+    std::vector<common_file_info> files;
														
 
															+    if (path.empty()) return files;
														
 
															+
														
 
															+    std::filesystem::path dir(path);
														
 
															+    if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
														
 
															+        return files;
														
 
															+    }
														
 
															+
														
 
															+    for (const auto & entry : std::filesystem::directory_iterator(dir)) {
														
 
															+        try {
														
 
															+            // Only include regular files (skip directories)
														
 
															+            const auto & p = entry.path();
														
 
															+            if (std::filesystem::is_regular_file(p)) {
														
 
															+                common_file_info info;
														
 
															+                info.path = p.string();
														
 
															+                info.name = p.filename().string();
														
 
															+                try {
														
 
															+                    info.size = static_cast<size_t>(std::filesystem::file_size(p));
														
 
															+                } catch (const std::filesystem::filesystem_error &) {
														
 
															+                    info.size = 0;
														
 
															+                }
														
 
															+                files.push_back(std::move(info));
														
 
															+            }
														
 
															+        } catch (const std::filesystem::filesystem_error &) {
														
 
															+            // skip entries we cannot inspect
														
 
															+            continue;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return files;
														
 
															+}
														
 
															+
														
 
															 //
														
 
															 // Model utils
														
--- a/common/common.h
+++ b/common/common.h
@@ -611,6 +611,13 @@ bool fs_create_directory_with_parents(const std::string & path);
 
															 std::string fs_get_cache_directory();
														
 
															 std::string fs_get_cache_file(const std::string & filename);
														
 
															+struct common_file_info {
														
 
															+    std::string path;
														
 
															+    std::string name;
														
 
															+    size_t      size = 0; // in bytes
														
 
															+};
														
 
															+std::vector<common_file_info> fs_list_files(const std::string & path);
														
 
															+
														
 
															 //
														
 
															 // Model utils
														
 
															 //
														
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -50,6 +50,22 @@ using json = nlohmann::ordered_json;
 
															 // downloader
														
 
															 //
														
 
															+// validate repo name format: owner/repo
														
 
															+static bool validate_repo_name(const std::string & repo) {
														
 
															+    static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)");
														
 
															+    return std::regex_match(repo, repo_regex);
														
 
															+}
														
 
															+
														
 
															+static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
														
 
															+    // we use "=" to avoid clashing with other component, while still being allowed on windows
														
 
															+    std::string fname = "manifest=" + repo + "=" + tag + ".json";
														
 
															+    if (!validate_repo_name(repo)) {
														
 
															+        throw std::runtime_error("error: repo name must be in the format 'owner/repo'");
														
 
															+    }
														
 
															+    string_replace_all(fname, "/", "=");
														
 
															+    return fs_get_cache_file(fname);
														
 
															+}
														
 
															+
														
 
															 static std::string read_file(const std::string & fname) {
														
 
															     std::ifstream file(fname);
														
 
															     if (!file) {
														
@@ -829,17 +845,13 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
 
															     // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
														
 
															     // User-Agent header is already set in common_remote_get_content, no need to set it here
														
 
															-    // we use "=" to avoid clashing with other component, while still being allowed on windows
														
 
															-    std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
														
 
															-    string_replace_all(cached_response_fname, "/", "_");
														
 
															-    std::string cached_response_path = fs_get_cache_file(cached_response_fname);
														
 
															-
														
 
															     // make the request
														
 
															     common_remote_params params;
														
 
															     params.headers = headers;
														
 
															     long res_code = 0;
														
 
															     std::string res_str;
														
 
															     bool use_cache = false;
														
 
															+    std::string cached_response_path = get_manifest_path(hf_repo, tag);
														
 
															     if (!offline) {
														
 
															         try {
														
 
															             auto res = common_remote_get_content(url, params);
														
@@ -895,6 +907,33 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
 
															     return { hf_repo, ggufFile, mmprojFile };
														
 
															 }
														
 
															+std::vector<common_cached_model_info> common_list_cached_models() {
														
 
															+    std::vector<common_cached_model_info> models;
														
 
															+    const std::string cache_dir = fs_get_cache_directory();
														
 
															+    const std::vector<common_file_info> files = fs_list_files(cache_dir);
														
 
															+    for (const auto & file : files) {
														
 
															+        if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
														
 
															+            common_cached_model_info model_info;
														
 
															+            model_info.manifest_path = file.path;
														
 
															+            std::string fname = file.name;
														
 
															+            string_replace_all(fname, ".json", ""); // remove extension
														
 
															+            auto parts = string_split<std::string>(fname, '=');
														
 
															+            if (parts.size() == 4) {
														
 
															+                // expect format: manifest=<user>=<model>=<tag>=<other>
														
 
															+                model_info.user  = parts[1];
														
 
															+                model_info.model = parts[2];
														
 
															+                model_info.tag   = parts[3];
														
 
															+            } else {
														
 
															+                // invalid format
														
 
															+                continue;
														
 
															+            }
														
 
															+            model_info.size = 0; // TODO: get GGUF size, not manifest size
														
 
															+            models.push_back(model_info);
														
 
															+        }
														
 
															+    }
														
 
															+    return models;
														
 
															+}
														
 
															+
														
 
															 //
														
 
															 // Docker registry functions
														
 
															 //
														
@@ -959,6 +998,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
 
															         std::string token = common_docker_get_token(repo);  // Get authentication token
														
 
															         // Get manifest
														
 
															+        // TODO: cache the manifest response so that it appears in the model list
														
 
															         const std::string    url_prefix = "https://registry-1.docker.io/v2/" + repo;
														
 
															         std::string          manifest_url = url_prefix + "/manifests/" + tag;
														
 
															         common_remote_params manifest_params;
														
--- a/common/download.h
+++ b/common/download.h
@@ -8,16 +8,23 @@ struct common_params_model;
 
															 // download functionalities
														
 
															 //
														
 
															+struct common_cached_model_info {
														
 
															+    std::string manifest_path;
														
 
															+    std::string user;
														
 
															+    std::string model;
														
 
															+    std::string tag;
														
 
															+    size_t      size = 0; // GGUF size in bytes
														
 
															+    std::string to_string() const {
														
 
															+        return user + "/" + model + ":" + tag;
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															 struct common_hf_file_res {
														
 
															     std::string repo; // repo name with ":tag" removed
														
 
															     std::string ggufFile;
														
 
															     std::string mmprojFile;
														
 
															 };
														
 
															-// resolve and download model from Docker registry
														
 
															-// return local path to downloaded model file
														
 
															-std::string common_docker_resolve_model(const std::string & docker);
														
 
															-
														
 
															 /**
														
 
															  * Allow getting the HF file from the HF repo with tag (like ollama), for example:
														
 
															  * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
														
@@ -39,3 +46,10 @@ bool common_download_model(
 
															     const common_params_model & model,
														
 
															     const std::string & bearer_token,
														
 
															     bool offline);
														
 
															+
														
 
															+// returns list of cached models
														
 
															+std::vector<common_cached_model_info> common_list_cached_models();
														
 
															+
														
 
															+// resolve and download model from Docker registry
														
 
															+// return local path to downloaded model file
														
 
															+std::string common_docker_resolve_model(const std::string & docker);