7 місяців тому · cdf94a1802
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -242,33 +242,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
 
															 }
														
 
															 // download one single file from remote URL to local path
														
 
															-static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token) {
														
 
															-    // Initialize libcurl
														
 
															-    curl_ptr       curl(curl_easy_init(), &curl_easy_cleanup);
														
 
															-    curl_slist_ptr http_headers;
														
 
															-    if (!curl) {
														
 
															-        LOG_ERR("%s: error initializing libcurl\n", __func__);
														
 
															-        return false;
														
 
															-    }
														
 
															-
														
 
															-    // Set the URL, allow to follow http redirection
														
 
															-    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
														
 
															-    curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
														
 
															-
														
 
															-    http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
														
 
															-    // Check if hf-token or bearer-token was specified
														
 
															-    if (!bearer_token.empty()) {
														
 
															-        std::string auth_header = "Authorization: Bearer " + bearer_token;
														
 
															-        http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
														
 
															-    }
														
 
															-    curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
														
 
															-
														
 
															-#if defined(_WIN32)
														
 
															-    // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
														
 
															-    //   operating system. Currently implemented under MS-Windows.
														
 
															-    curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
														
 
															-#endif
														
 
															-
														
 
															+static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
														
 
															     // Check if the file already exists locally
														
 
															     auto file_exists = std::filesystem::exists(path);
														
@@ -279,6 +253,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
 
															     std::string last_modified;
														
 
															     if (file_exists) {
														
 
															+        if (offline) {
														
 
															+            LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
														
 
															+            return true; // skip verification/downloading
														
 
															+        }
														
 
															         // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
														
 
															         std::ifstream metadata_in(metadata_path);
														
 
															         if (metadata_in.good()) {
														
@@ -297,6 +275,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
 
															         }
														
 
															         // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
														
 
															     } else {
														
 
															+        if (offline) {
														
 
															+            LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
														
 
															+            return false;
														
 
															+        }
														
 
															         LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
														
 
															     }
														
@@ -310,50 +292,73 @@ static bool common_download_file_single(const std::string & url, const std::stri
 
															     bool head_request_ok = false;
														
 
															     bool should_download = !file_exists; // by default, we should download if the file does not exist
														
 
															-    // get ETag to see if the remote file has changed
														
 
															-    {
														
 
															-        typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
														
 
															-        auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
														
 
															-            common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
														
 
															+    // Initialize libcurl
														
 
															+    curl_ptr       curl(curl_easy_init(), &curl_easy_cleanup);
														
 
															+    curl_slist_ptr http_headers;
														
 
															+    if (!curl) {
														
 
															+        LOG_ERR("%s: error initializing libcurl\n", __func__);
														
 
															+        return false;
														
 
															+    }
														
 
															+
														
 
															+    // Set the URL, allow to follow http redirection
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
														
 
															-            static std::regex header_regex("([^:]+): (.*)\r\n");
														
 
															-            static std::regex etag_regex("ETag", std::regex_constants::icase);
														
 
															-            static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
														
 
															+    http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
														
 
															+    // Check if hf-token or bearer-token was specified
														
 
															+    if (!bearer_token.empty()) {
														
 
															+        std::string auth_header = "Authorization: Bearer " + bearer_token;
														
 
															+        http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
														
 
															+    }
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
														
 
															-            std::string header(buffer, n_items);
														
 
															-            std::smatch match;
														
 
															-            if (std::regex_match(header, match, header_regex)) {
														
 
															-                const std::string & key = match[1];
														
 
															-                const std::string & value = match[2];
														
 
															-                if (std::regex_match(key, match, etag_regex)) {
														
 
															-                    headers->etag = value;
														
 
															-                } else if (std::regex_match(key, match, last_modified_regex)) {
														
 
															-                    headers->last_modified = value;
														
 
															-                }
														
 
															-            }
														
 
															-            return n_items;
														
 
															-        };
														
 
															+#if defined(_WIN32)
														
 
															+    // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
														
 
															+    //   operating system. Currently implemented under MS-Windows.
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
														
 
															+#endif
														
 
															-        curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
														
 
															-        curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
														
 
															-        curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
														
 
															-        curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
														
 
															+    typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
														
 
															+    auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
														
 
															+        common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
														
 
															-        // we only allow retrying once for HEAD requests
														
 
															-        // this is for the use case of using running offline (no internet), retrying can be annoying
														
 
															-        bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
														
 
															-        if (!was_perform_successful) {
														
 
															-            head_request_ok = false;
														
 
															-        }
														
 
															+        static std::regex header_regex("([^:]+): (.*)\r\n");
														
 
															+        static std::regex etag_regex("ETag", std::regex_constants::icase);
														
 
															+        static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
														
 
															-        long http_code = 0;
														
 
															-        curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
														
 
															-        if (http_code == 200) {
														
 
															-            head_request_ok = true;
														
 
															-        } else {
														
 
															-            LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
														
 
															-            head_request_ok = false;
														
 
															+        std::string header(buffer, n_items);
														
 
															+        std::smatch match;
														
 
															+        if (std::regex_match(header, match, header_regex)) {
														
 
															+            const std::string & key = match[1];
														
 
															+            const std::string & value = match[2];
														
 
															+            if (std::regex_match(key, match, etag_regex)) {
														
 
															+                headers->etag = value;
														
 
															+            } else if (std::regex_match(key, match, last_modified_regex)) {
														
 
															+                headers->last_modified = value;
														
 
															+            }
														
 
															         }
														
 
															+        return n_items;
														
 
															+    };
														
 
															+
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
														
 
															+    curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
														
 
															+
														
 
															+    // we only allow retrying once for HEAD requests
														
 
															+    // this is for the use case of using running offline (no internet), retrying can be annoying
														
 
															+    bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
														
 
															+    if (!was_perform_successful) {
														
 
															+        head_request_ok = false;
														
 
															+    }
														
 
															+
														
 
															+    long http_code = 0;
														
 
															+    curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
														
 
															+    if (http_code == 200) {
														
 
															+        head_request_ok = true;
														
 
															+    } else {
														
 
															+        LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
														
 
															+        head_request_ok = false;
														
 
															     }
														
 
															     // if head_request_ok is false, we don't have the etag or last-modified headers
														
@@ -460,12 +465,12 @@ static bool common_download_file_single(const std::string & url, const std::stri
 
															 // download multiple files from remote URLs to local paths
														
 
															 // the input is a vector of pairs <url, path>
														
 
															-static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token) {
														
 
															+static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
														
 
															     // Prepare download in parallel
														
 
															     std::vector<std::future<bool>> futures_download;
														
 
															     for (auto const & item : urls) {
														
 
															-        futures_download.push_back(std::async(std::launch::async, [bearer_token](const std::pair<std::string, std::string> & it) -> bool {
														
 
															-            return common_download_file_single(it.first, it.second, bearer_token);
														
 
															+        futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool {
														
 
															+            return common_download_file_single(it.first, it.second, bearer_token, offline);
														
 
															         }, item));
														
 
															     }
														
@@ -481,14 +486,15 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
 
															 static bool common_download_model(
														
 
															         const common_params_model & model,
														
 
															-        const std::string & bearer_token) {
														
 
															+        const std::string & bearer_token,
														
 
															+        bool offline) {
														
 
															     // Basic validation of the model.url
														
 
															     if (model.url.empty()) {
														
 
															         LOG_ERR("%s: invalid model url\n", __func__);
														
 
															         return false;
														
 
															     }
														
 
															-    if (!common_download_file_single(model.url, model.path, bearer_token)) {
														
 
															+    if (!common_download_file_single(model.url, model.path, bearer_token, offline)) {
														
 
															         return false;
														
 
															     }
														
@@ -547,7 +553,7 @@ static bool common_download_model(
 
															         }
														
 
															         // Download in parallel
														
 
															-        common_download_file_multiple(urls, bearer_token);
														
 
															+        common_download_file_multiple(urls, bearer_token, offline);
														
 
															     }
														
 
															     return true;
														
@@ -608,7 +614,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
 
															  *
														
 
															  * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
														
 
															  */
														
 
															-static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
														
 
															+static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) {
														
 
															     auto parts = string_split<std::string>(hf_repo_with_tag, ':');
														
 
															     std::string tag = parts.size() > 1 ? parts.back() : "latest";
														
 
															     std::string hf_repo = parts[0];
														
@@ -638,20 +644,25 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
 
															     long res_code = 0;
														
 
															     std::string res_str;
														
 
															     bool use_cache = false;
														
 
															-    try {
														
 
															-        auto res = common_remote_get_content(url, params);
														
 
															-        res_code = res.first;
														
 
															-        res_str = std::string(res.second.data(), res.second.size());
														
 
															-    } catch (const std::exception & e) {
														
 
															-        LOG_WRN("error: failed to get manifest: %s\n", e.what());
														
 
															-        LOG_WRN("try reading from cache\n");
														
 
															-        // try to read from cache
														
 
															+    if (!offline) {
														
 
															         try {
														
 
															+            auto res = common_remote_get_content(url, params);
														
 
															+            res_code = res.first;
														
 
															+            res_str = std::string(res.second.data(), res.second.size());
														
 
															+        } catch (const std::exception & e) {
														
 
															+            LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what());
														
 
															+        }
														
 
															+    }
														
 
															+    if (res_code == 0) {
														
 
															+        if (std::filesystem::exists(cached_response_path)) {
														
 
															+            LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str());
														
 
															             res_str = read_file(cached_response_path);
														
 
															             res_code = 200;
														
 
															             use_cache = true;
														
 
															-        } catch (const std::exception & e) {
														
 
															-            throw std::runtime_error("error: failed to get manifest (check your internet connection)");
														
 
															+        } else {
														
 
															+            throw std::runtime_error(
														
 
															+                offline ? "error: failed to get manifest (offline mode)"
														
 
															+                : "error: failed to get manifest (check your internet connection)");
														
 
															         }
														
 
															     }
														
 
															     std::string ggufFile;
														
@@ -698,24 +709,25 @@ bool common_has_curl() {
 
															     return false;
														
 
															 }
														
 
															-static bool common_download_file_single(const std::string &, const std::string &, const std::string &) {
														
 
															+static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) {
														
 
															     LOG_ERR("error: built without CURL, cannot download model from internet\n");
														
 
															     return false;
														
 
															 }
														
 
															-static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &) {
														
 
															+static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool) {
														
 
															     LOG_ERR("error: built without CURL, cannot download model from the internet\n");
														
 
															     return false;
														
 
															 }
														
 
															 static bool common_download_model(
														
 
															         const common_params_model &,
														
 
															-        const std::string &) {
														
 
															+        const std::string &,
														
 
															+        bool) {
														
 
															     LOG_ERR("error: built without CURL, cannot download model from the internet\n");
														
 
															     return false;
														
 
															 }
														
 
															-static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &) {
														
 
															+static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
														
 
															     LOG_ERR("error: built without CURL, cannot download model from the internet\n");
														
 
															     return {};
														
 
															 }
														
@@ -742,7 +754,8 @@ struct handle_model_result {
 
															 static handle_model_result common_params_handle_model(
														
 
															         struct common_params_model & model,
														
 
															         const std::string & bearer_token,
														
 
															-        const std::string & model_path_default) {
														
 
															+        const std::string & model_path_default,
														
 
															+        bool offline) {
														
 
															     handle_model_result result;
														
 
															     // handle pre-fill default model path and url based on hf_repo and hf_file
														
 
															     {
														
@@ -750,7 +763,7 @@ static handle_model_result common_params_handle_model(
 
															             // short-hand to avoid specifying --hf-file -> default it to --model
														
 
															             if (model.hf_file.empty()) {
														
 
															                 if (model.path.empty()) {
														
 
															-                    auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token);
														
 
															+                    auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
														
 
															                     if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
														
 
															                         exit(1); // built without CURL, error message already printed
														
 
															                     }
														
@@ -791,7 +804,7 @@ static handle_model_result common_params_handle_model(
 
															     // then, download it if needed
														
 
															     if (!model.url.empty()) {
														
 
															-        bool ok = common_download_model(model, bearer_token);
														
 
															+        bool ok = common_download_model(model, bearer_token, offline);
														
 
															         if (!ok) {
														
 
															             LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
														
 
															             exit(1);
														
@@ -934,7 +947,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
 
															     // handle model and download
														
 
															     {
														
 
															-        auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
														
 
															+        auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
														
 
															         if (params.no_mmproj) {
														
 
															             params.mmproj = {};
														
 
															         } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
														
@@ -944,12 +957,12 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
 
															         // only download mmproj if the current example is using it
														
 
															         for (auto & ex : mmproj_examples) {
														
 
															             if (ctx_arg.ex == ex) {
														
 
															-                common_params_handle_model(params.mmproj,    params.hf_token, "");
														
 
															+                common_params_handle_model(params.mmproj,    params.hf_token, "", params.offline);
														
 
															                 break;
														
 
															             }
														
 
															         }
														
 
															-        common_params_handle_model(params.speculative.model, params.hf_token, "");
														
 
															-        common_params_handle_model(params.vocoder.model,     params.hf_token, "");
														
 
															+        common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
														
 
															+        common_params_handle_model(params.vocoder.model,     params.hf_token, "", params.offline);
														
 
															     }
														
 
															     if (params.escape) {
														
@@ -2996,6 +3009,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
 
															             common_log_set_verbosity_thold(INT_MAX);
														
 
															         }
														
 
															     ));
														
 
															+    add_opt(common_arg(
														
 
															+        {"--offline"},
														
 
															+        "Offline mode: forces use of cache, prevents network access",
														
 
															+        [](common_params & params) {
														
 
															+            params.offline = true;
														
 
															+        }
														
 
															+    ).set_env("LLAMA_OFFLINE"));
														
 
															     add_opt(common_arg(
														
 
															         {"-lv", "--verbosity", "--log-verbosity"}, "N",
														
 
															         "Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
														
--- a/common/common.h
+++ b/common/common.h
@@ -291,6 +291,7 @@ struct common_params {
 
															     int32_t verbosity                  = 0;
														
 
															     int32_t control_vector_layer_start = -1; // layer range for control vector
														
 
															     int32_t control_vector_layer_end   = -1; // layer range for control vector
														
 
															+    bool    offline                    = false;
														
 
															     int32_t ppl_stride      = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
														
 
															     int32_t ppl_output_type = 0;     // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line