|
@@ -57,12 +57,32 @@ static std::string read_file(const std::string & fname) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static void write_file(const std::string & fname, const std::string & content) {
|
|
static void write_file(const std::string & fname, const std::string & content) {
|
|
|
- std::ofstream file(fname);
|
|
|
|
|
|
|
+ const std::string fname_tmp = fname + ".tmp";
|
|
|
|
|
+ std::ofstream file(fname_tmp);
|
|
|
if (!file) {
|
|
if (!file) {
|
|
|
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
|
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
|
|
}
|
|
}
|
|
|
- file << content;
|
|
|
|
|
- file.close();
|
|
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ file << content;
|
|
|
|
|
+ file.close();
|
|
|
|
|
+
|
|
|
|
|
+ // Makes write atomic
|
|
|
|
|
+ if (rename(fname_tmp.c_str(), fname.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
|
|
|
|
|
+ // If rename fails, try to delete the temporary file
|
|
|
|
|
+ if (remove(fname_tmp.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (...) {
|
|
|
|
|
+ // If anything fails, try to delete the temporary file
|
|
|
|
|
+ if (remove(fname_tmp.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str()));
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
@@ -217,250 +237,294 @@ struct curl_slist_ptr {
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
-#define CURL_MAX_RETRY 3
|
|
|
|
|
-#define CURL_RETRY_DELAY_SECONDS 2
|
|
|
|
|
|
|
+static CURLcode common_curl_perf(CURL * curl) {
|
|
|
|
|
+ CURLcode res = curl_easy_perform(curl);
|
|
|
|
|
+ if (res != CURLE_OK) {
|
|
|
|
|
+ LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return res;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// Send a HEAD request to retrieve the etag and last-modified headers
|
|
|
|
|
+struct common_load_model_from_url_headers {
|
|
|
|
|
+ std::string etag;
|
|
|
|
|
+ std::string last_modified;
|
|
|
|
|
+ std::string accept_ranges;
|
|
|
|
|
+};
|
|
|
|
|
|
|
|
-static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds, const char * method_name) {
|
|
|
|
|
- int remaining_attempts = max_attempts;
|
|
|
|
|
|
|
+struct FILE_deleter {
|
|
|
|
|
+ void operator()(FILE * f) const { fclose(f); }
|
|
|
|
|
+};
|
|
|
|
|
|
|
|
- while (remaining_attempts > 0) {
|
|
|
|
|
- LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method_name, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
|
|
|
|
|
|
|
+static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
|
|
|
|
|
+ common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
|
|
|
|
+ static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
|
|
|
+ static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
|
|
|
+ static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
|
|
|
+ static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
|
|
|
|
|
+ std::string header(buffer, n_items);
|
|
|
|
|
+ std::smatch match;
|
|
|
|
|
+ if (std::regex_match(header, match, header_regex)) {
|
|
|
|
|
+ const std::string & key = match[1];
|
|
|
|
|
+ const std::string & value = match[2];
|
|
|
|
|
+ if (std::regex_match(key, match, etag_regex)) {
|
|
|
|
|
+ headers->etag = value;
|
|
|
|
|
+ } else if (std::regex_match(key, match, last_modified_regex)) {
|
|
|
|
|
+ headers->last_modified = value;
|
|
|
|
|
+ } else if (std::regex_match(key, match, accept_ranges_regex)) {
|
|
|
|
|
+ headers->accept_ranges = value;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return n_items;
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
- CURLcode res = curl_easy_perform(curl);
|
|
|
|
|
- if (res == CURLE_OK) {
|
|
|
|
|
- return true;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
|
|
|
|
|
+ return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
- int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000;
|
|
|
|
|
- LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
|
|
|
|
|
|
|
+// helper function to hide password in URL
|
|
|
|
|
+static std::string llama_download_hide_password_in_url(const std::string & url) {
|
|
|
|
|
+ // Use regex to match and replace the user[:password]@ pattern in URLs
|
|
|
|
|
+ // Pattern: scheme://[user[:password]@]host[...]
|
|
|
|
|
+ static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
|
|
|
|
|
+ std::smatch match;
|
|
|
|
|
|
|
|
- remaining_attempts--;
|
|
|
|
|
- if (remaining_attempts == 0) break;
|
|
|
|
|
- std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
|
|
|
|
|
+ if (std::regex_match(url, match, url_regex)) {
|
|
|
|
|
+ // match[1] = scheme (e.g., "https://")
|
|
|
|
|
+ // match[2] = user[:password]@ part
|
|
|
|
|
+ // match[3] = rest of URL (host and path)
|
|
|
|
|
+ return match[1].str() + "********@" + match[3].str();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
|
|
|
|
|
|
+ return url; // No credentials found or malformed URL
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
- return false;
|
|
|
|
|
|
|
+static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
|
|
|
|
|
+ // Set the URL, allow to follow http redirection
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
|
|
|
+
|
|
|
|
|
+# if defined(_WIN32)
|
|
|
|
|
+ // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
|
|
|
+ // operating system. Currently implemented under MS-Windows.
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
|
|
|
+# endif
|
|
|
|
|
+
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// download one single file from remote URL to local path
|
|
|
|
|
-static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
|
|
|
|
|
- // Check if the file already exists locally
|
|
|
|
|
- auto file_exists = std::filesystem::exists(path);
|
|
|
|
|
|
|
+static void common_curl_easy_setopt_get(CURL * curl) {
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
|
|
|
|
|
|
|
|
- // If the file exists, check its JSON metadata companion file.
|
|
|
|
|
- std::string metadata_path = path + ".json";
|
|
|
|
|
- nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
|
|
|
|
|
- std::string etag;
|
|
|
|
|
- std::string last_modified;
|
|
|
|
|
|
|
+ // display download progress
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
- if (file_exists) {
|
|
|
|
|
- if (offline) {
|
|
|
|
|
- LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
|
|
|
- return true; // skip verification/downloading
|
|
|
|
|
- }
|
|
|
|
|
- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
|
|
|
|
- std::ifstream metadata_in(metadata_path);
|
|
|
|
|
- if (metadata_in.good()) {
|
|
|
|
|
- try {
|
|
|
|
|
- metadata_in >> metadata;
|
|
|
|
|
- LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
|
|
|
|
- if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
|
|
|
- etag = metadata.at("etag");
|
|
|
|
|
- }
|
|
|
|
|
- if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
|
|
|
- last_modified = metadata.at("lastModified");
|
|
|
|
|
- }
|
|
|
|
|
- } catch (const nlohmann::json::exception & e) {
|
|
|
|
|
- LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
|
|
|
- } else {
|
|
|
|
|
- if (offline) {
|
|
|
|
|
- LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
|
|
|
- return false;
|
|
|
|
|
- }
|
|
|
|
|
- LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
|
|
|
|
|
+static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
|
|
|
|
|
+ if (std::filesystem::exists(path_temporary)) {
|
|
|
|
|
+ const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
|
|
|
|
|
+ LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
|
|
|
|
|
+ const std::string range_str = partial_size + "-";
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Send a HEAD request to retrieve the etag and last-modified headers
|
|
|
|
|
- struct common_load_model_from_url_headers {
|
|
|
|
|
- std::string etag;
|
|
|
|
|
- std::string last_modified;
|
|
|
|
|
- };
|
|
|
|
|
|
|
+ // Always open file in append mode could be resuming
|
|
|
|
|
+ std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
|
|
|
|
|
+ if (!outfile) {
|
|
|
|
|
+ LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- common_load_model_from_url_headers headers;
|
|
|
|
|
- bool head_request_ok = false;
|
|
|
|
|
- bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
|
|
|
|
|
+ common_curl_easy_setopt_get(curl);
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
|
|
|
|
|
|
|
|
- // Initialize libcurl
|
|
|
|
|
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
|
|
|
- curl_slist_ptr http_headers;
|
|
|
|
|
|
|
+ return common_curl_perf(curl) == CURLE_OK;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static bool common_download_head(CURL * curl,
|
|
|
|
|
+ curl_slist_ptr & http_headers,
|
|
|
|
|
+ const std::string & url,
|
|
|
|
|
+ const std::string & bearer_token) {
|
|
|
if (!curl) {
|
|
if (!curl) {
|
|
|
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
|
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Set the URL, allow to follow http redirection
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
|
|
|
-
|
|
|
|
|
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
|
// Check if hf-token or bearer-token was specified
|
|
// Check if hf-token or bearer-token was specified
|
|
|
if (!bearer_token.empty()) {
|
|
if (!bearer_token.empty()) {
|
|
|
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
|
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
|
|
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
|
|
|
|
|
|
+ http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
|
|
}
|
|
}
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
|
|
|
-
|
|
|
|
|
-#if defined(_WIN32)
|
|
|
|
|
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
|
|
|
- // operating system. Currently implemented under MS-Windows.
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
|
|
|
-#endif
|
|
|
|
|
|
|
|
|
|
- typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
|
|
|
|
|
- auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
|
|
|
|
|
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
|
|
|
|
-
|
|
|
|
|
- static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
|
|
|
- static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
|
|
|
- static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
|
|
|
|
|
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
|
|
|
+ common_curl_easy_setopt_head(curl, url);
|
|
|
|
|
+ return common_curl_perf(curl) == CURLE_OK;
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
- std::string header(buffer, n_items);
|
|
|
|
|
- std::smatch match;
|
|
|
|
|
- if (std::regex_match(header, match, header_regex)) {
|
|
|
|
|
- const std::string & key = match[1];
|
|
|
|
|
- const std::string & value = match[2];
|
|
|
|
|
- if (std::regex_match(key, match, etag_regex)) {
|
|
|
|
|
- headers->etag = value;
|
|
|
|
|
- } else if (std::regex_match(key, match, last_modified_regex)) {
|
|
|
|
|
- headers->last_modified = value;
|
|
|
|
|
|
|
+// download one single file from remote URL to local path
|
|
|
|
|
+static bool common_download_file_single(const std::string & url,
|
|
|
|
|
+ const std::string & path,
|
|
|
|
|
+ const std::string & bearer_token,
|
|
|
|
|
+ bool offline) {
|
|
|
|
|
+ // If the file exists, check its JSON metadata companion file.
|
|
|
|
|
+ std::string metadata_path = path + ".json";
|
|
|
|
|
+ static const int max_attempts = 3;
|
|
|
|
|
+ static const int retry_delay_seconds = 2;
|
|
|
|
|
+ for (int i = 0; i < max_attempts; ++i) {
|
|
|
|
|
+ nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
|
|
|
|
|
+ std::string etag;
|
|
|
|
|
+ std::string last_modified;
|
|
|
|
|
+
|
|
|
|
|
+ // Check if the file already exists locally
|
|
|
|
|
+ const auto file_exists = std::filesystem::exists(path);
|
|
|
|
|
+ if (file_exists) {
|
|
|
|
|
+ if (offline) {
|
|
|
|
|
+ LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
|
|
|
+ return true; // skip verification/downloading
|
|
|
|
|
+ }
|
|
|
|
|
+ // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
|
|
|
|
+ std::ifstream metadata_in(metadata_path);
|
|
|
|
|
+ if (metadata_in.good()) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ metadata_in >> metadata;
|
|
|
|
|
+ LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
|
|
|
+ metadata.dump().c_str());
|
|
|
|
|
+ if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
|
|
|
+ etag = metadata.at("etag");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
|
|
|
+ last_modified = metadata.at("lastModified");
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (const nlohmann::json::exception & e) {
|
|
|
|
|
+ LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
+ // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ if (offline) {
|
|
|
|
|
+ LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
|
}
|
|
}
|
|
|
- return n_items;
|
|
|
|
|
- };
|
|
|
|
|
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
|
|
|
|
|
+ bool head_request_ok = false;
|
|
|
|
|
+ bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
|
|
|
|
|
|
- // we only allow retrying once for HEAD requests
|
|
|
|
|
- // this is for the use case of using running offline (no internet), retrying can be annoying
|
|
|
|
|
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
|
|
|
|
|
- if (!was_perform_successful) {
|
|
|
|
|
- head_request_ok = false;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- long http_code = 0;
|
|
|
|
|
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
|
|
|
- if (http_code == 200) {
|
|
|
|
|
- head_request_ok = true;
|
|
|
|
|
- } else {
|
|
|
|
|
- LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
|
|
|
- head_request_ok = false;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // if head_request_ok is false, we don't have the etag or last-modified headers
|
|
|
|
|
- // we leave should_download as-is, which is true if the file does not exist
|
|
|
|
|
- if (head_request_ok) {
|
|
|
|
|
- // check if ETag or Last-Modified headers are different
|
|
|
|
|
- // if it is, we need to download the file again
|
|
|
|
|
- if (!etag.empty() && etag != headers.etag) {
|
|
|
|
|
- LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
|
|
|
|
- should_download = true;
|
|
|
|
|
- } else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
|
|
|
- LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
|
|
|
|
|
- should_download = true;
|
|
|
|
|
|
|
+ // Initialize libcurl
|
|
|
|
|
+ curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
|
|
|
+ common_load_model_from_url_headers headers;
|
|
|
|
|
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
|
|
|
+ curl_slist_ptr http_headers;
|
|
|
|
|
+ const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
|
|
|
|
|
+ if (!was_perform_successful) {
|
|
|
|
|
+ head_request_ok = false;
|
|
|
}
|
|
}
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
- if (should_download) {
|
|
|
|
|
- std::string path_temporary = path + ".downloadInProgress";
|
|
|
|
|
- if (file_exists) {
|
|
|
|
|
- LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
|
|
|
- if (remove(path.c_str()) != 0) {
|
|
|
|
|
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
|
|
|
- return false;
|
|
|
|
|
|
|
+ long http_code = 0;
|
|
|
|
|
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
|
|
|
+ if (http_code == 200) {
|
|
|
|
|
+ head_request_ok = true;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
|
|
|
+ head_request_ok = false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // if head_request_ok is false, we don't have the etag or last-modified headers
|
|
|
|
|
+ // we leave should_download as-is, which is true if the file does not exist
|
|
|
|
|
+ bool should_download_from_scratch = false;
|
|
|
|
|
+ if (head_request_ok) {
|
|
|
|
|
+ // check if ETag or Last-Modified headers are different
|
|
|
|
|
+ // if it is, we need to download the file again
|
|
|
|
|
+ if (!etag.empty() && etag != headers.etag) {
|
|
|
|
|
+ LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
|
|
|
|
|
+ headers.etag.c_str());
|
|
|
|
|
+ should_download = true;
|
|
|
|
|
+ should_download_from_scratch = true;
|
|
|
|
|
+ } else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
|
|
|
+ LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
|
|
|
|
|
+ last_modified.c_str(), headers.last_modified.c_str());
|
|
|
|
|
+ should_download = true;
|
|
|
|
|
+ should_download_from_scratch = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
|
|
|
|
|
+ if (should_download) {
|
|
|
|
|
+ if (file_exists &&
|
|
|
|
|
+ !accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
|
|
|
|
|
+ LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
|
|
|
+ if (remove(path.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
- // Set the output file
|
|
|
|
|
|
|
+ const std::string path_temporary = path + ".downloadInProgress";
|
|
|
|
|
+ if (should_download_from_scratch) {
|
|
|
|
|
+ if (std::filesystem::exists(path_temporary)) {
|
|
|
|
|
+ if (remove(path_temporary.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- struct FILE_deleter {
|
|
|
|
|
- void operator()(FILE * f) const {
|
|
|
|
|
- fclose(f);
|
|
|
|
|
|
|
+ if (std::filesystem::exists(path)) {
|
|
|
|
|
+ if (remove(path.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
- };
|
|
|
|
|
-
|
|
|
|
|
- std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
|
|
|
|
|
- if (!outfile) {
|
|
|
|
|
- LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
|
|
|
|
|
- return false;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
|
|
|
|
|
- auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
|
|
|
|
|
- return fwrite(data, size, nmemb, (FILE *)fd);
|
|
|
|
|
- };
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
|
|
|
|
|
|
|
|
|
|
- // display download progress
|
|
|
|
|
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
|
|
|
|
|
|
|
+ // Write the updated JSON metadata file.
|
|
|
|
|
+ metadata.update({
|
|
|
|
|
+ { "url", url },
|
|
|
|
|
+ { "etag", headers.etag },
|
|
|
|
|
+ { "lastModified", headers.last_modified }
|
|
|
|
|
+ });
|
|
|
|
|
+ write_file(metadata_path, metadata.dump(4));
|
|
|
|
|
+ LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
|
|
|
+
|
|
|
|
|
+ // start the download
|
|
|
|
|
+ LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
|
|
|
+ __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
|
|
|
|
|
+ headers.etag.c_str(), headers.last_modified.c_str());
|
|
|
|
|
+ const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
|
|
|
|
|
+ if (!was_pull_successful) {
|
|
|
|
|
+ if (i + 1 < max_attempts) {
|
|
|
|
|
+ const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
|
|
|
+ LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
|
|
|
+ std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- // helper function to hide password in URL
|
|
|
|
|
- auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
|
|
|
|
|
- std::size_t protocol_pos = url.find("://");
|
|
|
|
|
- if (protocol_pos == std::string::npos) {
|
|
|
|
|
- return url; // Malformed URL
|
|
|
|
|
|
|
+ continue;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- std::size_t at_pos = url.find('@', protocol_pos + 3);
|
|
|
|
|
- if (at_pos == std::string::npos) {
|
|
|
|
|
- return url; // No password in URL
|
|
|
|
|
|
|
+ long http_code = 0;
|
|
|
|
|
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
|
|
|
+ if (http_code < 200 || http_code >= 400) {
|
|
|
|
|
+ LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
|
|
|
+ return false;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
|
|
|
|
|
- };
|
|
|
|
|
-
|
|
|
|
|
- // start the download
|
|
|
|
|
- LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
|
|
|
|
- llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
|
|
|
|
|
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET");
|
|
|
|
|
- if (!was_perform_successful) {
|
|
|
|
|
- return false;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- long http_code = 0;
|
|
|
|
|
- curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
|
|
|
- if (http_code < 200 || http_code >= 400) {
|
|
|
|
|
- LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
|
|
|
- return false;
|
|
|
|
|
|
|
+ if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
|
|
|
+ LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Causes file to be closed explicitly here before we rename it.
|
|
|
|
|
- outfile.reset();
|
|
|
|
|
-
|
|
|
|
|
- // Write the updated JSON metadata file.
|
|
|
|
|
- metadata.update({
|
|
|
|
|
- {"url", url},
|
|
|
|
|
- {"etag", headers.etag},
|
|
|
|
|
- {"lastModified", headers.last_modified}
|
|
|
|
|
- });
|
|
|
|
|
- write_file(metadata_path, metadata.dump(4));
|
|
|
|
|
- LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
|
|
|
-
|
|
|
|
|
- if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
|
|
|
- LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
|
|
|
- return false;
|
|
|
|
|
- }
|
|
|
|
|
- } else {
|
|
|
|
|
- LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
|
|
|
|
|
+ break;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
return true;
|
|
@@ -770,7 +834,7 @@ static std::string common_docker_get_token(const std::string & repo) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static std::string common_docker_resolve_model(const std::string & docker) {
|
|
static std::string common_docker_resolve_model(const std::string & docker) {
|
|
|
- // Parse ai/smollm2:135M-Q4_K_M
|
|
|
|
|
|
|
+ // Parse ai/smollm2:135M-Q4_0
|
|
|
size_t colon_pos = docker.find(':');
|
|
size_t colon_pos = docker.find(':');
|
|
|
std::string repo, tag;
|
|
std::string repo, tag;
|
|
|
if (colon_pos != std::string::npos) {
|
|
if (colon_pos != std::string::npos) {
|