download.h 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #pragma once
  2. #include <string>
  3. struct common_params_model;
  4. //
  5. // download functionalities
  6. //
  7. struct common_cached_model_info {
  8. std::string manifest_path;
  9. std::string user;
  10. std::string model;
  11. std::string tag;
  12. size_t size = 0; // GGUF size in bytes
  13. std::string to_string() const {
  14. return user + "/" + model + ":" + tag;
  15. }
  16. };
  17. struct common_hf_file_res {
  18. std::string repo; // repo name with ":tag" removed
  19. std::string ggufFile;
  20. std::string mmprojFile;
  21. };
  22. /**
  23. * Allow getting the HF file from the HF repo with tag (like ollama), for example:
  24. * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
  25. * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
  26. * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
  27. * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
  28. *
  29. * Return pair of <repo, file> (with "repo" already having tag removed)
  30. *
  31. * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
  32. */
  33. common_hf_file_res common_get_hf_file(
  34. const std::string & hf_repo_with_tag,
  35. const std::string & bearer_token,
  36. bool offline);
  37. // returns true if download succeeded
  38. bool common_download_model(
  39. const common_params_model & model,
  40. const std::string & bearer_token,
  41. bool offline);
  42. // returns list of cached models
  43. std::vector<common_cached_model_info> common_list_cached_models();
  44. // resolve and download model from Docker registry
  45. // return local path to downloaded model file
  46. std::string common_docker_resolve_model(const std::string & docker);