fit-params.cpp 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. #include "llama.h"
  2. #include "arg.h"
  3. #include "common.h"
  4. #include "log.h"
  5. #include <iostream>
  6. #if defined(_MSC_VER)
  7. #pragma warning(disable: 4244 4267) // possible loss of data
  8. #endif
  9. int main(int argc, char ** argv) {
  10. common_params params;
  11. if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
  12. return 1;
  13. }
  14. common_init();
  15. llama_backend_init();
  16. llama_numa_init(params.numa);
  17. auto mparams = common_model_params_to_llama(params);
  18. auto cparams = common_context_params_to_llama(params);
  19. llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
  20. params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
  21. params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
  22. LOG_INF("Printing fitted CLI arguments to stdout...\n");
  23. std::cout << "-c " << cparams.n_ctx;
  24. std::cout << " -ngl " << mparams.n_gpu_layers;
  25. size_t nd = llama_max_devices();
  26. while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {
  27. nd--;
  28. }
  29. if (nd > 1) {
  30. for (size_t id = 0; id < nd; id++) {
  31. if (id == 0) {
  32. std::cout << " -ts ";
  33. }
  34. if (id > 0) {
  35. std::cout << ",";
  36. }
  37. std::cout << mparams.tensor_split[id];
  38. }
  39. }
  40. const size_t ntbo = llama_max_tensor_buft_overrides();
  41. for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) {
  42. if (itbo == 0) {
  43. std::cout << " -ot ";
  44. }
  45. if (itbo > 0) {
  46. std::cout << ",";
  47. }
  48. std::cout << mparams.tensor_buft_overrides[itbo].pattern << "=" << ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft);
  49. }
  50. std::cout << "\n";
  51. return 0;
  52. }