ggml-backend-reg.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. #include "ggml-backend-impl.h"
  2. #include "ggml-backend.h"
  3. #include "ggml-impl.h"
  4. #include <algorithm>
  5. #include <cstring>
  6. #include <filesystem>
  7. #include <memory>
  8. #include <string>
  9. #include <type_traits>
  10. #include <vector>
  11. #include <cctype>
  12. #ifdef _WIN32
  13. # define WIN32_LEAN_AND_MEAN
  14. # ifndef NOMINMAX
  15. # define NOMINMAX
  16. # endif
  17. # include <windows.h>
  18. #elif defined(__APPLE__)
  19. # include <mach-o/dyld.h>
  20. # include <dlfcn.h>
  21. #else
  22. # include <dlfcn.h>
  23. # include <unistd.h>
  24. #endif
  25. // Backend registry
  26. #ifdef GGML_USE_CPU
  27. #include "ggml-cpu.h"
  28. #endif
  29. #ifdef GGML_USE_CUDA
  30. #include "ggml-cuda.h"
  31. #endif
  32. #ifdef GGML_USE_METAL
  33. #include "ggml-metal.h"
  34. #endif
  35. #ifdef GGML_USE_SYCL
  36. #include "ggml-sycl.h"
  37. #endif
  38. #ifdef GGML_USE_VULKAN
  39. #include "ggml-vulkan.h"
  40. #endif
  41. #ifdef GGML_USE_WEBGPU
  42. #include "ggml-webgpu.h"
  43. #endif
  44. #ifdef GGML_USE_ZDNN
  45. #include "ggml-zdnn.h"
  46. #endif
  47. #ifdef GGML_USE_OPENCL
  48. #include "ggml-opencl.h"
  49. #endif
  50. #ifdef GGML_USE_HEXAGON
  51. #include "ggml-hexagon.h"
  52. #endif
  53. #ifdef GGML_USE_BLAS
  54. #include "ggml-blas.h"
  55. #endif
  56. #ifdef GGML_USE_RPC
  57. #include "ggml-rpc.h"
  58. #endif
  59. #ifdef GGML_USE_CANN
  60. #include "ggml-cann.h"
  61. #endif
  62. // disable C++17 deprecation warning for std::codecvt_utf8
  63. #if defined(__clang__)
  64. # pragma clang diagnostic push
  65. # pragma clang diagnostic ignored "-Wdeprecated-declarations"
  66. #elif defined(__GNUC__)
  67. # pragma GCC diagnostic push
  68. # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  69. #endif
  70. namespace fs = std::filesystem;
  71. static std::string path_str(const fs::path & path) {
  72. std::string u8path;
  73. try {
  74. #if defined(__cpp_lib_char8_t)
  75. // C++20 and later: u8string() returns std::u8string
  76. std::u8string u8str = path.u8string();
  77. u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
  78. #else
  79. // C++17: u8string() returns std::string
  80. u8path = path.u8string();
  81. #endif
  82. } catch (...) {
  83. }
  84. return u8path;
  85. }
  86. #if defined(__clang__)
  87. # pragma clang diagnostic pop
  88. #elif defined(__GNUC__)
  89. # pragma GCC diagnostic pop
  90. #endif
  91. #ifdef _WIN32
  92. using dl_handle = std::remove_pointer_t<HMODULE>;
  93. struct dl_handle_deleter {
  94. void operator()(HMODULE handle) {
  95. FreeLibrary(handle);
  96. }
  97. };
  98. static dl_handle * dl_load_library(const fs::path & path) {
  99. // suppress error dialogs for missing DLLs
  100. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  101. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  102. HMODULE handle = LoadLibraryW(path.wstring().c_str());
  103. SetErrorMode(old_mode);
  104. return handle;
  105. }
  106. static void * dl_get_sym(dl_handle * handle, const char * name) {
  107. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  108. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  109. void * p = (void *) GetProcAddress(handle, name);
  110. SetErrorMode(old_mode);
  111. return p;
  112. }
  113. static const char * dl_error() {
  114. return "";
  115. }
  116. #else
  117. using dl_handle = void;
  118. struct dl_handle_deleter {
  119. void operator()(void * handle) {
  120. dlclose(handle);
  121. }
  122. };
  123. static void * dl_load_library(const fs::path & path) {
  124. dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
  125. return handle;
  126. }
  127. static void * dl_get_sym(dl_handle * handle, const char * name) {
  128. return dlsym(handle, name);
  129. }
  130. static const char * dl_error() {
  131. const char *rslt = dlerror();
  132. return rslt != nullptr ? rslt : "";
  133. }
  134. #endif
  135. using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
  136. struct ggml_backend_reg_entry {
  137. ggml_backend_reg_t reg;
  138. dl_handle_ptr handle;
  139. };
  140. struct ggml_backend_registry {
  141. std::vector<ggml_backend_reg_entry> backends;
  142. std::vector<ggml_backend_dev_t> devices;
  143. ggml_backend_registry() {
  144. #ifdef GGML_USE_CUDA
  145. register_backend(ggml_backend_cuda_reg());
  146. #endif
  147. #ifdef GGML_USE_METAL
  148. register_backend(ggml_backend_metal_reg());
  149. #endif
  150. #ifdef GGML_USE_SYCL
  151. register_backend(ggml_backend_sycl_reg());
  152. #endif
  153. #ifdef GGML_USE_VULKAN
  154. register_backend(ggml_backend_vk_reg());
  155. #endif
  156. #ifdef GGML_USE_WEBGPU
  157. register_backend(ggml_backend_webgpu_reg());
  158. #endif
  159. #ifdef GGML_USE_ZDNN
  160. register_backend(ggml_backend_zdnn_reg());
  161. #endif
  162. #ifdef GGML_USE_OPENCL
  163. register_backend(ggml_backend_opencl_reg());
  164. #endif
  165. #ifdef GGML_USE_HEXAGON
  166. register_backend(ggml_backend_hexagon_reg());
  167. #endif
  168. #ifdef GGML_USE_CANN
  169. register_backend(ggml_backend_cann_reg());
  170. #endif
  171. #ifdef GGML_USE_BLAS
  172. register_backend(ggml_backend_blas_reg());
  173. #endif
  174. #ifdef GGML_USE_RPC
  175. register_backend(ggml_backend_rpc_reg());
  176. #endif
  177. #ifdef GGML_USE_CPU
  178. register_backend(ggml_backend_cpu_reg());
  179. #endif
  180. }
  181. ~ggml_backend_registry() {
  182. // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
  183. // since backend threads may still be running and accessing resources from the dynamic library
  184. for (auto & entry : backends) {
  185. if (entry.handle) {
  186. entry.handle.release(); // NOLINT
  187. }
  188. }
  189. }
  190. void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
  191. if (!reg) {
  192. return;
  193. }
  194. #ifndef NDEBUG
  195. GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
  196. __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
  197. #endif
  198. backends.push_back({ reg, std::move(handle) });
  199. for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
  200. register_device(ggml_backend_reg_dev_get(reg, i));
  201. }
  202. }
  203. void register_device(ggml_backend_dev_t device) {
  204. #ifndef NDEBUG
  205. GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
  206. #endif
  207. devices.push_back(device);
  208. }
  209. ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
  210. dl_handle_ptr handle { dl_load_library(path) };
  211. if (!handle) {
  212. if (!silent) {
  213. GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
  214. }
  215. return nullptr;
  216. }
  217. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  218. if (score_fn && score_fn() == 0) {
  219. if (!silent) {
  220. GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
  221. }
  222. return nullptr;
  223. }
  224. auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
  225. if (!backend_init_fn) {
  226. if (!silent) {
  227. GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
  228. }
  229. return nullptr;
  230. }
  231. ggml_backend_reg_t reg = backend_init_fn();
  232. if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
  233. if (!silent) {
  234. if (!reg) {
  235. GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
  236. __func__, path_str(path).c_str());
  237. } else {
  238. GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
  239. __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
  240. }
  241. }
  242. return nullptr;
  243. }
  244. GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
  245. register_backend(reg, std::move(handle));
  246. return reg;
  247. }
  248. void unload_backend(ggml_backend_reg_t reg, bool silent) {
  249. auto it = std::find_if(backends.begin(), backends.end(),
  250. [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
  251. if (it == backends.end()) {
  252. if (!silent) {
  253. GGML_LOG_ERROR("%s: backend not found\n", __func__);
  254. }
  255. return;
  256. }
  257. if (!silent) {
  258. GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
  259. }
  260. // remove devices
  261. devices.erase(
  262. std::remove_if(devices.begin(), devices.end(),
  263. [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
  264. devices.end());
  265. // remove backend
  266. backends.erase(it);
  267. }
  268. };
  269. static ggml_backend_registry & get_reg() {
  270. static ggml_backend_registry reg;
  271. return reg;
  272. }
  273. // Internal API
  274. void ggml_backend_register(ggml_backend_reg_t reg) {
  275. get_reg().register_backend(reg);
  276. }
  277. void ggml_backend_device_register(ggml_backend_dev_t device) {
  278. get_reg().register_device(device);
  279. }
  280. // Backend (reg) enumeration
  281. static bool striequals(const char * a, const char * b) {
  282. for (; *a && *b; a++, b++) {
  283. if (std::tolower(*a) != std::tolower(*b)) {
  284. return false;
  285. }
  286. }
  287. return *a == *b;
  288. }
  289. size_t ggml_backend_reg_count() {
  290. return get_reg().backends.size();
  291. }
  292. ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
  293. GGML_ASSERT(index < ggml_backend_reg_count());
  294. return get_reg().backends[index].reg;
  295. }
  296. ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
  297. for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
  298. ggml_backend_reg_t reg = ggml_backend_reg_get(i);
  299. if (striequals(ggml_backend_reg_name(reg), name)) {
  300. return reg;
  301. }
  302. }
  303. return nullptr;
  304. }
  305. // Device enumeration
  306. size_t ggml_backend_dev_count() {
  307. return get_reg().devices.size();
  308. }
  309. ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
  310. GGML_ASSERT(index < ggml_backend_dev_count());
  311. return get_reg().devices[index];
  312. }
  313. ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
  314. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  315. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  316. if (striequals(ggml_backend_dev_name(dev), name)) {
  317. return dev;
  318. }
  319. }
  320. return nullptr;
  321. }
  322. ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
  323. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  324. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  325. if (ggml_backend_dev_type(dev) == type) {
  326. return dev;
  327. }
  328. }
  329. return nullptr;
  330. }
  331. // Convenience functions
  332. ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
  333. ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
  334. if (!dev) {
  335. return nullptr;
  336. }
  337. return ggml_backend_dev_init(dev, params);
  338. }
  339. ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
  340. ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
  341. if (!dev) {
  342. return nullptr;
  343. }
  344. return ggml_backend_dev_init(dev, params);
  345. }
  346. ggml_backend_t ggml_backend_init_best(void) {
  347. ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
  348. dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU);
  349. dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
  350. if (!dev) {
  351. return nullptr;
  352. }
  353. return ggml_backend_dev_init(dev, nullptr);
  354. }
  355. // Dynamic loading
  356. ggml_backend_reg_t ggml_backend_load(const char * path) {
  357. return get_reg().load_backend(path, false);
  358. }
  359. void ggml_backend_unload(ggml_backend_reg_t reg) {
  360. get_reg().unload_backend(reg, true);
  361. }
  362. static fs::path get_executable_path() {
  363. #if defined(__APPLE__)
  364. // get executable path
  365. std::vector<char> path;
  366. uint32_t size;
  367. while (true) {
  368. size = path.size();
  369. if (_NSGetExecutablePath(path.data(), &size) == 0) {
  370. break;
  371. }
  372. path.resize(size);
  373. }
  374. std::string base_path(path.data(), size);
  375. // remove executable name
  376. auto last_slash = base_path.find_last_of('/');
  377. if (last_slash != std::string::npos) {
  378. base_path = base_path.substr(0, last_slash);
  379. }
  380. return base_path + "/";
  381. #elif defined(__linux__) || defined(__FreeBSD__)
  382. std::string base_path = ".";
  383. std::vector<char> path(1024);
  384. while (true) {
  385. // get executable path
  386. # if defined(__linux__)
  387. ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
  388. # elif defined(__FreeBSD__)
  389. ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
  390. # endif
  391. if (len == -1) {
  392. break;
  393. }
  394. if (len < (ssize_t) path.size()) {
  395. base_path = std::string(path.data(), len);
  396. // remove executable name
  397. auto last_slash = base_path.find_last_of('/');
  398. if (last_slash != std::string::npos) {
  399. base_path = base_path.substr(0, last_slash);
  400. }
  401. break;
  402. }
  403. path.resize(path.size() * 2);
  404. }
  405. return base_path + "/";
  406. #elif defined(_WIN32)
  407. std::vector<wchar_t> path(MAX_PATH);
  408. DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
  409. if (len == 0) {
  410. return {};
  411. }
  412. std::wstring base_path(path.data(), len);
  413. // remove executable name
  414. auto last_slash = base_path.find_last_of('\\');
  415. if (last_slash != std::string::npos) {
  416. base_path = base_path.substr(0, last_slash);
  417. }
  418. return base_path + L"\\";
  419. #else
  420. return {};
  421. #endif
  422. }
  423. static fs::path backend_filename_prefix() {
  424. #ifdef _WIN32
  425. return fs::u8path("ggml-");
  426. #else
  427. return fs::u8path("libggml-");
  428. #endif
  429. }
  430. static fs::path backend_filename_extension() {
  431. #ifdef _WIN32
  432. return fs::u8path(".dll");
  433. #else
  434. return fs::u8path(".so");
  435. #endif
  436. }
  437. static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
  438. // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
  439. const fs::path name_path = fs::u8path(name);
  440. const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
  441. const fs::path file_extension = backend_filename_extension();
  442. std::vector<fs::path> search_paths;
  443. if (user_search_path == nullptr) {
  444. #ifdef GGML_BACKEND_DIR
  445. search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
  446. #endif
  447. // default search paths: executable directory, current directory
  448. search_paths.push_back(get_executable_path());
  449. search_paths.push_back(fs::current_path());
  450. } else {
  451. search_paths.push_back(fs::u8path(user_search_path));
  452. }
  453. int best_score = 0;
  454. fs::path best_path;
  455. for (const auto & search_path : search_paths) {
  456. if (!fs::exists(search_path)) {
  457. GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
  458. continue;
  459. }
  460. fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
  461. for (const auto & entry : dir_it) {
  462. if (entry.is_regular_file()) {
  463. auto filename = entry.path().filename();
  464. auto ext = entry.path().extension();
  465. if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
  466. dl_handle_ptr handle { dl_load_library(entry) };
  467. if (!handle && !silent) {
  468. GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
  469. }
  470. if (handle) {
  471. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  472. if (score_fn) {
  473. int s = score_fn();
  474. #ifndef NDEBUG
  475. GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
  476. #endif
  477. if (s > best_score) {
  478. best_score = s;
  479. best_path = entry.path();
  480. }
  481. } else {
  482. if (!silent) {
  483. GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
  484. }
  485. }
  486. }
  487. }
  488. }
  489. }
  490. }
  491. if (best_score == 0) {
  492. // try to load the base backend
  493. for (const auto & search_path : search_paths) {
  494. fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
  495. fs::path path = search_path / filename;
  496. if (fs::exists(path)) {
  497. return get_reg().load_backend(path, silent);
  498. }
  499. }
  500. return nullptr;
  501. }
  502. return get_reg().load_backend(best_path, silent);
  503. }
  504. void ggml_backend_load_all() {
  505. ggml_backend_load_all_from_path(nullptr);
  506. }
  507. void ggml_backend_load_all_from_path(const char * dir_path) {
  508. #ifdef NDEBUG
  509. bool silent = true;
  510. #else
  511. bool silent = false;
  512. #endif
  513. ggml_backend_load_best("blas", silent, dir_path);
  514. ggml_backend_load_best("cann", silent, dir_path);
  515. ggml_backend_load_best("cuda", silent, dir_path);
  516. ggml_backend_load_best("hip", silent, dir_path);
  517. ggml_backend_load_best("metal", silent, dir_path);
  518. ggml_backend_load_best("rpc", silent, dir_path);
  519. ggml_backend_load_best("sycl", silent, dir_path);
  520. ggml_backend_load_best("vulkan", silent, dir_path);
  521. ggml_backend_load_best("opencl", silent, dir_path);
  522. ggml_backend_load_best("hexagon", silent, dir_path);
  523. ggml_backend_load_best("musa", silent, dir_path);
  524. ggml_backend_load_best("cpu", silent, dir_path);
  525. // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
  526. const char * backend_path = std::getenv("GGML_BACKEND_PATH");
  527. if (backend_path) {
  528. ggml_backend_load(backend_path);
  529. }
  530. }