ggml-backend-reg.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. #include "ggml-backend-impl.h"
  2. #include "ggml-backend.h"
  3. #include "ggml-impl.h"
  4. #include <algorithm>
  5. #include <codecvt>
  6. #include <cstring>
  7. #include <filesystem>
  8. #include <locale>
  9. #include <memory>
  10. #include <string>
  11. #include <type_traits>
  12. #include <vector>
  13. #ifdef _WIN32
  14. # define WIN32_LEAN_AND_MEAN
  15. # ifndef NOMINMAX
  16. # define NOMINMAX
  17. # endif
  18. # include <windows.h>
  19. #elif defined(__APPLE__)
  20. # include <mach-o/dyld.h>
  21. # include <dlfcn.h>
  22. #else
  23. # include <dlfcn.h>
  24. # include <unistd.h>
  25. #endif
  26. // Backend registry
  27. #ifdef GGML_USE_CPU
  28. #include "ggml-cpu.h"
  29. #endif
  30. #ifdef GGML_USE_CUDA
  31. #include "ggml-cuda.h"
  32. #endif
  33. #ifdef GGML_USE_METAL
  34. #include "ggml-metal.h"
  35. #endif
  36. #ifdef GGML_USE_SYCL
  37. #include "ggml-sycl.h"
  38. #endif
  39. #ifdef GGML_USE_VULKAN
  40. #include "ggml-vulkan.h"
  41. #endif
  42. #ifdef GGML_USE_OPENCL
  43. #include "ggml-opencl.h"
  44. #endif
  45. #ifdef GGML_USE_BLAS
  46. #include "ggml-blas.h"
  47. #endif
  48. #ifdef GGML_USE_RPC
  49. #include "ggml-rpc.h"
  50. #endif
  51. #ifdef GGML_USE_CANN
  52. #include "ggml-cann.h"
  53. #endif
  54. #ifdef GGML_USE_KOMPUTE
  55. #include "ggml-kompute.h"
  56. #endif
  57. // disable C++17 deprecation warning for std::codecvt_utf8
  58. #if defined(__clang__)
  59. # pragma clang diagnostic push
  60. # pragma clang diagnostic ignored "-Wdeprecated-declarations"
  61. #endif
  62. static std::wstring utf8_to_utf16(const std::string & str) {
  63. std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
  64. return converter.from_bytes(str);
  65. }
  66. static std::string utf16_to_utf8(const std::wstring & str) {
  67. std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
  68. return converter.to_bytes(str);
  69. }
  70. #if defined(__clang__)
  71. # pragma clang diagnostic pop
  72. #endif
  73. #ifdef _WIN32
  74. using dl_handle = std::remove_pointer_t<HMODULE>;
  75. struct dl_handle_deleter {
  76. void operator()(HMODULE handle) {
  77. FreeLibrary(handle);
  78. }
  79. };
  80. static dl_handle * dl_load_library(const std::wstring & path) {
  81. // suppress error dialogs for missing DLLs
  82. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  83. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  84. HMODULE handle = LoadLibraryW(path.c_str());
  85. SetErrorMode(old_mode);
  86. return handle;
  87. }
  88. static void * dl_get_sym(dl_handle * handle, const char * name) {
  89. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  90. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  91. void * p = (void *) GetProcAddress(handle, name);
  92. SetErrorMode(old_mode);
  93. return p;
  94. }
  95. #else
  96. using dl_handle = void;
  97. struct dl_handle_deleter {
  98. void operator()(void * handle) {
  99. dlclose(handle);
  100. }
  101. };
  102. static void * dl_load_library(const std::wstring & path) {
  103. dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
  104. return handle;
  105. }
  106. static void * dl_get_sym(dl_handle * handle, const char * name) {
  107. return dlsym(handle, name);
  108. }
  109. #endif
  110. using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
  111. struct ggml_backend_reg_entry {
  112. ggml_backend_reg_t reg;
  113. dl_handle_ptr handle;
  114. };
  115. struct ggml_backend_registry {
  116. std::vector<ggml_backend_reg_entry> backends;
  117. std::vector<ggml_backend_dev_t> devices;
  118. ggml_backend_registry() {
  119. #ifdef GGML_USE_CUDA
  120. register_backend(ggml_backend_cuda_reg());
  121. #endif
  122. #ifdef GGML_USE_METAL
  123. register_backend(ggml_backend_metal_reg());
  124. #endif
  125. #ifdef GGML_USE_SYCL
  126. register_backend(ggml_backend_sycl_reg());
  127. #endif
  128. #ifdef GGML_USE_VULKAN
  129. register_backend(ggml_backend_vk_reg());
  130. #endif
  131. #ifdef GGML_USE_OPENCL
  132. register_backend(ggml_backend_opencl_reg());
  133. #endif
  134. #ifdef GGML_USE_CANN
  135. register_backend(ggml_backend_cann_reg());
  136. #endif
  137. #ifdef GGML_USE_BLAS
  138. register_backend(ggml_backend_blas_reg());
  139. #endif
  140. #ifdef GGML_USE_RPC
  141. register_backend(ggml_backend_rpc_reg());
  142. #endif
  143. #ifdef GGML_USE_KOMPUTE
  144. register_backend(ggml_backend_kompute_reg());
  145. #endif
  146. #ifdef GGML_USE_CPU
  147. register_backend(ggml_backend_cpu_reg());
  148. #endif
  149. }
  150. ~ggml_backend_registry() {
  151. // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
  152. // since backend threads may still be running and accessing resources from the dynamic library
  153. for (auto & entry : backends) {
  154. if (entry.handle) {
  155. entry.handle.release(); // NOLINT
  156. }
  157. }
  158. }
  159. void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
  160. if (!reg) {
  161. return;
  162. }
  163. #ifndef NDEBUG
  164. GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
  165. __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
  166. #endif
  167. backends.push_back({ reg, std::move(handle) });
  168. for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
  169. register_device(ggml_backend_reg_dev_get(reg, i));
  170. }
  171. }
  172. void register_device(ggml_backend_dev_t device) {
  173. #ifndef NDEBUG
  174. GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
  175. #endif
  176. devices.push_back(device);
  177. }
  178. ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
  179. dl_handle_ptr handle { dl_load_library(path) };
  180. if (!handle) {
  181. if (!silent) {
  182. GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
  183. }
  184. return nullptr;
  185. }
  186. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  187. if (score_fn && score_fn() == 0) {
  188. if (!silent) {
  189. GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
  190. }
  191. return nullptr;
  192. }
  193. auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
  194. if (!backend_init_fn) {
  195. if (!silent) {
  196. GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
  197. }
  198. return nullptr;
  199. }
  200. ggml_backend_reg_t reg = backend_init_fn();
  201. if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
  202. if (!silent) {
  203. if (!reg) {
  204. GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
  205. } else {
  206. GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
  207. __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
  208. }
  209. }
  210. return nullptr;
  211. }
  212. GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
  213. register_backend(reg, std::move(handle));
  214. return reg;
  215. }
  216. void unload_backend(ggml_backend_reg_t reg, bool silent) {
  217. auto it = std::find_if(backends.begin(), backends.end(),
  218. [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
  219. if (it == backends.end()) {
  220. if (!silent) {
  221. GGML_LOG_ERROR("%s: backend not found\n", __func__);
  222. }
  223. return;
  224. }
  225. if (!silent) {
  226. GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
  227. }
  228. // remove devices
  229. devices.erase(
  230. std::remove_if(devices.begin(), devices.end(),
  231. [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
  232. devices.end());
  233. // remove backend
  234. backends.erase(it);
  235. }
  236. };
  237. static ggml_backend_registry & get_reg() {
  238. static ggml_backend_registry reg;
  239. return reg;
  240. }
  241. // Internal API
  242. void ggml_backend_register(ggml_backend_reg_t reg) {
  243. get_reg().register_backend(reg);
  244. }
  245. void ggml_backend_device_register(ggml_backend_dev_t device) {
  246. get_reg().register_device(device);
  247. }
  248. // Backend (reg) enumeration
  249. static bool striequals(const char * a, const char * b) {
  250. for (; *a && *b; a++, b++) {
  251. if (std::tolower(*a) != std::tolower(*b)) {
  252. return false;
  253. }
  254. }
  255. return *a == *b;
  256. }
  257. size_t ggml_backend_reg_count() {
  258. return get_reg().backends.size();
  259. }
  260. ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
  261. GGML_ASSERT(index < ggml_backend_reg_count());
  262. return get_reg().backends[index].reg;
  263. }
  264. ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
  265. for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
  266. ggml_backend_reg_t reg = ggml_backend_reg_get(i);
  267. if (striequals(ggml_backend_reg_name(reg), name)) {
  268. return reg;
  269. }
  270. }
  271. return nullptr;
  272. }
  273. // Device enumeration
  274. size_t ggml_backend_dev_count() {
  275. return get_reg().devices.size();
  276. }
  277. ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
  278. GGML_ASSERT(index < ggml_backend_dev_count());
  279. return get_reg().devices[index];
  280. }
  281. ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
  282. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  283. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  284. if (striequals(ggml_backend_dev_name(dev), name)) {
  285. return dev;
  286. }
  287. }
  288. return nullptr;
  289. }
  290. ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
  291. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  292. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  293. if (ggml_backend_dev_type(dev) == type) {
  294. return dev;
  295. }
  296. }
  297. return nullptr;
  298. }
  299. // Convenience functions
  300. ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
  301. ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
  302. if (!dev) {
  303. return nullptr;
  304. }
  305. return ggml_backend_dev_init(dev, params);
  306. }
  307. ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
  308. ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
  309. if (!dev) {
  310. return nullptr;
  311. }
  312. return ggml_backend_dev_init(dev, params);
  313. }
  314. ggml_backend_t ggml_backend_init_best(void) {
  315. ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
  316. if (!dev) {
  317. dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
  318. }
  319. if (!dev) {
  320. return nullptr;
  321. }
  322. return ggml_backend_dev_init(dev, nullptr);
  323. }
  324. // Dynamic loading
  325. ggml_backend_reg_t ggml_backend_load(const char * path) {
  326. return get_reg().load_backend(utf8_to_utf16(path), false);
  327. }
  328. void ggml_backend_unload(ggml_backend_reg_t reg) {
  329. get_reg().unload_backend(reg, true);
  330. }
  331. static std::wstring get_executable_path() {
  332. #if defined(__APPLE__)
  333. // get executable path
  334. std::vector<char> path;
  335. uint32_t size;
  336. while (true) {
  337. size = path.size();
  338. if (_NSGetExecutablePath(path.data(), &size) == 0) {
  339. break;
  340. }
  341. path.resize(size);
  342. }
  343. std::string base_path(path.data(), size);
  344. // remove executable name
  345. auto last_slash = base_path.find_last_of('/');
  346. if (last_slash != std::string::npos) {
  347. base_path = base_path.substr(0, last_slash);
  348. }
  349. return utf8_to_utf16(base_path + "/");
  350. #elif defined(__linux__) || defined(__FreeBSD__)
  351. std::string base_path = ".";
  352. std::vector<char> path(1024);
  353. while (true) {
  354. // get executable path
  355. # if defined(__linux__)
  356. ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
  357. # elif defined(__FreeBSD__)
  358. ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
  359. # endif
  360. if (len == -1) {
  361. break;
  362. }
  363. if (len < (ssize_t) path.size()) {
  364. base_path = std::string(path.data(), len);
  365. // remove executable name
  366. auto last_slash = base_path.find_last_of('/');
  367. if (last_slash != std::string::npos) {
  368. base_path = base_path.substr(0, last_slash);
  369. }
  370. break;
  371. }
  372. path.resize(path.size() * 2);
  373. }
  374. return utf8_to_utf16(base_path + "/");
  375. #elif defined(_WIN32)
  376. std::vector<wchar_t> path(MAX_PATH);
  377. DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
  378. if (len == 0) {
  379. return {};
  380. }
  381. std::wstring base_path(path.data(), len);
  382. // remove executable name
  383. auto last_slash = base_path.find_last_of('\\');
  384. if (last_slash != std::string::npos) {
  385. base_path = base_path.substr(0, last_slash);
  386. }
  387. return base_path + L"\\";
  388. #else
  389. return {};
  390. #endif
  391. }
  392. static std::wstring backend_filename_prefix() {
  393. #ifdef _WIN32
  394. return L"ggml-";
  395. #else
  396. return L"libggml-";
  397. #endif
  398. }
  399. static std::wstring backend_filename_suffix() {
  400. #ifdef _WIN32
  401. return L".dll";
  402. #else
  403. return L".so";
  404. #endif
  405. }
  406. static std::wstring path_separator() {
  407. #ifdef _WIN32
  408. return L"\\";
  409. #else
  410. return L"/";
  411. #endif
  412. }
  413. static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
  414. // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
  415. // TODO: search system paths
  416. std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
  417. std::vector<std::wstring> search_paths;
  418. if (user_search_path == nullptr) {
  419. search_paths.push_back(L"." + path_separator());
  420. search_paths.push_back(get_executable_path());
  421. } else {
  422. search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
  423. }
  424. int best_score = 0;
  425. std::wstring best_path;
  426. namespace fs = std::filesystem;
  427. for (const auto & search_path : search_paths) {
  428. if (!fs::exists(search_path)) {
  429. continue;
  430. }
  431. fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
  432. for (const auto & entry : dir_it) {
  433. if (entry.is_regular_file()) {
  434. std::wstring filename = entry.path().filename().wstring();
  435. std::wstring ext = entry.path().extension().wstring();
  436. if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
  437. dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
  438. if (!handle && !silent) {
  439. GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
  440. }
  441. if (handle) {
  442. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  443. if (score_fn) {
  444. int s = score_fn();
  445. #ifndef NDEBUG
  446. GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
  447. #endif
  448. if (s > best_score) {
  449. best_score = s;
  450. best_path = entry.path().wstring();
  451. }
  452. } else {
  453. if (!silent) {
  454. GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
  455. }
  456. }
  457. }
  458. }
  459. }
  460. }
  461. }
  462. if (best_score == 0) {
  463. // try to load the base backend
  464. for (const auto & search_path : search_paths) {
  465. std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
  466. if (fs::exists(path)) {
  467. return get_reg().load_backend(path, silent);
  468. }
  469. }
  470. return nullptr;
  471. }
  472. return get_reg().load_backend(best_path, silent);
  473. }
  474. void ggml_backend_load_all() {
  475. ggml_backend_load_all_from_path(nullptr);
  476. }
  477. void ggml_backend_load_all_from_path(const char * dir_path) {
  478. #ifdef NDEBUG
  479. bool silent = true;
  480. #else
  481. bool silent = false;
  482. #endif
  483. ggml_backend_load_best("blas", silent, dir_path);
  484. ggml_backend_load_best("cann", silent, dir_path);
  485. ggml_backend_load_best("cuda", silent, dir_path);
  486. ggml_backend_load_best("hip", silent, dir_path);
  487. ggml_backend_load_best("kompute", silent, dir_path);
  488. ggml_backend_load_best("metal", silent, dir_path);
  489. ggml_backend_load_best("rpc", silent, dir_path);
  490. ggml_backend_load_best("sycl", silent, dir_path);
  491. ggml_backend_load_best("vulkan", silent, dir_path);
  492. ggml_backend_load_best("opencl", silent, dir_path);
  493. ggml_backend_load_best("musa", silent, dir_path);
  494. ggml_backend_load_best("cpu", silent, dir_path);
  495. // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
  496. const char * backend_path = std::getenv("GGML_BACKEND_PATH");
  497. if (backend_path) {
  498. ggml_backend_load(backend_path);
  499. }
  500. }