ggml-backend-reg.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. #include "ggml-backend-impl.h"
  2. #include "ggml-backend.h"
  3. #include "ggml-impl.h"
  4. #include <algorithm>
  5. #include <codecvt>
  6. #include <cstring>
  7. #include <filesystem>
  8. #include <locale>
  9. #include <memory>
  10. #include <string>
  11. #include <type_traits>
  12. #include <vector>
  13. #ifdef _WIN32
  14. # define WIN32_LEAN_AND_MEAN
  15. # ifndef NOMINMAX
  16. # define NOMINMAX
  17. # endif
  18. # include <windows.h>
  19. #elif defined(__APPLE__)
  20. # include <mach-o/dyld.h>
  21. # include <dlfcn.h>
  22. #else
  23. # include <dlfcn.h>
  24. # include <unistd.h>
  25. #endif
  26. // Backend registry
  27. #ifdef GGML_USE_CPU
  28. #include "ggml-cpu.h"
  29. #endif
  30. #ifdef GGML_USE_CUDA
  31. #include "ggml-cuda.h"
  32. #endif
  33. #ifdef GGML_USE_METAL
  34. #include "ggml-metal.h"
  35. #endif
  36. #ifdef GGML_USE_SYCL
  37. #include "ggml-sycl.h"
  38. #endif
  39. #ifdef GGML_USE_VULKAN
  40. #include "ggml-vulkan.h"
  41. #endif
  42. #ifdef GGML_USE_BLAS
  43. #include "ggml-blas.h"
  44. #endif
  45. #ifdef GGML_USE_RPC
  46. #include "ggml-rpc.h"
  47. #endif
  48. #ifdef GGML_USE_CANN
  49. #include "ggml-cann.h"
  50. #endif
  51. #ifdef GGML_USE_KOMPUTE
  52. #include "ggml-kompute.h"
  53. #endif
  54. #ifdef _WIN32
  55. using dl_handle = std::remove_pointer_t<HMODULE>;
  56. struct dl_handle_deleter {
  57. void operator()(HMODULE handle) {
  58. FreeLibrary(handle);
  59. }
  60. };
  61. static dl_handle * dl_load_library(const std::wstring & path) {
  62. // suppress error dialogs for missing DLLs
  63. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  64. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  65. HMODULE handle = LoadLibraryW(path.c_str());
  66. SetErrorMode(old_mode);
  67. return handle;
  68. }
  69. static dl_handle * dl_load_library(const std::string & path) {
  70. std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
  71. return dl_load_library(converter.from_bytes(path));
  72. }
  73. static void * dl_get_sym(dl_handle * handle, const char * name) {
  74. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  75. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  76. void * p = (void *) GetProcAddress(handle, name);
  77. SetErrorMode(old_mode);
  78. return p;
  79. }
  80. #else
  81. using dl_handle = void;
  82. struct dl_handle_deleter {
  83. void operator()(void * handle) {
  84. dlclose(handle);
  85. }
  86. };
  87. static void * dl_load_library(const std::string & path) {
  88. dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
  89. return handle;
  90. }
  91. static void * dl_get_sym(dl_handle * handle, const char * name) {
  92. return dlsym(handle, name);
  93. }
  94. #endif
  95. using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
  96. struct ggml_backend_reg_entry {
  97. ggml_backend_reg_t reg;
  98. dl_handle_ptr handle;
  99. };
  100. struct ggml_backend_registry {
  101. std::vector<ggml_backend_reg_entry> backends;
  102. std::vector<ggml_backend_dev_t> devices;
  103. ggml_backend_registry() {
  104. #ifdef GGML_USE_CUDA
  105. register_backend(ggml_backend_cuda_reg());
  106. #endif
  107. #ifdef GGML_USE_METAL
  108. register_backend(ggml_backend_metal_reg());
  109. #endif
  110. #ifdef GGML_USE_SYCL
  111. register_backend(ggml_backend_sycl_reg());
  112. #endif
  113. #ifdef GGML_USE_VULKAN
  114. register_backend(ggml_backend_vk_reg());
  115. #endif
  116. #ifdef GGML_USE_CANN
  117. register_backend(ggml_backend_cann_reg());
  118. #endif
  119. #ifdef GGML_USE_BLAS
  120. register_backend(ggml_backend_blas_reg());
  121. #endif
  122. #ifdef GGML_USE_RPC
  123. register_backend(ggml_backend_rpc_reg());
  124. #endif
  125. #ifdef GGML_USE_KOMPUTE
  126. register_backend(ggml_backend_kompute_reg());
  127. #endif
  128. #ifdef GGML_USE_CPU
  129. register_backend(ggml_backend_cpu_reg());
  130. #endif
  131. }
  132. ~ggml_backend_registry() {
  133. // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
  134. // since backend threads may still be running and accessing resources from the dynamic library
  135. for (auto & entry : backends) {
  136. if (entry.handle) {
  137. entry.handle.release(); // NOLINT
  138. }
  139. }
  140. }
  141. void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
  142. if (!reg) {
  143. return;
  144. }
  145. #ifndef NDEBUG
  146. GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
  147. __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
  148. #endif
  149. backends.push_back({ reg, std::move(handle) });
  150. for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
  151. register_device(ggml_backend_reg_dev_get(reg, i));
  152. }
  153. }
  154. void register_device(ggml_backend_dev_t device) {
  155. #ifndef NDEBUG
  156. GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
  157. #endif
  158. devices.push_back(device);
  159. }
  160. ggml_backend_reg_t load_backend(const char * path, bool silent) {
  161. dl_handle_ptr handle { dl_load_library(path) };
  162. if (!handle) {
  163. if (!silent) {
  164. GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
  165. }
  166. return nullptr;
  167. }
  168. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  169. if (score_fn && score_fn() == 0) {
  170. if (!silent) {
  171. GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
  172. }
  173. return nullptr;
  174. }
  175. auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
  176. if (!backend_init_fn) {
  177. if (!silent) {
  178. GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
  179. }
  180. return nullptr;
  181. }
  182. ggml_backend_reg_t reg = backend_init_fn();
  183. if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
  184. if (!silent) {
  185. if (!reg) {
  186. GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
  187. } else {
  188. GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
  189. __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
  190. }
  191. }
  192. return nullptr;
  193. }
  194. GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
  195. register_backend(reg, std::move(handle));
  196. return reg;
  197. }
  198. void unload_backend(ggml_backend_reg_t reg, bool silent) {
  199. auto it = std::find_if(backends.begin(), backends.end(),
  200. [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
  201. if (it == backends.end()) {
  202. if (!silent) {
  203. GGML_LOG_ERROR("%s: backend not found\n", __func__);
  204. }
  205. return;
  206. }
  207. if (!silent) {
  208. GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
  209. }
  210. // remove devices
  211. devices.erase(
  212. std::remove_if(devices.begin(), devices.end(),
  213. [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
  214. devices.end());
  215. // remove backend
  216. backends.erase(it);
  217. }
  218. };
  219. static ggml_backend_registry & get_reg() {
  220. static ggml_backend_registry reg;
  221. return reg;
  222. }
  223. // Internal API
  224. void ggml_backend_register(ggml_backend_reg_t reg) {
  225. get_reg().register_backend(reg);
  226. }
  227. void ggml_backend_device_register(ggml_backend_dev_t device) {
  228. get_reg().register_device(device);
  229. }
  230. // Backend (reg) enumeration
  231. static bool striequals(const char * a, const char * b) {
  232. for (; *a && *b; a++, b++) {
  233. if (std::tolower(*a) != std::tolower(*b)) {
  234. return false;
  235. }
  236. }
  237. return *a == *b;
  238. }
  239. size_t ggml_backend_reg_count() {
  240. return get_reg().backends.size();
  241. }
  242. ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
  243. GGML_ASSERT(index < ggml_backend_reg_count());
  244. return get_reg().backends[index].reg;
  245. }
  246. ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
  247. for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
  248. ggml_backend_reg_t reg = ggml_backend_reg_get(i);
  249. if (striequals(ggml_backend_reg_name(reg), name)) {
  250. return reg;
  251. }
  252. }
  253. return nullptr;
  254. }
  255. // Device enumeration
  256. size_t ggml_backend_dev_count() {
  257. return get_reg().devices.size();
  258. }
  259. ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
  260. GGML_ASSERT(index < ggml_backend_dev_count());
  261. return get_reg().devices[index];
  262. }
  263. ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
  264. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  265. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  266. if (striequals(ggml_backend_dev_name(dev), name)) {
  267. return dev;
  268. }
  269. }
  270. return nullptr;
  271. }
  272. ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
  273. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  274. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  275. if (ggml_backend_dev_type(dev) == type) {
  276. return dev;
  277. }
  278. }
  279. return nullptr;
  280. }
  281. // Convenience functions
  282. ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
  283. ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
  284. if (!dev) {
  285. return nullptr;
  286. }
  287. return ggml_backend_dev_init(dev, params);
  288. }
  289. ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
  290. ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
  291. if (!dev) {
  292. return nullptr;
  293. }
  294. return ggml_backend_dev_init(dev, params);
  295. }
  296. ggml_backend_t ggml_backend_init_best(void) {
  297. ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
  298. if (!dev) {
  299. dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
  300. }
  301. if (!dev) {
  302. return nullptr;
  303. }
  304. return ggml_backend_dev_init(dev, nullptr);
  305. }
  306. // Dynamic loading
  307. ggml_backend_reg_t ggml_backend_load(const char * path) {
  308. return get_reg().load_backend(path, false);
  309. }
  310. void ggml_backend_unload(ggml_backend_reg_t reg) {
  311. get_reg().unload_backend(reg, true);
  312. }
  313. static std::string get_executable_path() {
  314. #if defined(__APPLE__)
  315. // get executable path
  316. std::vector<char> path;
  317. uint32_t size;
  318. while (true) {
  319. size = path.size();
  320. if (_NSGetExecutablePath(path.data(), &size) == 0) {
  321. break;
  322. }
  323. path.resize(size);
  324. }
  325. std::string base_path(path.data(), size);
  326. // remove executable name
  327. auto last_slash = base_path.find_last_of('/');
  328. if (last_slash != std::string::npos) {
  329. base_path = base_path.substr(0, last_slash);
  330. }
  331. return base_path + "/";
  332. #elif defined(__linux__)
  333. std::string base_path = ".";
  334. std::vector<char> path(1024);
  335. while (true) {
  336. // get executable path
  337. ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
  338. if (len == -1) {
  339. break;
  340. }
  341. if (len < (ssize_t) path.size()) {
  342. base_path = std::string(path.data(), len);
  343. // remove executable name
  344. auto last_slash = base_path.find_last_of('/');
  345. if (last_slash != std::string::npos) {
  346. base_path = base_path.substr(0, last_slash);
  347. }
  348. break;
  349. }
  350. path.resize(path.size() * 2);
  351. }
  352. return base_path + "/";
  353. #elif defined(_WIN32)
  354. std::vector<char> path(MAX_PATH);
  355. DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
  356. if (len == 0) {
  357. return "";
  358. }
  359. std::string base_path(path.data(), len);
  360. // remove executable name
  361. auto last_slash = base_path.find_last_of('\\');
  362. if (last_slash != std::string::npos) {
  363. base_path = base_path.substr(0, last_slash);
  364. }
  365. return base_path + "\\";
  366. #endif
  367. }
  368. static std::string backend_filename_prefix() {
  369. #ifdef _WIN32
  370. return "ggml-";
  371. #else
  372. return "libggml-";
  373. #endif
  374. }
  375. static std::string backend_filename_suffix() {
  376. #ifdef _WIN32
  377. return ".dll";
  378. #else
  379. return ".so";
  380. #endif
  381. }
  382. static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
  383. // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
  384. // TODO: search system paths
  385. std::string file_prefix = backend_filename_prefix() + name + "-";
  386. std::vector<std::string> search_paths;
  387. if (user_search_path == nullptr) {
  388. search_paths.push_back("./");
  389. search_paths.push_back(get_executable_path());
  390. } else {
  391. #if defined(_WIN32)
  392. search_paths.push_back(std::string(user_search_path) + "\\");
  393. #else
  394. search_paths.push_back(std::string(user_search_path) + "/");
  395. #endif
  396. }
  397. int best_score = 0;
  398. std::string best_path;
  399. namespace fs = std::filesystem;
  400. for (const auto & search_path : search_paths) {
  401. if (!fs::exists(search_path)) {
  402. continue;
  403. }
  404. fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
  405. for (const auto & entry : dir_it) {
  406. if (entry.is_regular_file()) {
  407. std::string filename = entry.path().filename().string();
  408. std::string ext = entry.path().extension().string();
  409. if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
  410. dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
  411. if (!handle && !silent) {
  412. GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
  413. }
  414. if (handle) {
  415. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  416. if (score_fn) {
  417. int s = score_fn();
  418. #ifndef NDEBUG
  419. GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
  420. #endif
  421. if (s > best_score) {
  422. best_score = s;
  423. best_path = entry.path().string();
  424. }
  425. } else {
  426. if (!silent) {
  427. GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
  428. }
  429. }
  430. }
  431. }
  432. }
  433. }
  434. }
  435. if (best_score == 0) {
  436. // try to load the base backend
  437. for (const auto & search_path : search_paths) {
  438. std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
  439. if (fs::exists(path)) {
  440. return get_reg().load_backend(path.c_str(), silent);
  441. }
  442. }
  443. return nullptr;
  444. }
  445. return get_reg().load_backend(best_path.c_str(), silent);
  446. }
  447. void ggml_backend_load_all() {
  448. ggml_backend_load_all_from_path(nullptr);
  449. }
  450. void ggml_backend_load_all_from_path(const char * dir_path) {
  451. #ifdef NDEBUG
  452. bool silent = true;
  453. #else
  454. bool silent = false;
  455. #endif
  456. ggml_backend_load_best("blas", silent, dir_path);
  457. ggml_backend_load_best("cann", silent, dir_path);
  458. ggml_backend_load_best("cuda", silent, dir_path);
  459. ggml_backend_load_best("hip", silent, dir_path);
  460. ggml_backend_load_best("kompute", silent, dir_path);
  461. ggml_backend_load_best("metal", silent, dir_path);
  462. ggml_backend_load_best("rpc", silent, dir_path);
  463. ggml_backend_load_best("sycl", silent, dir_path);
  464. ggml_backend_load_best("vulkan", silent, dir_path);
  465. ggml_backend_load_best("musa", silent, dir_path);
  466. ggml_backend_load_best("cpu", silent, dir_path);
  467. }