ggml-backend-reg.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. #include "ggml-backend-impl.h"
  2. #include "ggml-backend.h"
  3. #include "ggml-impl.h"
  4. #include <algorithm>
  5. #include <cstring>
  6. #include <string>
  7. #include <vector>
  8. #ifdef _WIN32
  9. # define WIN32_LEAN_AND_MEAN
  10. # ifndef NOMINMAX
  11. # define NOMINMAX
  12. # endif
  13. # include <windows.h>
  14. #elif defined(__APPLE__)
  15. # include <mach-o/dyld.h>
  16. # include <dlfcn.h>
  17. #else
  18. # include <dlfcn.h>
  19. # include <unistd.h>
  20. #endif
  21. // Backend registry
  22. #ifdef GGML_USE_CPU
  23. #include "ggml-cpu.h"
  24. #endif
  25. #ifdef GGML_USE_CUDA
  26. #include "ggml-cuda.h"
  27. #endif
  28. #ifdef GGML_USE_METAL
  29. #include "ggml-metal.h"
  30. #endif
  31. #ifdef GGML_USE_SYCL
  32. #include "ggml-sycl.h"
  33. #endif
  34. #ifdef GGML_USE_VULKAN
  35. #include "ggml-vulkan.h"
  36. #endif
  37. #ifdef GGML_USE_BLAS
  38. #include "ggml-blas.h"
  39. #endif
  40. #ifdef GGML_USE_RPC
  41. #include "ggml-rpc.h"
  42. #endif
  43. #ifdef GGML_USE_AMX
  44. # include "ggml-amx.h"
  45. #endif
  46. #ifdef GGML_USE_CANN
  47. #include "ggml-cann.h"
  48. #endif
  49. #ifdef GGML_USE_KOMPUTE
  50. #include "ggml-kompute.h"
  51. #endif
  52. struct ggml_backend_reg_entry {
  53. ggml_backend_reg_t reg;
  54. void * handle;
  55. };
  56. struct ggml_backend_registry {
  57. std::vector<ggml_backend_reg_entry> backends;
  58. std::vector<ggml_backend_dev_t> devices;
  59. ggml_backend_registry() {
  60. #ifdef GGML_USE_CUDA
  61. register_backend(ggml_backend_cuda_reg());
  62. #endif
  63. #ifdef GGML_USE_METAL
  64. register_backend(ggml_backend_metal_reg());
  65. #endif
  66. #ifdef GGML_USE_SYCL
  67. register_backend(ggml_backend_sycl_reg());
  68. #endif
  69. #ifdef GGML_USE_VULKAN
  70. register_backend(ggml_backend_vk_reg());
  71. #endif
  72. #ifdef GGML_USE_CANN
  73. register_backend(ggml_backend_cann_reg());
  74. #endif
  75. #ifdef GGML_USE_BLAS
  76. register_backend(ggml_backend_blas_reg());
  77. #endif
  78. #ifdef GGML_USE_RPC
  79. register_backend(ggml_backend_rpc_reg());
  80. #endif
  81. #ifdef GGML_USE_AMX
  82. register_backend(ggml_backend_amx_reg());
  83. #endif
  84. #ifdef GGML_USE_KOMPUTE
  85. register_backend(ggml_backend_kompute_reg());
  86. #endif
  87. #ifdef GGML_USE_CPU
  88. register_backend(ggml_backend_cpu_reg());
  89. #endif
  90. }
  91. ~ggml_backend_registry() {
  92. while (!backends.empty()) {
  93. // use silent since the log system may have been destroyed at this point
  94. unload_backend(backends.back().reg, true);
  95. }
  96. }
  97. void register_backend(ggml_backend_reg_t reg, void * handle = nullptr) {
  98. if (!reg) {
  99. return;
  100. }
  101. #ifndef NDEBUG
  102. GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
  103. __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
  104. #endif
  105. backends.push_back({ reg, handle });
  106. for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
  107. register_device(ggml_backend_reg_dev_get(reg, i));
  108. }
  109. }
  110. void register_device(ggml_backend_dev_t device) {
  111. #ifndef NDEBUG
  112. GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
  113. #endif
  114. devices.push_back(device);
  115. }
  116. ggml_backend_reg_t load_backend(const char * path, bool silent) {
  117. #ifdef _WIN32
  118. // suppress error dialogs for missing DLLs
  119. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  120. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  121. HMODULE handle = LoadLibraryA(path);
  122. if (!handle) {
  123. if (!silent) {
  124. GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
  125. }
  126. SetErrorMode(old_mode);
  127. return nullptr;
  128. }
  129. ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
  130. SetErrorMode(old_mode);
  131. if (!backend_init) {
  132. if (!silent) {
  133. GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
  134. }
  135. FreeLibrary(handle);
  136. return nullptr;
  137. }
  138. #else
  139. void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
  140. if (!handle) {
  141. if (!silent) {
  142. GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
  143. }
  144. return nullptr;
  145. }
  146. auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
  147. if (!backend_init) {
  148. if (!silent) {
  149. GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
  150. }
  151. dlclose(handle);
  152. return nullptr;
  153. }
  154. #endif
  155. ggml_backend_reg_t reg = backend_init();
  156. if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
  157. if (!silent) {
  158. if (!reg) {
  159. GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
  160. } else {
  161. GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
  162. __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
  163. }
  164. }
  165. #ifdef _WIN32
  166. FreeLibrary(handle);
  167. #else
  168. dlclose(handle);
  169. #endif
  170. return nullptr;
  171. }
  172. GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
  173. register_backend(reg, handle);
  174. return reg;
  175. }
  176. void unload_backend(ggml_backend_reg_t reg, bool silent) {
  177. auto it = std::find_if(backends.begin(), backends.end(),
  178. [reg](ggml_backend_reg_entry entry) { return entry.reg == reg; });
  179. if (it == backends.end()) {
  180. if (!silent) {
  181. GGML_LOG_ERROR("%s: backend not found\n", __func__);
  182. }
  183. return;
  184. }
  185. if (!silent) {
  186. GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
  187. }
  188. // remove devices
  189. devices.erase(
  190. std::remove_if(devices.begin(), devices.end(),
  191. [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
  192. devices.end());
  193. // unload library
  194. if (it->handle) {
  195. #ifdef _WIN32
  196. FreeLibrary((HMODULE) it->handle);
  197. #else
  198. dlclose(it->handle);
  199. #endif
  200. }
  201. // remove backend
  202. backends.erase(it);
  203. }
  204. };
  205. static ggml_backend_registry & get_reg() {
  206. static ggml_backend_registry reg;
  207. return reg;
  208. }
  209. // Internal API
  210. void ggml_backend_register(ggml_backend_reg_t reg) {
  211. get_reg().register_backend(reg);
  212. }
  213. void ggml_backend_device_register(ggml_backend_dev_t device) {
  214. get_reg().register_device(device);
  215. }
  216. // Backend (reg) enumeration
  217. static bool striequals(const char * a, const char * b) {
  218. for (; *a && *b; a++, b++) {
  219. if (std::tolower(*a) != std::tolower(*b)) {
  220. return false;
  221. }
  222. }
  223. return *a == *b;
  224. }
  225. size_t ggml_backend_reg_count() {
  226. return get_reg().backends.size();
  227. }
  228. ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
  229. GGML_ASSERT(index < ggml_backend_reg_count());
  230. return get_reg().backends[index].reg;
  231. }
  232. ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
  233. for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
  234. ggml_backend_reg_t reg = ggml_backend_reg_get(i);
  235. if (striequals(ggml_backend_reg_name(reg), name)) {
  236. return reg;
  237. }
  238. }
  239. return nullptr;
  240. }
  241. // Device enumeration
  242. size_t ggml_backend_dev_count() {
  243. return get_reg().devices.size();
  244. }
  245. ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
  246. GGML_ASSERT(index < ggml_backend_dev_count());
  247. return get_reg().devices[index];
  248. }
  249. ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
  250. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  251. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  252. if (striequals(ggml_backend_dev_name(dev), name)) {
  253. return dev;
  254. }
  255. }
  256. return nullptr;
  257. }
  258. ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
  259. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  260. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  261. if (ggml_backend_dev_type(dev) == type) {
  262. return dev;
  263. }
  264. }
  265. return nullptr;
  266. }
  267. // Convenience functions
  268. ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
  269. ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
  270. if (!dev) {
  271. return nullptr;
  272. }
  273. return ggml_backend_dev_init(dev, params);
  274. }
  275. ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
  276. ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
  277. if (!dev) {
  278. return nullptr;
  279. }
  280. return ggml_backend_dev_init(dev, params);
  281. }
  282. ggml_backend_t ggml_backend_init_best(void) {
  283. ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
  284. if (!dev) {
  285. dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
  286. }
  287. if (!dev) {
  288. return nullptr;
  289. }
  290. return ggml_backend_dev_init(dev, nullptr);
  291. }
  292. // Dynamic loading
  293. ggml_backend_reg_t ggml_backend_load(const char * path) {
  294. return get_reg().load_backend(path, false);
  295. }
  296. void ggml_backend_unload(ggml_backend_reg_t reg) {
  297. get_reg().unload_backend(reg, true);
  298. }
  299. void ggml_backend_load_all() {
  300. std::vector<std::string> search_prefix;
  301. // add the executable directory to the search path
  302. // FIXME: this is convenient for development, but it should probably be disabled in production
  303. #if defined(__APPLE__)
  304. // get executable path
  305. std::vector<char> path;
  306. uint32_t size;
  307. while (true) {
  308. size = path.size();
  309. if (_NSGetExecutablePath(path.data(), &size) == 0) {
  310. break;
  311. }
  312. path.resize(size);
  313. }
  314. std::string base_path(path.data(), size);
  315. // remove executable name
  316. auto last_slash = base_path.find_last_of('/');
  317. if (last_slash != std::string::npos) {
  318. base_path = base_path.substr(0, last_slash);
  319. }
  320. search_prefix.push_back(base_path + "/");
  321. #elif defined(__linux__)
  322. std::string base_path = ".";
  323. std::vector<char> path(1024);
  324. while (true) {
  325. // get executable path
  326. ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
  327. if (len == -1) {
  328. break;
  329. }
  330. if (len < (ssize_t) path.size()) {
  331. base_path = std::string(path.data(), len);
  332. // remove executable name
  333. auto last_slash = base_path.find_last_of('/');
  334. if (last_slash != std::string::npos) {
  335. base_path = base_path.substr(0, last_slash);
  336. }
  337. break;
  338. }
  339. path.resize(path.size() * 2);
  340. }
  341. search_prefix.push_back(base_path + "/");
  342. #endif
  343. auto & reg = get_reg();
  344. auto try_load = [&](const std::string & name) {
  345. std::string os_name;
  346. #ifdef _WIN32
  347. os_name = "ggml-" + name + ".dll";
  348. #else
  349. os_name = "libggml-" + name + ".so";
  350. #endif
  351. if (reg.load_backend(os_name.c_str(), true)) {
  352. return;
  353. }
  354. for (const auto & prefix : search_prefix) {
  355. if (reg.load_backend((prefix + os_name).c_str(), true)) {
  356. return;
  357. }
  358. }
  359. };
  360. try_load("amx");
  361. try_load("blas");
  362. try_load("cann");
  363. try_load("cuda");
  364. try_load("hip");
  365. try_load("kompute");
  366. try_load("metal");
  367. try_load("rpc");
  368. try_load("sycl");
  369. try_load("vulkan");
  370. try_load("musa");
  371. try_load("cpu");
  372. }