|
@@ -16,6 +16,7 @@
|
|
|
#include <cinttypes>
|
|
#include <cinttypes>
|
|
|
#include <cstddef>
|
|
#include <cstddef>
|
|
|
#include <cstdint>
|
|
#include <cstdint>
|
|
|
|
|
+#include <cstdlib>
|
|
|
#include <float.h>
|
|
#include <float.h>
|
|
|
#include <limits>
|
|
#include <limits>
|
|
|
#include <stdint.h>
|
|
#include <stdint.h>
|
|
@@ -24,10 +25,9 @@
|
|
|
#include <cmath>
|
|
#include <cmath>
|
|
|
#include <iostream>
|
|
#include <iostream>
|
|
|
#include <fstream>
|
|
#include <fstream>
|
|
|
-
|
|
|
|
|
#include <stdio.h>
|
|
#include <stdio.h>
|
|
|
#include <stdlib.h>
|
|
#include <stdlib.h>
|
|
|
-
|
|
|
|
|
|
|
+#include <regex>
|
|
|
|
|
|
|
|
#include <sycl/sycl.hpp>
|
|
#include <sycl/sycl.hpp>
|
|
|
#include <sycl/half_type.hpp>
|
|
#include <sycl/half_type.hpp>
|
|
@@ -82,6 +82,30 @@ Following definition copied from DPCT head files, which are used by ggml-sycl.cp
|
|
|
#define __dpct_noinline__ __attribute__((noinline))
|
|
#define __dpct_noinline__ __attribute__((noinline))
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+std::string get_device_type_name(const sycl::device &Device) {
|
|
|
|
|
+ auto DeviceType = Device.get_info<sycl::info::device::device_type>();
|
|
|
|
|
+ switch (DeviceType) {
|
|
|
|
|
+ case sycl::info::device_type::cpu:
|
|
|
|
|
+ return "cpu";
|
|
|
|
|
+ case sycl::info::device_type::gpu:
|
|
|
|
|
+ return "gpu";
|
|
|
|
|
+ case sycl::info::device_type::host:
|
|
|
|
|
+ return "host";
|
|
|
|
|
+ case sycl::info::device_type::accelerator:
|
|
|
|
|
+ return "acc";
|
|
|
|
|
+ default:
|
|
|
|
|
+ return "unknown";
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+std::string get_device_backend_and_type(const sycl::device &device) {
|
|
|
|
|
+ std::stringstream device_type;
|
|
|
|
|
+ sycl::backend backend = device.get_backend();
|
|
|
|
|
+ device_type << backend << ":" << get_device_type_name(device);
|
|
|
|
|
+ return device_type.str();
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
namespace dpct
|
|
namespace dpct
|
|
|
{
|
|
{
|
|
|
typedef sycl::queue *queue_ptr;
|
|
typedef sycl::queue *queue_ptr;
|
|
@@ -942,17 +966,65 @@ namespace dpct
|
|
|
|
|
|
|
|
private:
|
|
private:
|
|
|
mutable std::recursive_mutex m_mutex;
|
|
mutable std::recursive_mutex m_mutex;
|
|
|
|
|
+ static bool compare_dev(sycl::device &device1, sycl::device &device2)
|
|
|
|
|
+ {
|
|
|
|
|
+ dpct::device_info prop1;
|
|
|
|
|
+ dpct::get_device_info(prop1, device1);
|
|
|
|
|
+ dpct::device_info prop2;
|
|
|
|
|
+ dpct::get_device_info(prop2, device2);
|
|
|
|
|
+ return prop1.get_max_compute_units() > prop2.get_max_compute_units();
|
|
|
|
|
+ }
|
|
|
|
|
+ static int convert_backend_index(std::string & backend) {
|
|
|
|
|
+ if (backend == "ext_oneapi_level_zero:gpu") return 0;
|
|
|
|
|
+ if (backend == "opencl:gpu") return 1;
|
|
|
|
|
+ if (backend == "opencl:cpu") return 2;
|
|
|
|
|
+ if (backend == "opencl:acc") return 3;
|
|
|
|
|
+ printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
|
|
|
|
|
+ GGML_ASSERT(false);
|
|
|
|
|
+ }
|
|
|
|
|
+ static bool compare_backend(std::string &backend1, std::string &backend2) {
|
|
|
|
|
+ return convert_backend_index(backend1) < convert_backend_index(backend2);
|
|
|
|
|
+ }
|
|
|
dev_mgr()
|
|
dev_mgr()
|
|
|
{
|
|
{
|
|
|
sycl::device default_device =
|
|
sycl::device default_device =
|
|
|
sycl::device(sycl::default_selector_v);
|
|
sycl::device(sycl::default_selector_v);
|
|
|
_devs.push_back(std::make_shared<device_ext>(default_device));
|
|
_devs.push_back(std::make_shared<device_ext>(default_device));
|
|
|
|
|
|
|
|
- std::vector<sycl::device> sycl_all_devs =
|
|
|
|
|
- sycl::device::get_devices(sycl::info::device_type::all);
|
|
|
|
|
|
|
+ std::vector<sycl::device> sycl_all_devs;
|
|
|
// Collect other devices except for the default device.
|
|
// Collect other devices except for the default device.
|
|
|
if (default_device.is_cpu())
|
|
if (default_device.is_cpu())
|
|
|
_cpu_device = 0;
|
|
_cpu_device = 0;
|
|
|
|
|
+
|
|
|
|
|
+ auto Platforms = sycl::platform::get_platforms();
|
|
|
|
|
+ // Keep track of the number of devices per backend
|
|
|
|
|
+ std::map<sycl::backend, size_t> DeviceNums;
|
|
|
|
|
+ std::map<std::string, std::vector<sycl::device>> backend_devices;
|
|
|
|
|
+
|
|
|
|
|
+ while (!Platforms.empty()) {
|
|
|
|
|
+ auto Platform = Platforms.back();
|
|
|
|
|
+ Platforms.pop_back();
|
|
|
|
|
+ auto devices = Platform.get_devices();
|
|
|
|
|
+ std::string backend_type = get_device_backend_and_type(devices[0]);
|
|
|
|
|
+ for (const auto &device : devices) {
|
|
|
|
|
+ backend_devices[backend_type].push_back(device);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ std::vector<std::string> keys;
|
|
|
|
|
+ for(auto it = backend_devices.begin(); it != backend_devices.end(); ++it) {
|
|
|
|
|
+ keys.push_back(it->first);
|
|
|
|
|
+ }
|
|
|
|
|
+ std::sort(keys.begin(), keys.end(), compare_backend);
|
|
|
|
|
+
|
|
|
|
|
+ for (auto &key : keys) {
|
|
|
|
|
+ std::vector<sycl::device> devs = backend_devices[key];
|
|
|
|
|
+ std::sort(devs.begin(), devs.end(), compare_dev);
|
|
|
|
|
+ for (const auto &dev : devs) {
|
|
|
|
|
+ sycl_all_devs.push_back(dev);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
for (auto &dev : sycl_all_devs)
|
|
for (auto &dev : sycl_all_devs)
|
|
|
{
|
|
{
|
|
|
if (dev == default_device)
|
|
if (dev == default_device)
|
|
@@ -3202,6 +3274,11 @@ static int g_work_group_size = 0;
|
|
|
#define GGML_SYCL_MMV_Y 1
|
|
#define GGML_SYCL_MMV_Y 1
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
|
|
+enum ggml_sycl_backend_gpu_mode {
|
|
|
|
|
+ SYCL_UNSET_GPU_MODE = -1,
|
|
|
|
|
+ SYCL_SINGLE_GPU_MODE = 0,
|
|
|
|
|
+ SYCL_MUL_GPU_MODE
|
|
|
|
|
+};
|
|
|
|
|
|
|
|
static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
|
|
static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
|
|
|
|
|
|
|
@@ -3401,12 +3478,31 @@ class sycl_gpu_mgr {
|
|
|
int work_group_size = 0;
|
|
int work_group_size = 0;
|
|
|
std::string gpus_list = "";
|
|
std::string gpus_list = "";
|
|
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
|
+ Use all GPUs with same top max compute units
|
|
|
|
|
+ */
|
|
|
sycl_gpu_mgr() {
|
|
sycl_gpu_mgr() {
|
|
|
detect_sycl_gpu_list_with_max_cu();
|
|
detect_sycl_gpu_list_with_max_cu();
|
|
|
get_allow_gpus();
|
|
get_allow_gpus();
|
|
|
create_context_with_gpus();
|
|
create_context_with_gpus();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
|
+ Only use the assigned GPU
|
|
|
|
|
+ */
|
|
|
|
|
+ sycl_gpu_mgr(int main_gpu_id) {
|
|
|
|
|
+ sycl::device device = dpct::dev_mgr::instance().get_device(main_gpu_id);
|
|
|
|
|
+ dpct::device_info prop;
|
|
|
|
|
+ dpct::get_device_info(prop, device);
|
|
|
|
|
+ gpus.push_back(main_gpu_id);
|
|
|
|
|
+ devices.push_back(device);
|
|
|
|
|
+ work_group_size = prop.get_max_work_group_size();
|
|
|
|
|
+ max_compute_units = prop.get_max_compute_units();
|
|
|
|
|
+
|
|
|
|
|
+ get_allow_gpus();
|
|
|
|
|
+ create_context_with_gpus();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
void create_context_with_gpus() {
|
|
void create_context_with_gpus() {
|
|
|
sycl::context ctx = sycl::context(devices);
|
|
sycl::context ctx = sycl::context(devices);
|
|
|
assert(gpus.size() > 0);
|
|
assert(gpus.size() > 0);
|
|
@@ -3422,7 +3518,7 @@ class sycl_gpu_mgr {
|
|
|
gpus_list += std::to_string(gpus[i]);
|
|
gpus_list += std::to_string(gpus[i]);
|
|
|
gpus_list += ",";
|
|
gpus_list += ",";
|
|
|
}
|
|
}
|
|
|
- if (gpus_list.length() > 2) {
|
|
|
|
|
|
|
+ if (gpus_list.length() > 1) {
|
|
|
gpus_list.pop_back();
|
|
gpus_list.pop_back();
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -3471,8 +3567,8 @@ class sycl_gpu_mgr {
|
|
|
if (gpus[i] == id)
|
|
if (gpus[i] == id)
|
|
|
return i;
|
|
return i;
|
|
|
}
|
|
}
|
|
|
- assert(false);
|
|
|
|
|
- return -1;
|
|
|
|
|
|
|
+ printf("miss to get device index by id=%d\n", id);
|
|
|
|
|
+ GGML_ASSERT(false);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
int get_next_index(int id) {
|
|
int get_next_index(int id) {
|
|
@@ -3481,8 +3577,7 @@ class sycl_gpu_mgr {
|
|
|
if (gpus[i] == id)
|
|
if (gpus[i] == id)
|
|
|
return i;
|
|
return i;
|
|
|
}
|
|
}
|
|
|
- assert(false);
|
|
|
|
|
- return -1;
|
|
|
|
|
|
|
+ GGML_ASSERT(false);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
bool is_ext_oneapi_device(const sycl::device &dev) {
|
|
bool is_ext_oneapi_device(const sycl::device &dev) {
|
|
@@ -3500,11 +3595,14 @@ static int g_device_count = -1;
|
|
|
static int g_all_sycl_device_count = -1;
|
|
static int g_all_sycl_device_count = -1;
|
|
|
static int g_main_device = -1;
|
|
static int g_main_device = -1;
|
|
|
static int g_main_device_id = -1;
|
|
static int g_main_device_id = -1;
|
|
|
|
|
+static bool g_ggml_backend_sycl_buffer_type_initialized = false;
|
|
|
|
|
|
|
|
static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
|
|
static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
|
|
|
|
|
|
|
|
static float g_tensor_split[GGML_SYCL_MAX_DEVICES] = {0};
|
|
static float g_tensor_split[GGML_SYCL_MAX_DEVICES] = {0};
|
|
|
|
|
|
|
|
|
|
+static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode = SYCL_UNSET_GPU_MODE;
|
|
|
|
|
+
|
|
|
struct sycl_device_capabilities {
|
|
struct sycl_device_capabilities {
|
|
|
int cc; // compute capability
|
|
int cc; // compute capability
|
|
|
bool vmm; // virtual memory support
|
|
bool vmm; // virtual memory support
|
|
@@ -13008,17 +13106,20 @@ bool ggml_sycl_loaded(void) {
|
|
|
return g_sycl_loaded;
|
|
return g_sycl_loaded;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-void print_device_detail(int id) {
|
|
|
|
|
|
|
+void print_device_detail(int id, sycl::device &device, std::string device_type) {
|
|
|
|
|
+
|
|
|
dpct::device_info prop;
|
|
dpct::device_info prop;
|
|
|
SYCL_CHECK(CHECK_TRY_ERROR(
|
|
SYCL_CHECK(CHECK_TRY_ERROR(
|
|
|
- dpct::get_device_info(prop, dpct::dev_mgr::instance().get_device(id))));
|
|
|
|
|
- sycl::device cur_device = dpct::dev_mgr::instance().get_device(id);
|
|
|
|
|
|
|
+ dpct::get_device_info(prop, device)));
|
|
|
|
|
+
|
|
|
std::string version;
|
|
std::string version;
|
|
|
version += std::to_string(prop.get_major_version());
|
|
version += std::to_string(prop.get_major_version());
|
|
|
version += ".";
|
|
version += ".";
|
|
|
version += std::to_string(prop.get_minor_version());
|
|
version += std::to_string(prop.get_minor_version());
|
|
|
|
|
|
|
|
- fprintf(stderr, "|%2d|%45s|%18s|%17d|%14d|%13d|%15lu|\n", id,
|
|
|
|
|
|
|
+ device_type = std::regex_replace(device_type, std::regex("ext_oneapi_"), "");
|
|
|
|
|
+
|
|
|
|
|
+ fprintf(stderr, "|%2d|%18s|%45s|%10s|%11d|%8d|%7d|%15lu|\n", id, device_type.c_str(),
|
|
|
prop.get_name(), version.c_str(), prop.get_max_compute_units(),
|
|
prop.get_name(), version.c_str(), prop.get_max_compute_units(),
|
|
|
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
|
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
|
|
prop.get_global_mem_size());
|
|
prop.get_global_mem_size());
|
|
@@ -13026,19 +13127,35 @@ void print_device_detail(int id) {
|
|
|
|
|
|
|
|
void ggml_backend_sycl_print_sycl_devices() {
|
|
void ggml_backend_sycl_print_sycl_devices() {
|
|
|
int device_count = dpct::dev_mgr::instance().device_count();
|
|
int device_count = dpct::dev_mgr::instance().device_count();
|
|
|
|
|
+ std::map<std::string, size_t> DeviceNums;
|
|
|
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
|
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
|
|
- fprintf(stderr, "|ID| Name |compute capability|Max compute units|Max work group|Max sub group|Global mem size|\n");
|
|
|
|
|
- fprintf(stderr, "|--|---------------------------------------------|------------------|-----------------|--------------|-------------|---------------|\n");
|
|
|
|
|
|
|
+ fprintf(stderr, "| | | |Compute |Max compute|Max work|Max sub| |\n");
|
|
|
|
|
+ fprintf(stderr, "|ID| Device Type| Name|capability|units |group |group |Global mem size|\n");
|
|
|
|
|
+ fprintf(stderr, "|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|\n");
|
|
|
for (int id = 0; id < device_count; ++id) {
|
|
for (int id = 0; id < device_count; ++id) {
|
|
|
- print_device_detail(id);
|
|
|
|
|
|
|
+ sycl::device device = dpct::dev_mgr::instance().get_device(id);
|
|
|
|
|
+ sycl::backend backend = device.get_backend();
|
|
|
|
|
+ std::string backend_type = get_device_backend_and_type(device);
|
|
|
|
|
+ int type_id=DeviceNums[backend_type]++;
|
|
|
|
|
+ std::stringstream device_type;
|
|
|
|
|
+ device_type << "[" << backend_type << ":" << std::to_string(type_id) << "]";
|
|
|
|
|
+ print_device_detail(id, device, device_type.str());
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void print_gpu_device_list() {
|
|
void print_gpu_device_list() {
|
|
|
- fprintf(stderr, "detect %d SYCL GPUs: [%s] with Max compute units:%d\n",
|
|
|
|
|
- g_sycl_gpu_mgr->get_gpu_count(),
|
|
|
|
|
- g_sycl_gpu_mgr->gpus_list.c_str(),
|
|
|
|
|
- g_sycl_gpu_mgr->max_compute_units);
|
|
|
|
|
|
|
+ GGML_ASSERT(g_sycl_gpu_mgr);
|
|
|
|
|
+
|
|
|
|
|
+ char* hint=NULL;
|
|
|
|
|
+ if (g_ggml_sycl_backend_gpu_mode == SYCL_SINGLE_GPU_MODE) {
|
|
|
|
|
+ hint = "use %d SYCL GPUs: [%s] with Max compute units:%d\n";
|
|
|
|
|
+ } else {
|
|
|
|
|
+ hint = "detect %d SYCL GPUs: [%s] with top Max compute units:%d\n";
|
|
|
|
|
+ }
|
|
|
|
|
+ fprintf(stderr, hint,
|
|
|
|
|
+ g_sycl_gpu_mgr->get_gpu_count(),
|
|
|
|
|
+ g_sycl_gpu_mgr->gpus_list.c_str(),
|
|
|
|
|
+ g_sycl_gpu_mgr->max_compute_units);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
int get_sycl_env(const char *env_name, int default_val) {
|
|
int get_sycl_env(const char *env_name, int default_val) {
|
|
@@ -13074,6 +13191,15 @@ void ggml_init_sycl() try {
|
|
|
#else
|
|
#else
|
|
|
fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
|
|
fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
|
|
|
#endif
|
|
#endif
|
|
|
|
|
+
|
|
|
|
|
+/* NOT REMOVE, keep it for next optimize for XMX.
|
|
|
|
|
+#if defined(SYCL_USE_XMX)
|
|
|
|
|
+ fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
|
|
|
|
|
+#else
|
|
|
|
|
+ fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
|
|
|
|
|
+#endif
|
|
|
|
|
+*/
|
|
|
|
|
+
|
|
|
if (CHECK_TRY_ERROR(g_all_sycl_device_count =
|
|
if (CHECK_TRY_ERROR(g_all_sycl_device_count =
|
|
|
dpct::dev_mgr::instance().device_count()) != 0) {
|
|
dpct::dev_mgr::instance().device_count()) != 0) {
|
|
|
initialized = true;
|
|
initialized = true;
|
|
@@ -13082,68 +13208,65 @@ void ggml_init_sycl() try {
|
|
|
}
|
|
}
|
|
|
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
|
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
|
|
ggml_backend_sycl_print_sycl_devices();
|
|
ggml_backend_sycl_print_sycl_devices();
|
|
|
|
|
+ initialized = true;
|
|
|
|
|
+ g_sycl_loaded = true;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+catch (sycl::exception const &exc) {
|
|
|
|
|
+ std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
|
|
|
+ << ", line:" << __LINE__ << std::endl;
|
|
|
|
|
+ std::exit(1);
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
- if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
|
|
|
|
-
|
|
|
|
|
- g_device_count = g_sycl_gpu_mgr->get_gpu_count();
|
|
|
|
|
- g_work_group_size = g_sycl_gpu_mgr->work_group_size;
|
|
|
|
|
-
|
|
|
|
|
- print_gpu_device_list();
|
|
|
|
|
|
|
+void ggml_init_by_gpus(int device_count) try {
|
|
|
|
|
+ g_device_count = device_count;
|
|
|
|
|
+ g_work_group_size = g_sycl_gpu_mgr->work_group_size;
|
|
|
|
|
|
|
|
- int64_t total_vram = 0;
|
|
|
|
|
|
|
+ int64_t total_vram = 0;
|
|
|
|
|
|
|
|
-/* NOT REMOVE, keep it for next optimize for XMX.
|
|
|
|
|
-#if defined(SYCL_USE_XMX)
|
|
|
|
|
- fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
|
|
|
|
|
-#else
|
|
|
|
|
- fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
|
|
|
|
|
-#endif
|
|
|
|
|
-*/
|
|
|
|
|
- for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
|
|
|
|
|
- g_device_caps[id].vmm = 0;
|
|
|
|
|
- g_device_caps[id].device_id = -1;
|
|
|
|
|
- g_device_caps[id].cc = 0;
|
|
|
|
|
- g_tensor_split[id] = 0;
|
|
|
|
|
- g_default_tensor_split[id] = 0;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ print_gpu_device_list();
|
|
|
|
|
|
|
|
- for (int i = 0; i < g_device_count; ++i) {
|
|
|
|
|
- int device_id = g_sycl_gpu_mgr->gpus[i];
|
|
|
|
|
- g_device_caps[i].vmm = 0;
|
|
|
|
|
|
|
+ for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
|
|
|
|
|
+ g_device_caps[id].vmm = 0;
|
|
|
|
|
+ g_device_caps[id].device_id = -1;
|
|
|
|
|
+ g_device_caps[id].cc = 0;
|
|
|
|
|
+ g_tensor_split[id] = 0;
|
|
|
|
|
+ g_default_tensor_split[id] = 0;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- dpct::device_info prop;
|
|
|
|
|
- SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
|
|
|
|
- prop, dpct::dev_mgr::instance().get_device(device_id))));
|
|
|
|
|
|
|
+ for (int i = 0; i < g_device_count; ++i) {
|
|
|
|
|
+ int device_id = g_sycl_gpu_mgr->gpus[i];
|
|
|
|
|
+ g_device_caps[i].vmm = 0;
|
|
|
|
|
|
|
|
- g_default_tensor_split[i] = total_vram;
|
|
|
|
|
- total_vram += prop.get_global_mem_size();
|
|
|
|
|
|
|
+ dpct::device_info prop;
|
|
|
|
|
+ SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
|
|
|
|
+ prop, dpct::dev_mgr::instance().get_device(device_id))));
|
|
|
|
|
|
|
|
- g_device_caps[i].cc =
|
|
|
|
|
- 100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ g_default_tensor_split[i] = total_vram;
|
|
|
|
|
+ total_vram += prop.get_global_mem_size();
|
|
|
|
|
|
|
|
- for (int i = 0; i < g_device_count; ++i) {
|
|
|
|
|
- g_default_tensor_split[i] /= total_vram;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ g_device_caps[i].cc =
|
|
|
|
|
+ 100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- for (int i = 0; i < g_device_count; ++i) {
|
|
|
|
|
- SYCL_CHECK(ggml_sycl_set_device(i));
|
|
|
|
|
|
|
+ for (int i = 0; i < g_device_count; ++i) {
|
|
|
|
|
+ g_default_tensor_split[i] /= total_vram;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- // create sycl streams
|
|
|
|
|
- for (int is = 0; is < MAX_STREAMS; ++is) {
|
|
|
|
|
- SYCL_CHECK(CHECK_TRY_ERROR(
|
|
|
|
|
- g_syclStreams[i][is] =
|
|
|
|
|
- dpct::get_current_device().create_queue(
|
|
|
|
|
- g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ for (int i = 0; i < g_device_count; ++i) {
|
|
|
|
|
+ SYCL_CHECK(ggml_sycl_set_device(i));
|
|
|
|
|
|
|
|
- const dpct::queue_ptr stream = g_syclStreams[i][0];
|
|
|
|
|
- // create sycl handle
|
|
|
|
|
- SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
|
|
|
|
|
|
|
+ // create sycl streams
|
|
|
|
|
+ for (int is = 0; is < MAX_STREAMS; ++is) {
|
|
|
|
|
+ SYCL_CHECK(CHECK_TRY_ERROR(
|
|
|
|
|
+ g_syclStreams[i][is] =
|
|
|
|
|
+ dpct::get_current_device().create_queue(
|
|
|
|
|
+ g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- initialized = true;
|
|
|
|
|
- g_sycl_loaded = true;
|
|
|
|
|
|
|
+ const dpct::queue_ptr stream = g_syclStreams[i][0];
|
|
|
|
|
+ // create sycl handle
|
|
|
|
|
+ SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
catch (sycl::exception const &exc) {
|
|
catch (sycl::exception const &exc) {
|
|
@@ -16551,22 +16674,24 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
|
|
/* .is_host = */ nullptr,
|
|
/* .is_host = */ nullptr,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
-ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
|
|
|
|
|
|
+ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
|
|
|
|
+ if (device_index>=g_device_count or device_index<0) {
|
|
|
|
|
+ printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
|
|
|
|
+ device_index, g_device_count-1);
|
|
|
|
|
+ GGML_ASSERT(device_index<g_device_count);
|
|
|
|
|
+ }
|
|
|
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
|
|
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
|
|
|
|
|
|
|
|
- static bool ggml_backend_sycl_buffer_type_initialized = false;
|
|
|
|
|
-
|
|
|
|
|
- if (!ggml_backend_sycl_buffer_type_initialized) {
|
|
|
|
|
|
|
+ if (!g_ggml_backend_sycl_buffer_type_initialized) {
|
|
|
for (int i = 0; i < g_device_count; i++) {
|
|
for (int i = 0; i < g_device_count; i++) {
|
|
|
ggml_backend_sycl_buffer_types[i] = {
|
|
ggml_backend_sycl_buffer_types[i] = {
|
|
|
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
|
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
|
|
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
|
|
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
|
|
|
};
|
|
};
|
|
|
}
|
|
}
|
|
|
- ggml_backend_sycl_buffer_type_initialized = true;
|
|
|
|
|
|
|
+ g_ggml_backend_sycl_buffer_type_initialized = true;
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- return &ggml_backend_sycl_buffer_types[device];
|
|
|
|
|
|
|
+ return &ggml_backend_sycl_buffer_types[device_index];
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// sycl split buffer type
|
|
// sycl split buffer type
|
|
@@ -17319,11 +17444,42 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
|
|
|
return g_sycl_gpu_mgr->get_index(device_id);
|
|
return g_sycl_gpu_mgr->get_index(device_id);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
|
|
|
|
|
+ return g_sycl_gpu_mgr->gpus[device_index];
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
|
|
|
|
|
+ GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
|
|
|
|
|
+ fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
|
|
|
|
|
+ if (g_sycl_gpu_mgr) {
|
|
|
|
|
+ delete g_sycl_gpu_mgr;
|
|
|
|
|
+ }
|
|
|
|
|
+ g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
|
|
|
|
|
+ g_ggml_sycl_backend_gpu_mode = SYCL_SINGLE_GPU_MODE;
|
|
|
|
|
+ ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
|
|
|
|
|
+ g_ggml_backend_sycl_buffer_type_initialized = false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
|
|
|
|
|
+ if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ fprintf(stderr, "ggml_backend_sycl_set_mul_device_mode: true\n");
|
|
|
|
|
+
|
|
|
|
|
+ if (g_sycl_gpu_mgr) {
|
|
|
|
|
+ delete g_sycl_gpu_mgr;
|
|
|
|
|
+ }
|
|
|
|
|
+ g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
|
|
|
|
+ g_ggml_sycl_backend_gpu_mode = SYCL_MUL_GPU_MODE;
|
|
|
|
|
+ ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
|
|
|
|
|
+ g_ggml_backend_sycl_buffer_type_initialized = false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
extern "C" int ggml_backend_sycl_reg_devices();
|
|
extern "C" int ggml_backend_sycl_reg_devices();
|
|
|
|
|
|
|
|
int ggml_backend_sycl_reg_devices() {
|
|
int ggml_backend_sycl_reg_devices() {
|
|
|
- if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
|
|
|
|
- g_device_count = g_sycl_gpu_mgr->get_gpu_count();
|
|
|
|
|
|
|
+ ggml_backend_sycl_set_mul_device_mode();
|
|
|
assert(g_device_count>0);
|
|
assert(g_device_count>0);
|
|
|
for (int i = 0; i < g_device_count; i++) {
|
|
for (int i = 0; i < g_device_count; i++) {
|
|
|
int id = g_sycl_gpu_mgr->gpus[i];
|
|
int id = g_sycl_gpu_mgr->gpus[i];
|