|
@@ -683,7 +683,7 @@ struct cmd_params_instance {
|
|
|
bool cpu_strict;
|
|
bool cpu_strict;
|
|
|
int poll;
|
|
int poll;
|
|
|
int n_gpu_layers;
|
|
int n_gpu_layers;
|
|
|
- std::string rpc_servers;
|
|
|
|
|
|
|
+ std::string rpc_servers_str;
|
|
|
llama_split_mode split_mode;
|
|
llama_split_mode split_mode;
|
|
|
int main_gpu;
|
|
int main_gpu;
|
|
|
bool no_kv_offload;
|
|
bool no_kv_offload;
|
|
@@ -696,8 +696,37 @@ struct cmd_params_instance {
|
|
|
llama_model_params mparams = llama_model_default_params();
|
|
llama_model_params mparams = llama_model_default_params();
|
|
|
|
|
|
|
|
mparams.n_gpu_layers = n_gpu_layers;
|
|
mparams.n_gpu_layers = n_gpu_layers;
|
|
|
- if (!rpc_servers.empty()) {
|
|
|
|
|
- mparams.rpc_servers = rpc_servers.c_str();
|
|
|
|
|
|
|
+ if (!rpc_servers_str.empty()) {
|
|
|
|
|
+ auto rpc_servers = string_split<std::string>(rpc_servers_str, ',');
|
|
|
|
|
+
|
|
|
|
|
+ // add RPC devices
|
|
|
|
|
+ if (!rpc_servers.empty()) {
|
|
|
|
|
+ ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
|
|
|
|
+ if (!rpc_reg) {
|
|
|
|
|
+ fprintf(stderr, "%s: failed to find RPC backend\n", __func__);
|
|
|
|
|
+ exit(1);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
|
|
|
|
|
+ ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
|
|
|
|
|
+ if (!ggml_backend_rpc_add_device_fn) {
|
|
|
|
|
+ fprintf(stderr, "%s: failed to find RPC device add function\n", __func__);
|
|
|
|
|
+ exit(1);
|
|
|
|
|
+ }
|
|
|
|
|
+ static std::vector<ggml_backend_dev_t> devices;
|
|
|
|
|
+ devices.clear();
|
|
|
|
|
+ for (const std::string & server : rpc_servers) {
|
|
|
|
|
+ ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
|
|
|
|
|
+ if (dev) {
|
|
|
|
|
+ devices.push_back(dev);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ fprintf(stderr, "%s: failed to add RPC device for server '%s'\n", __func__, server.c_str());
|
|
|
|
|
+ exit(1);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ devices.push_back(nullptr);
|
|
|
|
|
+ mparams.devices = devices.data();
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
mparams.split_mode = split_mode;
|
|
mparams.split_mode = split_mode;
|
|
|
mparams.main_gpu = main_gpu;
|
|
mparams.main_gpu = main_gpu;
|
|
@@ -708,7 +737,7 @@ struct cmd_params_instance {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
bool equal_mparams(const cmd_params_instance & other) const {
|
|
bool equal_mparams(const cmd_params_instance & other) const {
|
|
|
- return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers &&
|
|
|
|
|
|
|
+ return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
|
|
|
split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
|
|
split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
|
|
|
tensor_split == other.tensor_split;
|
|
tensor_split == other.tensor_split;
|
|
|
}
|
|
}
|