| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- #ifdef GGML_USE_CUDA
- #include "ggml-cuda.h"
- #endif
- #ifdef GGML_USE_METAL
- #include "ggml-metal.h"
- #endif
- #include "ggml-rpc.h"
- #include <string>
- #include <stdio.h>
- static ggml_backend_t create_backend() {
- ggml_backend_t backend = NULL;
- #ifdef GGML_USE_CUDA
- fprintf(stderr, "%s: using CUDA backend\n", __func__);
- backend = ggml_backend_cuda_init(0); // init device 0
- if (!backend) {
- fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
- }
- #elif GGML_USE_METAL
- fprintf(stderr, "%s: using Metal backend\n", __func__);
- backend = ggml_backend_metal_init();
- if (!backend) {
- fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
- }
- #endif
- // if there aren't GPU Backends fallback to CPU backend
- if (!backend) {
- fprintf(stderr, "%s: using CPU backend\n", __func__);
- backend = ggml_backend_cpu_init();
- }
- return backend;
- }
- static void get_backend_memory(size_t * free_mem, size_t * total_mem) {
- #ifdef GGML_USE_CUDA
- ggml_backend_cuda_get_device_memory(0, free_mem, total_mem);
- #else
- // TODO: implement for other backends
- *free_mem = 1;
- *total_mem = 1;
- #endif
- }
- int main(int argc, char * argv[]) {
- if (argc < 3) {
- fprintf(stderr, "Usage: %s <host> <port>\n", argv[0]);
- return 1;
- }
- const char * host = argv[1];
- int port = std::stoi(argv[2]);
- if (port <= 0 || port > 65535) {
- fprintf(stderr, "Invalid port number: %d\n", port);
- return 1;
- }
- ggml_backend_t backend = create_backend();
- if (!backend) {
- fprintf(stderr, "Failed to create backend\n");
- return 1;
- }
- printf("Starting RPC server on %s:%d\n", host, port);
- size_t free_mem, total_mem;
- get_backend_memory(&free_mem, &total_mem);
- std::string endpoint = std::string(host) + ":" + std::to_string(port);
- start_rpc_server(backend, endpoint.c_str(), free_mem, total_mem);
- ggml_backend_free(backend);
- return 0;
- }
|