1 год назад · e75c6279d1
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -136,6 +136,7 @@ node index.js
 
				   - `{"status": "loading model"}` if the model is still being loaded.
			
 
				   - `{"status": "error"}` if the model failed to load.
			
 
				   - `{"status": "ok"}` if the model is successfully loaded and the server is ready for further requests mentioned below.
			
 
				+  - `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if no slot are currently available
			
 
				 
			
 
				 - **POST** `/completion`: Given a `prompt`, it returns the predicted completion.
			
 
				 
			
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2578,8 +2578,35 @@ int main(int argc, char **argv)
 
				         server_state current_state = state.load();
			
 
				         switch(current_state) {
			
 
				             case SERVER_STATE_READY:
			
 
				-                res.set_content(R"({"status": "ok"})", "application/json");
			
 
				-                res.status = 200; // HTTP OK
			
 
				+                if (llama.all_slots_are_idle) {
			
 
				+                    res.set_content(R"({"status": "ok"})", "application/json");
			
 
				+                    res.status = 200; // HTTP OK
			
 
				+                } else {
			
 
				+                    int available_slots = 0;
			
 
				+                    int processing_slots = 0;
			
 
				+                    for (llama_client_slot & slot : llama.slots) {
			
 
				+                        if (slot.available()) {
			
 
				+                            available_slots++;
			
 
				+                        } else {
			
 
				+                            processing_slots++;
			
 
				+                        }
			
 
				+                    }
			
 
				+                    if (available_slots > 0) {
			
 
				+                        json health = {
			
 
				+                                {"status",           "ok"},
			
 
				+                                {"slots_idle",       available_slots},
			
 
				+                                {"slots_processing", processing_slots}};
			
 
				+                        res.set_content(health.dump(), "application/json");
			
 
				+                        res.status = 200; // HTTP OK
			
 
				+                    } else {
			
 
				+                        json health = {
			
 
				+                                {"status",           "no slot available"},
			
 
				+                                {"slots_idle",       available_slots},
			
 
				+                                {"slots_processing", processing_slots}};
			
 
				+                        res.set_content(health.dump(), "application/json");
			
 
				+                        res.status = 503; // HTTP Service Unavailable
			
 
				+                    }
			
 
				+                }
			
 
				                 break;
			
 
				             case SERVER_STATE_LOADING_MODEL:
			
 
				                 res.set_content(R"({"status": "loading model"})", "application/json");