Просмотр исходного кода

server : add loading html page while model is loading (#9468)

* Adding loading page for '/' server requests

* set content when model is loading

* removed loading html file

* updated cmakelist

* updated makefile

* cleaned up whitespace

* cleanup for PR removed error

* updated server test to handle 503 HTML

* updated server test to handle 503 HTML

* ca†ch 503 before parsing json

* revert test

* account for both api and web browser requests

* precommit corrections

* eol fix

* revert changes to pre-commit

* removed print statement

* made loading message more descriptive

* also support .html files

---------

Co-authored-by: VJHack <flymyplane21@gmail.com>
Co-authored-by: Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com>
Xuan Son Nguyen 1 год назад
Родитель
Сommit
feff4aa846
4 измененных файлов с 23 добавлено и 2 удалено
  1. 1 0
      Makefile
  2. 1 0
      examples/server/CMakeLists.txt
  3. 12 0
      examples/server/public/loading.html
  4. 9 2
      examples/server/server.cpp

+ 1 - 0
Makefile

@@ -1440,6 +1440,7 @@ llama-server: \
 	examples/server/system-prompts.js.hpp \
 	examples/server/prompt-formats.js.hpp \
 	examples/server/json-schema-to-grammar.mjs.hpp \
+	examples/server/loading.html.hpp \
 	common/json.hpp \
 	common/stb_image.h \
 	$(OBJ_ALL)

+ 1 - 0
examples/server/CMakeLists.txt

@@ -30,6 +30,7 @@ set(PUBLIC_ASSETS
     system-prompts.js
     prompt-formats.js
     json-schema-to-grammar.mjs
+    loading.html
 )
 
 foreach(asset ${PUBLIC_ASSETS})

+ 12 - 0
examples/server/public/loading.html

@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv="refresh" content="5">
+    </head>
+    <body>
+        <div id="loading">
+            The model is loading. Please wait.<br/>
+            The user interface will appear soon.
+        </div>
+    </body>
+</html>

+ 9 - 2
examples/server/server.cpp

@@ -28,6 +28,7 @@
 #include "system-prompts.js.hpp"
 #include "prompt-formats.js.hpp"
 #include "json-schema-to-grammar.mjs.hpp"
+#include "loading.html.hpp"
 
 #include <atomic>
 #include <chrono>
@@ -2592,10 +2593,16 @@ int main(int argc, char ** argv) {
         return false;
     };
 
-    auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) {
+    auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
         server_state current_state = state.load();
         if (current_state == SERVER_STATE_LOADING_MODEL) {
-            res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
+            auto tmp = string_split(req.path, '.');
+            if (req.path == "/" || tmp.back() == "html") {
+                res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
+                res.status = 503;
+            } else {
+                res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
+            }
             return false;
         }
         return true;