server-context.h 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #include "server-http.h"
  2. #include "server-task.h"
  3. #include "server-queue.h"
  4. #include <nlohmann/json_fwd.hpp>
  5. #include <cstddef>
  6. #include <memory>
  7. struct server_context_impl; // private implementation
  8. struct server_context {
  9. std::unique_ptr<server_context_impl> impl;
  10. server_context();
  11. ~server_context();
  12. // initialize slots and server-related data
  13. void init();
  14. // load the model and initialize llama_context
  15. // returns true on success
  16. bool load_model(const common_params & params);
  17. // this function will block main thread until termination
  18. void start_loop();
  19. // terminate main loop (will unblock start_loop)
  20. void terminate();
  21. // get the underlaying llama_context
  22. llama_context * get_llama_context() const;
  23. // get a new response reader, used by CLI application
  24. server_response_reader get_response_reader();
  25. };
  26. // forward declarations
  27. struct server_res_generator;
  28. struct server_routes {
  29. server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; })
  30. : params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) {
  31. init_routes();
  32. }
  33. void init_routes();
  34. // handlers using lambda function, so that they can capture `this` without `std::bind`
  35. server_http_context::handler_t get_health;
  36. server_http_context::handler_t get_metrics;
  37. server_http_context::handler_t get_slots;
  38. server_http_context::handler_t post_slots;
  39. server_http_context::handler_t get_props;
  40. server_http_context::handler_t post_props;
  41. server_http_context::handler_t get_api_show;
  42. server_http_context::handler_t post_infill;
  43. server_http_context::handler_t post_completions;
  44. server_http_context::handler_t post_completions_oai;
  45. server_http_context::handler_t post_chat_completions;
  46. server_http_context::handler_t post_anthropic_messages;
  47. server_http_context::handler_t post_anthropic_count_tokens;
  48. server_http_context::handler_t post_apply_template;
  49. server_http_context::handler_t get_models;
  50. server_http_context::handler_t post_tokenize;
  51. server_http_context::handler_t post_detokenize;
  52. server_http_context::handler_t post_embeddings;
  53. server_http_context::handler_t post_embeddings_oai;
  54. server_http_context::handler_t post_rerank;
  55. server_http_context::handler_t get_lora_adapters;
  56. server_http_context::handler_t post_lora_adapters;
  57. private:
  58. // TODO: move these outside of server_routes?
  59. std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
  60. std::unique_ptr<server_res_generator> handle_slots_restore(const server_http_req & req, int id_slot);
  61. std::unique_ptr<server_res_generator> handle_slots_erase(const server_http_req &, int id_slot);
  62. std::unique_ptr<server_res_generator> handle_embeddings_impl(const server_http_req & req, task_response_type res_type);
  63. const common_params & params;
  64. server_context_impl & ctx_server;
  65. std::function<bool()> is_ready;
  66. };