server-context.h 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #include "server-http.h"
  2. #include "server-task.h"
  3. #include "server-queue.h"
  4. #include <nlohmann/json_fwd.hpp>
  5. #include <cstddef>
  6. #include <memory>
  7. struct server_context_impl; // private implementation
  8. struct server_context_info {
  9. std::string build_info;
  10. std::string model_name;
  11. bool has_inp_image;
  12. bool has_inp_audio;
  13. };
  14. struct server_context {
  15. std::unique_ptr<server_context_impl> impl;
  16. server_context();
  17. ~server_context();
  18. // initialize slots and server-related data
  19. void init();
  20. // load the model and initialize llama_context
  21. // returns true on success
  22. bool load_model(const common_params & params);
  23. // this function will block main thread until termination
  24. void start_loop();
  25. // terminate main loop (will unblock start_loop)
  26. void terminate();
  27. // get the underlaying llama_context
  28. llama_context * get_llama_context() const;
  29. // get a new response reader, used by CLI application
  30. server_response_reader get_response_reader();
  31. // get server info
  32. // used by CLI application
  33. server_context_info get_info() const;
  34. };
  35. // forward declarations
  36. struct server_res_generator;
  37. struct server_routes {
  38. server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; })
  39. : params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) {
  40. init_routes();
  41. }
  42. void init_routes();
  43. // handlers using lambda function, so that they can capture `this` without `std::bind`
  44. server_http_context::handler_t get_health;
  45. server_http_context::handler_t get_metrics;
  46. server_http_context::handler_t get_slots;
  47. server_http_context::handler_t post_slots;
  48. server_http_context::handler_t get_props;
  49. server_http_context::handler_t post_props;
  50. server_http_context::handler_t get_api_show;
  51. server_http_context::handler_t post_infill;
  52. server_http_context::handler_t post_completions;
  53. server_http_context::handler_t post_completions_oai;
  54. server_http_context::handler_t post_chat_completions;
  55. server_http_context::handler_t post_anthropic_messages;
  56. server_http_context::handler_t post_anthropic_count_tokens;
  57. server_http_context::handler_t post_apply_template;
  58. server_http_context::handler_t get_models;
  59. server_http_context::handler_t post_tokenize;
  60. server_http_context::handler_t post_detokenize;
  61. server_http_context::handler_t post_embeddings;
  62. server_http_context::handler_t post_embeddings_oai;
  63. server_http_context::handler_t post_rerank;
  64. server_http_context::handler_t get_lora_adapters;
  65. server_http_context::handler_t post_lora_adapters;
  66. private:
  67. // TODO: move these outside of server_routes?
  68. std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
  69. std::unique_ptr<server_res_generator> handle_slots_restore(const server_http_req & req, int id_slot);
  70. std::unique_ptr<server_res_generator> handle_slots_erase(const server_http_req &, int id_slot);
  71. std::unique_ptr<server_res_generator> handle_embeddings_impl(const server_http_req & req, task_response_type res_type);
  72. const common_params & params;
  73. server_context_impl & ctx_server;
  74. std::function<bool()> is_ready;
  75. };