server-context.h 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. #include "server-http.h"
  2. #include "server-task.h"
  3. #include "server-queue.h"
  4. #include <nlohmann/json_fwd.hpp>
  5. #include <cstddef>
  6. #include <memory>
  7. struct server_context_impl; // private implementation
  8. struct server_context {
  9. std::unique_ptr<server_context_impl> impl;
  10. server_context();
  11. ~server_context();
  12. // initialize slots and server-related data
  13. void init();
  14. // load the model and initialize llama_context
  15. // returns true on success
  16. bool load_model(const common_params & params);
  17. // this function will block main thread until termination
  18. void start_loop();
  19. // terminate main loop (will unblock start_loop)
  20. void terminate();
  21. // get the underlaying llama_context
  22. llama_context * get_llama_context() const;
  23. // get the underlaying queue_tasks and queue_results
  24. // used by CLI application
  25. std::pair<server_queue &, server_response &> get_queues();
  26. };
  27. // forward declarations
  28. struct server_res_generator;
  29. struct server_routes {
  30. server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; })
  31. : params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) {
  32. init_routes();
  33. }
  34. void init_routes();
  35. // handlers using lambda function, so that they can capture `this` without `std::bind`
  36. server_http_context::handler_t get_health;
  37. server_http_context::handler_t get_metrics;
  38. server_http_context::handler_t get_slots;
  39. server_http_context::handler_t post_slots;
  40. server_http_context::handler_t get_props;
  41. server_http_context::handler_t post_props;
  42. server_http_context::handler_t get_api_show;
  43. server_http_context::handler_t post_infill;
  44. server_http_context::handler_t post_completions;
  45. server_http_context::handler_t post_completions_oai;
  46. server_http_context::handler_t post_chat_completions;
  47. server_http_context::handler_t post_anthropic_messages;
  48. server_http_context::handler_t post_anthropic_count_tokens;
  49. server_http_context::handler_t post_apply_template;
  50. server_http_context::handler_t get_models;
  51. server_http_context::handler_t post_tokenize;
  52. server_http_context::handler_t post_detokenize;
  53. server_http_context::handler_t post_embeddings;
  54. server_http_context::handler_t post_embeddings_oai;
  55. server_http_context::handler_t post_rerank;
  56. server_http_context::handler_t get_lora_adapters;
  57. server_http_context::handler_t post_lora_adapters;
  58. private:
  59. // TODO: move these outside of server_routes?
  60. std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
  61. std::unique_ptr<server_res_generator> handle_slots_restore(const server_http_req & req, int id_slot);
  62. std::unique_ptr<server_res_generator> handle_slots_erase(const server_http_req &, int id_slot);
  63. std::unique_ptr<server_res_generator> handle_embeddings_impl(const server_http_req & req, task_response_type res_type);
  64. const common_params & params;
  65. server_context_impl & ctx_server;
  66. std::function<bool()> is_ready;
  67. };