| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- #include "server-http.h"
- #include "server-task.h"
- #include "server-queue.h"
- #include <nlohmann/json_fwd.hpp>
- #include <cstddef>
- #include <memory>
- struct server_context_impl; // private implementation
- struct server_context_info {
- std::string build_info;
- std::string model_name;
- bool has_inp_image;
- bool has_inp_audio;
- };
- struct server_context {
- std::unique_ptr<server_context_impl> impl;
- server_context();
- ~server_context();
- // initialize slots and server-related data
- void init();
- // load the model and initialize llama_context
- // returns true on success
- bool load_model(const common_params & params);
- // this function will block main thread until termination
- void start_loop();
- // terminate main loop (will unblock start_loop)
- void terminate();
- // get the underlaying llama_context
- llama_context * get_llama_context() const;
- // get a new response reader, used by CLI application
- server_response_reader get_response_reader();
- // get server info
- // used by CLI application
- server_context_info get_info() const;
- };
- // forward declarations
- struct server_res_generator;
- struct server_routes {
- server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; })
- : params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) {
- init_routes();
- }
- void init_routes();
- // handlers using lambda function, so that they can capture `this` without `std::bind`
- server_http_context::handler_t get_health;
- server_http_context::handler_t get_metrics;
- server_http_context::handler_t get_slots;
- server_http_context::handler_t post_slots;
- server_http_context::handler_t get_props;
- server_http_context::handler_t post_props;
- server_http_context::handler_t get_api_show;
- server_http_context::handler_t post_infill;
- server_http_context::handler_t post_completions;
- server_http_context::handler_t post_completions_oai;
- server_http_context::handler_t post_chat_completions;
- server_http_context::handler_t post_anthropic_messages;
- server_http_context::handler_t post_anthropic_count_tokens;
- server_http_context::handler_t post_apply_template;
- server_http_context::handler_t get_models;
- server_http_context::handler_t post_tokenize;
- server_http_context::handler_t post_detokenize;
- server_http_context::handler_t post_embeddings;
- server_http_context::handler_t post_embeddings_oai;
- server_http_context::handler_t post_rerank;
- server_http_context::handler_t get_lora_adapters;
- server_http_context::handler_t post_lora_adapters;
- private:
- // TODO: move these outside of server_routes?
- std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
- std::unique_ptr<server_res_generator> handle_slots_restore(const server_http_req & req, int id_slot);
- std::unique_ptr<server_res_generator> handle_slots_erase(const server_http_req &, int id_slot);
- std::unique_ptr<server_res_generator> handle_embeddings_impl(const server_http_req & req, task_response_type res_type);
- const common_params & params;
- server_context_impl & ctx_server;
- std::function<bool()> is_ready;
- };
|