|
|
@@ -1705,6 +1705,8 @@ private:
|
|
|
};
|
|
|
|
|
|
struct server_response {
|
|
|
+ bool running = true;
|
|
|
+
|
|
|
// for keeping track of all tasks waiting for the result
|
|
|
std::unordered_set<int> waiting_task_ids;
|
|
|
|
|
|
@@ -1759,6 +1761,10 @@ struct server_response {
|
|
|
while (true) {
|
|
|
std::unique_lock<std::mutex> lock(mutex_results);
|
|
|
condition_results.wait(lock, [&]{
|
|
|
+ if (!running) {
|
|
|
+ SRV_DBG("%s : queue result stop\n", __func__);
|
|
|
+ std::terminate(); // we cannot return here since the caller is HTTP code
|
|
|
+ }
|
|
|
return !queue_results.empty();
|
|
|
});
|
|
|
|
|
|
@@ -1789,6 +1795,10 @@ struct server_response {
|
|
|
}
|
|
|
|
|
|
std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
|
|
|
+ if (!running) {
|
|
|
+ SRV_DBG("%s : queue result stop\n", __func__);
|
|
|
+ std::terminate(); // we cannot return here since the caller is HTTP code
|
|
|
+ }
|
|
|
if (cr_res == std::cv_status::timeout) {
|
|
|
return nullptr;
|
|
|
}
|
|
|
@@ -1818,6 +1828,12 @@ struct server_response {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // terminate the waiting loop
|
|
|
+ void terminate() {
|
|
|
+ running = false;
|
|
|
+ condition_results.notify_all();
|
|
|
+ }
|
|
|
};
|
|
|
|
|
|
struct server_context {
|
|
|
@@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) {
|
|
|
svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); };
|
|
|
|
|
|
// clean up function, to be called before exit
|
|
|
- auto clean_up = [&svr]() {
|
|
|
+ auto clean_up = [&svr, &ctx_server]() {
|
|
|
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
|
|
svr->stop();
|
|
|
+ ctx_server.queue_results.terminate();
|
|
|
llama_backend_free();
|
|
|
};
|
|
|
|
|
|
@@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
if (!ctx_server.load_model(params)) {
|
|
|
clean_up();
|
|
|
- // t.join(); // FIXME: see below
|
|
|
+ t.join();
|
|
|
LOG_ERR("%s: exiting due to model loading error\n", __func__);
|
|
|
return 1;
|
|
|
}
|
|
|
@@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) {
|
|
|
ctx_server.queue_tasks.start_loop();
|
|
|
|
|
|
clean_up();
|
|
|
- // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
|
|
|
+ t.join();
|
|
|
|
|
|
return 0;
|
|
|
}
|