* server: free sampling contexts on exit This cleans up last leak found by the address sanitizer. * fix whitespace * fix whitespace
@@ -671,6 +671,13 @@ struct server_context {
model = nullptr;
}
+ // Clear any sampling context
+ for (server_slot & slot : slots) {
+ if (slot.ctx_sampling != nullptr) {
+ llama_sampling_free(slot.ctx_sampling);
+ }
+
llama_batch_free(batch);