|
|
@@ -395,6 +395,8 @@ struct llama_buffer {
|
|
|
uint8_t * addr = NULL;
|
|
|
size_t size = 0;
|
|
|
|
|
|
+ llama_buffer() = default;
|
|
|
+
|
|
|
void resize(size_t size) {
|
|
|
delete[] addr;
|
|
|
addr = new uint8_t[size];
|
|
|
@@ -404,27 +406,59 @@ struct llama_buffer {
|
|
|
~llama_buffer() {
|
|
|
delete[] addr;
|
|
|
}
|
|
|
+
|
|
|
+ // disable copy and move
|
|
|
+ llama_buffer(const llama_buffer&) = delete;
|
|
|
+ llama_buffer(llama_buffer&&) = delete;
|
|
|
+ llama_buffer& operator=(const llama_buffer&) = delete;
|
|
|
+ llama_buffer& operator=(llama_buffer&&) = delete;
|
|
|
};
|
|
|
|
|
|
#ifdef GGML_USE_CUBLAS
|
|
|
#include "ggml-cuda.h"
|
|
|
struct llama_ctx_buffer {
|
|
|
uint8_t * addr = NULL;
|
|
|
+ bool is_cuda;
|
|
|
size_t size = 0;
|
|
|
|
|
|
+ llama_ctx_buffer() = default;
|
|
|
+
|
|
|
void resize(size_t size) {
|
|
|
+ free();
|
|
|
+
|
|
|
+ addr = (uint8_t *) ggml_cuda_host_malloc(size);
|
|
|
if (addr) {
|
|
|
- ggml_cuda_host_free(addr);
|
|
|
+ is_cuda = true;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ // fall back to pageable memory
|
|
|
+ addr = new uint8_t[size];
|
|
|
+ is_cuda = false;
|
|
|
}
|
|
|
- addr = (uint8_t *) ggml_cuda_host_malloc(size);
|
|
|
this->size = size;
|
|
|
}
|
|
|
|
|
|
- ~llama_ctx_buffer() {
|
|
|
+ void free() {
|
|
|
if (addr) {
|
|
|
- ggml_cuda_host_free(addr);
|
|
|
+ if (is_cuda) {
|
|
|
+ ggml_cuda_host_free(addr);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ delete[] addr;
|
|
|
+ }
|
|
|
}
|
|
|
+ addr = NULL;
|
|
|
}
|
|
|
+
|
|
|
+ ~llama_ctx_buffer() {
|
|
|
+ free();
|
|
|
+ }
|
|
|
+
|
|
|
+ // disable copy and move
|
|
|
+ llama_ctx_buffer(const llama_ctx_buffer&) = delete;
|
|
|
+ llama_ctx_buffer(llama_ctx_buffer&&) = delete;
|
|
|
+ llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
|
|
|
+ llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
|
|
|
};
|
|
|
#else
|
|
|
typedef llama_buffer llama_ctx_buffer;
|