3 månader sedan · 638d330246
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -392,12 +392,8 @@ static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) {
 
				     free(alloc);
			
 
				 }
			
 
				 
			
 
				-static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) {
			
 
				-    size_t max_size = 0;
			
 
				-    for (int i = 0; i < alloc->n_chunks; i++) {
			
 
				-        max_size += alloc->chunks[i]->max_size;
			
 
				-    }
			
 
				-    return max_size;
			
 
				+static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc, int chunk) {
			
 
				+    return chunk < alloc->n_chunks ? alloc->chunks[chunk]->max_size : 0;
			
 
				 }
			
 
				 
			
 
				 
			
@@ -417,10 +413,8 @@ static void ggml_vbuffer_free(struct vbuffer * buf) {
 
				     free(buf);
			
 
				 }
			
 
				 
			
 
				-static int ggml_vbuffer_n_chunks(struct vbuffer * buf) {
			
 
				-    int n = 0;
			
 
				-    while (n < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[n]) n++;
			
 
				-    return n;
			
 
				+static size_t ggml_vbuffer_chunk_size(struct vbuffer * buf, int chunk) {
			
 
				+    return buf->chunks[chunk] ? ggml_backend_buffer_get_size(buf->chunks[chunk]) : 0;
			
 
				 }
			
 
				 
			
 
				 static size_t ggml_vbuffer_size(struct vbuffer * buf) {
			
@@ -885,12 +879,20 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
 
				             }
			
 
				         }
			
 
				 
			
 
				-        size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
			
 
				-        size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
			
 
				-
			
 
				         // even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
			
 
				-        if (new_size > cur_size || galloc->buffers[i] == NULL) {
			
 
				+        bool realloc = galloc->buffers[i] == NULL;
			
 
				+        size_t new_size = 0;
			
 
				+        for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) {
			
 
				+            size_t cur_chunk_size = galloc->buffers[i] ? ggml_vbuffer_chunk_size(galloc->buffers[i], c) : 0;
			
 
				+            size_t new_chunk_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i], c);
			
 
				+            new_size += new_chunk_size;
			
 
				+            if (new_chunk_size > cur_chunk_size) {
			
 
				+                realloc = true;
			
 
				+            }
			
 
				+        }
			
 
				+        if (realloc) {
			
 
				 #ifndef NDEBUG
			
 
				+            size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
			
 
				             GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
			
 
				 #endif
			
 
				 
			
--- a/tests/test-alloc.cpp
+++ b/tests/test-alloc.cpp
@@ -548,6 +548,41 @@ static void test_buffer_size_zero() {
 
				     GGML_ASSERT(backend_b.context->allocated_total() == 0);
			
 
				 }
			
 
				 
			
 
				+// Test re-using gallocr for a different graph. The new graph has the same
			
 
				+// total size, but one of the chunks is larger, so reallocation is required.
			
 
				+static void test_reallocation() {
			
 
				+    dummy_backend    backend = dummy_backend_init(32, /*align*/ 4);
			
 
				+    ggml_gallocr_ptr galloc;
			
 
				+    {
			
 
				+        auto [ctx, graph, ctx_ptr] = make_context();
			
 
				+        ggml_tensor * x[4];
			
 
				+        x[0] = make_input_with_size(ctx, 24);
			
 
				+        x[1] = make_input_with_size(ctx, 16);
			
 
				+        x[2] = ggml_view_1d(ctx, x[0], 4, 0);
			
 
				+        x[3] = ggml_add(ctx, x[2], x[1]);
			
 
				+        assign_names(ctx);
			
 
				+
			
 
				+        galloc = allocate_graph(graph, x[3], &backend.buffer_type);
			
 
				+        check_all_allocated(graph);
			
 
				+        GGML_ASSERT(backend.context->allocated_total() == 40);
			
 
				+    }
			
 
				+    {
			
 
				+        auto [ctx, graph, ctx_ptr] = make_context();
			
 
				+        ggml_tensor * x[3];
			
 
				+        x[0] = make_input_with_size(ctx, 20);
			
 
				+        x[1] = make_input_with_size(ctx, 20);
			
 
				+        x[2] = ggml_add(ctx, x[0], x[1]);
			
 
				+        assign_names(ctx);
			
 
				+        ggml_set_output(x[2]);
			
 
				+        ggml_build_forward_expand(graph, x[2]);
			
 
				+
			
 
				+        bool result = ggml_gallocr_alloc_graph(galloc.get(), graph);
			
 
				+        GGML_ASSERT(result);
			
 
				+        check_all_allocated(graph);
			
 
				+        GGML_ASSERT(backend.context->allocated_total() == 40);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 static void run(const char * name, void (*f)()) {
			
 
				     printf("%s ", name);
			
 
				     fflush(stdout);
			
@@ -568,5 +603,6 @@ int main() {
 
				     run("test_prefer_already_allocated_memory", test_prefer_already_allocated_memory);
			
 
				     run("test_multiple_buffer_types", test_multiple_buffer_types);
			
 
				     run("test_buffer_size_zero", test_buffer_size_zero);
			
 
				+    run("test_reallocation", test_reallocation);
			
 
				     return 0;
			
 
				 }