hace 1 mes · 90c72a614a
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -723,6 +723,12 @@ struct ggml_backend_sched {
 
				     bool op_offload;
			
 
				 
			
 
				     int debug;
			
 
				+
			
 
				+    // used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
			
 
				+    // ref: https://github.com/ggml-org/llama.cpp/pull/17617
			
 
				+    int debug_realloc;
			
 
				+    int debug_graph_size;
			
 
				+    int debug_prev_graph_size;
			
 
				 };
			
 
				 
			
 
				 #define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
			
@@ -1289,6 +1295,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
 
				     }
			
 
				 
			
 
				     int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
			
 
				+
			
 
				+    // remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
			
 
				+    sched->debug_prev_graph_size = sched->debug_graph_size;
			
 
				+    sched->debug_graph_size = graph_size;
			
 
				+
			
 
				     if (sched->graph.size < graph_size) {
			
 
				         sched->graph.size = graph_size;
			
 
				         sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
			
@@ -1395,14 +1406,21 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
 
				 
			
 
				     // allocate graph
			
 
				     if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
			
 
				-#ifdef GGML_SCHED_NO_REALLOC
			
 
				-        GGML_ABORT("%s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n", __func__);
			
 
				-#endif
			
 
				-
			
 
				 #ifndef NDEBUG
			
 
				         GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
			
 
				 #endif
			
 
				 
			
 
				+        if (sched->debug_realloc > 0) {
			
 
				+            // we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
			
 
				+            // example: https://github.com/ggml-org/llama.cpp/pull/17143
			
 
				+            const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
			
 
				+
			
 
				+            if (unexpected || sched->debug_realloc > 1) {
			
 
				+                GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
			
 
				+                        sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				         // the re-allocation may cause the split inputs to be moved to a different address
			
 
				         // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
			
 
				         for (int i = 0; i < sched->n_backends; i++) {
			
@@ -1620,6 +1638,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
 
				 
			
 
				     const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
			
 
				     sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
			
 
				+
			
 
				+    sched->debug_realloc = 0;
			
 
				+#ifdef GGML_SCHED_NO_REALLOC
			
 
				+    sched->debug_realloc = 1;
			
 
				+#endif
			
 
				+    const char * GGML_SCHED_DEBUG_REALLOC = getenv("GGML_SCHED_DEBUG_REALLOC");
			
 
				+    sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi(GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
			
 
				+
			
 
				     sched->n_backends = n_backends;
			
 
				     sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
			
 
				 
			
@@ -1636,6 +1662,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
 
				     sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
			
 
				     sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
			
 
				 
			
 
				+    sched->debug_graph_size = 0;
			
 
				+    sched->debug_prev_graph_size = 0;
			
 
				+
			
 
				     sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
			
 
				     sched->context_buffer = (char *) malloc(sched->context_buffer_size);