há 5 meses atrás · ca0ef2dddb
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -298,7 +298,7 @@ llama_context::llama_context(
 
				 
			
 
				         cross.v_embd.clear();
			
 
				 
			
 
				-        // reserve pp graph first so that buffers are only allocated once
			
 
				+        // reserve pp (prompt processing) graph first so that buffers are only allocated once
			
 
				         {
			
 
				             auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get());
			
 
				             if (!gf) {
			
@@ -309,7 +309,7 @@ llama_context::llama_context(
 
				             n_nodes_pp  = ggml_graph_n_nodes(gf);
			
 
				         }
			
 
				 
			
 
				-        // reserve with tg graph to get the number of splits and nodes
			
 
				+        // reserve with tg (token generation) graph to get the number of splits and nodes
			
 
				         {
			
 
				             auto * gf = graph_reserve(n_seqs, n_seqs, n_seqs, mctx.get());
			
 
				             if (!gf) {