|
|
@@ -298,7 +298,7 @@ llama_context::llama_context(
|
|
|
|
|
|
cross.v_embd.clear();
|
|
|
|
|
|
- // reserve pp graph first so that buffers are only allocated once
|
|
|
+ // reserve pp (prompt processing) graph first so that buffers are only allocated once
|
|
|
{
|
|
|
auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get());
|
|
|
if (!gf) {
|
|
|
@@ -309,7 +309,7 @@ llama_context::llama_context(
|
|
|
n_nodes_pp = ggml_graph_n_nodes(gf);
|
|
|
}
|
|
|
|
|
|
- // reserve with tg graph to get the number of splits and nodes
|
|
|
+ // reserve with tg (token generation) graph to get the number of splits and nodes
|
|
|
{
|
|
|
auto * gf = graph_reserve(n_seqs, n_seqs, n_seqs, mctx.get());
|
|
|
if (!gf) {
|