2 years ago · 9d2382b3e4
--- a/llama.cpp
+++ b/llama.cpp
@@ -1812,6 +1812,12 @@ static bool llama_eval_internal(
 
				     // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
			
 
				     n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
			
 
				 
			
 
				+    struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
			
 
				+    struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
			
 
				+
			
 
				+    LLAMA_ASSERT(strcmp(res->name, "result_output") == 0);
			
 
				+    LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0);
			
 
				+
			
 
				 #if GGML_USE_MPI
			
 
				     const int64_t n_layer = hparams.n_layer;
			
 
				     ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
			
@@ -1825,7 +1831,10 @@ static bool llama_eval_internal(
 
				         //}
			
 
				         ggml_metal_set_n_cb     (lctx.ctx_metal, n_threads);
			
 
				         ggml_metal_graph_compute(lctx.ctx_metal, gf);
			
 
				-        ggml_metal_get_tensor   (lctx.ctx_metal, cur);
			
 
				+        ggml_metal_get_tensor   (lctx.ctx_metal, res);
			
 
				+        if (!lctx.embedding.empty()) {
			
 
				+            ggml_metal_get_tensor(lctx.ctx_metal, embeddings);
			
 
				+        }
			
 
				     } else {
			
 
				         // IMPORTANT:
			
 
				         // Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
			
@@ -1856,12 +1865,6 @@ static bool llama_eval_internal(
 
				     // update kv token count
			
 
				     lctx.kv_self.n = n_past + N;
			
 
				 
			
 
				-    struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
			
 
				-    struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
			
 
				-
			
 
				-    LLAMA_ASSERT(strcmp(res->name, "result_output") == 0);
			
 
				-    LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0);
			
 
				-
			
 
				     if (cgraph_fname) {
			
 
				         ggml_graph_export(gf, cgraph_fname);
			
 
				     }