Просмотр исходного кода

ggml : better PERF prints + support "LLAMA_PERF=1 make"

Georgi Gerganov 2 лет назад
Родитель
Сommit
e4422e299c
3 измененных файлов с 9 добавлено и 3 удалено
  1. 4 0
      Makefile
  2. 2 2
      ggml.c
  3. 3 1
      llama.cpp

+ 4 - 0
Makefile

@@ -117,6 +117,10 @@ ifdef LLAMA_GPROF
 	CFLAGS   += -pg
 	CFLAGS   += -pg
 	CXXFLAGS += -pg
 	CXXFLAGS += -pg
 endif
 endif
+ifdef LLAMA_PERF
+	CFLAGS   += -DGGML_PERF
+	CXXFLAGS += -DGGML_PERF
+endif
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 	CFLAGS   += -mcpu=native
 	CFLAGS   += -mcpu=native
 	CXXFLAGS += -mcpu=native
 	CXXFLAGS += -mcpu=native

+ 2 - 2
ggml.c

@@ -11239,7 +11239,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
 
 
         perf_total_per_op_us[node->op] += node->perf_time_us;
         perf_total_per_op_us[node->op] += node->perf_time_us;
 
 
-        GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 ", %" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
+        GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
                 i,
                 i,
                 node->ne[0], node->ne[1], node->ne[2],
                 node->ne[0], node->ne[1], node->ne[2],
                 GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
                 GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
@@ -11253,7 +11253,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
     for (int i = 0; i < cgraph->n_leafs; i++) {
     for (int i = 0; i < cgraph->n_leafs; i++) {
         struct ggml_tensor * node = cgraph->leafs[i];
         struct ggml_tensor * node = cgraph->leafs[i];
 
 
-        GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 "] %8s\n",
+        GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
                 i,
                 i,
                 node->ne[0], node->ne[1],
                 node->ne[0], node->ne[1],
                 GGML_OP_LABEL[node->op]);
                 GGML_OP_LABEL[node->op]);

+ 3 - 1
llama.cpp

@@ -1250,9 +1250,11 @@ static bool llama_eval_internal(
     ggml_build_forward_expand(&gf, inpL);
     ggml_build_forward_expand(&gf, inpL);
     ggml_graph_compute       (ctx0, &gf);
     ggml_graph_compute       (ctx0, &gf);
 
 
+#ifdef GGML_PERF
     // print timing information per ggml operation (for debugging purposes)
     // print timing information per ggml operation (for debugging purposes)
     // requires GGML_PERF to be defined
     // requires GGML_PERF to be defined
-    //ggml_graph_print(&gf);
+    ggml_graph_print(&gf);
+#endif
 
 
     // plot the computation graph in dot format (for debugging purposes)
     // plot the computation graph in dot format (for debugging purposes)
     //if (n_past%100 == 0) {
     //if (n_past%100 == 0) {