Просмотр исходного кода

cpu: skip NOPs to avoid barriers (#17133)

* cpu: skip NOPs to avoid barriers

* cpu: use ggml_op_is_empty
Max Krasnyansky 2 месяцев назад
Родитель
Сommit
395e286bc9
3 измененных файлов с 21 добавлено и 60 удалено
  1. 21 16
      ggml/src/ggml-cpu/ggml-cpu.c
  2. 0 40
      ggml/src/ggml-cpu/ops.cpp
  3. 0 4
      ggml/src/ggml-cpu/ops.h

+ 21 - 16
ggml/src/ggml-cpu/ggml-cpu.c

@@ -1807,22 +1807,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
             {
                 ggml_compute_forward_cont(params, tensor);
                 ggml_compute_forward_cont(params, tensor);
             } break;
             } break;
-        case GGML_OP_RESHAPE:
-            {
-                ggml_compute_forward_reshape(params, tensor);
-            } break;
-        case GGML_OP_VIEW:
-            {
-                ggml_compute_forward_view(params, tensor);
-            } break;
-        case GGML_OP_PERMUTE:
-            {
-                ggml_compute_forward_permute(params, tensor);
-            } break;
-        case GGML_OP_TRANSPOSE:
-            {
-                ggml_compute_forward_transpose(params, tensor);
-            } break;
         case GGML_OP_GET_ROWS:
         case GGML_OP_GET_ROWS:
             {
             {
                 ggml_compute_forward_get_rows(params, tensor);
                 ggml_compute_forward_get_rows(params, tensor);
@@ -2042,6 +2026,22 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
             {
                 // nop
                 // nop
             } break;
             } break;
+        case GGML_OP_RESHAPE:
+            {
+                // nop
+            } break;
+        case GGML_OP_PERMUTE:
+            {
+                // nop
+            } break;
+        case GGML_OP_VIEW:
+            {
+                // nop
+            } break;
+        case GGML_OP_TRANSPOSE:
+            {
+                // nop
+            } break;
         case GGML_OP_COUNT:
         case GGML_OP_COUNT:
             {
             {
                 GGML_ABORT("fatal error");
                 GGML_ABORT("fatal error");
@@ -2884,6 +2884,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
     for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
     for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
         struct ggml_tensor * node = cgraph->nodes[node_n];
         struct ggml_tensor * node = cgraph->nodes[node_n];
 
 
+        if (ggml_op_is_empty(node->op)) {
+            // skip NOPs
+            continue;
+        }
+
         ggml_compute_forward(&params, node);
         ggml_compute_forward(&params, node);
 
 
         if (state->ith == 0 && cplan->abort_callback &&
         if (state->ith == 0 && cplan->abort_callback &&

+ 0 - 40
ggml/src/ggml-cpu/ops.cpp

@@ -4455,46 +4455,6 @@ void ggml_compute_forward_cont(
     ggml_compute_forward_dup(params, dst);
     ggml_compute_forward_dup(params, dst);
 }
 }
 
 
-// ggml_compute_forward_reshape
-
-void ggml_compute_forward_reshape(
-        const ggml_compute_params * params,
-        ggml_tensor * dst) {
-    // NOP
-    GGML_UNUSED(params);
-    GGML_UNUSED(dst);
-}
-
-// ggml_compute_forward_view
-
-void ggml_compute_forward_view(
-        const ggml_compute_params * params,
-        ggml_tensor * dst) {
-    // NOP
-    GGML_UNUSED(params);
-    GGML_UNUSED(dst);
-}
-
-// ggml_compute_forward_permute
-
-void ggml_compute_forward_permute(
-        const ggml_compute_params * params,
-        ggml_tensor * dst) {
-    // NOP
-    GGML_UNUSED(params);
-    GGML_UNUSED(dst);
-}
-
-// ggml_compute_forward_transpose
-
-void ggml_compute_forward_transpose(
-        const ggml_compute_params * params,
-        ggml_tensor * dst) {
-    // NOP
-    GGML_UNUSED(params);
-    GGML_UNUSED(dst);
-}
-
 // ggml_compute_forward_get_rows
 // ggml_compute_forward_get_rows
 
 
 static void ggml_compute_forward_get_rows_q(
 static void ggml_compute_forward_get_rows_q(

+ 0 - 4
ggml/src/ggml-cpu/ops.h

@@ -51,10 +51,6 @@ void ggml_compute_forward_scale(const struct ggml_compute_params * params, struc
 void ggml_compute_forward_set(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_set(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_cpy(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_cpy(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_cont(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_cont(const struct ggml_compute_params * params, struct ggml_tensor * dst);
-void ggml_compute_forward_reshape(const struct ggml_compute_params * params, struct ggml_tensor * dst);
-void ggml_compute_forward_view(const struct ggml_compute_params * params, struct ggml_tensor * dst);
-void ggml_compute_forward_permute(const struct ggml_compute_params * params, struct ggml_tensor * dst);
-void ggml_compute_forward_transpose(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_get_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_get_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_get_rows_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_get_rows_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_set_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_set_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);