Jelajahi Sumber

ggml-opencl, llama: using reserve() if count already known (#7272)

Herman Semenov 1 tahun lalu
induk
melakukan
213e90ed73
2 mengubah file dengan 8 tambahan dan 2 penghapusan
  1. 5 2
      ggml-opencl.cpp
  2. 3 0
      llama.cpp

+ 5 - 2
ggml-opencl.cpp

@@ -1,4 +1,4 @@
-#include "ggml.h"
+#include "ggml.h"
 #include "ggml-opencl.h"
 #include "ggml-opencl.h"
 #include "ggml-backend-impl.h"
 #include "ggml-backend-impl.h"
 
 
@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
                     CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
                     CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
                 }
                 }
 
 
-                for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
+                int64_t i12 = i02 * r2;
+                int64_t e12 = i12 + r2;
+                events.reserve(e12 - i12);
+                for (; i12 < e12; i12++) {
                     if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
                     if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
                         // copy src1 to device
                         // copy src1 to device
                         events.emplace_back();
                         events.emplace_back();

+ 3 - 0
llama.cpp

@@ -16162,6 +16162,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
     }
     }
 
 
     // make tensors
     // make tensors
+    cvec.tensors.reserve(model.hparams.n_layer);
     cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
     cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
     for (size_t il = 1; il < model.hparams.n_layer; il++) {
     for (size_t il = 1; il < model.hparams.n_layer; il++) {
         struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
         struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
@@ -16170,6 +16171,8 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
     }
     }
 
 
     // allocate tensors / buffers and zero
     // allocate tensors / buffers and zero
+    cvec.ctxs.reserve(ctx_map.size());
+    cvec.bufs.reserve(ctx_map.size());
     for (auto it : ctx_map) {
     for (auto it : ctx_map) {
         ggml_backend_buffer_type_t buft = it.first;
         ggml_backend_buffer_type_t buft = it.first;
         ggml_context * ctx = it.second;
         ggml_context * ctx = it.second;