2 лет назад · c0bb1d3ce2
--- a/ggml.c
+++ b/ggml.c
--- a/ggml.h
+++ b/ggml.h
@@ -258,11 +258,11 @@ struct ggml_tensor {
 
				     enum ggml_type type;
			
 
				 
			
 
				     int    n_dims;
			
 
				-    int    ne[GGML_MAX_DIMS]; // number of elements
			
 
				-    size_t nb[GGML_MAX_DIMS]; // stride in bytes:
			
 
				-                              // nb[0] = sizeof(type)
			
 
				-                              // nb[1] = nb[0]   * ne[0] + padding
			
 
				-                              // nb[i] = nb[i-1] * ne[i-1]
			
 
				+    int64_t ne[GGML_MAX_DIMS]; // number of elements
			
 
				+    size_t  nb[GGML_MAX_DIMS]; // stride in bytes:
			
 
				+                               // nb[0] = sizeof(type)
			
 
				+                               // nb[1] = nb[0]   * ne[0] + padding
			
 
				+                               // nb[i] = nb[i-1] * ne[i-1]
			
 
				 
			
 
				     // compute data
			
 
				     enum ggml_op op;
			
@@ -328,8 +328,8 @@ int64_t ggml_cycles_per_ms(void);
 
				 void ggml_print_object (const struct ggml_object * obj);
			
 
				 void ggml_print_objects(const struct ggml_context * ctx);
			
 
				 
			
 
				-int    ggml_nelements(const struct ggml_tensor * tensor);
			
 
				-size_t ggml_nbytes   (const struct ggml_tensor * tensor);
			
 
				+int64_t ggml_nelements(const struct ggml_tensor * tensor);
			
 
				+size_t  ggml_nbytes   (const struct ggml_tensor * tensor);
			
 
				 
			
 
				 int    ggml_blck_size (enum ggml_type type);
			
 
				 size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
			
@@ -355,33 +355,33 @@ struct ggml_tensor * ggml_new_tensor(
 
				         struct ggml_context * ctx,
			
 
				         enum   ggml_type type,
			
 
				         int    n_dims,
			
 
				-        const int *ne);
			
 
				+        const int64_t *ne);
			
 
				 
			
 
				 struct ggml_tensor * ggml_new_tensor_1d(
			
 
				         struct ggml_context * ctx,
			
 
				         enum   ggml_type type,
			
 
				-        int    ne0);
			
 
				+        int64_t ne0);
			
 
				 
			
 
				 struct ggml_tensor * ggml_new_tensor_2d(
			
 
				         struct ggml_context * ctx,
			
 
				         enum   ggml_type type,
			
 
				-        int    ne0,
			
 
				-        int    ne1);
			
 
				+        int64_t ne0,
			
 
				+        int64_t ne1);
			
 
				 
			
 
				 struct ggml_tensor * ggml_new_tensor_3d(
			
 
				         struct ggml_context * ctx,
			
 
				         enum   ggml_type type,
			
 
				-        int    ne0,
			
 
				-        int    ne1,
			
 
				-        int    ne2);
			
 
				+        int64_t ne0,
			
 
				+        int64_t ne1,
			
 
				+        int64_t ne2);
			
 
				 
			
 
				 struct ggml_tensor * ggml_new_tensor_4d(
			
 
				         struct ggml_context * ctx,
			
 
				         enum   ggml_type type,
			
 
				-        int    ne0,
			
 
				-        int    ne1,
			
 
				-        int    ne2,
			
 
				-        int    ne3);
			
 
				+        int64_t ne0,
			
 
				+        int64_t ne1,
			
 
				+        int64_t ne2,
			
 
				+        int64_t ne3);
			
 
				 
			
 
				 struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
			
 
				 struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
			
@@ -531,30 +531,30 @@ struct ggml_tensor * ggml_reshape(
 
				 struct ggml_tensor * ggml_reshape_2d(
			
 
				         struct ggml_context * ctx,
			
 
				         struct ggml_tensor  * a,
			
 
				-        int                   ne0,
			
 
				-        int                   ne1);
			
 
				+        int64_t               ne0,
			
 
				+        int64_t               ne1);
			
 
				 
			
 
				 // return view(a)
			
 
				 // TODO: when we start computing gradient, make a copy instead of view
			
 
				 struct ggml_tensor * ggml_reshape_3d(
			
 
				         struct ggml_context * ctx,
			
 
				         struct ggml_tensor  * a,
			
 
				-        int                   ne0,
			
 
				-        int                   ne1,
			
 
				-        int                   ne2);
			
 
				+        int64_t               ne0,
			
 
				+        int64_t               ne1,
			
 
				+        int64_t               ne2);
			
 
				 
			
 
				 // offset in bytes
			
 
				 struct ggml_tensor * ggml_view_1d(
			
 
				         struct ggml_context * ctx,
			
 
				         struct ggml_tensor  * a,
			
 
				-        int                   ne0,
			
 
				+        int64_t               ne0,
			
 
				         size_t                offset);
			
 
				 
			
 
				 struct ggml_tensor * ggml_view_2d(
			
 
				         struct ggml_context * ctx,
			
 
				         struct ggml_tensor  * a,
			
 
				-        int                   ne0,
			
 
				-        int                   ne1,
			
 
				+        int64_t               ne0,
			
 
				+        int64_t               ne1,
			
 
				         size_t                nb1, // row stride in bytes
			
 
				         size_t                offset);
			
 
				 
			
--- a/llama.cpp
+++ b/llama.cpp
@@ -256,8 +256,8 @@ static bool kv_cache_init(
 
				     const int n_embd  = hparams.n_embd;
			
 
				     const int n_layer = hparams.n_layer;
			
 
				 
			
 
				-    const int n_mem      = n_layer*n_ctx;
			
 
				-    const int n_elements = n_embd*n_mem;
			
 
				+    const int64_t n_mem      = (int64_t)n_layer*n_ctx;
			
 
				+    const int64_t n_elements = n_embd*n_mem;
			
 
				 
			
 
				     cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
			
 
				 
			
@@ -679,7 +679,7 @@ static bool llama_model_load(
 
				                 return false;
			
 
				             }
			
 
				             if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
			
 
				-                fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
			
 
				+                fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%" PRId64 ", %" PRId64 "], expected [%d, %d]\n",
			
 
				                         __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
			
 
				                 return false;
			
 
				             }