2 ani în urmă · 3d59ec5935
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -13,6 +13,10 @@
 
				 #include "ggml-cuda.h"
			
 
				 #include "ggml.h"
			
 
				 
			
 
				+#if defined(_MSC_VER)
			
 
				+#pragma warning(disable: 4244 4267) // possible loss of data
			
 
				+#endif
			
 
				+
			
 
				 static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
			
 
				 
			
 
				 #define CUDA_CHECK(err)                                                                 \
			
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@@ -15,6 +15,10 @@
 
				 
			
 
				 #include "ggml.h"
			
 
				 
			
 
				+#if defined(_MSC_VER)
			
 
				+#pragma warning(disable: 4244 4267) // possible loss of data
			
 
				+#endif
			
 
				+
			
 
				 #define CL_DMMV_BLOCK_SIZE 32
			
 
				 
			
 
				 #define MULTILINE_QUOTE(...) #__VA_ARGS__
			
--- a/llama.cpp
+++ b/llama.cpp
@@ -1253,7 +1253,7 @@ static void llama_model_load_internal(
 
				             vram_scratch = n_batch * MB;
			
 
				             ggml_cuda_set_scratch_size(vram_scratch);
			
 
				             if (n_gpu_layers > 0) {
			
 
				-                fprintf(stderr, "%s: allocating batch_size x 1 MB = %ld MB VRAM for the scratch buffer\n",
			
 
				+                fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n",
			
 
				                         __func__, vram_scratch / MB);
			
 
				             }
			
 
				         }