1 月之前 · 86a3f0fad8
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -25,6 +25,7 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
 
				 // ops that return true for this function must not use restrict pointers for their backend implementations
			
 
				 bool ggml_op_can_inplace(enum ggml_op op) {
			
 
				     switch (op) {
			
 
				+        case GGML_OP_FILL:
			
 
				         case GGML_OP_SCALE:
			
 
				         case GGML_OP_DIAG_MASK_ZERO:
			
 
				         case GGML_OP_DIAG_MASK_INF:
			
--- a/ggml/src/ggml-cuda/fill.cu
+++ b/ggml/src/ggml-cuda/fill.cu
@@ -4,7 +4,7 @@
 
				 #define CUDA_FILL_BLOCK_SIZE 256
			
 
				 
			
 
				 template <typename T>
			
 
				-static __global__ void fill_kernel(T * __restrict__ dst, const int64_t k, const T value) {
			
 
				+static __global__ void fill_kernel(T * dst, const int64_t k, const T value) {
			
 
				     const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;
			
 
				     if (i >= k) {
			
 
				         return;