1 жил өмнө · 0a11f8b7b5
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
 
				         [](common_params & params) {
			
 
				             params.ctx_shift = false;
			
 
				         }
			
 
				-    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
			
 
				+    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
			
 
				     add_opt(common_arg(
			
 
				         {"--chunks"}, "N",
			
 
				         string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
			
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3065,6 +3065,9 @@ class Rwkv6Model(Model):
 
				         if new_name.endswith("time_mix_w2.weight"):
			
 
				             data_torch = data_torch.permute(0, 2, 1)
			
 
				 
			
 
				+        if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
			
 
				+            data_torch = data_torch.squeeze()
			
 
				+
			
 
				         rescale_every_n_layers = self.hparams["rescale_every"]
			
 
				         if rescale_every_n_layers > 0:
			
 
				             if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):