1 vuosi sitten · 89503dcb5f
--- a/llama.cpp
+++ b/llama.cpp
@@ -9456,8 +9456,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
 
															         else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
														
 
															             new_type = GGML_TYPE_Q4_K;
														
 
															         }
														
 
															-        else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4) {
														
 
															-            new_type = GGML_TYPE_Q4_K;
														
 
															+        else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
														
 
															+            new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
														
 
															         }
														
 
															         else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
														
 
															             new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
														
@@ -9496,9 +9496,9 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
 
															         else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
														
 
															             if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;
														
 
															         }
														
 
															-        //else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
														
 
															-        //    if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K;
														
 
															-        //}
														
 
															+        else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix) {
														
 
															+            new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K;
														
 
															+        }
														
 
															         else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
														
 
															             new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K
														
 
															                      : arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K