1 год назад · ffd00797d8
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
 
				         GGML_ASSERT(!original_logits.empty());
			
 
				     }
			
 
				     llama_token id = 0;
			
 
				-    // Get a pointer to the logits
			
 
				-    float * logits = llama_get_logits_ith(ctx_main, idx);
			
 
				 
			
 
				     if (temp < 0.0) {
			
 
				         // greedy sampling, with probs
			
@@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
 
				     }
			
 
				 
			
 
				     if (ctx_sampling->grammar != NULL && !is_resampling) {
			
 
				+        // Get a pointer to the logits
			
 
				+        float * logits = llama_get_logits_ith(ctx_main, idx);
			
 
				+
			
 
				         // Create an array with a single token data element for the sampled id
			
 
				         llama_token_data single_token_data = {id, logits[id], 0.0f};
			
 
				         llama_token_data_array single_token_data_array = { &single_token_data, 1, false };