1 week ago · 9da3dcd753
--- a/src/models/nemotron-h.cpp
+++ b/src/models/nemotron-h.cpp
@@ -67,7 +67,7 @@ ggml_tensor * llm_build_nemotron_h::build_attention_layer(ggml_tensor *
 
				                                                           const llama_model &       model,
			
 
				                                                           const int64_t             n_embd_head,
			
 
				                                                           const int                 il) {
			
 
				-    // compute Q and K and (optionally) RoPE them
			
 
				+    // compute Q and K
			
 
				     ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
			
 
				     cb(Qcur, "Qcur", il);
			
 
				     if (model.layers[il].bq) {