|
|
@@ -668,6 +668,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
hparams.n_swa = 8192;
|
|
|
hparams.n_attn_temp_floor_scale = 8192;
|
|
|
hparams.f_attn_temp_scale = 0.1f;
|
|
|
+ hparams.f_attn_temp_offset = 1.0f;
|
|
|
hparams.set_swa_pattern(4); // pattern: 3 chunked - 1 full
|
|
|
}
|
|
|
|
|
|
@@ -1646,6 +1647,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale, false);
|
|
|
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_LENGTH, hparams.n_attn_temp_floor_scale, false);
|
|
|
|
|
|
+ hparams.f_attn_temp_offset = 0.0f;
|
|
|
+
|
|
|
switch (hparams.n_layer) {
|
|
|
case 27: type = LLM_TYPE_16B; break;
|
|
|
case 60: type = LLM_TYPE_236B; break;
|
|
|
@@ -2276,6 +2279,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow, false);
|
|
|
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f);
|
|
|
|
|
|
+ hparams.f_attn_temp_offset = 0.0f;
|
|
|
+
|
|
|
// TODO: maybe add n_attn_temp_floor_scale as a separate KV?
|
|
|
if (hparams.f_attn_temp_scale != 0.0f) {
|
|
|
hparams.n_attn_temp_floor_scale = hparams.n_ctx_orig_yarn;
|