|
@@ -17598,6 +17598,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|
|
/* recurrent_kv_size */ std::max((uint32_t) 1, cparams.n_seq_max),
|
|
/* recurrent_kv_size */ std::max((uint32_t) 1, cparams.n_seq_max),
|
|
|
/* n_seq_max */ cparams.n_seq_max,
|
|
/* n_seq_max */ cparams.n_seq_max,
|
|
|
/* offload */ cparams.offload_kqv,
|
|
/* offload */ cparams.offload_kqv,
|
|
|
|
|
+ /* unified */ cparams.kv_unified,
|
|
|
/* filter_attn */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr,
|
|
/* filter_attn */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr,
|
|
|
/* filter_recr */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr);
|
|
/* filter_recr */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr);
|
|
|
} else {
|
|
} else {
|