|
|
@@ -606,7 +606,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
|
|
|
ml.get_key(LLM_KV_ROPE_DIMENSION_COUNT, hparams.n_rot, false);
|
|
|
|
|
|
- if (arch == LLM_ARCH_LLAMA || arch == LLM_ARCH_DECI || arch == LLM_ARCH_FALCON) {
|
|
|
+ if (arch == LLM_ARCH_LLAMA || arch == LLM_ARCH_DECI || arch == LLM_ARCH_FALCON || arch == LLM_ARCH_LLAMA_EMBED) {
|
|
|
if (hparams.n_rot != hparams.n_embd_head_k) {
|
|
|
throw std::runtime_error(format("invalid n_rot: %u, expected %u", hparams.n_rot, hparams.n_embd_head_k));
|
|
|
}
|
|
|
@@ -630,6 +630,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
// arch-specific KVs
|
|
|
switch (arch) {
|
|
|
case LLM_ARCH_LLAMA:
|
|
|
+ case LLM_ARCH_LLAMA_EMBED:
|
|
|
{
|
|
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
|
|
|
|
|
@@ -2652,6 +2653,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|
|
case LLM_ARCH_GRANITE:
|
|
|
case LLM_ARCH_GRANITE_MOE:
|
|
|
case LLM_ARCH_MISTRAL3:
|
|
|
+ case LLM_ARCH_LLAMA_EMBED:
|
|
|
{
|
|
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
|
|
|
|
|
@@ -7269,16 +7271,20 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
|
|
switch (arch) {
|
|
|
case LLM_ARCH_LLAMA:
|
|
|
{
|
|
|
- llm = std::make_unique<llm_build_llama>(*this, params);
|
|
|
+ llm = std::make_unique<llm_build_llama<false>>(*this, params);
|
|
|
} break;
|
|
|
case LLM_ARCH_LLAMA4:
|
|
|
{
|
|
|
if (hparams.swa_type == LLAMA_SWA_TYPE_NONE) {
|
|
|
- llm = std::make_unique<llm_build_llama>(*this, params);
|
|
|
+ llm = std::make_unique<llm_build_llama<false>>(*this, params);
|
|
|
} else {
|
|
|
llm = std::make_unique<llm_build_llama_iswa>(*this, params);
|
|
|
}
|
|
|
} break;
|
|
|
+ case LLM_ARCH_LLAMA_EMBED:
|
|
|
+ {
|
|
|
+ llm = std::make_unique<llm_build_llama<true>>(*this, params);
|
|
|
+ } break;
|
|
|
case LLM_ARCH_DECI:
|
|
|
{
|
|
|
llm = std::make_unique<llm_build_deci>(*this, params);
|
|
|
@@ -7874,6 +7880,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
|
|
case LLM_ARCH_ERNIE4_5:
|
|
|
case LLM_ARCH_ERNIE4_5_MOE:
|
|
|
case LLM_ARCH_MISTRAL3:
|
|
|
+ case LLM_ARCH_LLAMA_EMBED:
|
|
|
return LLAMA_ROPE_TYPE_NORM;
|
|
|
|
|
|
// the pairs of head values are offset by n_rot/2
|