|
|
@@ -66,6 +66,7 @@ const char * llm_type_name(llm_type type) {
|
|
|
case LLM_TYPE_1_7B: return "1.7B";
|
|
|
case LLM_TYPE_1_8B: return "1.8B";
|
|
|
case LLM_TYPE_2B: return "2B";
|
|
|
+ case LLM_TYPE_2_6B: return "2.6B";
|
|
|
case LLM_TYPE_2_8B: return "2.8B";
|
|
|
case LLM_TYPE_2_9B: return "2.9B";
|
|
|
case LLM_TYPE_3B: return "3B";
|
|
|
@@ -1977,10 +1978,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
for (uint32_t il = 0; il < hparams.n_layer; ++il) {
|
|
|
hparams.recurrent_layer_arr[il] = hparams.n_head_kv(il) == 0;
|
|
|
}
|
|
|
- switch (hparams.n_embd) {
|
|
|
- case 1024: type = LLM_TYPE_350M; break;
|
|
|
- case 1536: type = LLM_TYPE_700M; break;
|
|
|
- case 2048: type = LLM_TYPE_1_2B; break;
|
|
|
+ switch (hparams.n_ff()) {
|
|
|
+ case 4608: type = LLM_TYPE_350M; break;
|
|
|
+ case 6912: type = LLM_TYPE_700M; break;
|
|
|
+ case 8192: type = LLM_TYPE_1_2B; break;
|
|
|
+ case 10752: type = LLM_TYPE_2_6B; break;
|
|
|
default: type = LLM_TYPE_UNKNOWN;
|
|
|
}
|
|
|
} break;
|