|
|
@@ -3322,7 +3322,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|
|
layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
|
|
|
|
|
|
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
|
|
|
- layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, layer.ffn_gate ? n_ff : n_ff * 2}, 0);
|
|
|
+
|
|
|
+ const auto tn_ffn_up_weight = tn(LLM_TENSOR_FFN_UP, "weight", i);
|
|
|
+ ggml_tensor * t_ffn_up = ml.get_tensor_meta(tn_ffn_up_weight.str().c_str());
|
|
|
+ const int64_t n_ffn_up = t_ffn_up ? t_ffn_up->ne[1] : n_ff;
|
|
|
+
|
|
|
+ GGML_ASSERT(n_ffn_up == n_ff || n_ffn_up == n_ff * 2);
|
|
|
+ layer.ffn_up = create_tensor(tn_ffn_up_weight, {n_embd, n_ffn_up}, 0);
|
|
|
+ layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ffn_up}, TENSOR_NOT_REQUIRED);
|
|
|
|
|
|
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
|
|
|
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
|