|
|
@@ -122,36 +122,11 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
llama_context * ctx;
|
|
|
|
|
|
- // load the model
|
|
|
- {
|
|
|
- auto lparams = llama_context_default_params();
|
|
|
-
|
|
|
- lparams.n_ctx = params.n_ctx;
|
|
|
- lparams.n_parts = params.n_parts;
|
|
|
- lparams.seed = params.seed;
|
|
|
- lparams.f16_kv = params.memory_f16;
|
|
|
- lparams.logits_all = params.perplexity;
|
|
|
- lparams.use_mmap = params.use_mmap;
|
|
|
- lparams.use_mlock = params.use_mlock;
|
|
|
- lparams.embedding = params.embedding;
|
|
|
-
|
|
|
- ctx = llama_init_from_file(params.model.c_str(), lparams);
|
|
|
-
|
|
|
- if (ctx == NULL) {
|
|
|
- fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
|
|
- return 1;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (!params.lora_adapter.empty()) {
|
|
|
- int err = llama_apply_lora_from_file(ctx,
|
|
|
- params.lora_adapter.c_str(),
|
|
|
- params.lora_base.empty() ? NULL : params.lora_base.c_str(),
|
|
|
- params.n_threads);
|
|
|
- if (err != 0) {
|
|
|
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
|
|
- return 1;
|
|
|
- }
|
|
|
+ // load the model and apply lora adapter, if any
|
|
|
+ ctx = llama_init_from_gpt_params(params);
|
|
|
+ if (ctx == NULL) {
|
|
|
+ fprintf(stderr, "%s: error: unable to load model\n", __func__);
|
|
|
+ return 1;
|
|
|
}
|
|
|
|
|
|
// print system information
|