|
|
@@ -84,13 +84,13 @@ int main(int argc, char ** argv) {
|
|
|
model_params.n_gpu_layers = ngl;
|
|
|
|
|
|
llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
|
|
|
- const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
|
|
|
|
if (model == NULL) {
|
|
|
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+ const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
|
// tokenize the prompt
|
|
|
|
|
|
// find the number of tokens in the prompt
|