|
|
@@ -2120,7 +2120,7 @@ struct llama_vocab {
|
|
|
id special_prefix_id = -1;
|
|
|
id special_suffix_id = -1;
|
|
|
id special_middle_id = -1;
|
|
|
- id special_eot_id = -1;
|
|
|
+ id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
|
|
|
|
|
|
bool add_space_prefix = true;
|
|
|
|
|
|
@@ -3770,7 +3770,7 @@ static void llm_load_hparams(
|
|
|
switch (hparams.n_layer) {
|
|
|
case 22: model.type = e_model::MODEL_1B; break;
|
|
|
case 26: model.type = e_model::MODEL_3B; break;
|
|
|
- case 32: model.type = e_model::MODEL_7B; break;
|
|
|
+ case 32: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_7B : e_model::MODEL_8B; break; // LLaMa 8B v3 uses GQA
|
|
|
case 40: model.type = e_model::MODEL_13B; break;
|
|
|
case 48: model.type = e_model::MODEL_34B; break;
|
|
|
case 60: model.type = e_model::MODEL_30B; break;
|
|
|
@@ -4179,7 +4179,10 @@ static void llm_load_vocab(
|
|
|
vocab.special_prefix_id = 67;
|
|
|
vocab.special_suffix_id = 69;
|
|
|
vocab.special_middle_id = 68;
|
|
|
- vocab.special_eot_id = 70;
|
|
|
+ // TODO: this is not EOT, it is "file separator" token, needs fix
|
|
|
+ // https://huggingface.co/google/codegemma-7b-it/blob/9b1d9231388358c04d90bd003458f5070d97db44/tokenizer_config.json#L565-L572
|
|
|
+ //vocab.special_eot_id = 70;
|
|
|
+ vocab.special_eot_id = 107;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -4308,6 +4311,7 @@ static void llm_load_vocab(
|
|
|
{ LLM_KV_TOKENIZER_MIDDLE_ID, vocab.special_middle_id },
|
|
|
{ LLM_KV_TOKENIZER_EOT_ID, vocab.special_eot_id },
|
|
|
};
|
|
|
+
|
|
|
for (const auto & it : special_token_types) {
|
|
|
const std::string & key = kv(std::get<0>(it));
|
|
|
int32_t & id = std::get<1>(it);
|
|
|
@@ -4322,7 +4326,6 @@ static void llm_load_vocab(
|
|
|
} else {
|
|
|
id = new_id;
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
|
|
|
// Handle add_bos_token and add_eos_token
|
|
|
@@ -4336,6 +4339,27 @@ static void llm_load_vocab(
|
|
|
vocab.special_add_eos = int(temp);
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
|
|
|
+ //
|
|
|
+ // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
|
|
|
+ // for now, we apply this workaround to find the EOT token based on its text
|
|
|
+ if (vocab.special_eot_id == -1) {
|
|
|
+ for (const auto & t : vocab.token_to_id) {
|
|
|
+ if (
|
|
|
+ // TODO: gemma "<end_of_turn>" is exported as a normal token, so the following check does not work
|
|
|
+ // need to fix convert script
|
|
|
+ //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
|
|
|
+ (t.first == "<|eot_id|>" ||
|
|
|
+ t.first == "<|im_emd|>" ||
|
|
|
+ t.first == "<end_of_turn>"
|
|
|
+ )
|
|
|
+ ) {
|
|
|
+ vocab.special_eot_id = t.second;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// build special tokens cache
|
|
|
@@ -4498,14 +4522,19 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
|
|
|
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, model.name.c_str());
|
|
|
|
|
|
// special tokens
|
|
|
- if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }
|
|
|
- if (vocab.special_eos_id != -1) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].text.c_str() ); }
|
|
|
- if (vocab.special_unk_id != -1) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].text.c_str() ); }
|
|
|
- if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
|
|
|
- if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
|
|
|
- if (vocab.special_cls_id != -1) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, vocab.special_cls_id, vocab.id_to_token[vocab.special_cls_id].text.c_str() ); }
|
|
|
- if (vocab.special_mask_id != -1) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }
|
|
|
- if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
|
|
|
+ if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }
|
|
|
+ if (vocab.special_eos_id != -1) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].text.c_str() ); }
|
|
|
+ if (vocab.special_unk_id != -1) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].text.c_str() ); }
|
|
|
+ if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
|
|
|
+ if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
|
|
|
+ if (vocab.special_cls_id != -1) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, vocab.special_cls_id, vocab.id_to_token[vocab.special_cls_id].text.c_str() ); }
|
|
|
+ if (vocab.special_mask_id != -1) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }
|
|
|
+
|
|
|
+ if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
|
|
|
+ if (vocab.special_prefix_id != -1) { LLAMA_LOG_INFO( "%s: PRE token = %d '%s'\n", __func__, vocab.special_prefix_id, vocab.id_to_token[vocab.special_prefix_id].text.c_str() ); }
|
|
|
+ if (vocab.special_suffix_id != -1) { LLAMA_LOG_INFO( "%s: SUF token = %d '%s'\n", __func__, vocab.special_suffix_id, vocab.id_to_token[vocab.special_suffix_id].text.c_str() ); }
|
|
|
+ if (vocab.special_middle_id != -1) { LLAMA_LOG_INFO( "%s: MID token = %d '%s'\n", __func__, vocab.special_middle_id, vocab.id_to_token[vocab.special_middle_id].text.c_str() ); }
|
|
|
+ if (vocab.special_eot_id != -1) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id].text.c_str() ); }
|
|
|
}
|
|
|
|
|
|
// Returns false if cancelled by progress_callback
|
|
|
@@ -13268,16 +13297,14 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
|
|
|
GGML_ASSERT(ctx);
|
|
|
const int64_t t_start_sample_us = ggml_time_us();
|
|
|
|
|
|
- bool allow_eos = false;
|
|
|
+ bool allow_eog = false;
|
|
|
for (const auto & stack : grammar->stacks) {
|
|
|
if (stack.empty()) {
|
|
|
- allow_eos = true;
|
|
|
+ allow_eog = true;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- const llama_token eos = llama_token_eos(&ctx->model);
|
|
|
-
|
|
|
std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
|
|
|
candidates_decoded.reserve(candidates->size);
|
|
|
std::vector<llama_grammar_candidate> candidates_grammar;
|
|
|
@@ -13286,8 +13313,8 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
|
|
|
for (size_t i = 0; i < candidates->size; ++i) {
|
|
|
const llama_token id = candidates->data[i].id;
|
|
|
const std::string piece = llama_token_to_piece(ctx, id);
|
|
|
- if (id == eos) {
|
|
|
- if (!allow_eos) {
|
|
|
+ if (llama_token_is_eog(&ctx->model, id)) {
|
|
|
+ if (!allow_eog) {
|
|
|
candidates->data[i].logit = -INFINITY;
|
|
|
}
|
|
|
} else if (piece.empty() || piece[0] == 0) {
|
|
|
@@ -13476,7 +13503,7 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
|
|
|
void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token) {
|
|
|
const int64_t t_start_sample_us = ggml_time_us();
|
|
|
|
|
|
- if (token == llama_token_eos(&ctx->model)) {
|
|
|
+ if (llama_token_is_eog(&ctx->model, token)) {
|
|
|
for (const auto & stack : grammar->stacks) {
|
|
|
if (stack.empty()) {
|
|
|
return;
|
|
|
@@ -16880,6 +16907,13 @@ llama_token_type llama_token_get_type(const struct llama_model * model, llama_to
|
|
|
return model->vocab.id_to_token[token].type;
|
|
|
}
|
|
|
|
|
|
+bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
|
|
|
+ return token != -1 && (
|
|
|
+ token == llama_token_eos(model) ||
|
|
|
+ token == llama_token_eot(model)
|
|
|
+ );
|
|
|
+}
|
|
|
+
|
|
|
llama_token llama_token_bos(const struct llama_model * model) {
|
|
|
return model->vocab.special_bos_id;
|
|
|
}
|