|
|
@@ -835,7 +835,7 @@ struct llm_tokenizer_ugm_session {
|
|
|
}
|
|
|
|
|
|
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
|
|
|
- std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
|
|
|
+ std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
|
|
|
// at the beginning tokenization score is zero
|
|
|
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
|
|
|
|
|
|
@@ -867,7 +867,7 @@ struct llm_tokenizer_ugm_session {
|
|
|
const double challenger_score = current_best.score_sum + token_score;
|
|
|
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
|
|
|
if (challenger_score > current_champ.score_sum) {
|
|
|
- struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
|
|
|
+ struct best_tokenization challenger = { token_id, input_offset, challenger_score };
|
|
|
current_champ = challenger;
|
|
|
}
|
|
|
}
|
|
|
@@ -881,7 +881,7 @@ struct llm_tokenizer_ugm_session {
|
|
|
prefix_offset = input_offset + n_utf8_code_units;
|
|
|
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
|
|
|
if (challenger_score > current_champ.score_sum) {
|
|
|
- struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
|
|
|
+ struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
|
|
|
current_champ = challenger;
|
|
|
}
|
|
|
}
|
|
|
@@ -1007,7 +1007,7 @@ private:
|
|
|
struct best_tokenization {
|
|
|
llama_token token_id;
|
|
|
size_t input_offset;
|
|
|
- float score_sum;
|
|
|
+ double score_sum;
|
|
|
};
|
|
|
|
|
|
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {
|