2 lat temu · 353ec251a4
--- a/main.cpp
+++ b/main.cpp
@@ -9,7 +9,6 @@
 
															 #include <cstring>
														
 
															 #include <fstream>
														
 
															 #include <iostream>
														
 
															-#include <map>
														
 
															 #include <string>
														
 
															 #include <vector>
														
@@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
 
															 static const int EOS_TOKEN_ID = 2;
														
 
															 // determine number of model parts based on the dimension
														
 
															-static const std::map<int, int> LLAMA_N_PARTS = {
														
 
															+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
														
 
															     { 4096, 1 },
														
 
															     { 5120, 2 },
														
 
															     { 6656, 4 },
														
@@ -123,7 +122,7 @@ struct llama_model {
 
															     //
														
 
															     struct ggml_context * ctx;
														
 
															-    std::map<std::string, struct ggml_tensor *> tensors;
														
 
															+    std::unordered_map<std::string, struct ggml_tensor *> tensors;
														
 
															 };
														
 
															 // load the model's weights from a file
														
@@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
 
															     // load vocab
														
 
															     {
														
 
															         std::string word;
														
 
															+        vocab.id_to_token.resize(model.hparams.n_vocab);
														
 
															         std::vector<char> tmp(64);
														
 
															         for (int i = 0; i < model.hparams.n_vocab; i++) {
														
@@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
 
															             fin.read((char *) &score, sizeof(score));
														
 
															             vocab.token_to_id[word] = i;
														
 
															-            vocab.id_to_token[i] = word;
														
 
															-            vocab.score[i] = score;
														
 
															+
														
 
															+            auto &tok_score = vocab.id_to_token[i];
														
 
															+            tok_score.tok = word;
														
 
															+            tok_score.score = score;
														
 
															         }
														
 
															     }
														
@@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
 
															     fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
														
 
															     fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
														
 
															     for (int i = 0; i < (int) embd_inp.size(); i++) {
														
 
															-        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
														
 
															+        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
														
 
															     }
														
 
															     fprintf(stderr, "\n");
														
 
															     if (params.interactive) {
														
@@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
 
															         // display text
														
 
															         if (!input_noecho) {
														
 
															             for (auto id : embd) {
														
 
															-                printf("%s", vocab.id_to_token[id].c_str());
														
 
															+                printf("%s", vocab.id_to_token[id].tok.c_str());
														
 
															             }
														
 
															             fflush(stdout);
														
 
															         }
														
@@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
 
															             // check for reverse prompt
														
 
															             std::string last_output;
														
 
															             for (auto id : last_n_tokens) {
														
 
															-                last_output += vocab.id_to_token[id];
														
 
															+                last_output += vocab.id_to_token[id].tok;
														
 
															             }
														
 
															             // Check if each of the reverse prompts appears at the end of the output.
														
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -8,7 +8,6 @@
 
															 #include <cstdio>
														
 
															 #include <cstring>
														
 
															 #include <fstream>
														
 
															-#include <map>
														
 
															 #include <string>
														
 
															 #include <vector>
														
 
															 #include <regex>
														
@@ -130,6 +129,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
 
															         }
														
 
															         std::string word;
														
 
															+        vocab.id_to_token.resize(n_vocab);
														
 
															         for (int i = 0; i < n_vocab; i++) {
														
 
															             uint32_t len;
														
 
															             finp.read ((char *) &len, sizeof(len));
														
@@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
 
															             fout.write((char *) &score, sizeof(score));
														
 
															             vocab.token_to_id[word] = i;
														
 
															-            vocab.id_to_token[i] = word;
														
 
															-            vocab.score[i] = score;
														
 
															+
														
 
															+            auto &tok_score = vocab.id_to_token[i];
														
 
															+            tok_score.tok = word;
														
 
															+            tok_score.score = score;
														
 
															         }
														
 
															     }
														
--- a/utils.cpp
+++ b/utils.cpp
@@ -155,8 +155,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
 
															     }
														
 
															 }
														
 
															-std::map<std::string, int32_t> json_parse(const std::string & fname) {
														
 
															-    std::map<std::string, int32_t> result;
														
 
															+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
														
 
															+    std::unordered_map<std::string, int32_t> result;
														
 
															     // read file into string
														
 
															     std::string json;
														
@@ -360,16 +360,16 @@ private:
 
															             return;
														
 
															         }
														
 
															-        auto score = vocab_.score.find((*token).second);
														
 
															-
														
 
															-        if (score == vocab_.score.end()) {
														
 
															+        if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
														
 
															             return;
														
 
															         }
														
 
															+        const auto &tok_score = vocab_.id_to_token[(*token).second];
														
 
															+
														
 
															         llama_sp_bigram bigram;
														
 
															         bigram.left = left;
														
 
															         bigram.right = right;
														
 
															-        bigram.score = (*score).second;
														
 
															+        bigram.score = tok_score.score;
														
 
															         bigram.size = text.size();
														
 
															         work_queue_.push(bigram);
														
 
															     }
														
@@ -393,6 +393,8 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
 
															     std::string word;
														
 
															     std::vector<char> tmp(64);
														
 
															+    vocab.id_to_token.resize(n_vocab);
														
 
															+
														
 
															     for (int i = 0; i < n_vocab; i++) {
														
 
															         uint32_t len;
														
 
															         fin.read((char *) &len, sizeof(len));
														
@@ -410,8 +412,10 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
 
															         fin.read((char *) &score, sizeof(score));
														
 
															         vocab.token_to_id[word] = i;
														
 
															-        vocab.id_to_token[i] = word;
														
 
															-        vocab.score[i] = score;
														
 
															+
														
 
															+        auto &tok_score = vocab.id_to_token[i];
														
 
															+        tok_score.tok = word;
														
 
															+        tok_score.score = score;
														
 
															     }
														
 
															     return true;
														
--- a/utils.h
+++ b/utils.h
@@ -3,7 +3,7 @@
 
															 #pragma once
														
 
															 #include <string>
														
 
															-#include <map>
														
 
															+#include <unordered_map>
														
 
															 #include <vector>
														
 
															 #include <random>
														
 
															 #include <thread>
														
@@ -65,15 +65,19 @@ struct llama_vocab {
 
															     using id    = int32_t;
														
 
															     using token = std::string;
														
 
															-    std::map<token, id> token_to_id;
														
 
															-    std::map<id, token> id_to_token;
														
 
															-    std::map<id, float> score;
														
 
															+    struct token_score {
														
 
															+        token tok;
														
 
															+        float score;
														
 
															+    };
														
 
															+
														
 
															+    std::unordered_map<token, id> token_to_id;
														
 
															+    std::vector<token_score> id_to_token;
														
 
															 };
														
 
															 void replace(std::string & str, const std::string & needle, const std::string & replacement);
														
 
															 // poor-man's JSON parsing
														
 
															-std::map<std::string, int32_t> json_parse(const std::string & fname);
														
 
															+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
														
 
															 // TODO: temporary until #77 is merged, need this now for some tokenizer tests
														
 
															 bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);