před 1 rokem · e02b597be3
--- a/common/ngram-cache.h
+++ b/common/ngram-cache.h
@@ -37,11 +37,18 @@ struct llama_ngram {
 
				     }
			
 
				 };
			
 
				 
			
 
				+struct llama_token_hash_function {
			
 
				+    size_t operator()(const llama_token token) const {
			
 
				+        // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
			
 
				+        return token * 11400714819323198485llu;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				 struct llama_ngram_hash_function {
			
 
				     size_t operator()(const llama_ngram & ngram) const {
			
 
				-        size_t hash = 0;
			
 
				-        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
			
 
				-            hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
			
 
				+        size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
			
 
				+        for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
			
 
				+            hash ^= llama_token_hash_function{}(ngram.tokens[i]);
			
 
				         }
			
 
				         return hash;
			
 
				     }
			
--- a/examples/lookup/lookup-stats.cpp
+++ b/examples/lookup/lookup-stats.cpp
@@ -31,7 +31,6 @@ int main(int argc, char ** argv){
 
				 
			
 
				     // load the model
			
 
				     std::tie(model, ctx) = llama_init_from_gpt_params(params);
			
 
				-    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
			
 
				 
			
 
				     // tokenize the prompt
			
 
				     std::vector<llama_token> inp;
			
@@ -65,7 +64,7 @@ int main(int argc, char ** argv){
 
				     }
			
 
				 
			
 
				     const int n_input = inp.size();
			
 
				-    const int n_ctx = params.n_ctx;
			
 
				+    const int n_ctx = llama_n_ctx(ctx);
			
 
				 
			
 
				     int n_drafted = 0;
			
 
				     int n_accept  = 0;
			
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@@ -39,7 +39,6 @@ int main(int argc, char ** argv){
 
				 
			
 
				     // load the model
			
 
				     std::tie(model, ctx) = llama_init_from_gpt_params(params);
			
 
				-    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
			
 
				 
			
 
				     // tokenize the prompt
			
 
				     std::vector<llama_token> inp;