1
0

ngram-cache.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #pragma once
  2. #include "llama.h"
  3. #include <unordered_map>
  4. #include <string>
  5. #include <vector>
  6. #define LLAMA_NGRAM_MIN 1
  7. #define LLAMA_NGRAM_MAX 4
  8. #define LLAMA_NGRAM_STATIC 2
  9. // Data structures to map n-grams to empirical token probabilities:
  10. struct common_ngram {
  11. llama_token tokens[LLAMA_NGRAM_MAX];
  12. common_ngram() {
  13. for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
  14. tokens[i] = LLAMA_TOKEN_NULL;
  15. }
  16. }
  17. common_ngram(const llama_token * input, const int ngram_size) {
  18. for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
  19. tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
  20. }
  21. }
  22. bool operator==(const common_ngram & other) const {
  23. for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
  24. if (tokens[i] != other.tokens[i]) {
  25. return false;
  26. }
  27. }
  28. return true;
  29. }
  30. };
  31. struct common_token_hash_function {
  32. size_t operator()(const llama_token token) const {
  33. // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
  34. return token * 11400714819323198485llu;
  35. }
  36. };
  37. struct common_ngram_hash_function {
  38. size_t operator()(const common_ngram & ngram) const {
  39. size_t hash = common_token_hash_function{}(ngram.tokens[0]);
  40. for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
  41. hash ^= common_token_hash_function{}(ngram.tokens[i]);
  42. }
  43. return hash;
  44. }
  45. };
  46. // token -> number of times token has been seen
  47. typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
  48. // n-gram -> empirical distribution of following tokens
  49. typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
  50. // Update an ngram cache with tokens.
  51. // ngram_cache: the cache to modify.
  52. // ngram_min/ngram_max: the min/max size of the ngrams to extract from inp_data.
  53. // inp_data: the token sequence with which to update ngram_cache.
  54. // nnew: how many new tokens have been appended to inp_data since the last call to this function.
  55. // print_progress: whether to print progress to stderr.
  56. //
  57. // In order to get correct results inp_data can ONLY BE APPENDED TO.
  58. // Changes in the middle need a complete rebuild.
  59. void common_ngram_cache_update(
  60. common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
  61. // Try to draft tokens from ngram caches.
  62. // inp: the tokens generated so far.
  63. // draft: the token sequence to draft. Expected to initially contain the previously sampled token.
  64. // n_draft: maximum number of tokens to add to draft.
  65. // ngram_min/gram_max: the min/max size of the ngrams in nc_context and nc_dynamic.
  66. // nc_context: ngram cache based on current context.
  67. // nc_dynamic: ngram cache based on previous user generations.
  68. // nc_static: ngram cache generated from a large text corpus, used for validation.
  69. void common_ngram_cache_draft(
  70. std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
  71. common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
  72. // Save an ngram cache to a file.
  73. // ngram_cache: the ngram cache to save.
  74. // filename: the path under which to save the ngram cache.
  75. void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);
  76. // Load an ngram cache saved with common_ngram_cache_save.
  77. // filename: the path from which to load the ngram cache.
  78. // returns: an ngram cache containing the information saved to filename.
  79. common_ngram_cache common_ngram_cache_load(std::string & filename);
  80. // Merge two ngram caches.
  81. // ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
  82. // ngram_cache_add: the ngram cache to add to ngram_cache_target.
  83. void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);