llama-impl.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. #pragma once
  2. #include "llama.h"
  3. #include <string>
  4. #include <vector>
  5. #include <stdexcept>
  6. #ifdef __GNUC__
  7. #ifdef __MINGW32__
  8. #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
  9. #else
  10. #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
  11. #endif
  12. #else
  13. #define LLAMA_ATTRIBUTE_FORMAT(...)
  14. #endif
  15. //
  16. // logging
  17. //
  18. LLAMA_ATTRIBUTE_FORMAT(2, 3)
  19. void llama_log_internal (ggml_log_level level, const char * format, ...);
  20. void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
  21. #define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
  22. #define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
  23. #define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
  24. #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
  25. #define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
  26. #define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
  27. //
  28. // helpers
  29. //
  30. struct time_meas {
  31. time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
  32. ~time_meas() {
  33. if (t_start_us >= 0) {
  34. t_acc += ggml_time_us() - t_start_us;
  35. }
  36. }
  37. const int64_t t_start_us;
  38. int64_t & t_acc;
  39. };
  40. static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
  41. if (search.empty()) {
  42. return;
  43. }
  44. std::string builder;
  45. builder.reserve(s.length());
  46. size_t pos = 0;
  47. size_t last_pos = 0;
  48. while ((pos = s.find(search, last_pos)) != std::string::npos) {
  49. builder.append(s, last_pos, pos - last_pos);
  50. builder.append(replace);
  51. last_pos = pos + search.length();
  52. }
  53. builder.append(s, last_pos, std::string::npos);
  54. s = std::move(builder);
  55. }
  56. const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
  57. struct llama_context * ctx
  58. );
  59. // the ring buffer works similarly to std::deque, but with a fixed capacity
  60. template<typename T>
  61. struct ring_buffer {
  62. ring_buffer(size_t cap) : capacity(cap), data(cap) {}
  63. T & front() {
  64. if (sz == 0) {
  65. throw std::runtime_error("ring buffer is empty");
  66. }
  67. return data[first];
  68. }
  69. const T & front() const {
  70. if (sz == 0) {
  71. throw std::runtime_error("ring buffer is empty");
  72. }
  73. return data[first];
  74. }
  75. T & back() {
  76. if (sz == 0) {
  77. throw std::runtime_error("ring buffer is empty");
  78. }
  79. return data[pos];
  80. }
  81. const T & back() const {
  82. if (sz == 0) {
  83. throw std::runtime_error("ring buffer is empty");
  84. }
  85. return data[pos];
  86. }
  87. void push_back(const T & value) {
  88. if (capacity == 0) {
  89. throw std::runtime_error("ring buffer: capacity is zero");
  90. }
  91. if (sz == capacity) {
  92. // advance the start when buffer is full
  93. first = (first + 1) % capacity;
  94. } else {
  95. sz++;
  96. }
  97. data[pos] = value;
  98. pos = (pos + 1) % capacity;
  99. }
  100. T pop_front() {
  101. if (sz == 0) {
  102. throw std::runtime_error("ring buffer is empty");
  103. }
  104. T value = data[first];
  105. first = (first + 1) % capacity;
  106. sz--;
  107. return value;
  108. }
  109. //T & operator[](size_t i) {
  110. // if (i >= sz) {
  111. // throw std::runtime_error("ring buffer: index out of bounds");
  112. // }
  113. // return data[(first + i) % capacity];
  114. //}
  115. //const T & at(size_t i) const {
  116. // if (i >= sz) {
  117. // throw std::runtime_error("ring buffer: index out of bounds");
  118. // }
  119. // return data[(first + i) % capacity];
  120. //}
  121. const T & rat(size_t i) const {
  122. if (i >= sz) {
  123. throw std::runtime_error("ring buffer: index out of bounds");
  124. }
  125. return data[(first + sz - i - 1) % capacity];
  126. }
  127. std::vector<T> to_vector() const {
  128. std::vector<T> result;
  129. result.reserve(sz);
  130. for (size_t i = 0; i < sz; i++) {
  131. result.push_back(data[(first + i) % capacity]);
  132. }
  133. return result;
  134. }
  135. void clear() {
  136. // here only reset the status of the buffer
  137. sz = 0;
  138. first = 0;
  139. pos = 0;
  140. }
  141. bool empty() const {
  142. return sz == 0;
  143. }
  144. size_t size() const {
  145. return sz;
  146. }
  147. size_t capacity = 0;
  148. size_t sz = 0;
  149. size_t first = 0;
  150. size_t pos = 0;
  151. std::vector<T> data;
  152. };