| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- #pragma once
- #include "llama.h"
- #include <string>
- #include <vector>
- #include <stdexcept>
- #ifdef __GNUC__
- #ifdef __MINGW32__
- #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
- #else
- #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
- #endif
- #else
- #define LLAMA_ATTRIBUTE_FORMAT(...)
- #endif
- //
- // logging
- //
- LLAMA_ATTRIBUTE_FORMAT(2, 3)
- void llama_log_internal (ggml_log_level level, const char * format, ...);
- void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
- #define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
- #define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
- #define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
- #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
- #define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
- #define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
- //
- // helpers
- //
- struct time_meas {
- time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
- ~time_meas() {
- if (t_start_us >= 0) {
- t_acc += ggml_time_us() - t_start_us;
- }
- }
- const int64_t t_start_us;
- int64_t & t_acc;
- };
- static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
- if (search.empty()) {
- return;
- }
- std::string builder;
- builder.reserve(s.length());
- size_t pos = 0;
- size_t last_pos = 0;
- while ((pos = s.find(search, last_pos)) != std::string::npos) {
- builder.append(s, last_pos, pos - last_pos);
- builder.append(replace);
- last_pos = pos + search.length();
- }
- builder.append(s, last_pos, std::string::npos);
- s = std::move(builder);
- }
- const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
- struct llama_context * ctx
- );
- // the ring buffer works similarly to std::deque, but with a fixed capacity
- template<typename T>
- struct ring_buffer {
- ring_buffer(size_t cap) : capacity(cap), data(cap) {}
- T & front() {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[first];
- }
- const T & front() const {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[first];
- }
- T & back() {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[pos];
- }
- const T & back() const {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[pos];
- }
- void push_back(const T & value) {
- if (capacity == 0) {
- throw std::runtime_error("ring buffer: capacity is zero");
- }
- if (sz == capacity) {
- // advance the start when buffer is full
- first = (first + 1) % capacity;
- } else {
- sz++;
- }
- data[pos] = value;
- pos = (pos + 1) % capacity;
- }
- T pop_front() {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- T value = data[first];
- first = (first + 1) % capacity;
- sz--;
- return value;
- }
- //T & operator[](size_t i) {
- // if (i >= sz) {
- // throw std::runtime_error("ring buffer: index out of bounds");
- // }
- // return data[(first + i) % capacity];
- //}
- //const T & at(size_t i) const {
- // if (i >= sz) {
- // throw std::runtime_error("ring buffer: index out of bounds");
- // }
- // return data[(first + i) % capacity];
- //}
- const T & rat(size_t i) const {
- if (i >= sz) {
- throw std::runtime_error("ring buffer: index out of bounds");
- }
- return data[(first + sz - i - 1) % capacity];
- }
- std::vector<T> to_vector() const {
- std::vector<T> result;
- result.reserve(sz);
- for (size_t i = 0; i < sz; i++) {
- result.push_back(data[(first + i) % capacity]);
- }
- return result;
- }
- void clear() {
- // here only reset the status of the buffer
- sz = 0;
- first = 0;
- pos = 0;
- }
- bool empty() const {
- return sz == 0;
- }
- size_t size() const {
- return sz;
- }
- size_t capacity = 0;
- size_t sz = 0;
- size_t first = 0;
- size_t pos = 0;
- std::vector<T> data;
- };
|