1 rok temu · 6ff13987ad
--- a/build.zig
+++ b/build.zig
@@ -129,14 +129,14 @@ pub fn build(b: *std.build.Builder) !void {
 
				     const clip = make.obj("clip", "examples/llava/clip.cpp");
			
 
				     const llava = make.obj("llava", "examples/llava/llava.cpp");
			
 
				 
			
 
				-    _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, console, grammar_parser });
			
 
				-    _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
			
 
				-    _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
			
 
				-    _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
			
 
				-    _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
			
 
				+    _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo, console, grammar_parser });
			
 
				+    _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo });
			
 
				+    _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo });
			
 
				+    _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo });
			
 
				+    _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo, train });
			
 
				     _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
			
 
				 
			
 
				-    const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, grammar_parser, clip, llava });
			
 
				+    const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo, grammar_parser, clip, llava });
			
 
				     if (server.target.isWindows()) {
			
 
				         server.linkSystemLibrary("ws2_32");
			
 
				     }
			
--- a/common/common.cpp
+++ b/common/common.cpp
--- a/common/common.h
+++ b/common/common.h
@@ -27,7 +27,7 @@
 
				 #define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
			
 
				 
			
 
				 #define print_build_info() do {                                                                     \
			
 
				-    fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT);           \
			
 
				+    fprintf(stderr, "%s: build = %d (%s)\n",      __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT);      \
			
 
				     fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET);    \
			
 
				 } while(0)
			
 
				 
			
@@ -35,14 +35,18 @@
 
				 
			
 
				 // build info
			
 
				 extern int LLAMA_BUILD_NUMBER;
			
 
				-extern char const *LLAMA_COMMIT;
			
 
				-extern char const *LLAMA_COMPILER;
			
 
				-extern char const *LLAMA_BUILD_TARGET;
			
 
				+extern char const * LLAMA_COMMIT;
			
 
				+extern char const * LLAMA_COMPILER;
			
 
				+extern char const * LLAMA_BUILD_TARGET;
			
 
				 
			
 
				 struct llama_control_vector_load_info;
			
 
				 
			
 
				-int get_math_cpu_count();
			
 
				-int32_t get_num_physical_cores();
			
 
				+//
			
 
				+// CPU utils
			
 
				+//
			
 
				+
			
 
				+int32_t cpu_get_num_physical_cores();
			
 
				+int32_t cpu_get_num_math();
			
 
				 
			
 
				 //
			
 
				 // CLI argument parsing
			
@@ -51,7 +55,7 @@ int32_t get_num_physical_cores();
 
				 struct gpt_params {
			
 
				     uint32_t seed                 = LLAMA_DEFAULT_SEED; // RNG seed
			
 
				 
			
 
				-    int32_t n_threads             = get_math_cpu_count();
			
 
				+    int32_t n_threads             = cpu_get_num_math();
			
 
				     int32_t n_threads_draft       = -1;
			
 
				     int32_t n_threads_batch       = -1;    // number of threads to use for batch processing (-1 = use n_threads)
			
 
				     int32_t n_threads_batch_draft = -1;
			
@@ -179,33 +183,34 @@ struct gpt_params {
 
				 
			
 
				 void gpt_params_handle_model_default(gpt_params & params);
			
 
				 
			
 
				-bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
			
 
				-
			
 
				-bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params);
			
 
				-
			
 
				-bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
			
 
				+bool gpt_params_parse_ex   (int argc, char ** argv, gpt_params & params);
			
 
				+bool gpt_params_parse      (int argc, char ** argv, gpt_params & params);
			
 
				+bool gpt_params_find_arg   (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
			
 
				+void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
			
 
				 
			
 
				-void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
			
 
				+std::string gpt_params_get_system_info(const gpt_params & params);
			
 
				 
			
 
				-bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
			
 
				-
			
 
				-std::string get_system_info(const gpt_params & params);
			
 
				+//
			
 
				+// String utils
			
 
				+//
			
 
				 
			
 
				-std::string gpt_random_prompt(std::mt19937 & rng);
			
 
				+std::vector<std::string> string_split(std::string input, char separator);
			
 
				 
			
 
				-void process_escapes(std::string& input);
			
 
				+std::string string_strip(const std::string & str);
			
 
				+std::string string_get_sortable_timestamp();
			
 
				+std::string string_random_prompt(std::mt19937 & rng);
			
 
				 
			
 
				-bool validate_file_name(const std::string & filename);
			
 
				+bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
			
 
				+void string_process_escapes(std::string & input);
			
 
				 
			
 
				 //
			
 
				-// String utils
			
 
				+// Filesystem utils
			
 
				 //
			
 
				 
			
 
				-std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
			
 
				-std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & names_string);
			
 
				-std::vector<std::string> string_split(std::string input, char separator);
			
 
				-std::string string_strip(const std::string & str);
			
 
				-std::string sampler_type_to_name_string(llama_sampler_type sampler_type);
			
 
				+bool fs_validate_filename(const std::string & filename);
			
 
				+bool fs_create_directory_with_parents(const std::string & path);
			
 
				+
			
 
				+std::string fs_get_cache_directory();
			
 
				 
			
 
				 //
			
 
				 // Model utils
			
@@ -276,30 +281,15 @@ std::string llama_detokenize_bpe(
 
				 // defaults to true when model type is SPM, otherwise false.
			
 
				 bool llama_should_add_bos_token(const llama_model * model);
			
 
				 
			
 
				-//
			
 
				-// YAML utils
			
 
				-//
			
 
				-
			
 
				-bool create_directory_with_parents(const std::string & path);
			
 
				-std::string get_cache_directory();
			
 
				-void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
			
 
				-void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
			
 
				-void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);
			
 
				-std::string get_sortable_timestamp();
			
 
				-
			
 
				-void dump_non_result_info_yaml(
			
 
				-    FILE * stream, const gpt_params & params, const llama_context * lctx,
			
 
				-    const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
			
 
				-
			
 
				 //
			
 
				 // KV cache utils
			
 
				 //
			
 
				 
			
 
				 // Dump the KV cache view with the number of sequences per cell.
			
 
				-void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);
			
 
				+void llama_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
			
 
				 
			
 
				 // Dump the KV cache view showing individual sequences in each cell (long output).
			
 
				-void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
			
 
				+void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
			
 
				 
			
 
				 //
			
 
				 // Embedding utils
			
@@ -333,6 +323,20 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
 
				 //
			
 
				 // Split utils
			
 
				 //
			
 
				+
			
 
				 static const char * const LLM_KV_SPLIT_NO            = "split.no";
			
 
				 static const char * const LLM_KV_SPLIT_COUNT         = "split.count";
			
 
				 static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
			
 
				+
			
 
				+//
			
 
				+// YAML utils
			
 
				+//
			
 
				+
			
 
				+void yaml_dump_vector_float    (FILE * stream, const char * prop_name, const std::vector<float> & data);
			
 
				+void yaml_dump_vector_int      (FILE * stream, const char * prop_name, const std::vector<int> & data);
			
 
				+void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data);
			
 
				+
			
 
				+void yaml_dump_non_result_info(
			
 
				+    FILE * stream, const gpt_params & params, const llama_context * lctx,
			
 
				+    const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
			
 
				+
			
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -125,7 +125,7 @@ std::string llama_sampling_order_print(const llama_sampling_params & params) {
 
				     std::string result = "CFG -> Penalties ";
			
 
				     if (params.mirostat == 0) {
			
 
				         for (auto sampler_type : params.samplers_sequence) {
			
 
				-            const auto sampler_type_name = sampler_type_to_name_string(sampler_type);
			
 
				+            const auto sampler_type_name = llama_sampling_type_to_str(sampler_type);
			
 
				             if (!sampler_type_name.empty()) {
			
 
				                 result += "-> " + sampler_type_name + " ";
			
 
				             }
			
@@ -137,6 +137,87 @@ std::string llama_sampling_order_print(const llama_sampling_params & params) {
 
				     return result;
			
 
				 }
			
 
				 
			
 
				+std::string llama_sampling_type_to_str(llama_sampler_type sampler_type) {
			
 
				+    switch (sampler_type) {
			
 
				+        case llama_sampler_type::TOP_K:       return "top_k";
			
 
				+        case llama_sampler_type::TFS_Z:       return "tfs_z";
			
 
				+        case llama_sampler_type::TYPICAL_P:   return "typical_p";
			
 
				+        case llama_sampler_type::TOP_P:       return "top_p";
			
 
				+        case llama_sampler_type::MIN_P:       return "min_p";
			
 
				+        case llama_sampler_type::TEMPERATURE: return "temperature";
			
 
				+        default : return "";
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
			
 
				+    std::unordered_map<std::string, llama_sampler_type> sampler_canonical_name_map {
			
 
				+        {"top_k",       llama_sampler_type::TOP_K},
			
 
				+        {"top_p",       llama_sampler_type::TOP_P},
			
 
				+        {"typical_p",   llama_sampler_type::TYPICAL_P},
			
 
				+        {"min_p",       llama_sampler_type::MIN_P},
			
 
				+        {"tfs_z",       llama_sampler_type::TFS_Z},
			
 
				+        {"temperature", llama_sampler_type::TEMPERATURE}
			
 
				+    };
			
 
				+
			
 
				+    // since samplers names are written multiple ways
			
 
				+    // make it ready for both system names and input names
			
 
				+    std::unordered_map<std::string, llama_sampler_type> sampler_alt_name_map {
			
 
				+        {"top-k",       llama_sampler_type::TOP_K},
			
 
				+        {"top-p",       llama_sampler_type::TOP_P},
			
 
				+        {"nucleus",     llama_sampler_type::TOP_P},
			
 
				+        {"typical-p",   llama_sampler_type::TYPICAL_P},
			
 
				+        {"typical",     llama_sampler_type::TYPICAL_P},
			
 
				+        {"min-p",       llama_sampler_type::MIN_P},
			
 
				+        {"tfs-z",       llama_sampler_type::TFS_Z},
			
 
				+        {"tfs",         llama_sampler_type::TFS_Z},
			
 
				+        {"temp",        llama_sampler_type::TEMPERATURE}
			
 
				+    };
			
 
				+
			
 
				+    std::vector<llama_sampler_type> sampler_types;
			
 
				+    sampler_types.reserve(names.size());
			
 
				+    for (const auto & name : names)
			
 
				+    {
			
 
				+        auto sampler_item = sampler_canonical_name_map.find(name);
			
 
				+        if (sampler_item != sampler_canonical_name_map.end())
			
 
				+        {
			
 
				+            sampler_types.push_back(sampler_item->second);
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            if (allow_alt_names)
			
 
				+            {
			
 
				+                sampler_item = sampler_alt_name_map.find(name);
			
 
				+                if (sampler_item != sampler_alt_name_map.end())
			
 
				+                {
			
 
				+                    sampler_types.push_back(sampler_item->second);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return sampler_types;
			
 
				+}
			
 
				+
			
 
				+std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string) {
			
 
				+    std::unordered_map<char, llama_sampler_type> sampler_name_map {
			
 
				+        {'k', llama_sampler_type::TOP_K},
			
 
				+        {'p', llama_sampler_type::TOP_P},
			
 
				+        {'y', llama_sampler_type::TYPICAL_P},
			
 
				+        {'m', llama_sampler_type::MIN_P},
			
 
				+        {'f', llama_sampler_type::TFS_Z},
			
 
				+        {'t', llama_sampler_type::TEMPERATURE}
			
 
				+    };
			
 
				+
			
 
				+    std::vector<llama_sampler_type> sampler_types;
			
 
				+    sampler_types.reserve(names_string.size());
			
 
				+    for (const auto & c : names_string) {
			
 
				+        const auto sampler_item = sampler_name_map.find(c);
			
 
				+        if (sampler_item != sampler_name_map.end()) {
			
 
				+            sampler_types.push_back(sampler_item->second);
			
 
				+        }
			
 
				+    }
			
 
				+    return sampler_types;
			
 
				+}
			
 
				+
			
 
				 // no reasons to expose this function in header
			
 
				 static void sampler_queue(
			
 
				                    struct llama_context * ctx_main,
			
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -116,6 +116,11 @@ std::string llama_sampling_print(const llama_sampling_params & params);
 
				 // Print sampling order into a string
			
 
				 std::string llama_sampling_order_print(const llama_sampling_params & params);
			
 
				 
			
 
				+std::string llama_sampling_type_to_str(llama_sampler_type sampler_type);
			
 
				+
			
 
				+std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
			
 
				+std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string);
			
 
				+
			
 
				 // this is a common sampling function used across the examples for convenience
			
 
				 // it can serve as a starting point for implementing your own sampling function
			
 
				 // Note: When using multiple sequences, it is the caller's responsibility to call
			
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -1380,7 +1380,7 @@ bool consume_common_train_arg(
 
				 
			
 
				 void finish_processing_train_args(struct train_params_common * params) {
			
 
				     if (params->escape) {
			
 
				-        process_escapes(params->sample_start);
			
 
				+        string_process_escapes(params->sample_start);
			
 
				     }
			
 
				 }
			
 
				 
			
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@@ -48,7 +48,7 @@ int main(int argc, char ** argv) {
 
				         params.prompt = "Hello my name is";
			
 
				     }
			
 
				 
			
 
				-    process_escapes(params.prompt);
			
 
				+    string_process_escapes(params.prompt);
			
 
				 
			
 
				     // init LLM
			
 
				 
			
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -80,7 +80,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     std::mt19937 rng(params.seed);
			
 
				     if (params.random_prompt) {
			
 
				-        params.prompt = gpt_random_prompt(rng);
			
 
				+        params.prompt = string_random_prompt(rng);
			
 
				     }
			
 
				 
			
 
				     llama_backend_init();
			
@@ -107,7 +107,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         fprintf(stderr, "\n");
			
 
				-        fprintf(stderr, "%s\n", get_system_info(params).c_str());
			
 
				+        fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				 
			
 
				     // split the prompt into lines
			
--- a/examples/eval-callback/eval-callback.cpp
+++ b/examples/eval-callback/eval-callback.cpp
@@ -152,7 +152,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     std::mt19937 rng(params.seed);
			
 
				     if (params.random_prompt) {
			
 
				-        params.prompt = gpt_random_prompt(rng);
			
 
				+        params.prompt = string_random_prompt(rng);
			
 
				     }
			
 
				 
			
 
				     llama_backend_init();
			
@@ -176,7 +176,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         fprintf(stderr, "\n");
			
 
				-        fprintf(stderr, "%s\n", get_system_info(params).c_str());
			
 
				+        fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				 
			
 
				     bool OK = run(ctx, params);
			
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -598,7 +598,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     std::mt19937 rng(params.seed);
			
 
				     if (params.random_prompt) {
			
 
				-        params.prompt = gpt_random_prompt(rng);
			
 
				+        params.prompt = string_random_prompt(rng);
			
 
				     }
			
 
				 
			
 
				     sparams.dataset = params.prompt_file;
			
@@ -667,7 +667,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         fprintf(stderr, "\n");
			
 
				-        fprintf(stderr, "%s\n", get_system_info(params).c_str());
			
 
				+        fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				 
			
 
				     bool OK = compute_imatrix(ctx, params, compute_ppl, from_chunk);
			
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -50,9 +50,9 @@ static void write_logfile(
 
				         return;
			
 
				     }
			
 
				 
			
 
				-    const std::string timestamp = get_sortable_timestamp();
			
 
				+    const std::string timestamp = string_get_sortable_timestamp();
			
 
				 
			
 
				-    const bool success = create_directory_with_parents(params.logdir);
			
 
				+    const bool success = fs_create_directory_with_parents(params.logdir);
			
 
				     if (!success) {
			
 
				         fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
			
 
				                 __func__, params.logdir.c_str());
			
@@ -70,7 +70,7 @@ static void write_logfile(
 
				     fprintf(logfile, "binary: infill\n");
			
 
				     char model_desc[128];
			
 
				     llama_model_desc(model, model_desc, sizeof(model_desc));
			
 
				-    dump_non_result_info_yaml(logfile, params, ctx, timestamp, input_tokens, model_desc);
			
 
				+    yaml_dump_non_result_info(logfile, params, ctx, timestamp, input_tokens, model_desc);
			
 
				 
			
 
				     fprintf(logfile, "\n");
			
 
				     fprintf(logfile, "######################\n");
			
@@ -78,8 +78,8 @@ static void write_logfile(
 
				     fprintf(logfile, "######################\n");
			
 
				     fprintf(logfile, "\n");
			
 
				 
			
 
				-    dump_string_yaml_multiline(logfile, "output", output.c_str());
			
 
				-    dump_vector_int_yaml(logfile, "output_tokens", output_tokens);
			
 
				+    yaml_dump_string_multiline(logfile, "output", output.c_str());
			
 
				+    yaml_dump_vector_int(logfile, "output_tokens", output_tokens);
			
 
				 
			
 
				     llama_dump_timing_info_yaml(logfile, ctx);
			
 
				     fclose(logfile);
			
@@ -236,7 +236,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         LOG_TEE("\n");
			
 
				-        LOG_TEE("%s\n", get_system_info(params).c_str());
			
 
				+        LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				     const bool add_bos = llama_should_add_bos_token(model);
			
 
				     GGML_ASSERT(llama_add_eos_token(model) != 1);
			
@@ -621,8 +621,8 @@ int main(int argc, char ** argv) {
 
				 
			
 
				                 if (params.escape) {
			
 
				                     //process escape sequences, for the initial prompt this is done in common.cpp when we load the params, but for the interactive mode we need to do it here
			
 
				-                    process_escapes(params.input_prefix);
			
 
				-                    process_escapes(params.input_suffix);
			
 
				+                    string_process_escapes(params.input_prefix);
			
 
				+                    string_process_escapes(params.input_suffix);
			
 
				                 }
			
 
				                 suff_rm_leading_spc = params.escape;
			
 
				                 if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
			
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -200,7 +200,7 @@ static const cmd_params cmd_params_defaults = {
 
				     /* n_ubatch      */ {512},
			
 
				     /* type_k        */ {GGML_TYPE_F16},
			
 
				     /* type_v        */ {GGML_TYPE_F16},
			
 
				-    /* n_threads     */ {get_math_cpu_count()},
			
 
				+    /* n_threads     */ {cpu_get_num_math()},
			
 
				     /* n_gpu_layers  */ {99},
			
 
				     /* split_mode    */ {LLAMA_SPLIT_MODE_LAYER},
			
 
				     /* main_gpu      */ {0},
			
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@@ -290,7 +290,7 @@ int main(int argc, char ** argv) {
 
				 #endif // LOG_DISABLE_LOGS
			
 
				 
			
 
				     if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
			
 
				-        gpt_print_usage(argc, argv, params);
			
 
				+        gpt_params_print_usage(argc, argv, params);
			
 
				         show_additional_info(argc, argv);
			
 
				         return 1;
			
 
				     }
			
--- a/examples/lookahead/lookahead.cpp
+++ b/examples/lookahead/lookahead.cpp
@@ -174,7 +174,7 @@ int main(int argc, char ** argv) {
 
				         // debug
			
 
				         if (dump_kv_cache) {
			
 
				             llama_kv_cache_view_update(ctx, &kvc_view);
			
 
				-            dump_kv_cache_view_seqs(kvc_view, 40);
			
 
				+            llama_kv_cache_dump_view_seqs(kvc_view, 40);
			
 
				         }
			
 
				 
			
 
				         // build the mask from https://lmsys.org/blog/2023-11-21-lookahead-decoding/
			
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@@ -121,7 +121,7 @@ int main(int argc, char ** argv){
 
				         // debug
			
 
				         if (dump_kv_cache) {
			
 
				             llama_kv_cache_view_update(ctx, &kvc_view);
			
 
				-            dump_kv_cache_view_seqs(kvc_view, 40);
			
 
				+            llama_kv_cache_dump_view_seqs(kvc_view, 40);
			
 
				         }
			
 
				 
			
 
				         // print current draft sequence
			
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -60,9 +60,9 @@ static void write_logfile(
 
				         return;
			
 
				     }
			
 
				 
			
 
				-    const std::string timestamp = get_sortable_timestamp();
			
 
				+    const std::string timestamp = string_get_sortable_timestamp();
			
 
				 
			
 
				-    const bool success = create_directory_with_parents(params.logdir);
			
 
				+    const bool success = fs_create_directory_with_parents(params.logdir);
			
 
				     if (!success) {
			
 
				         fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
			
 
				                 __func__, params.logdir.c_str());
			
@@ -80,7 +80,7 @@ static void write_logfile(
 
				     fprintf(logfile, "binary: main\n");
			
 
				     char model_desc[128];
			
 
				     llama_model_desc(model, model_desc, sizeof(model_desc));
			
 
				-    dump_non_result_info_yaml(logfile, params, ctx, timestamp, input_tokens, model_desc);
			
 
				+    yaml_dump_non_result_info(logfile, params, ctx, timestamp, input_tokens, model_desc);
			
 
				 
			
 
				     fprintf(logfile, "\n");
			
 
				     fprintf(logfile, "######################\n");
			
@@ -88,8 +88,8 @@ static void write_logfile(
 
				     fprintf(logfile, "######################\n");
			
 
				     fprintf(logfile, "\n");
			
 
				 
			
 
				-    dump_string_yaml_multiline(logfile, "output", output.c_str());
			
 
				-    dump_vector_int_yaml(logfile, "output_tokens", output_tokens);
			
 
				+    yaml_dump_string_multiline(logfile, "output", output.c_str());
			
 
				+    yaml_dump_vector_int(logfile, "output_tokens", output_tokens);
			
 
				 
			
 
				     llama_dump_timing_info_yaml(logfile, ctx);
			
 
				     fclose(logfile);
			
@@ -181,7 +181,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     std::mt19937 rng(params.seed);
			
 
				     if (params.random_prompt) {
			
 
				-        params.prompt = gpt_random_prompt(rng);
			
 
				+        params.prompt = string_random_prompt(rng);
			
 
				     }
			
 
				 
			
 
				     LOG("%s: llama backend init\n", __func__);
			
@@ -219,7 +219,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         LOG_TEE("\n");
			
 
				-        LOG_TEE("%s\n", get_system_info(params).c_str());
			
 
				+        LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				 
			
 
				     std::string path_session = params.path_prompt_cache;
			
@@ -879,7 +879,7 @@ int main(int argc, char ** argv) {
 
				                         embd_inp.insert(embd_inp.end(), cml_pfx.begin(), cml_pfx.end());
			
 
				                     }
			
 
				                     if (params.escape) {
			
 
				-                        process_escapes(buffer);
			
 
				+                        string_process_escapes(buffer);
			
 
				                     }
			
 
				 
			
 
				                     const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
			
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -210,7 +210,7 @@ int main(int argc, char ** argv) {
 
				     while (true) {
			
 
				         if (dump_kv_cache) {
			
 
				             llama_kv_cache_view_update(ctx, &kvc_view);
			
 
				-            dump_kv_cache_view_seqs(kvc_view, 40);
			
 
				+            llama_kv_cache_dump_view_seqs(kvc_view, 40);
			
 
				         }
			
 
				 
			
 
				         llama_batch_clear(batch);
			
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -44,9 +44,9 @@ static void write_logfile(
 
				         return;
			
 
				     }
			
 
				 
			
 
				-    const std::string timestamp = get_sortable_timestamp();
			
 
				+    const std::string timestamp = string_get_sortable_timestamp();
			
 
				 
			
 
				-    const bool success = create_directory_with_parents(params.logdir);
			
 
				+    const bool success = fs_create_directory_with_parents(params.logdir);
			
 
				     if (!success) {
			
 
				         fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
			
 
				                 __func__, params.logdir.c_str());
			
@@ -64,7 +64,7 @@ static void write_logfile(
 
				     fprintf(logfile, "binary: main\n");
			
 
				     char model_desc[128];
			
 
				     llama_model_desc(model, model_desc, sizeof(model_desc));
			
 
				-    dump_non_result_info_yaml(logfile, params, ctx, timestamp, results.tokens, model_desc);
			
 
				+    yaml_dump_non_result_info(logfile, params, ctx, timestamp, results.tokens, model_desc);
			
 
				 
			
 
				     fprintf(logfile, "\n");
			
 
				     fprintf(logfile, "######################\n");
			
@@ -72,9 +72,9 @@ static void write_logfile(
 
				     fprintf(logfile, "######################\n");
			
 
				     fprintf(logfile, "\n");
			
 
				 
			
 
				-    dump_vector_float_yaml(logfile, "logits", results.logits);
			
 
				+    yaml_dump_vector_float(logfile, "logits", results.logits);
			
 
				     fprintf(logfile, "ppl_value: %f\n", results.ppl_value);
			
 
				-    dump_vector_float_yaml(logfile, "probs", results.probs);
			
 
				+    yaml_dump_vector_float(logfile, "probs", results.probs);
			
 
				 
			
 
				     llama_dump_timing_info_yaml(logfile, ctx);
			
 
				     fclose(logfile);
			
@@ -2007,7 +2007,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     std::mt19937 rng(params.seed);
			
 
				     if (params.random_prompt) {
			
 
				-        params.prompt = gpt_random_prompt(rng);
			
 
				+        params.prompt = string_random_prompt(rng);
			
 
				     }
			
 
				 
			
 
				     llama_backend_init();
			
@@ -2035,7 +2035,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         fprintf(stderr, "\n");
			
 
				-        fprintf(stderr, "%s\n", get_system_info(params).c_str());
			
 
				+        fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				 
			
 
				     struct results_perplexity results;
			
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -259,7 +259,7 @@ int main(int argc, char ** argv) {
 
				                 usage(argv[0]);
			
 
				             }
			
 
				         } else if (strcmp(argv[arg_idx], "--override-kv") == 0) {
			
 
				-            if (arg_idx == argc-1 || !parse_kv_override(argv[++arg_idx], kv_overrides)) {
			
 
				+            if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) {
			
 
				                 usage(argv[0]);
			
 
				             }
			
 
				         } else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) {
			
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@@ -11,7 +11,7 @@ struct retrieval_params {
 
				 };
			
 
				 
			
 
				 static void retrieval_params_print_usage(int argc, char ** argv, gpt_params & gpt_params, retrieval_params & params) {
			
 
				-    gpt_print_usage(argc, argv, gpt_params);
			
 
				+    gpt_params_print_usage(argc, argv, gpt_params);
			
 
				     printf("retrieval options:\n");
			
 
				     printf("  --context-file FNAME  file containing context to embed.\n");
			
 
				     printf("                        specify multiple files by providing --context-file option multiple times.\n");
			
@@ -226,7 +226,7 @@ int main(int argc, char ** argv) {
 
				     // print system information
			
 
				     {
			
 
				         fprintf(stderr, "\n");
			
 
				-        fprintf(stderr, "%s\n", get_system_info(params).c_str());
			
 
				+        fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
			
 
				     }
			
 
				 
			
 
				     // max batch size
			
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1019,7 +1019,7 @@ struct server_context {
 
				                         sampler_names.emplace_back(sampler_name);
			
 
				                     }
			
 
				                 }
			
 
				-                slot.sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
			
 
				+                slot.sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
			
 
				             } else {
			
 
				                 slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
			
 
				             }
			
@@ -1256,7 +1256,7 @@ struct server_context {
 
				         std::vector<std::string> samplers_sequence;
			
 
				         samplers_sequence.reserve(slot.sparams.samplers_sequence.size());
			
 
				         for (const auto & sampler_type : slot.sparams.samplers_sequence) {
			
 
				-            samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
			
 
				+            samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
			
 
				         }
			
 
				 
			
 
				         return json {
			
@@ -2852,7 +2852,7 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
 
				                 invalid_param = true;
			
 
				                 break;
			
 
				             }
			
 
				-            if (!parse_kv_override(argv[i], params.kv_overrides)) {
			
 
				+            if (!string_parse_kv_override(argv[i], params.kv_overrides)) {
			
 
				                 fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
			
 
				                 invalid_param = true;
			
 
				                 break;
			
@@ -3310,7 +3310,7 @@ int main(int argc, char ** argv) {
 
				     const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
			
 
				         json request_data = json::parse(req.body);
			
 
				         std::string filename = request_data.at("filename");
			
 
				-        if (!validate_file_name(filename)) {
			
 
				+        if (!fs_validate_filename(filename)) {
			
 
				             res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
			
 
				             return;
			
 
				         }
			
@@ -3340,7 +3340,7 @@ int main(int argc, char ** argv) {
 
				     const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
			
 
				         json request_data = json::parse(req.body);
			
 
				         std::string filename = request_data.at("filename");
			
 
				-        if (!validate_file_name(filename)) {
			
 
				+        if (!fs_validate_filename(filename)) {
			
 
				             res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
			
 
				             return;
			
 
				         }