2 years ago · aacdbd4056
--- a/llama.cpp
+++ b/llama.cpp
@@ -925,21 +925,21 @@ static bool kv_cache_init(
 
															 struct llama_context_params llama_context_default_params() {
														
 
															     struct llama_context_params result = {
														
 
															+        /*.seed                        =*/ -1,
														
 
															         /*.n_ctx                       =*/ 512,
														
 
															         /*.n_batch                     =*/ 512,
														
 
															         /*.gpu_layers                  =*/ 0,
														
 
															         /*.main_gpu                    =*/ 0,
														
 
															         /*.tensor_split                =*/ {0},
														
 
															+        /*.progress_callback           =*/ nullptr,
														
 
															+        /*.progress_callback_user_data =*/ nullptr,
														
 
															         /*.low_vram                    =*/ false,
														
 
															-        /*.seed                        =*/ -1,
														
 
															         /*.f16_kv                      =*/ true,
														
 
															         /*.logits_all                  =*/ false,
														
 
															         /*.vocab_only                  =*/ false,
														
 
															         /*.use_mmap                    =*/ true,
														
 
															         /*.use_mlock                   =*/ false,
														
 
															         /*.embedding                   =*/ false,
														
 
															-        /*.progress_callback           =*/ nullptr,
														
 
															-        /*.progress_callback_user_data =*/ nullptr,
														
 
															     };
														
 
															     return result;
														
--- a/llama.h
+++ b/llama.h
@@ -71,28 +71,27 @@ extern "C" {
 
															     typedef void (*llama_progress_callback)(float progress, void *ctx);
														
 
															-    struct llama_context_params {
														
 
															+   struct llama_context_params {
														
 
															+        int seed;                              // RNG seed, -1 for random
														
 
															         int n_ctx;                             // text context
														
 
															         int n_batch;                           // prompt processing batch size
														
 
															         int n_gpu_layers;                      // number of layers to store in VRAM
														
 
															         int main_gpu;                          // the GPU that is used for scratch and small tensors
														
 
															         float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
														
 
															-        bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
														
 
															-        int seed;                              // RNG seed, -1 for random
														
 
															+        // called with a progress value between 0 and 1, pass NULL to disable
														
 
															+        llama_progress_callback progress_callback;
														
 
															+        // context pointer passed to the progress callback
														
 
															+        void * progress_callback_user_data;
														
 
															+        // Keep the booleans together to avoid misalignment during copy-by-value.
														
 
															+        bool low_vram;   // if true, reduce VRAM usage at the cost of performance
														
 
															         bool f16_kv;     // use fp16 for KV cache
														
 
															         bool logits_all; // the llama_eval() call computes all logits, not just the last one
														
 
															         bool vocab_only; // only load the vocabulary, no weights
														
 
															         bool use_mmap;   // use mmap if possible
														
 
															         bool use_mlock;  // force system to keep model in RAM
														
 
															         bool embedding;  // embedding mode only
														
 
															-
														
 
															-        // called with a progress value between 0 and 1, pass NULL to disable
														
 
															-        llama_progress_callback progress_callback;
														
 
															-        // context pointer passed to the progress callback
														
 
															-        void * progress_callback_user_data;
														
 
															     };
														
 
															-
														
 
															     // model file types
														
 
															     enum llama_ftype {
														
 
															         LLAMA_FTYPE_ALL_F32              = 0,