1 年間前 · b8a7a5a90f
--- a/.devops/main-intel.Dockerfile
+++ b/.devops/main-intel.Dockerfile
@@ -10,14 +10,12 @@ WORKDIR /app
 
				 
			
 
				 COPY . .
			
 
				 
			
 
				-RUN mkdir build && \
			
 
				-    cd build && \
			
 
				-    if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
			
 
				+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
			
 
				         echo "LLAMA_SYCL_F16 is set" && \
			
 
				         export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
			
 
				     fi && \
			
 
				-    cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
			
 
				-    cmake --build . --config Release --target main
			
 
				+    cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
			
 
				+    cmake --build build --config Release --target main
			
 
				 
			
 
				 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
			
 
				 
			
--- a/.devops/main-vulkan.Dockerfile
+++ b/.devops/main-vulkan.Dockerfile
@@ -14,10 +14,8 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
 
				 # Build it
			
 
				 WORKDIR /app
			
 
				 COPY . .
			
 
				-RUN mkdir build && \
			
 
				-    cd build && \
			
 
				-    cmake .. -DLLAMA_VULKAN=1 && \
			
 
				-    cmake --build . --config Release --target main
			
 
				+RUN cmake -B build -DLLAMA_VULKAN=1 && \
			
 
				+    cmake --build build --config Release --target main
			
 
				 
			
 
				 # Clean up
			
 
				 WORKDIR /
			
--- a/.devops/server-intel.Dockerfile
+++ b/.devops/server-intel.Dockerfile
@@ -10,14 +10,12 @@ WORKDIR /app
 
				 
			
 
				 COPY . .
			
 
				 
			
 
				-RUN mkdir build && \
			
 
				-    cd build && \
			
 
				-    if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
			
 
				+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
			
 
				         echo "LLAMA_SYCL_F16 is set" && \
			
 
				         export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
			
 
				     fi && \
			
 
				-    cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
			
 
				-    cmake --build . --config Release --target server
			
 
				+    cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
			
 
				+    cmake --build build --config Release --target server
			
 
				 
			
 
				 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
			
 
				 
			
--- a/.devops/server-vulkan.Dockerfile
+++ b/.devops/server-vulkan.Dockerfile
@@ -18,10 +18,8 @@ RUN apt-get update && \
 
				 # Build it
			
 
				 WORKDIR /app
			
 
				 COPY . .
			
 
				-RUN mkdir build && \
			
 
				-    cd build && \
			
 
				-    cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
			
 
				-    cmake --build . --config Release --target server
			
 
				+RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
			
 
				+    cmake --build build --config Release --target server
			
 
				 
			
 
				 # Clean up
			
 
				 WORKDIR /
			
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -96,9 +96,7 @@ jobs:
 
				         id: cmake_build
			
 
				         run: |
			
 
				           set -eux
			
 
				-          mkdir build
			
 
				-          cd build
			
 
				-          cmake .. \
			
 
				+          cmake -B build \
			
 
				               -DLLAMA_NATIVE=OFF \
			
 
				               -DLLAMA_BUILD_SERVER=ON \
			
 
				               -DLLAMA_CURL=ON \
			
@@ -109,7 +107,7 @@ jobs:
 
				               -DLLAMA_FATAL_WARNINGS=OFF \
			
 
				               -DLLAMA_ALL_WARNINGS=OFF \
			
 
				               -DCMAKE_BUILD_TYPE=Release;
			
 
				-          cmake --build . --config Release -j $(nproc) --target server
			
 
				+          cmake --build build --config Release -j $(nproc) --target server
			
 
				 
			
 
				       - name: Download the dataset
			
 
				         id: download_dataset
			
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -94,15 +94,13 @@ jobs:
 
				       - name: Build
			
 
				         id: cmake_build
			
 
				         run: |
			
 
				-          mkdir build
			
 
				-          cd build
			
 
				-          cmake .. \
			
 
				+          cmake -B build \
			
 
				               -DLLAMA_NATIVE=OFF \
			
 
				               -DLLAMA_BUILD_SERVER=ON \
			
 
				               -DLLAMA_CURL=ON \
			
 
				               -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
			
 
				               -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
			
 
				-          cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
			
 
				+          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
			
 
				 
			
 
				 
			
 
				       - name: Tests
			
@@ -143,10 +141,8 @@ jobs:
 
				       - name: Build
			
 
				         id: cmake_build
			
 
				         run: |
			
 
				-          mkdir build
			
 
				-          cd build
			
 
				-          cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
			
 
				-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
			
 
				+          cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
			
 
				+          cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
			
 
				 
			
 
				       - name: Python setup
			
 
				         id: setup_python
			
--- a/README-sycl.md
+++ b/README-sycl.md
@@ -185,9 +185,8 @@ Upon a successful installation, SYCL is enabled for the available intel devices,
 
				 ```sh
			
 
				 git clone https://github.com/oneapi-src/oneMKL
			
 
				 cd oneMKL
			
 
				-mkdir -p buildWithCublas && cd buildWithCublas
			
 
				-cmake ../ -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
			
 
				-make
			
 
				+cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
			
 
				+cmake --build buildWithCublas --config Release
			
 
				 ```
			
 
				 
			
 
				 
			
@@ -227,16 +226,15 @@ Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA devic
 
				 source /opt/intel/oneapi/setvars.sh
			
 
				 
			
 
				 # Build LLAMA with MKL BLAS acceleration for intel GPU
			
 
				-mkdir -p build && cd build
			
 
				 
			
 
				 # Option 1: Use FP32 (recommended for better performance in most cases)
			
 
				-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
			
 
				+cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
			
 
				 
			
 
				 # Option 2: Use FP16
			
 
				-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
			
 
				+cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
			
 
				 
			
 
				-#build all binary
			
 
				-cmake --build . --config Release -j -v
			
 
				+# build all binary
			
 
				+cmake --build build --config Release -j -v
			
 
				 ```
			
 
				 
			
 
				 #### Nvidia GPU
			
@@ -248,16 +246,15 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_
 
				 export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
			
 
				 
			
 
				 # Build LLAMA with Nvidia BLAS acceleration through SYCL
			
 
				-mkdir -p build && cd build
			
 
				 
			
 
				 # Option 1: Use FP32 (recommended for better performance in most cases)
			
 
				-cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
			
 
				+cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
			
 
				 
			
 
				 # Option 2: Use FP16
			
 
				-cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
			
 
				+cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
			
 
				 
			
 
				-#build all binary
			
 
				-cmake --build . --config Release -j -v
			
 
				+# build all binary
			
 
				+cmake --build build --config Release -j -v
			
 
				 
			
 
				 ```
			
 
				 
			
@@ -412,17 +409,15 @@ b. Download & install mingw-w64 make for Windows provided by w64devkit
 
				 On the oneAPI command line window, step into the llama.cpp main directory and run the following:
			
 
				 
			
 
				 ```
			
 
				-mkdir -p build
			
 
				-cd build
			
 
				 @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
			
 
				 
			
 
				 # Option 1: Use FP32 (recommended for better performance in most cases)
			
 
				-cmake -G "MinGW Makefiles" ..  -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release
			
 
				+cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release
			
 
				 
			
 
				 # Option 2: Or FP16
			
 
				-cmake -G "MinGW Makefiles" ..  -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
			
 
				+cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
			
 
				 
			
 
				-make -j
			
 
				+cmake --build build --config Release -j
			
 
				 ```
			
 
				 
			
 
				 Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions:
			
--- a/README.md
+++ b/README.md
@@ -308,6 +308,8 @@ In order to build llama.cpp you have three different options.
 
				       make
			
 
				       ```
			
 
				 
			
 
				+      **Note**: for `Debug` builds, run `make LLAMA_DEBUG=1`
			
 
				+
			
 
				   - On Windows:
			
 
				 
			
 
				     1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
			
@@ -322,12 +324,26 @@ In order to build llama.cpp you have three different options.
 
				 - Using `CMake`:
			
 
				 
			
 
				     ```bash
			
 
				-    mkdir build
			
 
				-    cd build
			
 
				-    cmake ..
			
 
				-    cmake --build . --config Release
			
 
				+    cmake -B build
			
 
				+    cmake --build build --config Release
			
 
				     ```
			
 
				 
			
 
				+    **Note**: for `Debug` builds, there are two cases:
			
 
				+
			
 
				+    - Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag):
			
 
				+
			
 
				+      ```bash
			
 
				+      cmake -B build -DCMAKE_BUILD_TYPE=Debug
			
 
				+      cmake --build build
			
 
				+      ```
			
 
				+
			
 
				+    - Multi-config generators (`-G` param set to Visual Studio, XCode...):
			
 
				+
			
 
				+      ```bash
			
 
				+      cmake -B build -G "Xcode"
			
 
				+      cmake --build build --config Debug
			
 
				+      ```
			
 
				+
			
 
				 - Using `Zig` (version 0.11 or later):
			
 
				 
			
 
				     Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,
			
@@ -439,10 +455,8 @@ Building the program with BLAS support may lead to some performance improvements
 
				   - Using `CMake` on Linux:
			
 
				 
			
 
				       ```bash
			
 
				-      mkdir build
			
 
				-      cd build
			
 
				-      cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
			
 
				-      cmake --build . --config Release
			
 
				+      cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
			
 
				+      cmake --build build --config Release
			
 
				       ```
			
 
				 
			
 
				 - #### BLIS
			
@@ -462,11 +476,9 @@ Building the program with BLAS support may lead to some performance improvements
 
				   - Using manual oneAPI installation:
			
 
				     By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
			
 
				       ```bash
			
 
				-      mkdir build
			
 
				-      cd build
			
 
				       source /opt/intel/oneapi/setvars.sh # You can skip this step if  in oneapi-basekit docker image, only required for manual installation
			
 
				-      cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
			
 
				-      cmake --build . --config Release
			
 
				+      cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
			
 
				+      cmake --build build --config Release
			
 
				       ```
			
 
				 
			
 
				   - Using oneAPI docker image:
			
@@ -487,10 +499,8 @@ Building the program with BLAS support may lead to some performance improvements
 
				   - Using `CMake`:
			
 
				 
			
 
				     ```bash
			
 
				-    mkdir build
			
 
				-    cd build
			
 
				-    cmake .. -DLLAMA_CUDA=ON
			
 
				-    cmake --build . --config Release
			
 
				+    cmake -B build -DLLAMA_CUDA=ON
			
 
				+    cmake --build build --config Release
			
 
				     ```
			
 
				 
			
 
				   The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
			
@@ -517,8 +527,8 @@ Building the program with BLAS support may lead to some performance improvements
 
				   - Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
			
 
				     ```bash
			
 
				     CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
			
 
				-        cmake -H. -Bbuild -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
			
 
				-        && cmake --build build -- -j 16
			
 
				+        cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
			
 
				+        && cmake --build build --config Release -- -j 16
			
 
				     ```
			
 
				     On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`.
			
 
				     However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
			
@@ -564,15 +574,14 @@ Building the program with BLAS support may lead to some performance improvements
 
				 
			
 
				         ```sh
			
 
				         git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git
			
 
				-        mkdir OpenCL-SDK/build
			
 
				-        cd OpenCL-SDK/build
			
 
				-        cmake .. -DBUILD_DOCS=OFF \
			
 
				+        cd OpenCL-SDK
			
 
				+        cmake -B build -DBUILD_DOCS=OFF \
			
 
				           -DBUILD_EXAMPLES=OFF \
			
 
				           -DBUILD_TESTING=OFF \
			
 
				           -DOPENCL_SDK_BUILD_SAMPLES=OFF \
			
 
				           -DOPENCL_SDK_TEST_SAMPLES=OFF
			
 
				-        cmake --build . --config Release
			
 
				-        cmake --install . --prefix /some/path
			
 
				+        cmake --build build
			
 
				+        cmake --install build --prefix /some/path
			
 
				         ```
			
 
				       </details>
			
 
				 
			
@@ -594,23 +603,23 @@ Building the program with BLAS support may lead to some performance improvements
 
				       ```cmd
			
 
				       set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64"
			
 
				       git clone https://github.com/CNugteren/CLBlast.git
			
 
				-      mkdir CLBlast\build
			
 
				-      cd CLBlast\build
			
 
				-      cmake .. -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
			
 
				-      cmake --build . --config Release
			
 
				-      cmake --install . --prefix C:/CLBlast
			
 
				+      cd CLBlast
			
 
				+      cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
			
 
				+      cmake --build build --config Release
			
 
				+      cmake --install build --prefix C:/CLBlast
			
 
				       ```
			
 
				 
			
 
				+      (note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds)
			
 
				+
			
 
				   - <details>
			
 
				     <summary>Unix:</summary>
			
 
				 
			
 
				       ```sh
			
 
				       git clone https://github.com/CNugteren/CLBlast.git
			
 
				-      mkdir CLBlast/build
			
 
				-      cd CLBlast/build
			
 
				-      cmake .. -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
			
 
				-      cmake --build . --config Release
			
 
				-      cmake --install . --prefix /some/path
			
 
				+      cd CLBlast
			
 
				+      cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
			
 
				+      cmake --build build --config Release
			
 
				+      cmake --install build --prefix /some/path
			
 
				       ```
			
 
				 
			
 
				       Where `/some/path` is where the built library will be installed (default is `/usr/local`).
			
@@ -624,21 +633,17 @@ Building the program with BLAS support may lead to some performance improvements
 
				     ```
			
 
				   - CMake (Unix):
			
 
				     ```sh
			
 
				-    mkdir build
			
 
				-    cd build
			
 
				-    cmake .. -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
			
 
				-    cmake --build . --config Release
			
 
				+    cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
			
 
				+    cmake --build build --config Release
			
 
				     ```
			
 
				   - CMake (Windows):
			
 
				     ```cmd
			
 
				     set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast"
			
 
				     git clone https://github.com/ggerganov/llama.cpp
			
 
				     cd llama.cpp
			
 
				-    mkdir build
			
 
				-    cd build
			
 
				-    cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
			
 
				-    cmake --build . --config Release
			
 
				-    cmake --install . --prefix C:/LlamaCPP
			
 
				+    cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
			
 
				+    cmake --build build --config Release
			
 
				+    cmake --install build --prefix C:/LlamaCPP
			
 
				     ```
			
 
				 
			
 
				   ##### Running Llama with CLBlast
			
@@ -694,10 +699,8 @@ Building the program with BLAS support may lead to some performance improvements
 
				   Then, build llama.cpp using the cmake command below:
			
 
				 
			
 
				   ```bash
			
 
				-  mkdir -p build
			
 
				-  cd build
			
 
				-  cmake .. -DLLAMA_VULKAN=1
			
 
				-  cmake --build . --config Release
			
 
				+  cmake -B build -DLLAMA_VULKAN=1
			
 
				+  cmake --build build --config Release
			
 
				   # Test the output binary (with "-ngl 33" to offload all layers to GPU)
			
 
				   ./bin/main -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4
			
 
				 
			
--- a/examples/main-cmake-pkg/README.md
+++ b/examples/main-cmake-pkg/README.md
@@ -17,11 +17,9 @@ In this case, CLBlast was already installed so the CMake package is referenced i
 
				 ```cmd
			
 
				 git clone https://github.com/ggerganov/llama.cpp
			
 
				 cd llama.cpp
			
 
				-mkdir build
			
 
				-cd build
			
 
				-cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64
			
 
				-cmake --build . --config Release
			
 
				-cmake --install . --prefix C:/LlamaCPP
			
 
				+cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64
			
 
				+cmake --build build --config Release
			
 
				+cmake --install build --prefix C:/LlamaCPP
			
 
				 ```
			
 
				 
			
 
				 ### Build main-cmake-pkg
			
@@ -29,9 +27,7 @@ cmake --install . --prefix C:/LlamaCPP
 
				 
			
 
				 ```cmd
			
 
				 cd ..\examples\main-cmake-pkg
			
 
				-mkdir build
			
 
				-cd build
			
 
				-cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64
			
 
				-cmake --build . --config Release
			
 
				-cmake --install . --prefix C:/MyLlamaApp
			
 
				+cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64
			
 
				+cmake --build build --config Release
			
 
				+cmake --install build --prefix C:/MyLlamaApp
			
 
				 ```
			
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -74,15 +74,18 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/
 
				 - Using `make`:
			
 
				 
			
 
				   ```bash
			
 
				-  make
			
 
				+  make server
			
 
				   ```
			
 
				 
			
 
				 - Using `CMake`:
			
 
				 
			
 
				   ```bash
			
 
				-  cmake --build . --config Release
			
 
				+  cmake -B build
			
 
				+  cmake --build build --config Release -t server
			
 
				   ```
			
 
				 
			
 
				+  Binary is at `./build/bin/server`
			
 
				+
			
 
				 ## Build with SSL
			
 
				 
			
 
				 `server` can also be built with SSL support using OpenSSL 3
			
@@ -99,10 +102,8 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/
 
				 - Using `CMake`:
			
 
				 
			
 
				   ```bash
			
 
				-  mkdir build
			
 
				-  cd build
			
 
				-  cmake .. -DLLAMA_SERVER_SSL=ON
			
 
				-  make server
			
 
				+  cmake -B build -DLLAMA_SERVER_SSL=ON
			
 
				+  cmake --build build --config Release -t server
			
 
				   ```
			
 
				 
			
 
				 ## Quick Start