1 год назад · cf8e0a3bb9
--- a/.devops/full-musa.Dockerfile
+++ b/.devops/full-musa.Dockerfile
@@ -0,0 +1,26 @@
 
				+ARG UBUNTU_VERSION=22.04
			
 
				+# This needs to generally match the container host's environment.
			
 
				+ARG MUSA_VERSION=rc3.1.0
			
 
				+# Target the MUSA build image
			
 
				+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
			
 
				+
			
 
				+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
			
 
				+
			
 
				+COPY requirements.txt   requirements.txt
			
 
				+COPY requirements       requirements
			
 
				+
			
 
				+RUN pip install --upgrade pip setuptools wheel \
			
 
				+    && pip install -r requirements.txt
			
 
				+
			
 
				+WORKDIR /app
			
 
				+
			
 
				+COPY . .
			
 
				+
			
 
				+RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
			
 
				+    cmake --build build --config Release -j$(nproc) && \
			
 
				+    cp build/bin/* .
			
 
				+
			
 
				+ENTRYPOINT ["/app/.devops/tools.sh"]
			
--- a/.devops/llama-cli-musa.Dockerfile
+++ b/.devops/llama-cli-musa.Dockerfile
@@ -0,0 +1,30 @@
 
				+ARG UBUNTU_VERSION=22.04
			
 
				+# This needs to generally match the container host's environment.
			
 
				+ARG MUSA_VERSION=rc3.1.0
			
 
				+# Target the MUSA build image
			
 
				+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
			
 
				+# Target the MUSA runtime image
			
 
				+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
			
 
				+
			
 
				+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y build-essential git cmake
			
 
				+
			
 
				+WORKDIR /app
			
 
				+
			
 
				+COPY . .
			
 
				+
			
 
				+RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
			
 
				+    cmake --build build --config Release --target llama-cli -j$(nproc)
			
 
				+
			
 
				+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y libgomp1
			
 
				+
			
 
				+COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
			
 
				+COPY --from=build /app/build/src/libllama.so /libllama.so
			
 
				+COPY --from=build /app/build/bin/llama-cli /llama-cli
			
 
				+
			
 
				+ENTRYPOINT [ "/llama-cli" ]
			
--- a/.devops/llama-server-musa.Dockerfile
+++ b/.devops/llama-server-musa.Dockerfile
@@ -0,0 +1,35 @@
 
				+ARG UBUNTU_VERSION=22.04
			
 
				+# This needs to generally match the container host's environment.
			
 
				+ARG MUSA_VERSION=rc3.1.0
			
 
				+# Target the MUSA build image
			
 
				+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
			
 
				+# Target the MUSA runtime image
			
 
				+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
			
 
				+
			
 
				+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y build-essential git cmake libcurl4-openssl-dev
			
 
				+
			
 
				+WORKDIR /app
			
 
				+
			
 
				+COPY . .
			
 
				+
			
 
				+RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
			
 
				+    cmake --build build --config Release --target llama-server -j$(nproc)
			
 
				+
			
 
				+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
			
 
				+
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y libcurl4-openssl-dev libgomp1 curl
			
 
				+
			
 
				+COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
			
 
				+COPY --from=build /app/build/src/libllama.so /libllama.so
			
 
				+COPY --from=build /app/build/bin/llama-server /llama-server
			
 
				+
			
 
				+# Must be set to 0.0.0.0 so it can listen to requests from host machine
			
 
				+ENV LLAMA_ARG_HOST=0.0.0.0
			
 
				+
			
 
				+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
			
 
				+
			
 
				+ENTRYPOINT [ "/llama-server" ]
			
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -43,6 +43,9 @@ jobs:
 
				           - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
			
 
				           - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
			
 
				           - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
			
 
				+          - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
			
 
				+          - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
			
 
				+          - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
			
 
				           # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
			
 
				           #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
			
 
				           #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
			
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -19,8 +19,11 @@ Additionally, there the following images, similar to the above:
 
				 - `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
			
 
				 - `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
			
 
				 - `ghcr.io/ggerganov/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
			
 
				+- `ghcr.io/ggerganov/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`)
			
 
				+- `ghcr.io/ggerganov/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`)
			
 
				+- `ghcr.io/ggerganov/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`)
			
 
				 
			
 
				-The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
			
 
				+The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now).
			
 
				 
			
 
				 ## Usage
			
 
				 
			
@@ -84,3 +87,37 @@ docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run
 
				 docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
			
 
				 docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
			
 
				 ```
			
 
				+
			
 
				+## Docker With MUSA
			
 
				+
			
 
				+Assuming one has the [mt-container-toolkit](https://developer.mthreads.com/musa/native) properly installed on Linux, `muBLAS` should be accessible inside the container.
			
 
				+
			
 
				+## Building Docker locally
			
 
				+
			
 
				+```bash
			
 
				+docker build -t local/llama.cpp:full-musa -f .devops/full-musa.Dockerfile .
			
 
				+docker build -t local/llama.cpp:light-musa -f .devops/llama-cli-musa.Dockerfile .
			
 
				+docker build -t local/llama.cpp:server-musa -f .devops/llama-server-musa.Dockerfile .
			
 
				+```
			
 
				+
			
 
				+You may want to pass in some different `ARGS`, depending on the MUSA environment supported by your container host, as well as the GPU architecture.
			
 
				+
			
 
				+The defaults are:
			
 
				+
			
 
				+- `MUSA_VERSION` set to `rc3.1.0`
			
 
				+
			
 
				+The resulting images, are essentially the same as the non-MUSA images:
			
 
				+
			
 
				+1. `local/llama.cpp:full-musa`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
			
 
				+2. `local/llama.cpp:light-musa`: This image only includes the main executable file.
			
 
				+3. `local/llama.cpp:server-musa`: This image only includes the server executable file.
			
 
				+
			
 
				+## Usage
			
 
				+
			
 
				+After building locally, Usage is similar to the non-MUSA examples, but you'll need to set `mthreads` as default Docker runtime. This can be done by executing `(cd /usr/bin/musa && sudo ./docker setup $PWD)` and verifying the changes by executing `docker info | grep mthreads` on the host machine. You will also want to use the `--n-gpu-layers` flag.
			
 
				+
			
 
				+```bash
			
 
				+docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
			
 
				+docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
			
 
				+docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
			
 
				+```
			
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -163,8 +163,8 @@ if (GGML_OPENMP)
 
				         list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
			
 
				 
			
 
				         if (GGML_MUSA)
			
 
				-            list(APPEND GGML_EXTRA_INCLUDES     "/usr/lib/llvm-10/include/openmp")
			
 
				-            list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so")
			
 
				+            list(APPEND GGML_EXTRA_INCLUDES     "/usr/lib/llvm-14/lib/clang/14.0.0/include")
			
 
				+            list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-14/lib/libomp.so")
			
 
				         endif()
			
 
				     else()
			
 
				         message(WARNING "OpenMP not found")