vor 10 Monaten · 251364549f
--- a/Makefile
+++ b/Makefile
@@ -836,7 +836,7 @@ ifdef GGML_MUSA
 
				 	else
			
 
				 		MUSA_PATH ?= /opt/musa
			
 
				 	endif
			
 
				-	MUSA_ARCHITECTURES ?= 21;22
			
 
				+	MUSA_ARCHITECTURES ?= 21;22;31
			
 
				 
			
 
				 	MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
			
 
				 	MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
			
--- a/docs/build.md
+++ b/docs/build.md
@@ -197,28 +197,52 @@ The following compilation options are also available to tweak performance:
 
				 
			
 
				 ## MUSA
			
 
				 
			
 
				-This provides GPU acceleration using the MUSA cores of your Moore Threads MTT GPU. Make sure to have the MUSA SDK installed. You can download it from here: [MUSA SDK](https://developer.mthreads.com/sdk/download/musa).
			
 
				+This provides GPU acceleration using a Moore Threads GPU. Make sure to have the [MUSA SDK](https://developer.mthreads.com/musa/musa-sdk) installed.
			
 
				 
			
 
				-- Using `CMake`:
			
 
				+#### Download directly from Moore Threads
			
 
				 
			
 
				-  ```bash
			
 
				-  cmake -B build -DGGML_MUSA=ON
			
 
				-  cmake --build build --config Release
			
 
				-  ```
			
 
				+You may find the official downloads here: [Moore Threads developer site](https://developer.mthreads.com/sdk/download/musa).
			
 
				 
			
 
				-  For static build:
			
 
				+### Compilation
			
 
				 
			
 
				-  ```bash
			
 
				+```bash
			
 
				+cmake -B build -DGGML_MUSA=ON
			
 
				+cmake --build build --config Release
			
 
				+```
			
 
				+
			
 
				+#### Override Compute Capability Specifications
			
 
				+
			
 
				+By default, all supported compute capabilities are enabled. To customize this behavior, you can specify the `MUSA_ARCHITECTURES` option in the CMake command:
			
 
				+
			
 
				+```bash
			
 
				+cmake -B build -DGGML_MUSA=ON -DMUSA_ARCHITECTURES="21"
			
 
				+```
			
 
				+
			
 
				+This configuration enables only compute capability `2.1` (MTT S80) during compilation, which can help reduce compilation time.
			
 
				+
			
 
				+#### Compilation options
			
 
				+
			
 
				+Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet.
			
 
				+
			
 
				+- For static builds, add `-DBUILD_SHARED_LIBS=OFF` and `-DCMAKE_POSITION_INDEPENDENT_CODE=ON`:
			
 
				+  ```
			
 
				   cmake -B build -DGGML_MUSA=ON \
			
 
				     -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
			
 
				   cmake --build build --config Release
			
 
				   ```
			
 
				 
			
 
				-The environment variable [`MUSA_VISIBLE_DEVICES`](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) can be used to specify which GPU(s) will be used.
			
 
				+### Runtime MUSA environmental variables
			
 
				 
			
 
				-The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted.
			
 
				+You may set the [musa environmental variables](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) at runtime.
			
 
				 
			
 
				-Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet.
			
 
				+```bash
			
 
				+# Use `MUSA_VISIBLE_DEVICES` to hide the first compute device.
			
 
				+MUSA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf
			
 
				+```
			
 
				+
			
 
				+### Unified Memory
			
 
				+
			
 
				+The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted.
			
 
				 
			
 
				 ## HIP
			
 
				 
			
--- a/ggml/src/ggml-musa/CMakeLists.txt
+++ b/ggml/src/ggml-musa/CMakeLists.txt
@@ -21,7 +21,7 @@ if (MUSAToolkit_FOUND)
 
				     message(STATUS "MUSA Toolkit found")
			
 
				 
			
 
				     if (NOT DEFINED MUSA_ARCHITECTURES)
			
 
				-        set(MUSA_ARCHITECTURES "21;22")
			
 
				+        set(MUSA_ARCHITECTURES "21;22;31")
			
 
				     endif()
			
 
				     message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")