1
0

package.nix 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. {
  2. lib,
  3. glibc,
  4. config,
  5. stdenv,
  6. runCommand,
  7. cmake,
  8. ninja,
  9. pkg-config,
  10. git,
  11. mpi,
  12. blas,
  13. cudaPackages,
  14. autoAddDriverRunpath,
  15. darwin,
  16. rocmPackages,
  17. vulkan-headers,
  18. vulkan-loader,
  19. curl,
  20. shaderc,
  21. useBlas ?
  22. builtins.all (x: !x) [
  23. useCuda
  24. useMetalKit
  25. useRocm
  26. useVulkan
  27. ]
  28. && blas.meta.available,
  29. useCuda ? config.cudaSupport,
  30. useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
  31. # Increases the runtime closure size by ~700M
  32. useMpi ? false,
  33. useRocm ? config.rocmSupport,
  34. rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
  35. enableCurl ? true,
  36. useVulkan ? false,
  37. llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
  38. # It's necessary to consistently use backendStdenv when building with CUDA support,
  39. # otherwise we get libstdc++ errors downstream.
  40. effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  41. enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  42. precompileMetalShaders ? false,
  43. }:
  44. let
  45. inherit (lib)
  46. cmakeBool
  47. cmakeFeature
  48. optionalAttrs
  49. optionals
  50. strings
  51. ;
  52. stdenv = throw "Use effectiveStdenv instead";
  53. suffices =
  54. lib.optionals useBlas [ "BLAS" ]
  55. ++ lib.optionals useCuda [ "CUDA" ]
  56. ++ lib.optionals useMetalKit [ "MetalKit" ]
  57. ++ lib.optionals useMpi [ "MPI" ]
  58. ++ lib.optionals useRocm [ "ROCm" ]
  59. ++ lib.optionals useVulkan [ "Vulkan" ];
  60. pnameSuffix =
  61. strings.optionalString (suffices != [ ])
  62. "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  63. descriptionSuffix = strings.optionalString (
  64. suffices != [ ]
  65. ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
  66. xcrunHost = runCommand "xcrunHost" { } ''
  67. mkdir -p $out/bin
  68. ln -s /usr/bin/xcrun $out/bin
  69. '';
  70. # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  71. # separately
  72. darwinBuildInputs =
  73. with darwin.apple_sdk.frameworks;
  74. [
  75. Accelerate
  76. CoreVideo
  77. CoreGraphics
  78. ]
  79. ++ optionals useMetalKit [ MetalKit ];
  80. cudaBuildInputs = with cudaPackages; [
  81. cuda_cudart
  82. cuda_cccl # <nv/target>
  83. libcublas
  84. ];
  85. rocmBuildInputs = with rocmPackages; [
  86. clr
  87. hipblas
  88. rocblas
  89. ];
  90. vulkanBuildInputs = [
  91. vulkan-headers
  92. vulkan-loader
  93. shaderc
  94. ];
  95. in
  96. effectiveStdenv.mkDerivation (finalAttrs: {
  97. pname = "llama-cpp${pnameSuffix}";
  98. version = llamaVersion;
  99. # Note: none of the files discarded here are visible in the sandbox or
  100. # affect the output hash. This also means they can be modified without
  101. # triggering a rebuild.
  102. src = lib.cleanSourceWith {
  103. filter =
  104. name: type:
  105. let
  106. noneOf = builtins.all (x: !x);
  107. baseName = baseNameOf name;
  108. in
  109. noneOf [
  110. (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
  111. (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
  112. (lib.hasPrefix "." baseName) # Skip hidden files and directories
  113. (baseName == "flake.lock")
  114. ];
  115. src = lib.cleanSource ../../.;
  116. };
  117. postPatch = ''
  118. substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
  119. --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
  120. substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
  121. --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
  122. '';
  123. # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
  124. # `default.metallib` may be compiled with Metal compiler from XCode
  125. # and we need to escape sandbox on MacOS to access Metal compiler.
  126. # `xcrun` is used find the path of the Metal compiler, which is varible
  127. # and not on $PATH
  128. # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
  129. __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
  130. nativeBuildInputs =
  131. [
  132. cmake
  133. ninja
  134. pkg-config
  135. git
  136. ]
  137. ++ optionals useCuda [
  138. cudaPackages.cuda_nvcc
  139. autoAddDriverRunpath
  140. ]
  141. ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
  142. ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
  143. buildInputs =
  144. optionals effectiveStdenv.isDarwin darwinBuildInputs
  145. ++ optionals useCuda cudaBuildInputs
  146. ++ optionals useMpi [ mpi ]
  147. ++ optionals useRocm rocmBuildInputs
  148. ++ optionals useBlas [ blas ]
  149. ++ optionals useVulkan vulkanBuildInputs
  150. ++ optionals enableCurl [ curl ];
  151. cmakeFlags =
  152. [
  153. (cmakeBool "LLAMA_BUILD_SERVER" true)
  154. (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
  155. (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
  156. (cmakeBool "LLAMA_CURL" enableCurl)
  157. (cmakeBool "GGML_NATIVE" false)
  158. (cmakeBool "GGML_BLAS" useBlas)
  159. (cmakeBool "GGML_CUDA" useCuda)
  160. (cmakeBool "GGML_HIP" useRocm)
  161. (cmakeBool "GGML_METAL" useMetalKit)
  162. (cmakeBool "GGML_VULKAN" useVulkan)
  163. (cmakeBool "GGML_STATIC" enableStatic)
  164. ]
  165. ++ optionals useCuda [
  166. (
  167. with cudaPackages.flags;
  168. cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
  169. builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
  170. )
  171. )
  172. ]
  173. ++ optionals useRocm [
  174. (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
  175. (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
  176. ]
  177. ++ optionals useMetalKit [
  178. (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
  179. (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
  180. ];
  181. # Environment variables needed for ROCm
  182. env = optionalAttrs useRocm {
  183. ROCM_PATH = "${rocmPackages.clr}";
  184. HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
  185. };
  186. # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  187. # if they haven't been added yet.
  188. postInstall = ''
  189. mkdir -p $out/include
  190. cp $src/include/llama.h $out/include/
  191. '';
  192. meta = {
  193. # Configurations we don't want even the CI to evaluate. Results in the
  194. # "unsupported platform" messages. This is mostly a no-op, because
  195. # cudaPackages would've refused to evaluate anyway.
  196. badPlatforms = optionals useCuda lib.platforms.darwin;
  197. # Configurations that are known to result in build failures. Can be
  198. # overridden by importing Nixpkgs with `allowBroken = true`.
  199. broken = (useMetalKit && !effectiveStdenv.isDarwin);
  200. description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
  201. homepage = "https://github.com/ggml-org/llama.cpp/";
  202. license = lib.licenses.mit;
  203. # Accommodates `nix run` and `lib.getExe`
  204. mainProgram = "llama-cli";
  205. # These people might respond, on the best effort basis, if you ping them
  206. # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
  207. # Consider adding yourself to this list if you want to ensure this flake
  208. # stays maintained and you're willing to invest your time. Do not add
  209. # other people without their consent. Consider removing people after
  210. # they've been unreachable for long periods of time.
  211. # Note that lib.maintainers is defined in Nixpkgs, but you may just add
  212. # an attrset following the same format as in
  213. # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
  214. maintainers = with lib.maintainers; [
  215. philiptaron
  216. SomeoneSerge
  217. ];
  218. # Extend `badPlatforms` instead
  219. platforms = lib.platforms.all;
  220. };
  221. })