package.nix 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. {
  2. lib,
  3. glibc,
  4. config,
  5. stdenv,
  6. mkShell,
  7. runCommand,
  8. cmake,
  9. ninja,
  10. pkg-config,
  11. git,
  12. python3,
  13. mpi,
  14. blas,
  15. cudaPackages,
  16. darwin,
  17. rocmPackages,
  18. vulkan-headers,
  19. vulkan-loader,
  20. clblast,
  21. useBlas ? builtins.all (x: !x) [
  22. useCuda
  23. useMetalKit
  24. useOpenCL
  25. useRocm
  26. useVulkan
  27. ],
  28. useCuda ? config.cudaSupport,
  29. useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
  30. useMpi ? false, # Increases the runtime closure size by ~700M
  31. useOpenCL ? false,
  32. useRocm ? config.rocmSupport,
  33. useVulkan ? false,
  34. llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
  35. # It's necessary to consistently use backendStdenv when building with CUDA support,
  36. # otherwise we get libstdc++ errors downstream.
  37. effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  38. enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  39. precompileMetalShaders ? false
  40. }@inputs:
  41. let
  42. inherit (lib)
  43. cmakeBool
  44. cmakeFeature
  45. optionals
  46. strings
  47. versionOlder
  48. ;
  49. stdenv = throw "Use effectiveStdenv instead";
  50. suffices =
  51. lib.optionals useBlas [ "BLAS" ]
  52. ++ lib.optionals useCuda [ "CUDA" ]
  53. ++ lib.optionals useMetalKit [ "MetalKit" ]
  54. ++ lib.optionals useMpi [ "MPI" ]
  55. ++ lib.optionals useOpenCL [ "OpenCL" ]
  56. ++ lib.optionals useRocm [ "ROCm" ]
  57. ++ lib.optionals useVulkan [ "Vulkan" ];
  58. pnameSuffix =
  59. strings.optionalString (suffices != [ ])
  60. "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  61. descriptionSuffix =
  62. strings.optionalString (suffices != [ ])
  63. ", accelerated with ${strings.concatStringsSep ", " suffices}";
  64. executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
  65. # TODO: package the Python in this repository in a Nix-like way.
  66. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  67. # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  68. # https://peps.python.org/pep-0517/
  69. llama-python = python3.withPackages (
  70. ps: [
  71. ps.numpy
  72. ps.sentencepiece
  73. ]
  74. );
  75. # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  76. llama-python-extra = python3.withPackages (
  77. ps: [
  78. ps.numpy
  79. ps.sentencepiece
  80. ps.tiktoken
  81. ps.torchWithoutCuda
  82. ps.transformers
  83. ]
  84. );
  85. xcrunHost = runCommand "xcrunHost" {} ''
  86. mkdir -p $out/bin
  87. ln -s /usr/bin/xcrun $out/bin
  88. '';
  89. # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  90. # separately
  91. darwinBuildInputs =
  92. with darwin.apple_sdk.frameworks;
  93. [
  94. Accelerate
  95. CoreVideo
  96. CoreGraphics
  97. ]
  98. ++ optionals useMetalKit [ MetalKit ];
  99. cudaBuildInputs = with cudaPackages; [
  100. cuda_cccl.dev # <nv/target>
  101. # A temporary hack for reducing the closure size, remove once cudaPackages
  102. # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
  103. cuda_cudart.dev
  104. cuda_cudart.lib
  105. cuda_cudart.static
  106. libcublas.dev
  107. libcublas.lib
  108. libcublas.static
  109. ];
  110. rocmBuildInputs = with rocmPackages; [
  111. clr
  112. hipblas
  113. rocblas
  114. ];
  115. vulkanBuildInputs = [
  116. vulkan-headers
  117. vulkan-loader
  118. ];
  119. in
  120. effectiveStdenv.mkDerivation (
  121. finalAttrs: {
  122. pname = "llama-cpp${pnameSuffix}";
  123. version = llamaVersion;
  124. # Note: none of the files discarded here are visible in the sandbox or
  125. # affect the output hash. This also means they can be modified without
  126. # triggering a rebuild.
  127. src = lib.cleanSourceWith {
  128. filter =
  129. name: type:
  130. let
  131. noneOf = builtins.all (x: !x);
  132. baseName = baseNameOf name;
  133. in
  134. noneOf [
  135. (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
  136. (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
  137. (lib.hasPrefix "." baseName) # Skip hidden files and directories
  138. (baseName == "flake.lock")
  139. ];
  140. src = lib.cleanSource ../../.;
  141. };
  142. postPatch = ''
  143. substituteInPlace ./ggml-metal.m \
  144. --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
  145. substituteInPlace ./ggml-metal.m \
  146. --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
  147. # TODO: Package up each Python script or service appropriately.
  148. # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
  149. # we could make those *.py into setuptools' entrypoints
  150. substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
  151. '';
  152. # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
  153. # `default.metallib` may be compiled with Metal compiler from XCode
  154. # and we need to escape sandbox on MacOS to access Metal compiler.
  155. # `xcrun` is used find the path of the Metal compiler, which is varible
  156. # and not on $PATH
  157. # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
  158. __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
  159. nativeBuildInputs =
  160. [
  161. cmake
  162. ninja
  163. pkg-config
  164. git
  165. ]
  166. ++ optionals useCuda [
  167. cudaPackages.cuda_nvcc
  168. # TODO: Replace with autoAddDriverRunpath
  169. # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
  170. cudaPackages.autoAddOpenGLRunpathHook
  171. ]
  172. ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
  173. glibc.static
  174. ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
  175. xcrunHost
  176. ];
  177. buildInputs =
  178. optionals effectiveStdenv.isDarwin darwinBuildInputs
  179. ++ optionals useCuda cudaBuildInputs
  180. ++ optionals useMpi [ mpi ]
  181. ++ optionals useOpenCL [ clblast ]
  182. ++ optionals useRocm rocmBuildInputs
  183. ++ optionals useBlas [ blas ]
  184. ++ optionals useVulkan vulkanBuildInputs;
  185. cmakeFlags =
  186. [
  187. (cmakeBool "LLAMA_NATIVE" false)
  188. (cmakeBool "LLAMA_BUILD_SERVER" true)
  189. (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
  190. (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
  191. (cmakeBool "LLAMA_BLAS" useBlas)
  192. (cmakeBool "LLAMA_CLBLAST" useOpenCL)
  193. (cmakeBool "LLAMA_CUDA" useCuda)
  194. (cmakeBool "LLAMA_HIPBLAS" useRocm)
  195. (cmakeBool "LLAMA_METAL" useMetalKit)
  196. (cmakeBool "LLAMA_MPI" useMpi)
  197. (cmakeBool "LLAMA_VULKAN" useVulkan)
  198. (cmakeBool "LLAMA_STATIC" enableStatic)
  199. ]
  200. ++ optionals useCuda [
  201. (
  202. with cudaPackages.flags;
  203. cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
  204. builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
  205. )
  206. )
  207. ]
  208. ++ optionals useRocm [
  209. (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
  210. (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
  211. # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
  212. # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
  213. # and select the line that matches the current nixpkgs version of rocBLAS.
  214. # Should likely use `rocmPackages.clr.gpuTargets`.
  215. "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
  216. ]
  217. ++ optionals useMetalKit [
  218. (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
  219. (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
  220. ];
  221. # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  222. # if they haven't been added yet.
  223. postInstall = ''
  224. mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
  225. mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
  226. mkdir -p $out/include
  227. cp $src/llama.h $out/include/
  228. '';
  229. # Define the shells here, but don't add in the inputsFrom to avoid recursion.
  230. passthru = {
  231. inherit
  232. useBlas
  233. useCuda
  234. useMetalKit
  235. useMpi
  236. useOpenCL
  237. useRocm
  238. useVulkan
  239. ;
  240. shell = mkShell {
  241. name = "shell-${finalAttrs.finalPackage.name}";
  242. description = "contains numpy and sentencepiece";
  243. buildInputs = [ llama-python ];
  244. inputsFrom = [ finalAttrs.finalPackage ];
  245. shellHook = ''
  246. addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
  247. '';
  248. };
  249. shell-extra = mkShell {
  250. name = "shell-extra-${finalAttrs.finalPackage.name}";
  251. description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
  252. buildInputs = [ llama-python-extra ];
  253. inputsFrom = [ finalAttrs.finalPackage ];
  254. };
  255. };
  256. meta = {
  257. # Configurations we don't want even the CI to evaluate. Results in the
  258. # "unsupported platform" messages. This is mostly a no-op, because
  259. # cudaPackages would've refused to evaluate anyway.
  260. badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
  261. # Configurations that are known to result in build failures. Can be
  262. # overridden by importing Nixpkgs with `allowBroken = true`.
  263. broken = (useMetalKit && !effectiveStdenv.isDarwin);
  264. description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
  265. homepage = "https://github.com/ggerganov/llama.cpp/";
  266. license = lib.licenses.mit;
  267. # Accommodates `nix run` and `lib.getExe`
  268. mainProgram = "llama";
  269. # These people might respond, on the best effort basis, if you ping them
  270. # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
  271. # Consider adding yourself to this list if you want to ensure this flake
  272. # stays maintained and you're willing to invest your time. Do not add
  273. # other people without their consent. Consider removing people after
  274. # they've been unreachable for long periods of time.
  275. # Note that lib.maintainers is defined in Nixpkgs, but you may just add
  276. # an attrset following the same format as in
  277. # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
  278. maintainers = with lib.maintainers; [
  279. philiptaron
  280. SomeoneSerge
  281. ];
  282. # Extend `badPlatforms` instead
  283. platforms = lib.platforms.all;
  284. };
  285. }
  286. )