package.nix 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. {
  2. lib,
  3. config,
  4. stdenv,
  5. mkShell,
  6. cmake,
  7. ninja,
  8. pkg-config,
  9. git,
  10. python3,
  11. mpi,
  12. openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
  13. cudaPackages,
  14. darwin,
  15. rocmPackages,
  16. clblast,
  17. useBlas ? builtins.all (x: !x) [
  18. useCuda
  19. useMetalKit
  20. useOpenCL
  21. useRocm
  22. ],
  23. useCuda ? config.cudaSupport,
  24. useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
  25. useMpi ? false, # Increases the runtime closure size by ~700M
  26. useOpenCL ? false,
  27. useRocm ? config.rocmSupport,
  28. llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
  29. }@inputs:
  30. let
  31. inherit (lib)
  32. cmakeBool
  33. cmakeFeature
  34. optionals
  35. strings
  36. versionOlder
  37. ;
  38. # It's necessary to consistently use backendStdenv when building with CUDA support,
  39. # otherwise we get libstdc++ errors downstream.
  40. stdenv = throw "Use effectiveStdenv instead";
  41. effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
  42. suffices =
  43. lib.optionals useBlas [ "BLAS" ]
  44. ++ lib.optionals useCuda [ "CUDA" ]
  45. ++ lib.optionals useMetalKit [ "MetalKit" ]
  46. ++ lib.optionals useMpi [ "MPI" ]
  47. ++ lib.optionals useOpenCL [ "OpenCL" ]
  48. ++ lib.optionals useRocm [ "ROCm" ];
  49. pnameSuffix =
  50. strings.optionalString (suffices != [ ])
  51. "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  52. descriptionSuffix =
  53. strings.optionalString (suffices != [ ])
  54. ", accelerated with ${strings.concatStringsSep ", " suffices}";
  55. # TODO: package the Python in this repository in a Nix-like way.
  56. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  57. # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  58. # https://peps.python.org/pep-0517/
  59. llama-python = python3.withPackages (
  60. ps: [
  61. ps.numpy
  62. ps.sentencepiece
  63. ]
  64. );
  65. # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  66. llama-python-extra = python3.withPackages (
  67. ps: [
  68. ps.numpy
  69. ps.sentencepiece
  70. ps.torchWithoutCuda
  71. ps.transformers
  72. ]
  73. );
  74. # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  75. # separately
  76. darwinBuildInputs =
  77. with darwin.apple_sdk.frameworks;
  78. [
  79. Accelerate
  80. CoreVideo
  81. CoreGraphics
  82. ]
  83. ++ optionals useMetalKit [ MetalKit ];
  84. cudaBuildInputs = with cudaPackages; [
  85. cuda_cccl.dev # <nv/target>
  86. # A temporary hack for reducing the closure size, remove once cudaPackages
  87. # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
  88. cuda_cudart.dev
  89. cuda_cudart.lib
  90. cuda_cudart.static
  91. libcublas.dev
  92. libcublas.lib
  93. libcublas.static
  94. ];
  95. rocmBuildInputs = with rocmPackages; [
  96. clr
  97. hipblas
  98. rocblas
  99. ];
  100. in
  101. effectiveStdenv.mkDerivation (
  102. finalAttrs: {
  103. pname = "llama-cpp${pnameSuffix}";
  104. version = llamaVersion;
  105. src = lib.cleanSourceWith {
  106. filter =
  107. name: type:
  108. !(builtins.any (_: _) [
  109. (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
  110. (name == "README.md") # Ignore *.md changes whe computing outPaths
  111. (lib.hasPrefix "." name) # Skip hidden files and directories
  112. ]);
  113. src = lib.cleanSource ../../.;
  114. };
  115. postPatch = ''
  116. substituteInPlace ./ggml-metal.m \
  117. --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
  118. # TODO: Package up each Python script or service appropriately.
  119. # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
  120. # we could make those *.py into setuptools' entrypoints
  121. substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
  122. '';
  123. nativeBuildInputs =
  124. [
  125. cmake
  126. ninja
  127. pkg-config
  128. git
  129. ]
  130. ++ optionals useCuda [
  131. cudaPackages.cuda_nvcc
  132. # TODO: Replace with autoAddDriverRunpath
  133. # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
  134. cudaPackages.autoAddOpenGLRunpathHook
  135. ];
  136. buildInputs =
  137. optionals effectiveStdenv.isDarwin darwinBuildInputs
  138. ++ optionals useCuda cudaBuildInputs
  139. ++ optionals useMpi [ mpi ]
  140. ++ optionals useOpenCL [ clblast ]
  141. ++ optionals useRocm rocmBuildInputs;
  142. cmakeFlags =
  143. [
  144. (cmakeBool "LLAMA_NATIVE" true)
  145. (cmakeBool "LLAMA_BUILD_SERVER" true)
  146. (cmakeBool "BUILD_SHARED_LIBS" true)
  147. (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
  148. (cmakeBool "LLAMA_BLAS" useBlas)
  149. (cmakeBool "LLAMA_CLBLAST" useOpenCL)
  150. (cmakeBool "LLAMA_CUBLAS" useCuda)
  151. (cmakeBool "LLAMA_HIPBLAS" useRocm)
  152. (cmakeBool "LLAMA_METAL" useMetalKit)
  153. (cmakeBool "LLAMA_MPI" useMpi)
  154. ]
  155. ++ optionals useCuda [
  156. (
  157. with cudaPackages.flags;
  158. cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
  159. builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
  160. )
  161. )
  162. ]
  163. ++ optionals useRocm [
  164. (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
  165. (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
  166. # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
  167. # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
  168. # and select the line that matches the current nixpkgs version of rocBLAS.
  169. # Should likely use `rocmPackages.clr.gpuTargets`.
  170. "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
  171. ]
  172. ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
  173. ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
  174. # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  175. # if they haven't been added yet.
  176. postInstall = ''
  177. mv $out/bin/main $out/bin/llama
  178. mv $out/bin/server $out/bin/llama-server
  179. mkdir -p $out/include
  180. cp $src/llama.h $out/include/
  181. '';
  182. # Define the shells here, but don't add in the inputsFrom to avoid recursion.
  183. passthru = {
  184. inherit
  185. useBlas
  186. useCuda
  187. useMetalKit
  188. useMpi
  189. useOpenCL
  190. useRocm
  191. ;
  192. shell = mkShell {
  193. name = "shell-${finalAttrs.finalPackage.name}";
  194. description = "contains numpy and sentencepiece";
  195. buildInputs = [ llama-python ];
  196. inputsFrom = [ finalAttrs.finalPackage ];
  197. };
  198. shell-extra = mkShell {
  199. name = "shell-extra-${finalAttrs.finalPackage.name}";
  200. description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
  201. buildInputs = [ llama-python-extra ];
  202. inputsFrom = [ finalAttrs.finalPackage ];
  203. };
  204. };
  205. meta = {
  206. # Configurations we don't want even the CI to evaluate. Results in the
  207. # "unsupported platform" messages. This is mostly a no-op, because
  208. # cudaPackages would've refused to evaluate anyway.
  209. badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
  210. # Configurations that are known to result in build failures. Can be
  211. # overridden by importing Nixpkgs with `allowBroken = true`.
  212. broken = (useMetalKit && !effectiveStdenv.isDarwin);
  213. description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
  214. homepage = "https://github.com/ggerganov/llama.cpp/";
  215. license = lib.licenses.mit;
  216. # Accommodates `nix run` and `lib.getExe`
  217. mainProgram = "llama";
  218. # These people might respond, on the best effort basis, if you ping them
  219. # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
  220. # Consider adding yourself to this list if you want to ensure this flake
  221. # stays maintained and you're willing to invest your time. Do not add
  222. # other people without their consent. Consider removing people after
  223. # they've been unreachable for long periods of time.
  224. # Note that lib.maintainers is defined in Nixpkgs, but you may just add
  225. # an attrset following the same format as in
  226. # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
  227. maintainers = with lib.maintainers; [
  228. philiptaron
  229. SomeoneSerge
  230. ];
  231. # Extend `badPlatforms` instead
  232. platforms = lib.platforms.all;
  233. };
  234. }
  235. )