package.nix 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. {
  2. lib,
  3. glibc,
  4. config,
  5. stdenv,
  6. mkShell,
  7. cmake,
  8. ninja,
  9. pkg-config,
  10. git,
  11. python3,
  12. mpi,
  13. openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
  14. cudaPackages,
  15. darwin,
  16. rocmPackages,
  17. vulkan-headers,
  18. vulkan-loader,
  19. clblast,
  20. useBlas ? builtins.all (x: !x) [
  21. useCuda
  22. useMetalKit
  23. useOpenCL
  24. useRocm
  25. useVulkan
  26. ],
  27. useCuda ? config.cudaSupport,
  28. useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
  29. useMpi ? false, # Increases the runtime closure size by ~700M
  30. useOpenCL ? false,
  31. useRocm ? config.rocmSupport,
  32. useVulkan ? false,
  33. llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
  34. # It's necessary to consistently use backendStdenv when building with CUDA support,
  35. # otherwise we get libstdc++ errors downstream.
  36. effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  37. enableStatic ? effectiveStdenv.hostPlatform.isStatic
  38. }@inputs:
  39. let
  40. inherit (lib)
  41. cmakeBool
  42. cmakeFeature
  43. optionals
  44. strings
  45. versionOlder
  46. ;
  47. stdenv = throw "Use effectiveStdenv instead";
  48. suffices =
  49. lib.optionals useBlas [ "BLAS" ]
  50. ++ lib.optionals useCuda [ "CUDA" ]
  51. ++ lib.optionals useMetalKit [ "MetalKit" ]
  52. ++ lib.optionals useMpi [ "MPI" ]
  53. ++ lib.optionals useOpenCL [ "OpenCL" ]
  54. ++ lib.optionals useRocm [ "ROCm" ]
  55. ++ lib.optionals useVulkan [ "Vulkan" ];
  56. pnameSuffix =
  57. strings.optionalString (suffices != [ ])
  58. "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  59. descriptionSuffix =
  60. strings.optionalString (suffices != [ ])
  61. ", accelerated with ${strings.concatStringsSep ", " suffices}";
  62. # TODO: package the Python in this repository in a Nix-like way.
  63. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  64. # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  65. # https://peps.python.org/pep-0517/
  66. llama-python = python3.withPackages (
  67. ps: [
  68. ps.numpy
  69. ps.sentencepiece
  70. ]
  71. );
  72. # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  73. llama-python-extra = python3.withPackages (
  74. ps: [
  75. ps.numpy
  76. ps.sentencepiece
  77. ps.tiktoken
  78. ps.torchWithoutCuda
  79. ps.transformers
  80. ]
  81. );
  82. # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  83. # separately
  84. darwinBuildInputs =
  85. with darwin.apple_sdk.frameworks;
  86. [
  87. Accelerate
  88. CoreVideo
  89. CoreGraphics
  90. ]
  91. ++ optionals useMetalKit [ MetalKit ];
  92. cudaBuildInputs = with cudaPackages; [
  93. cuda_cccl.dev # <nv/target>
  94. # A temporary hack for reducing the closure size, remove once cudaPackages
  95. # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
  96. cuda_cudart.dev
  97. cuda_cudart.lib
  98. cuda_cudart.static
  99. libcublas.dev
  100. libcublas.lib
  101. libcublas.static
  102. ];
  103. rocmBuildInputs = with rocmPackages; [
  104. clr
  105. hipblas
  106. rocblas
  107. ];
  108. vulkanBuildInputs = [
  109. vulkan-headers
  110. vulkan-loader
  111. ];
  112. in
  113. effectiveStdenv.mkDerivation (
  114. finalAttrs: {
  115. pname = "llama-cpp${pnameSuffix}";
  116. version = llamaVersion;
  117. # Note: none of the files discarded here are visible in the sandbox or
  118. # affect the output hash. This also means they can be modified without
  119. # triggering a rebuild.
  120. src = lib.cleanSourceWith {
  121. filter =
  122. name: type:
  123. let
  124. noneOf = builtins.all (x: !x);
  125. baseName = baseNameOf name;
  126. in
  127. noneOf [
  128. (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
  129. (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
  130. (lib.hasPrefix "." baseName) # Skip hidden files and directories
  131. (baseName == "flake.lock")
  132. ];
  133. src = lib.cleanSource ../../.;
  134. };
  135. postPatch = ''
  136. substituteInPlace ./ggml-metal.m \
  137. --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
  138. # TODO: Package up each Python script or service appropriately.
  139. # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
  140. # we could make those *.py into setuptools' entrypoints
  141. substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
  142. '';
  143. nativeBuildInputs =
  144. [
  145. cmake
  146. ninja
  147. pkg-config
  148. git
  149. ]
  150. ++ optionals useCuda [
  151. cudaPackages.cuda_nvcc
  152. # TODO: Replace with autoAddDriverRunpath
  153. # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
  154. cudaPackages.autoAddOpenGLRunpathHook
  155. ]
  156. ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
  157. glibc.static
  158. ];
  159. buildInputs =
  160. optionals effectiveStdenv.isDarwin darwinBuildInputs
  161. ++ optionals useCuda cudaBuildInputs
  162. ++ optionals useMpi [ mpi ]
  163. ++ optionals useOpenCL [ clblast ]
  164. ++ optionals useRocm rocmBuildInputs
  165. ++ optionals useVulkan vulkanBuildInputs;
  166. cmakeFlags =
  167. [
  168. (cmakeBool "LLAMA_NATIVE" false)
  169. (cmakeBool "LLAMA_BUILD_SERVER" true)
  170. (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
  171. (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
  172. (cmakeBool "LLAMA_BLAS" useBlas)
  173. (cmakeBool "LLAMA_CLBLAST" useOpenCL)
  174. (cmakeBool "LLAMA_CUBLAS" useCuda)
  175. (cmakeBool "LLAMA_HIPBLAS" useRocm)
  176. (cmakeBool "LLAMA_METAL" useMetalKit)
  177. (cmakeBool "LLAMA_MPI" useMpi)
  178. (cmakeBool "LLAMA_VULKAN" useVulkan)
  179. (cmakeBool "LLAMA_STATIC" enableStatic)
  180. ]
  181. ++ optionals useCuda [
  182. (
  183. with cudaPackages.flags;
  184. cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
  185. builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
  186. )
  187. )
  188. ]
  189. ++ optionals useRocm [
  190. (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
  191. (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
  192. # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
  193. # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
  194. # and select the line that matches the current nixpkgs version of rocBLAS.
  195. # Should likely use `rocmPackages.clr.gpuTargets`.
  196. "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
  197. ]
  198. ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
  199. ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
  200. # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  201. # if they haven't been added yet.
  202. postInstall = ''
  203. mv $out/bin/main $out/bin/llama
  204. mv $out/bin/server $out/bin/llama-server
  205. mkdir -p $out/include
  206. cp $src/llama.h $out/include/
  207. '';
  208. # Define the shells here, but don't add in the inputsFrom to avoid recursion.
  209. passthru = {
  210. inherit
  211. useBlas
  212. useCuda
  213. useMetalKit
  214. useMpi
  215. useOpenCL
  216. useRocm
  217. useVulkan
  218. ;
  219. shell = mkShell {
  220. name = "shell-${finalAttrs.finalPackage.name}";
  221. description = "contains numpy and sentencepiece";
  222. buildInputs = [ llama-python ];
  223. inputsFrom = [ finalAttrs.finalPackage ];
  224. shellHook = ''
  225. addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
  226. '';
  227. };
  228. shell-extra = mkShell {
  229. name = "shell-extra-${finalAttrs.finalPackage.name}";
  230. description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
  231. buildInputs = [ llama-python-extra ];
  232. inputsFrom = [ finalAttrs.finalPackage ];
  233. };
  234. };
  235. meta = {
  236. # Configurations we don't want even the CI to evaluate. Results in the
  237. # "unsupported platform" messages. This is mostly a no-op, because
  238. # cudaPackages would've refused to evaluate anyway.
  239. badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
  240. # Configurations that are known to result in build failures. Can be
  241. # overridden by importing Nixpkgs with `allowBroken = true`.
  242. broken = (useMetalKit && !effectiveStdenv.isDarwin);
  243. description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
  244. homepage = "https://github.com/ggerganov/llama.cpp/";
  245. license = lib.licenses.mit;
  246. # Accommodates `nix run` and `lib.getExe`
  247. mainProgram = "llama";
  248. # These people might respond, on the best effort basis, if you ping them
  249. # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
  250. # Consider adding yourself to this list if you want to ensure this flake
  251. # stays maintained and you're willing to invest your time. Do not add
  252. # other people without their consent. Consider removing people after
  253. # they've been unreachable for long periods of time.
  254. # Note that lib.maintainers is defined in Nixpkgs, but you may just add
  255. # an attrset following the same format as in
  256. # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
  257. maintainers = with lib.maintainers; [
  258. philiptaron
  259. SomeoneSerge
  260. ];
  261. # Extend `badPlatforms` instead
  262. platforms = lib.platforms.all;
  263. };
  264. }
  265. )