package.nix 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. {
  2. lib,
  3. glibc,
  4. config,
  5. stdenv,
  6. mkShell,
  7. runCommand,
  8. cmake,
  9. ninja,
  10. pkg-config,
  11. git,
  12. python3,
  13. mpi,
  14. blas,
  15. cudaPackages,
  16. darwin,
  17. rocmPackages,
  18. vulkan-headers,
  19. vulkan-loader,
  20. curl,
  21. shaderc,
  22. useBlas ? builtins.all (x: !x) [
  23. useCuda
  24. useMetalKit
  25. useRocm
  26. useVulkan
  27. ] && blas.meta.available,
  28. useCuda ? config.cudaSupport,
  29. useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
  30. useMpi ? false, # Increases the runtime closure size by ~700M
  31. useRocm ? config.rocmSupport,
  32. enableCurl ? true,
  33. useVulkan ? false,
  34. llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
  35. # It's necessary to consistently use backendStdenv when building with CUDA support,
  36. # otherwise we get libstdc++ errors downstream.
  37. effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  38. enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  39. precompileMetalShaders ? false
  40. }@inputs:
  41. let
  42. inherit (lib)
  43. cmakeBool
  44. cmakeFeature
  45. optionals
  46. strings
  47. versionOlder
  48. ;
  49. stdenv = throw "Use effectiveStdenv instead";
  50. suffices =
  51. lib.optionals useBlas [ "BLAS" ]
  52. ++ lib.optionals useCuda [ "CUDA" ]
  53. ++ lib.optionals useMetalKit [ "MetalKit" ]
  54. ++ lib.optionals useMpi [ "MPI" ]
  55. ++ lib.optionals useRocm [ "ROCm" ]
  56. ++ lib.optionals useVulkan [ "Vulkan" ];
  57. pnameSuffix =
  58. strings.optionalString (suffices != [ ])
  59. "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  60. descriptionSuffix =
  61. strings.optionalString (suffices != [ ])
  62. ", accelerated with ${strings.concatStringsSep ", " suffices}";
  63. executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
  64. # TODO: package the Python in this repository in a Nix-like way.
  65. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  66. # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  67. # https://peps.python.org/pep-0517/
  68. #
  69. # TODO: Package up each Python script or service appropriately, by making
  70. # them into "entrypoints"
  71. llama-python = python3.withPackages (
  72. ps: [
  73. ps.numpy
  74. ps.sentencepiece
  75. ]
  76. );
  77. # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  78. llama-python-extra = python3.withPackages (
  79. ps: [
  80. ps.numpy
  81. ps.sentencepiece
  82. ps.tiktoken
  83. ps.torchWithoutCuda
  84. ps.transformers
  85. # server bench
  86. ps.matplotlib
  87. # server tests
  88. ps.openai
  89. ps.behave
  90. ps.prometheus-client
  91. # for examples/pydantic-models-to-grammar-examples.py
  92. ps.docstring-parser
  93. ps.pydantic
  94. # for scripts/compare-llama-bench.py
  95. ps.gitpython
  96. ps.tabulate
  97. ]
  98. );
  99. xcrunHost = runCommand "xcrunHost" {} ''
  100. mkdir -p $out/bin
  101. ln -s /usr/bin/xcrun $out/bin
  102. '';
  103. # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  104. # separately
  105. darwinBuildInputs =
  106. with darwin.apple_sdk.frameworks;
  107. [
  108. Accelerate
  109. CoreVideo
  110. CoreGraphics
  111. ]
  112. ++ optionals useMetalKit [ MetalKit ];
  113. cudaBuildInputs = with cudaPackages; [
  114. cuda_cccl.dev # <nv/target>
  115. # A temporary hack for reducing the closure size, remove once cudaPackages
  116. # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
  117. cuda_cudart.dev
  118. cuda_cudart.lib
  119. cuda_cudart.static
  120. libcublas.dev
  121. libcublas.lib
  122. libcublas.static
  123. ];
  124. rocmBuildInputs = with rocmPackages; [
  125. clr
  126. hipblas
  127. rocblas
  128. ];
  129. vulkanBuildInputs = [
  130. vulkan-headers
  131. vulkan-loader
  132. shaderc
  133. ];
  134. in
  135. effectiveStdenv.mkDerivation (
  136. finalAttrs: {
  137. pname = "llama-cpp${pnameSuffix}";
  138. version = llamaVersion;
  139. # Note: none of the files discarded here are visible in the sandbox or
  140. # affect the output hash. This also means they can be modified without
  141. # triggering a rebuild.
  142. src = lib.cleanSourceWith {
  143. filter =
  144. name: type:
  145. let
  146. noneOf = builtins.all (x: !x);
  147. baseName = baseNameOf name;
  148. in
  149. noneOf [
  150. (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
  151. (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
  152. (lib.hasPrefix "." baseName) # Skip hidden files and directories
  153. (baseName == "flake.lock")
  154. ];
  155. src = lib.cleanSource ../../.;
  156. };
  157. postPatch = ''
  158. substituteInPlace ./ggml/src/ggml-metal.m \
  159. --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
  160. substituteInPlace ./ggml/src/ggml-metal.m \
  161. --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
  162. '';
  163. # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
  164. # `default.metallib` may be compiled with Metal compiler from XCode
  165. # and we need to escape sandbox on MacOS to access Metal compiler.
  166. # `xcrun` is used find the path of the Metal compiler, which is varible
  167. # and not on $PATH
  168. # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
  169. __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
  170. nativeBuildInputs =
  171. [
  172. cmake
  173. ninja
  174. pkg-config
  175. git
  176. ]
  177. ++ optionals useCuda [
  178. cudaPackages.cuda_nvcc
  179. # TODO: Replace with autoAddDriverRunpath
  180. # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
  181. cudaPackages.autoAddOpenGLRunpathHook
  182. ]
  183. ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
  184. glibc.static
  185. ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
  186. xcrunHost
  187. ];
  188. buildInputs =
  189. optionals effectiveStdenv.isDarwin darwinBuildInputs
  190. ++ optionals useCuda cudaBuildInputs
  191. ++ optionals useMpi [ mpi ]
  192. ++ optionals useRocm rocmBuildInputs
  193. ++ optionals useBlas [ blas ]
  194. ++ optionals useVulkan vulkanBuildInputs
  195. ++ optionals enableCurl [ curl ];
  196. cmakeFlags =
  197. [
  198. (cmakeBool "LLAMA_BUILD_SERVER" true)
  199. (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
  200. (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
  201. (cmakeBool "LLAMA_CURL" enableCurl)
  202. (cmakeBool "GGML_NATIVE" false)
  203. (cmakeBool "GGML_BLAS" useBlas)
  204. (cmakeBool "GGML_CUDA" useCuda)
  205. (cmakeBool "GGML_HIPBLAS" useRocm)
  206. (cmakeBool "GGML_METAL" useMetalKit)
  207. (cmakeBool "GGML_VULKAN" useVulkan)
  208. (cmakeBool "GGML_STATIC" enableStatic)
  209. ]
  210. ++ optionals useCuda [
  211. (
  212. with cudaPackages.flags;
  213. cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
  214. builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
  215. )
  216. )
  217. ]
  218. ++ optionals useRocm [
  219. (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
  220. (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
  221. ]
  222. ++ optionals useMetalKit [
  223. (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
  224. (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
  225. ];
  226. # Environment variables needed for ROCm
  227. env = optionals useRocm {
  228. ROCM_PATH = "${rocmPackages.clr}";
  229. HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
  230. };
  231. # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  232. # if they haven't been added yet.
  233. postInstall = ''
  234. mkdir -p $out/include
  235. cp $src/include/llama.h $out/include/
  236. '';
  237. # Define the shells here, but don't add in the inputsFrom to avoid recursion.
  238. passthru = {
  239. inherit
  240. useBlas
  241. useCuda
  242. useMetalKit
  243. useMpi
  244. useRocm
  245. useVulkan
  246. ;
  247. shell = mkShell {
  248. name = "shell-${finalAttrs.finalPackage.name}";
  249. description = "contains numpy and sentencepiece";
  250. buildInputs = [ llama-python ];
  251. inputsFrom = [ finalAttrs.finalPackage ];
  252. shellHook = ''
  253. addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
  254. '';
  255. };
  256. shell-extra = mkShell {
  257. name = "shell-extra-${finalAttrs.finalPackage.name}";
  258. description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
  259. buildInputs = [ llama-python-extra ];
  260. inputsFrom = [ finalAttrs.finalPackage ];
  261. };
  262. };
  263. meta = {
  264. # Configurations we don't want even the CI to evaluate. Results in the
  265. # "unsupported platform" messages. This is mostly a no-op, because
  266. # cudaPackages would've refused to evaluate anyway.
  267. badPlatforms = optionals useCuda lib.platforms.darwin;
  268. # Configurations that are known to result in build failures. Can be
  269. # overridden by importing Nixpkgs with `allowBroken = true`.
  270. broken = (useMetalKit && !effectiveStdenv.isDarwin);
  271. description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
  272. homepage = "https://github.com/ggerganov/llama.cpp/";
  273. license = lib.licenses.mit;
  274. # Accommodates `nix run` and `lib.getExe`
  275. mainProgram = "llama-cli";
  276. # These people might respond, on the best effort basis, if you ping them
  277. # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
  278. # Consider adding yourself to this list if you want to ensure this flake
  279. # stays maintained and you're willing to invest your time. Do not add
  280. # other people without their consent. Consider removing people after
  281. # they've been unreachable for long periods of time.
  282. # Note that lib.maintainers is defined in Nixpkgs, but you may just add
  283. # an attrset following the same format as in
  284. # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
  285. maintainers = with lib.maintainers; [
  286. philiptaron
  287. SomeoneSerge
  288. ];
  289. # Extend `badPlatforms` instead
  290. platforms = lib.platforms.all;
  291. };
  292. }
  293. )