|
|
@@ -3,13 +3,11 @@
|
|
|
glibc,
|
|
|
config,
|
|
|
stdenv,
|
|
|
- mkShell,
|
|
|
runCommand,
|
|
|
cmake,
|
|
|
ninja,
|
|
|
pkg-config,
|
|
|
git,
|
|
|
- python3,
|
|
|
mpi,
|
|
|
blas,
|
|
|
cudaPackages,
|
|
|
@@ -20,15 +18,18 @@
|
|
|
vulkan-loader,
|
|
|
curl,
|
|
|
shaderc,
|
|
|
- useBlas ? builtins.all (x: !x) [
|
|
|
- useCuda
|
|
|
- useMetalKit
|
|
|
- useRocm
|
|
|
- useVulkan
|
|
|
- ] && blas.meta.available,
|
|
|
+ useBlas ?
|
|
|
+ builtins.all (x: !x) [
|
|
|
+ useCuda
|
|
|
+ useMetalKit
|
|
|
+ useRocm
|
|
|
+ useVulkan
|
|
|
+ ]
|
|
|
+ && blas.meta.available,
|
|
|
useCuda ? config.cudaSupport,
|
|
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
|
|
- useMpi ? false, # Increases the runtime closure size by ~700M
|
|
|
+ # Increases the runtime closure size by ~700M
|
|
|
+ useMpi ? false,
|
|
|
useRocm ? config.rocmSupport,
|
|
|
enableCurl ? true,
|
|
|
useVulkan ? false,
|
|
|
@@ -38,8 +39,8 @@
|
|
|
# otherwise we get libstdc++ errors downstream.
|
|
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
|
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
|
|
- precompileMetalShaders ? false
|
|
|
-}@inputs:
|
|
|
+ precompileMetalShaders ? false,
|
|
|
+}:
|
|
|
|
|
|
let
|
|
|
inherit (lib)
|
|
|
@@ -47,7 +48,6 @@ let
|
|
|
cmakeFeature
|
|
|
optionals
|
|
|
strings
|
|
|
- versionOlder
|
|
|
;
|
|
|
|
|
|
stdenv = throw "Use effectiveStdenv instead";
|
|
|
@@ -63,54 +63,11 @@ let
|
|
|
pnameSuffix =
|
|
|
strings.optionalString (suffices != [ ])
|
|
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
|
|
- descriptionSuffix =
|
|
|
- strings.optionalString (suffices != [ ])
|
|
|
- ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
|
|
-
|
|
|
- executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
|
|
-
|
|
|
- # TODO: package the Python in this repository in a Nix-like way.
|
|
|
- # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
|
|
- # is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
|
|
- # https://peps.python.org/pep-0517/
|
|
|
- #
|
|
|
- # TODO: Package up each Python script or service appropriately, by making
|
|
|
- # them into "entrypoints"
|
|
|
- llama-python = python3.withPackages (
|
|
|
- ps: [
|
|
|
- ps.numpy
|
|
|
- ps.sentencepiece
|
|
|
- ]
|
|
|
- );
|
|
|
+ descriptionSuffix = strings.optionalString (
|
|
|
+ suffices != [ ]
|
|
|
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
|
|
|
|
|
- # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
|
|
- llama-python-extra = python3.withPackages (
|
|
|
- ps: [
|
|
|
- ps.numpy
|
|
|
- ps.sentencepiece
|
|
|
- ps.tiktoken
|
|
|
- ps.torchWithoutCuda
|
|
|
- ps.transformers
|
|
|
-
|
|
|
- # server bench
|
|
|
- ps.matplotlib
|
|
|
-
|
|
|
- # server tests
|
|
|
- ps.openai
|
|
|
- ps.behave
|
|
|
- ps.prometheus-client
|
|
|
-
|
|
|
- # for examples/pydantic-models-to-grammar-examples.py
|
|
|
- ps.docstring-parser
|
|
|
- ps.pydantic
|
|
|
-
|
|
|
- # for scripts/compare-llama-bench.py
|
|
|
- ps.gitpython
|
|
|
- ps.tabulate
|
|
|
- ]
|
|
|
- );
|
|
|
-
|
|
|
- xcrunHost = runCommand "xcrunHost" {} ''
|
|
|
+ xcrunHost = runCommand "xcrunHost" { } ''
|
|
|
mkdir -p $out/bin
|
|
|
ln -s /usr/bin/xcrun $out/bin
|
|
|
'';
|
|
|
@@ -145,178 +102,145 @@ let
|
|
|
];
|
|
|
in
|
|
|
|
|
|
-effectiveStdenv.mkDerivation (
|
|
|
- finalAttrs: {
|
|
|
- pname = "llama-cpp${pnameSuffix}";
|
|
|
- version = llamaVersion;
|
|
|
-
|
|
|
- # Note: none of the files discarded here are visible in the sandbox or
|
|
|
- # affect the output hash. This also means they can be modified without
|
|
|
- # triggering a rebuild.
|
|
|
- src = lib.cleanSourceWith {
|
|
|
- filter =
|
|
|
- name: type:
|
|
|
- let
|
|
|
- noneOf = builtins.all (x: !x);
|
|
|
- baseName = baseNameOf name;
|
|
|
- in
|
|
|
- noneOf [
|
|
|
- (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
|
|
- (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
|
|
- (lib.hasPrefix "." baseName) # Skip hidden files and directories
|
|
|
- (baseName == "flake.lock")
|
|
|
- ];
|
|
|
- src = lib.cleanSource ../../.;
|
|
|
- };
|
|
|
-
|
|
|
- postPatch = ''
|
|
|
- substituteInPlace ./ggml/src/ggml-metal.m \
|
|
|
- --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
|
- substituteInPlace ./ggml/src/ggml-metal.m \
|
|
|
- --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
|
|
- '';
|
|
|
-
|
|
|
- # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
|
|
- # `default.metallib` may be compiled with Metal compiler from XCode
|
|
|
- # and we need to escape sandbox on MacOS to access Metal compiler.
|
|
|
- # `xcrun` is used find the path of the Metal compiler, which is varible
|
|
|
- # and not on $PATH
|
|
|
- # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
|
|
- __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
|
|
-
|
|
|
- nativeBuildInputs =
|
|
|
- [
|
|
|
- cmake
|
|
|
- ninja
|
|
|
- pkg-config
|
|
|
- git
|
|
|
- ]
|
|
|
- ++ optionals useCuda [
|
|
|
- cudaPackages.cuda_nvcc
|
|
|
- autoAddDriverRunpath
|
|
|
- ]
|
|
|
- ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
|
|
- glibc.static
|
|
|
- ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
|
|
|
- xcrunHost
|
|
|
- ];
|
|
|
-
|
|
|
- buildInputs =
|
|
|
- optionals effectiveStdenv.isDarwin darwinBuildInputs
|
|
|
- ++ optionals useCuda cudaBuildInputs
|
|
|
- ++ optionals useMpi [ mpi ]
|
|
|
- ++ optionals useRocm rocmBuildInputs
|
|
|
- ++ optionals useBlas [ blas ]
|
|
|
- ++ optionals useVulkan vulkanBuildInputs
|
|
|
- ++ optionals enableCurl [ curl ];
|
|
|
-
|
|
|
- cmakeFlags =
|
|
|
- [
|
|
|
- (cmakeBool "LLAMA_BUILD_SERVER" true)
|
|
|
- (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
|
|
- (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
|
|
- (cmakeBool "LLAMA_CURL" enableCurl)
|
|
|
- (cmakeBool "GGML_NATIVE" false)
|
|
|
- (cmakeBool "GGML_BLAS" useBlas)
|
|
|
- (cmakeBool "GGML_CUDA" useCuda)
|
|
|
- (cmakeBool "GGML_HIPBLAS" useRocm)
|
|
|
- (cmakeBool "GGML_METAL" useMetalKit)
|
|
|
- (cmakeBool "GGML_VULKAN" useVulkan)
|
|
|
- (cmakeBool "GGML_STATIC" enableStatic)
|
|
|
- ]
|
|
|
- ++ optionals useCuda [
|
|
|
- (
|
|
|
- with cudaPackages.flags;
|
|
|
- cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
|
- builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
|
|
- )
|
|
|
- )
|
|
|
- ]
|
|
|
- ++ optionals useRocm [
|
|
|
- (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
|
|
- (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
|
|
- ]
|
|
|
- ++ optionals useMetalKit [
|
|
|
- (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
|
|
- (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
|
|
+effectiveStdenv.mkDerivation (finalAttrs: {
|
|
|
+ pname = "llama-cpp${pnameSuffix}";
|
|
|
+ version = llamaVersion;
|
|
|
+
|
|
|
+ # Note: none of the files discarded here are visible in the sandbox or
|
|
|
+ # affect the output hash. This also means they can be modified without
|
|
|
+ # triggering a rebuild.
|
|
|
+ src = lib.cleanSourceWith {
|
|
|
+ filter =
|
|
|
+ name: type:
|
|
|
+ let
|
|
|
+ noneOf = builtins.all (x: !x);
|
|
|
+ baseName = baseNameOf name;
|
|
|
+ in
|
|
|
+ noneOf [
|
|
|
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
|
|
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
|
|
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
|
|
|
+ (baseName == "flake.lock")
|
|
|
];
|
|
|
+ src = lib.cleanSource ../../.;
|
|
|
+ };
|
|
|
+
|
|
|
+ postPatch = ''
|
|
|
+ substituteInPlace ./ggml/src/ggml-metal.m \
|
|
|
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
|
|
+ substituteInPlace ./ggml/src/ggml-metal.m \
|
|
|
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
|
|
+ '';
|
|
|
|
|
|
- # Environment variables needed for ROCm
|
|
|
- env = optionals useRocm {
|
|
|
- ROCM_PATH = "${rocmPackages.clr}";
|
|
|
- HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
|
|
- };
|
|
|
-
|
|
|
- # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
|
|
- # if they haven't been added yet.
|
|
|
- postInstall = ''
|
|
|
- mkdir -p $out/include
|
|
|
- cp $src/include/llama.h $out/include/
|
|
|
- '';
|
|
|
-
|
|
|
- # Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
|
|
- passthru = {
|
|
|
- inherit
|
|
|
- useBlas
|
|
|
- useCuda
|
|
|
- useMetalKit
|
|
|
- useMpi
|
|
|
- useRocm
|
|
|
- useVulkan
|
|
|
- ;
|
|
|
-
|
|
|
- shell = mkShell {
|
|
|
- name = "shell-${finalAttrs.finalPackage.name}";
|
|
|
- description = "contains numpy and sentencepiece";
|
|
|
- buildInputs = [ llama-python ];
|
|
|
- inputsFrom = [ finalAttrs.finalPackage ];
|
|
|
- shellHook = ''
|
|
|
- addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
|
|
- '';
|
|
|
- };
|
|
|
-
|
|
|
- shell-extra = mkShell {
|
|
|
- name = "shell-extra-${finalAttrs.finalPackage.name}";
|
|
|
- description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
|
|
- buildInputs = [ llama-python-extra ];
|
|
|
- inputsFrom = [ finalAttrs.finalPackage ];
|
|
|
- };
|
|
|
- };
|
|
|
-
|
|
|
- meta = {
|
|
|
- # Configurations we don't want even the CI to evaluate. Results in the
|
|
|
- # "unsupported platform" messages. This is mostly a no-op, because
|
|
|
- # cudaPackages would've refused to evaluate anyway.
|
|
|
- badPlatforms = optionals useCuda lib.platforms.darwin;
|
|
|
-
|
|
|
- # Configurations that are known to result in build failures. Can be
|
|
|
- # overridden by importing Nixpkgs with `allowBroken = true`.
|
|
|
- broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
|
|
-
|
|
|
- description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
|
|
- homepage = "https://github.com/ggerganov/llama.cpp/";
|
|
|
- license = lib.licenses.mit;
|
|
|
-
|
|
|
- # Accommodates `nix run` and `lib.getExe`
|
|
|
- mainProgram = "llama-cli";
|
|
|
+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
|
|
+ # `default.metallib` may be compiled with Metal compiler from XCode
|
|
|
+ # and we need to escape sandbox on MacOS to access Metal compiler.
|
|
|
+ # `xcrun` is used find the path of the Metal compiler, which is varible
|
|
|
+ # and not on $PATH
|
|
|
+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
|
|
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
|
|
|
|
|
- # These people might respond, on the best effort basis, if you ping them
|
|
|
- # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
|
- # Consider adding yourself to this list if you want to ensure this flake
|
|
|
- # stays maintained and you're willing to invest your time. Do not add
|
|
|
- # other people without their consent. Consider removing people after
|
|
|
- # they've been unreachable for long periods of time.
|
|
|
+ nativeBuildInputs =
|
|
|
+ [
|
|
|
+ cmake
|
|
|
+ ninja
|
|
|
+ pkg-config
|
|
|
+ git
|
|
|
+ ]
|
|
|
+ ++ optionals useCuda [
|
|
|
+ cudaPackages.cuda_nvcc
|
|
|
|
|
|
- # Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
|
- # an attrset following the same format as in
|
|
|
- # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
|
- maintainers = with lib.maintainers; [
|
|
|
- philiptaron
|
|
|
- SomeoneSerge
|
|
|
- ];
|
|
|
+ autoAddDriverRunpath
|
|
|
+ ]
|
|
|
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
|
|
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
|
|
+
|
|
|
+ buildInputs =
|
|
|
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
|
|
|
+ ++ optionals useCuda cudaBuildInputs
|
|
|
+ ++ optionals useMpi [ mpi ]
|
|
|
+ ++ optionals useRocm rocmBuildInputs
|
|
|
+ ++ optionals useBlas [ blas ]
|
|
|
+ ++ optionals useVulkan vulkanBuildInputs
|
|
|
+ ++ optionals enableCurl [ curl ];
|
|
|
+
|
|
|
+ cmakeFlags =
|
|
|
+ [
|
|
|
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
|
|
|
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
|
|
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
|
|
+ (cmakeBool "LLAMA_CURL" enableCurl)
|
|
|
+ (cmakeBool "GGML_NATIVE" false)
|
|
|
+ (cmakeBool "GGML_BLAS" useBlas)
|
|
|
+ (cmakeBool "GGML_CUDA" useCuda)
|
|
|
+ (cmakeBool "GGML_HIPBLAS" useRocm)
|
|
|
+ (cmakeBool "GGML_METAL" useMetalKit)
|
|
|
+ (cmakeBool "GGML_VULKAN" useVulkan)
|
|
|
+ (cmakeBool "GGML_STATIC" enableStatic)
|
|
|
+ ]
|
|
|
+ ++ optionals useCuda [
|
|
|
+ (
|
|
|
+ with cudaPackages.flags;
|
|
|
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
|
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
|
|
+ )
|
|
|
+ )
|
|
|
+ ]
|
|
|
+ ++ optionals useRocm [
|
|
|
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
|
|
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
|
|
+ ]
|
|
|
+ ++ optionals useMetalKit [
|
|
|
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
|
|
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
|
|
+ ];
|
|
|
+
|
|
|
+ # Environment variables needed for ROCm
|
|
|
+ env = optionals useRocm {
|
|
|
+ ROCM_PATH = "${rocmPackages.clr}";
|
|
|
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
|
|
+ };
|
|
|
+
|
|
|
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
|
|
+ # if they haven't been added yet.
|
|
|
+ postInstall = ''
|
|
|
+ mkdir -p $out/include
|
|
|
+ cp $src/include/llama.h $out/include/
|
|
|
+ '';
|
|
|
|
|
|
- # Extend `badPlatforms` instead
|
|
|
- platforms = lib.platforms.all;
|
|
|
- };
|
|
|
- }
|
|
|
-)
|
|
|
+ meta = {
|
|
|
+ # Configurations we don't want even the CI to evaluate. Results in the
|
|
|
+ # "unsupported platform" messages. This is mostly a no-op, because
|
|
|
+ # cudaPackages would've refused to evaluate anyway.
|
|
|
+ badPlatforms = optionals useCuda lib.platforms.darwin;
|
|
|
+
|
|
|
+ # Configurations that are known to result in build failures. Can be
|
|
|
+ # overridden by importing Nixpkgs with `allowBroken = true`.
|
|
|
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
|
|
+
|
|
|
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
|
|
+ homepage = "https://github.com/ggerganov/llama.cpp/";
|
|
|
+ license = lib.licenses.mit;
|
|
|
+
|
|
|
+ # Accommodates `nix run` and `lib.getExe`
|
|
|
+ mainProgram = "llama-cli";
|
|
|
+
|
|
|
+ # These people might respond, on the best effort basis, if you ping them
|
|
|
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
|
|
+ # Consider adding yourself to this list if you want to ensure this flake
|
|
|
+ # stays maintained and you're willing to invest your time. Do not add
|
|
|
+ # other people without their consent. Consider removing people after
|
|
|
+ # they've been unreachable for long periods of time.
|
|
|
+
|
|
|
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
|
|
+ # an attrset following the same format as in
|
|
|
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
|
|
+ maintainers = with lib.maintainers; [
|
|
|
+ philiptaron
|
|
|
+ SomeoneSerge
|
|
|
+ ];
|
|
|
+
|
|
|
+ # Extend `badPlatforms` instead
|
|
|
+ platforms = lib.platforms.all;
|
|
|
+ };
|
|
|
+})
|