1 год назад · 9c1ba55733
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@@ -1,13 +1,52 @@
 
				+{ inputs, ... }:
			
 
				+
			
 
				 {
			
 
				   perSystem =
			
 
				-    { config, lib, ... }:
			
 
				+    {
			
 
				+      config,
			
 
				+      lib,
			
 
				+      system,
			
 
				+      ...
			
 
				+    }:
			
 
				     {
			
 
				       devShells =
			
 
				-        lib.concatMapAttrs
			
 
				-          (name: package: {
			
 
				-            ${name} = package.passthru.shell;
			
 
				-            ${name + "-extra"} = package.passthru.shell-extra;
			
 
				-          })
			
 
				-          config.packages;
			
 
				+        let
			
 
				+          pkgs = import inputs.nixpkgs { inherit system; };
			
 
				+          stdenv = pkgs.stdenv;
			
 
				+          scripts = config.packages.python-scripts;
			
 
				+        in
			
 
				+        lib.pipe (config.packages) [
			
 
				+          (lib.concatMapAttrs (
			
 
				+            name: package: {
			
 
				+              ${name} = pkgs.mkShell {
			
 
				+                name = "${name}";
			
 
				+                inputsFrom = [ package ];
			
 
				+                shellHook = ''
			
 
				+                  echo "Entering ${name} devShell"
			
 
				+                '';
			
 
				+              };
			
 
				+              "${name}-extra" =
			
 
				+                if (name == "python-scripts") then
			
 
				+                  null
			
 
				+                else
			
 
				+                  pkgs.mkShell {
			
 
				+                    name = "${name}-extra";
			
 
				+                    inputsFrom = [
			
 
				+                      package
			
 
				+                      scripts
			
 
				+                    ];
			
 
				+                    # Extra packages that *may* be used by some scripts
			
 
				+                    packages = [
			
 
				+                        pkgs.python3Packages.tiktoken
			
 
				+                    ];
			
 
				+                    shellHook = ''
			
 
				+                      echo "Entering ${name} devShell"
			
 
				+                      addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
			
 
				+                    '';
			
 
				+                  };
			
 
				+            }
			
 
				+          ))
			
 
				+          (lib.filterAttrs (name: value: value != null))
			
 
				+        ];
			
 
				     };
			
 
				 }
			
--- a/.devops/nix/nixpkgs-instances.nix
+++ b/.devops/nix/nixpkgs-instances.nix
@@ -26,16 +26,14 @@
 
				           config.cudaSupport = true;
			
 
				           config.allowUnfreePredicate =
			
 
				             p:
			
 
				-            builtins.all
			
 
				-              (
			
 
				-                license:
			
 
				-                license.free
			
 
				-                || builtins.elem license.shortName [
			
 
				-                  "CUDA EULA"
			
 
				-                  "cuDNN EULA"
			
 
				-                ]
			
 
				-              )
			
 
				-              (p.meta.licenses or [ p.meta.license ]);
			
 
				+            builtins.all (
			
 
				+              license:
			
 
				+              license.free
			
 
				+              || builtins.elem license.shortName [
			
 
				+                "CUDA EULA"
			
 
				+                "cuDNN EULA"
			
 
				+              ]
			
 
				+            ) (p.meta.licenses or [ p.meta.license ]);
			
 
				         };
			
 
				         # Ensure dependencies use ROCm consistently
			
 
				         pkgsRocm = import inputs.nixpkgs {
			
--- a/.devops/nix/package-gguf-py.nix
+++ b/.devops/nix/package-gguf-py.nix
@@ -0,0 +1,36 @@
 
				+{
			
 
				+  lib,
			
 
				+  llamaVersion,
			
 
				+  numpy,
			
 
				+  tqdm,
			
 
				+  sentencepiece,
			
 
				+  pyyaml,
			
 
				+  poetry-core,
			
 
				+  buildPythonPackage,
			
 
				+  pytestCheckHook,
			
 
				+}:
			
 
				+
			
 
				+buildPythonPackage {
			
 
				+  pname = "gguf";
			
 
				+  version = llamaVersion;
			
 
				+  pyproject = true;
			
 
				+  nativeBuildInputs = [ poetry-core ];
			
 
				+  propagatedBuildInputs = [
			
 
				+    numpy
			
 
				+    tqdm
			
 
				+    sentencepiece
			
 
				+    pyyaml
			
 
				+  ];
			
 
				+  src = lib.cleanSource ../../gguf-py;
			
 
				+  pythonImportsCheck = [
			
 
				+    "numpy"
			
 
				+    "gguf"
			
 
				+  ];
			
 
				+  nativeCheckInputs = [ pytestCheckHook ];
			
 
				+  doCheck = true;
			
 
				+  meta = with lib; {
			
 
				+    description = "Python package for writing binary files in the GGUF format";
			
 
				+    license = licenses.mit;
			
 
				+    maintainers = [ maintainers.ditsuke ];
			
 
				+  };
			
 
				+}
			
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -3,13 +3,11 @@
 
				   glibc,
			
 
				   config,
			
 
				   stdenv,
			
 
				-  mkShell,
			
 
				   runCommand,
			
 
				   cmake,
			
 
				   ninja,
			
 
				   pkg-config,
			
 
				   git,
			
 
				-  python3,
			
 
				   mpi,
			
 
				   blas,
			
 
				   cudaPackages,
			
@@ -20,15 +18,18 @@
 
				   vulkan-loader,
			
 
				   curl,
			
 
				   shaderc,
			
 
				-  useBlas ? builtins.all (x: !x) [
			
 
				-    useCuda
			
 
				-    useMetalKit
			
 
				-    useRocm
			
 
				-    useVulkan
			
 
				-  ] && blas.meta.available,
			
 
				+  useBlas ?
			
 
				+    builtins.all (x: !x) [
			
 
				+      useCuda
			
 
				+      useMetalKit
			
 
				+      useRocm
			
 
				+      useVulkan
			
 
				+    ]
			
 
				+    && blas.meta.available,
			
 
				   useCuda ? config.cudaSupport,
			
 
				   useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
			
 
				-  useMpi ? false, # Increases the runtime closure size by ~700M
			
 
				+  # Increases the runtime closure size by ~700M
			
 
				+  useMpi ? false,
			
 
				   useRocm ? config.rocmSupport,
			
 
				   enableCurl ? true,
			
 
				   useVulkan ? false,
			
@@ -38,8 +39,8 @@
 
				   # otherwise we get libstdc++ errors downstream.
			
 
				   effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
			
 
				   enableStatic ? effectiveStdenv.hostPlatform.isStatic,
			
 
				-  precompileMetalShaders ? false
			
 
				-}@inputs:
			
 
				+  precompileMetalShaders ? false,
			
 
				+}:
			
 
				 
			
 
				 let
			
 
				   inherit (lib)
			
@@ -47,7 +48,6 @@ let
 
				     cmakeFeature
			
 
				     optionals
			
 
				     strings
			
 
				-    versionOlder
			
 
				     ;
			
 
				 
			
 
				   stdenv = throw "Use effectiveStdenv instead";
			
@@ -63,54 +63,11 @@ let
 
				   pnameSuffix =
			
 
				     strings.optionalString (suffices != [ ])
			
 
				       "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
			
 
				-  descriptionSuffix =
			
 
				-    strings.optionalString (suffices != [ ])
			
 
				-      ", accelerated with ${strings.concatStringsSep ", " suffices}";
			
 
				-
			
 
				-  executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
			
 
				-
			
 
				-  # TODO: package the Python in this repository in a Nix-like way.
			
 
				-  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
			
 
				-  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
			
 
				-  # https://peps.python.org/pep-0517/
			
 
				-  #
			
 
				-  # TODO: Package up each Python script or service appropriately, by making
			
 
				-  # them into "entrypoints"
			
 
				-  llama-python = python3.withPackages (
			
 
				-    ps: [
			
 
				-      ps.numpy
			
 
				-      ps.sentencepiece
			
 
				-    ]
			
 
				-  );
			
 
				+  descriptionSuffix = strings.optionalString (
			
 
				+    suffices != [ ]
			
 
				+  ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
			
 
				 
			
 
				-  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
			
 
				-  llama-python-extra = python3.withPackages (
			
 
				-    ps: [
			
 
				-      ps.numpy
			
 
				-      ps.sentencepiece
			
 
				-      ps.tiktoken
			
 
				-      ps.torchWithoutCuda
			
 
				-      ps.transformers
			
 
				-
			
 
				-      # server bench
			
 
				-      ps.matplotlib
			
 
				-
			
 
				-      # server tests
			
 
				-      ps.openai
			
 
				-      ps.behave
			
 
				-      ps.prometheus-client
			
 
				-
			
 
				-      # for examples/pydantic-models-to-grammar-examples.py
			
 
				-      ps.docstring-parser
			
 
				-      ps.pydantic
			
 
				-
			
 
				-      # for scripts/compare-llama-bench.py
			
 
				-      ps.gitpython
			
 
				-      ps.tabulate
			
 
				-    ]
			
 
				-  );
			
 
				-
			
 
				-  xcrunHost = runCommand "xcrunHost" {} ''
			
 
				+  xcrunHost = runCommand "xcrunHost" { } ''
			
 
				     mkdir -p $out/bin
			
 
				     ln -s /usr/bin/xcrun $out/bin
			
 
				   '';
			
@@ -145,178 +102,145 @@ let
 
				   ];
			
 
				 in
			
 
				 
			
 
				-effectiveStdenv.mkDerivation (
			
 
				-  finalAttrs: {
			
 
				-    pname = "llama-cpp${pnameSuffix}";
			
 
				-    version = llamaVersion;
			
 
				-
			
 
				-    # Note: none of the files discarded here are visible in the sandbox or
			
 
				-    # affect the output hash. This also means they can be modified without
			
 
				-    # triggering a rebuild.
			
 
				-    src = lib.cleanSourceWith {
			
 
				-      filter =
			
 
				-        name: type:
			
 
				-        let
			
 
				-          noneOf = builtins.all (x: !x);
			
 
				-          baseName = baseNameOf name;
			
 
				-        in
			
 
				-        noneOf [
			
 
				-          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
			
 
				-          (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
			
 
				-          (lib.hasPrefix "." baseName) # Skip hidden files and directories
			
 
				-          (baseName == "flake.lock")
			
 
				-        ];
			
 
				-      src = lib.cleanSource ../../.;
			
 
				-    };
			
 
				-
			
 
				-    postPatch = ''
			
 
				-      substituteInPlace ./ggml/src/ggml-metal.m \
			
 
				-        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
			
 
				-      substituteInPlace ./ggml/src/ggml-metal.m \
			
 
				-        --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
			
 
				-    '';
			
 
				-
			
 
				-    # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
			
 
				-    # `default.metallib` may be compiled with Metal compiler from XCode
			
 
				-    # and we need to escape sandbox on MacOS to access Metal compiler.
			
 
				-    # `xcrun` is used find the path of the Metal compiler, which is varible
			
 
				-    # and not on $PATH
			
 
				-    # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
			
 
				-    __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
			
 
				-
			
 
				-    nativeBuildInputs =
			
 
				-      [
			
 
				-        cmake
			
 
				-        ninja
			
 
				-        pkg-config
			
 
				-        git
			
 
				-      ]
			
 
				-      ++ optionals useCuda [
			
 
				-        cudaPackages.cuda_nvcc
			
 
				-        autoAddDriverRunpath
			
 
				-      ]
			
 
				-      ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
			
 
				-        glibc.static
			
 
				-      ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
			
 
				-        xcrunHost
			
 
				-      ];
			
 
				-
			
 
				-    buildInputs =
			
 
				-      optionals effectiveStdenv.isDarwin darwinBuildInputs
			
 
				-      ++ optionals useCuda cudaBuildInputs
			
 
				-      ++ optionals useMpi [ mpi ]
			
 
				-      ++ optionals useRocm rocmBuildInputs
			
 
				-      ++ optionals useBlas [ blas ]
			
 
				-      ++ optionals useVulkan vulkanBuildInputs
			
 
				-      ++ optionals enableCurl [ curl ];
			
 
				-
			
 
				-    cmakeFlags =
			
 
				-      [
			
 
				-        (cmakeBool "LLAMA_BUILD_SERVER" true)
			
 
				-        (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
			
 
				-        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
			
 
				-        (cmakeBool "LLAMA_CURL" enableCurl)
			
 
				-        (cmakeBool "GGML_NATIVE" false)
			
 
				-        (cmakeBool "GGML_BLAS" useBlas)
			
 
				-        (cmakeBool "GGML_CUDA" useCuda)
			
 
				-        (cmakeBool "GGML_HIPBLAS" useRocm)
			
 
				-        (cmakeBool "GGML_METAL" useMetalKit)
			
 
				-        (cmakeBool "GGML_VULKAN" useVulkan)
			
 
				-        (cmakeBool "GGML_STATIC" enableStatic)
			
 
				-      ]
			
 
				-      ++ optionals useCuda [
			
 
				-        (
			
 
				-          with cudaPackages.flags;
			
 
				-          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
			
 
				-            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
			
 
				-          )
			
 
				-        )
			
 
				-      ]
			
 
				-      ++ optionals useRocm [
			
 
				-        (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
			
 
				-        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
			
 
				-      ]
			
 
				-      ++ optionals useMetalKit [
			
 
				-        (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
			
 
				-        (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
			
 
				+effectiveStdenv.mkDerivation (finalAttrs: {
			
 
				+  pname = "llama-cpp${pnameSuffix}";
			
 
				+  version = llamaVersion;
			
 
				+
			
 
				+  # Note: none of the files discarded here are visible in the sandbox or
			
 
				+  # affect the output hash. This also means they can be modified without
			
 
				+  # triggering a rebuild.
			
 
				+  src = lib.cleanSourceWith {
			
 
				+    filter =
			
 
				+      name: type:
			
 
				+      let
			
 
				+        noneOf = builtins.all (x: !x);
			
 
				+        baseName = baseNameOf name;
			
 
				+      in
			
 
				+      noneOf [
			
 
				+        (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
			
 
				+        (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
			
 
				+        (lib.hasPrefix "." baseName) # Skip hidden files and directories
			
 
				+        (baseName == "flake.lock")
			
 
				       ];
			
 
				+    src = lib.cleanSource ../../.;
			
 
				+  };
			
 
				+
			
 
				+  postPatch = ''
			
 
				+    substituteInPlace ./ggml/src/ggml-metal.m \
			
 
				+      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
			
 
				+    substituteInPlace ./ggml/src/ggml-metal.m \
			
 
				+      --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
			
 
				+  '';
			
 
				 
			
 
				-    # Environment variables needed for ROCm
			
 
				-    env = optionals useRocm {
			
 
				-      ROCM_PATH = "${rocmPackages.clr}";
			
 
				-      HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
			
 
				-    };
			
 
				-
			
 
				-    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
			
 
				-    # if they haven't been added yet.
			
 
				-    postInstall = ''
			
 
				-      mkdir -p $out/include
			
 
				-      cp $src/include/llama.h $out/include/
			
 
				-    '';
			
 
				-
			
 
				-    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
			
 
				-    passthru = {
			
 
				-      inherit
			
 
				-        useBlas
			
 
				-        useCuda
			
 
				-        useMetalKit
			
 
				-        useMpi
			
 
				-        useRocm
			
 
				-        useVulkan
			
 
				-        ;
			
 
				-
			
 
				-      shell = mkShell {
			
 
				-        name = "shell-${finalAttrs.finalPackage.name}";
			
 
				-        description = "contains numpy and sentencepiece";
			
 
				-        buildInputs = [ llama-python ];
			
 
				-        inputsFrom = [ finalAttrs.finalPackage ];
			
 
				-        shellHook = ''
			
 
				-          addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
			
 
				-        '';
			
 
				-      };
			
 
				-
			
 
				-      shell-extra = mkShell {
			
 
				-        name = "shell-extra-${finalAttrs.finalPackage.name}";
			
 
				-        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
			
 
				-        buildInputs = [ llama-python-extra ];
			
 
				-        inputsFrom = [ finalAttrs.finalPackage ];
			
 
				-      };
			
 
				-    };
			
 
				-
			
 
				-    meta = {
			
 
				-      # Configurations we don't want even the CI to evaluate. Results in the
			
 
				-      # "unsupported platform" messages. This is mostly a no-op, because
			
 
				-      # cudaPackages would've refused to evaluate anyway.
			
 
				-      badPlatforms = optionals useCuda lib.platforms.darwin;
			
 
				-
			
 
				-      # Configurations that are known to result in build failures. Can be
			
 
				-      # overridden by importing Nixpkgs with `allowBroken = true`.
			
 
				-      broken = (useMetalKit && !effectiveStdenv.isDarwin);
			
 
				-
			
 
				-      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
			
 
				-      homepage = "https://github.com/ggerganov/llama.cpp/";
			
 
				-      license = lib.licenses.mit;
			
 
				-
			
 
				-      # Accommodates `nix run` and `lib.getExe`
			
 
				-      mainProgram = "llama-cli";
			
 
				+  # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
			
 
				+  # `default.metallib` may be compiled with Metal compiler from XCode
			
 
				+  # and we need to escape sandbox on MacOS to access Metal compiler.
			
 
				+  # `xcrun` is used find the path of the Metal compiler, which is varible
			
 
				+  # and not on $PATH
			
 
				+  # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
			
 
				+  __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
			
 
				 
			
 
				-      # These people might respond, on the best effort basis, if you ping them
			
 
				-      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
			
 
				-      # Consider adding yourself to this list if you want to ensure this flake
			
 
				-      # stays maintained and you're willing to invest your time. Do not add
			
 
				-      # other people without their consent. Consider removing people after
			
 
				-      # they've been unreachable for long periods of time.
			
 
				+  nativeBuildInputs =
			
 
				+    [
			
 
				+      cmake
			
 
				+      ninja
			
 
				+      pkg-config
			
 
				+      git
			
 
				+    ]
			
 
				+    ++ optionals useCuda [
			
 
				+      cudaPackages.cuda_nvcc
			
 
				 
			
 
				-      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
			
 
				-      # an attrset following the same format as in
			
 
				-      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
			
 
				-      maintainers = with lib.maintainers; [
			
 
				-        philiptaron
			
 
				-        SomeoneSerge
			
 
				-      ];
			
 
				+      autoAddDriverRunpath
			
 
				+    ]
			
 
				+    ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
			
 
				+    ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
			
 
				+
			
 
				+  buildInputs =
			
 
				+    optionals effectiveStdenv.isDarwin darwinBuildInputs
			
 
				+    ++ optionals useCuda cudaBuildInputs
			
 
				+    ++ optionals useMpi [ mpi ]
			
 
				+    ++ optionals useRocm rocmBuildInputs
			
 
				+    ++ optionals useBlas [ blas ]
			
 
				+    ++ optionals useVulkan vulkanBuildInputs
			
 
				+    ++ optionals enableCurl [ curl ];
			
 
				+
			
 
				+  cmakeFlags =
			
 
				+    [
			
 
				+      (cmakeBool "LLAMA_BUILD_SERVER" true)
			
 
				+      (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
			
 
				+      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
			
 
				+      (cmakeBool "LLAMA_CURL" enableCurl)
			
 
				+      (cmakeBool "GGML_NATIVE" false)
			
 
				+      (cmakeBool "GGML_BLAS" useBlas)
			
 
				+      (cmakeBool "GGML_CUDA" useCuda)
			
 
				+      (cmakeBool "GGML_HIPBLAS" useRocm)
			
 
				+      (cmakeBool "GGML_METAL" useMetalKit)
			
 
				+      (cmakeBool "GGML_VULKAN" useVulkan)
			
 
				+      (cmakeBool "GGML_STATIC" enableStatic)
			
 
				+    ]
			
 
				+    ++ optionals useCuda [
			
 
				+      (
			
 
				+        with cudaPackages.flags;
			
 
				+        cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
			
 
				+          builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
			
 
				+        )
			
 
				+      )
			
 
				+    ]
			
 
				+    ++ optionals useRocm [
			
 
				+      (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
			
 
				+      (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
			
 
				+    ]
			
 
				+    ++ optionals useMetalKit [
			
 
				+      (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
			
 
				+      (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
			
 
				+    ];
			
 
				+
			
 
				+  # Environment variables needed for ROCm
			
 
				+  env = optionals useRocm {
			
 
				+    ROCM_PATH = "${rocmPackages.clr}";
			
 
				+    HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
			
 
				+  };
			
 
				+
			
 
				+  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
			
 
				+  # if they haven't been added yet.
			
 
				+  postInstall = ''
			
 
				+    mkdir -p $out/include
			
 
				+    cp $src/include/llama.h $out/include/
			
 
				+  '';
			
 
				 
			
 
				-      # Extend `badPlatforms` instead
			
 
				-      platforms = lib.platforms.all;
			
 
				-    };
			
 
				-  }
			
 
				-)
			
 
				+  meta = {
			
 
				+    # Configurations we don't want even the CI to evaluate. Results in the
			
 
				+    # "unsupported platform" messages. This is mostly a no-op, because
			
 
				+    # cudaPackages would've refused to evaluate anyway.
			
 
				+    badPlatforms = optionals useCuda lib.platforms.darwin;
			
 
				+
			
 
				+    # Configurations that are known to result in build failures. Can be
			
 
				+    # overridden by importing Nixpkgs with `allowBroken = true`.
			
 
				+    broken = (useMetalKit && !effectiveStdenv.isDarwin);
			
 
				+
			
 
				+    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
			
 
				+    homepage = "https://github.com/ggerganov/llama.cpp/";
			
 
				+    license = lib.licenses.mit;
			
 
				+
			
 
				+    # Accommodates `nix run` and `lib.getExe`
			
 
				+    mainProgram = "llama-cli";
			
 
				+
			
 
				+    # These people might respond, on the best effort basis, if you ping them
			
 
				+    # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
			
 
				+    # Consider adding yourself to this list if you want to ensure this flake
			
 
				+    # stays maintained and you're willing to invest your time. Do not add
			
 
				+    # other people without their consent. Consider removing people after
			
 
				+    # they've been unreachable for long periods of time.
			
 
				+
			
 
				+    # Note that lib.maintainers is defined in Nixpkgs, but you may just add
			
 
				+    # an attrset following the same format as in
			
 
				+    # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
			
 
				+    maintainers = with lib.maintainers; [
			
 
				+      philiptaron
			
 
				+      SomeoneSerge
			
 
				+    ];
			
 
				+
			
 
				+    # Extend `badPlatforms` instead
			
 
				+    platforms = lib.platforms.all;
			
 
				+  };
			
 
				+})
			
--- a/.devops/nix/python-scripts.nix
+++ b/.devops/nix/python-scripts.nix
@@ -0,0 +1,66 @@
 
				+{
			
 
				+  lib,
			
 
				+  stdenv,
			
 
				+  buildPythonPackage,
			
 
				+  poetry-core,
			
 
				+  mkShell,
			
 
				+  python3Packages,
			
 
				+  gguf-py,
			
 
				+}@inputs:
			
 
				+
			
 
				+let
			
 
				+  llama-python-deps = with python3Packages; [
			
 
				+    numpy
			
 
				+    sentencepiece
			
 
				+    transformers
			
 
				+    protobuf
			
 
				+    torchWithoutCuda
			
 
				+    gguf-py
			
 
				+    tqdm
			
 
				+
			
 
				+    # for scripts/compare-llama-bench.py
			
 
				+    gitpython
			
 
				+    tabulate
			
 
				+
			
 
				+    # for examples/pydantic-models-to-grammar-examples.py
			
 
				+    docstring-parser
			
 
				+    pydantic
			
 
				+
			
 
				+  ];
			
 
				+
			
 
				+  llama-python-test-deps = with python3Packages; [
			
 
				+    # Server bench
			
 
				+    matplotlib
			
 
				+
			
 
				+    # server tests
			
 
				+    openai
			
 
				+    behave
			
 
				+    prometheus-client
			
 
				+  ];
			
 
				+in
			
 
				+
			
 
				+buildPythonPackage ({
			
 
				+  pname = "llama-scripts";
			
 
				+  version = "0.0.0";
			
 
				+  pyproject = true;
			
 
				+
			
 
				+  # NOTE: The files filtered out here are not visible in the build sandbox, neither
			
 
				+  # do they affect the output hash. They can be modified without triggering a rebuild.
			
 
				+  src = lib.cleanSourceWith {
			
 
				+    filter =
			
 
				+      name: type:
			
 
				+      let
			
 
				+        any = builtins.any (x: x);
			
 
				+        baseName = builtins.baseNameOf name;
			
 
				+      in
			
 
				+      any [
			
 
				+        (lib.hasSuffix ".py" name)
			
 
				+        (baseName == "README.md")
			
 
				+        (baseName == "pyproject.toml")
			
 
				+      ];
			
 
				+    src = lib.cleanSource ../../.;
			
 
				+  };
			
 
				+  nativeBuildInputs = [ poetry-core ];
			
 
				+  nativeCheckInputs = llama-python-test-deps;
			
 
				+  dependencies = llama-python-deps;
			
 
				+})
			
--- a/.devops/nix/scope.nix
+++ b/.devops/nix/scope.nix
@@ -1,19 +1,41 @@
 
				 {
			
 
				   lib,
			
 
				   newScope,
			
 
				+  python3,
			
 
				   llamaVersion ? "0.0.0",
			
 
				 }:
			
 
				 
			
 
				+let
			
 
				+  pythonPackages = python3.pkgs;
			
 
				+  buildPythonPackage = pythonPackages.buildPythonPackage;
			
 
				+  numpy = pythonPackages.numpy;
			
 
				+  tqdm = pythonPackages.tqdm;
			
 
				+  sentencepiece = pythonPackages.sentencepiece;
			
 
				+  pyyaml = pythonPackages.pyyaml;
			
 
				+  poetry-core = pythonPackages.poetry-core;
			
 
				+  pytestCheckHook = pythonPackages.pytestCheckHook;
			
 
				+in
			
 
				+
			
 
				 # We're using `makeScope` instead of just writing out an attrset
			
 
				 # because it allows users to apply overlays later using `overrideScope'`.
			
 
				 # Cf. https://noogle.dev/f/lib/makeScope
			
 
				 
			
 
				-lib.makeScope newScope (
			
 
				-  self: {
			
 
				-    inherit llamaVersion;
			
 
				-    llama-cpp = self.callPackage ./package.nix { };
			
 
				-    docker = self.callPackage ./docker.nix { };
			
 
				-    docker-min = self.callPackage ./docker.nix { interactive = false; };
			
 
				-    sif = self.callPackage ./sif.nix { };
			
 
				-  }
			
 
				-)
			
 
				+lib.makeScope newScope (self: {
			
 
				+  inherit llamaVersion;
			
 
				+  gguf-py = self.callPackage ./package-gguf-py.nix {
			
 
				+    inherit
			
 
				+      buildPythonPackage
			
 
				+      numpy
			
 
				+      tqdm
			
 
				+      sentencepiece
			
 
				+      poetry-core
			
 
				+      pyyaml
			
 
				+      pytestCheckHook
			
 
				+      ;
			
 
				+  };
			
 
				+  python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
			
 
				+  llama-cpp = self.callPackage ./package.nix { };
			
 
				+  docker = self.callPackage ./docker.nix { };
			
 
				+  docker-min = self.callPackage ./docker.nix { interactive = false; };
			
 
				+  sif = self.callPackage ./sif.nix { };
			
 
				+})
			
--- a/flake.nix
+++ b/flake.nix
@@ -145,7 +145,9 @@
 
				             # the same path you would with an overlay.
			
 
				             legacyPackages = {
			
 
				               llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
			
 
				-              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
			
 
				+              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix {
			
 
				+                inherit llamaVersion;
			
 
				+              };
			
 
				               llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
			
 
				               llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
			
 
				             };
			
@@ -157,6 +159,7 @@
 
				                 default = config.legacyPackages.llamaPackages.llama-cpp;
			
 
				                 vulkan = config.packages.default.override { useVulkan = true; };
			
 
				                 windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
			
 
				+                python-scripts = config.legacyPackages.llamaPackages.python-scripts;
			
 
				               }
			
 
				               // lib.optionalAttrs pkgs.stdenv.isLinux {
			
 
				                 cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;
			
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@@ -23,6 +23,7 @@ python = ">=3.8"
 
				 numpy = ">=1.17"
			
 
				 tqdm = ">=4.27"
			
 
				 pyyaml = ">=5.1"
			
 
				+sentencepiece = ">=0.1.98,<=0.2.0"
			
 
				 
			
 
				 [tool.poetry.dev-dependencies]
			
 
				 pytest = "^5.2"
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ classifiers = [
 
				 [tool.poetry.dependencies]
			
 
				 python = ">=3.9"
			
 
				 numpy = "^1.25.0"
			
 
				-sentencepiece = ">=0.1.98,<0.2.0"
			
 
				+sentencepiece = ">=0.1.98,<=0.2.0"
			
 
				 transformers = ">=4.35.2,<5.0.0"
			
 
				 protobuf = ">=4.21.0,<5.0.0"
			
 
				 gguf = { path = "./gguf-py" }