2 жил өмнө · 68eccbdc5b
--- a/.devops/nix/apps.nix
+++ b/.devops/nix/apps.nix
@@ -0,0 +1,22 @@
 
				+{
			
 
				+  perSystem =
			
 
				+    { config, lib, ... }:
			
 
				+    {
			
 
				+      apps =
			
 
				+        let
			
 
				+          inherit (config.packages) default;
			
 
				+          binaries = [
			
 
				+            "llama"
			
 
				+            "llama-embedding"
			
 
				+            "llama-server"
			
 
				+            "quantize"
			
 
				+            "train-text-from-scratch"
			
 
				+          ];
			
 
				+          mkApp = name: {
			
 
				+            type = "app";
			
 
				+            program = "${default}/bin/${name}";
			
 
				+          };
			
 
				+        in
			
 
				+        lib.genAttrs binaries mkApp;
			
 
				+    };
			
 
				+}
			
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@@ -0,0 +1,13 @@
 
				+{
			
 
				+  perSystem =
			
 
				+    { config, lib, ... }:
			
 
				+    {
			
 
				+      devShells =
			
 
				+        lib.concatMapAttrs
			
 
				+          (name: package: {
			
 
				+            ${name} = package.passthru.shell;
			
 
				+            ${name + "-extra"} = package.passthru.shell-extra;
			
 
				+          })
			
 
				+          config.packages;
			
 
				+    };
			
 
				+}
			
--- a/.devops/nix/jetson-support.nix
+++ b/.devops/nix/jetson-support.nix
@@ -0,0 +1,32 @@
 
				+{ inputs, ... }:
			
 
				+{
			
 
				+  perSystem =
			
 
				+    {
			
 
				+      config,
			
 
				+      system,
			
 
				+      lib,
			
 
				+      pkgsCuda,
			
 
				+      ...
			
 
				+    }:
			
 
				+    lib.optionalAttrs (system == "aarch64-linux") {
			
 
				+      packages =
			
 
				+        let
			
 
				+          caps.jetson-xavier = "7.2";
			
 
				+          caps.jetson-orin = "8.7";
			
 
				+          caps.jetson-nano = "5.3";
			
 
				+
			
 
				+          pkgsFor =
			
 
				+            cap:
			
 
				+            import inputs.nixpkgs {
			
 
				+              inherit system;
			
 
				+              config = {
			
 
				+                cudaSupport = true;
			
 
				+                cudaCapabilities = [ cap ];
			
 
				+                cudaEnableForwardCompat = false;
			
 
				+                inherit (pkgsCuda.config) allowUnfreePredicate;
			
 
				+              };
			
 
				+            };
			
 
				+        in
			
 
				+        builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps;
			
 
				+    };
			
 
				+}
			
--- a/.devops/nix/nixpkgs-instances.nix
+++ b/.devops/nix/nixpkgs-instances.nix
@@ -0,0 +1,35 @@
 
				+{ inputs, ... }:
			
 
				+{
			
 
				+  # The _module.args definitions are passed on to modules as arguments. E.g.
			
 
				+  # the module `{ pkgs ... }: { /* config */ }` implicitly uses
			
 
				+  # `_module.args.pkgs` (defined in this case by flake-parts).
			
 
				+  perSystem =
			
 
				+    { system, ... }:
			
 
				+    {
			
 
				+      _module.args = {
			
 
				+        pkgsCuda = import inputs.nixpkgs {
			
 
				+          inherit system;
			
 
				+          # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
			
 
				+          # and ucx are built with CUDA support)
			
 
				+          config.cudaSupport = true;
			
 
				+          config.allowUnfreePredicate =
			
 
				+            p:
			
 
				+            builtins.all
			
 
				+              (
			
 
				+                license:
			
 
				+                license.free
			
 
				+                || builtins.elem license.shortName [
			
 
				+                  "CUDA EULA"
			
 
				+                  "cuDNN EULA"
			
 
				+                ]
			
 
				+              )
			
 
				+              (p.meta.licenses or [ p.meta.license ]);
			
 
				+        };
			
 
				+        # Ensure dependencies use ROCm consistently
			
 
				+        pkgsRocm = import inputs.nixpkgs {
			
 
				+          inherit system;
			
 
				+          config.rocmSupport = true;
			
 
				+        };
			
 
				+      };
			
 
				+    };
			
 
				+}
			
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -0,0 +1,265 @@
 
				+{
			
 
				+  lib,
			
 
				+  config,
			
 
				+  stdenv,
			
 
				+  mkShell,
			
 
				+  cmake,
			
 
				+  ninja,
			
 
				+  pkg-config,
			
 
				+  git,
			
 
				+  python3,
			
 
				+  mpi,
			
 
				+  openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
			
 
				+  cudaPackages,
			
 
				+  darwin,
			
 
				+  rocmPackages,
			
 
				+  clblast,
			
 
				+  useBlas ? builtins.all (x: !x) [
			
 
				+    useCuda
			
 
				+    useMetalKit
			
 
				+    useOpenCL
			
 
				+    useRocm
			
 
				+  ],
			
 
				+  useCuda ? config.cudaSupport,
			
 
				+  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
			
 
				+  useMpi ? false, # Increases the runtime closure size by ~700M
			
 
				+  useOpenCL ? false,
			
 
				+  useRocm ? config.rocmSupport,
			
 
				+  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
			
 
				+}@inputs:
			
 
				+
			
 
				+let
			
 
				+  inherit (lib)
			
 
				+    cmakeBool
			
 
				+    cmakeFeature
			
 
				+    optionals
			
 
				+    strings
			
 
				+    versionOlder
			
 
				+    ;
			
 
				+
			
 
				+  # It's necessary to consistently use backendStdenv when building with CUDA support,
			
 
				+  # otherwise we get libstdc++ errors downstream.
			
 
				+  stdenv = throw "Use effectiveStdenv instead";
			
 
				+  effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
			
 
				+
			
 
				+  suffices =
			
 
				+    lib.optionals useBlas [ "BLAS" ]
			
 
				+    ++ lib.optionals useCuda [ "CUDA" ]
			
 
				+    ++ lib.optionals useMetalKit [ "MetalKit" ]
			
 
				+    ++ lib.optionals useMpi [ "MPI" ]
			
 
				+    ++ lib.optionals useOpenCL [ "OpenCL" ]
			
 
				+    ++ lib.optionals useRocm [ "ROCm" ];
			
 
				+
			
 
				+  pnameSuffix =
			
 
				+    strings.optionalString (suffices != [ ])
			
 
				+      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
			
 
				+  descriptionSuffix =
			
 
				+    strings.optionalString (suffices != [ ])
			
 
				+      ", accelerated with ${strings.concatStringsSep ", " suffices}";
			
 
				+
			
 
				+  # TODO: package the Python in this repository in a Nix-like way.
			
 
				+  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
			
 
				+  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
			
 
				+  # https://peps.python.org/pep-0517/
			
 
				+  llama-python = python3.withPackages (
			
 
				+    ps: [
			
 
				+      ps.numpy
			
 
				+      ps.sentencepiece
			
 
				+    ]
			
 
				+  );
			
 
				+
			
 
				+  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
			
 
				+  llama-python-extra = python3.withPackages (
			
 
				+    ps: [
			
 
				+      ps.numpy
			
 
				+      ps.sentencepiece
			
 
				+      ps.torchWithoutCuda
			
 
				+      ps.transformers
			
 
				+    ]
			
 
				+  );
			
 
				+
			
 
				+  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
			
 
				+  # separately
			
 
				+  darwinBuildInputs =
			
 
				+    with darwin.apple_sdk.frameworks;
			
 
				+    [
			
 
				+      Accelerate
			
 
				+      CoreVideo
			
 
				+      CoreGraphics
			
 
				+    ]
			
 
				+    ++ optionals useMetalKit [ MetalKit ];
			
 
				+
			
 
				+  cudaBuildInputs = with cudaPackages; [
			
 
				+    cuda_cccl.dev # <nv/target>
			
 
				+
			
 
				+    # A temporary hack for reducing the closure size, remove once cudaPackages
			
 
				+    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
			
 
				+    cuda_cudart.dev
			
 
				+    cuda_cudart.lib
			
 
				+    cuda_cudart.static
			
 
				+    libcublas.dev
			
 
				+    libcublas.lib
			
 
				+    libcublas.static
			
 
				+  ];
			
 
				+
			
 
				+  rocmBuildInputs = with rocmPackages; [
			
 
				+    clr
			
 
				+    hipblas
			
 
				+    rocblas
			
 
				+  ];
			
 
				+in
			
 
				+
			
 
				+effectiveStdenv.mkDerivation (
			
 
				+  finalAttrs: {
			
 
				+    pname = "llama-cpp${pnameSuffix}";
			
 
				+    version = llamaVersion;
			
 
				+
			
 
				+    src = lib.cleanSourceWith {
			
 
				+      filter =
			
 
				+        name: type:
			
 
				+        !(builtins.any (_: _) [
			
 
				+          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
			
 
				+          (name == "README.md") # Ignore *.md changes whe computing outPaths
			
 
				+          (lib.hasPrefix "." name) # Skip hidden files and directories
			
 
				+        ]);
			
 
				+      src = lib.cleanSource ../../.;
			
 
				+    };
			
 
				+
			
 
				+    postPatch = ''
			
 
				+      substituteInPlace ./ggml-metal.m \
			
 
				+        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
			
 
				+
			
 
				+      # TODO: Package up each Python script or service appropriately.
			
 
				+      # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
			
 
				+      # we could make those *.py into setuptools' entrypoints
			
 
				+      substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
			
 
				+    '';
			
 
				+
			
 
				+    nativeBuildInputs =
			
 
				+      [
			
 
				+        cmake
			
 
				+        ninja
			
 
				+        pkg-config
			
 
				+        git
			
 
				+      ]
			
 
				+      ++ optionals useCuda [
			
 
				+        cudaPackages.cuda_nvcc
			
 
				+
			
 
				+        # TODO: Replace with autoAddDriverRunpath
			
 
				+        # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
			
 
				+        cudaPackages.autoAddOpenGLRunpathHook
			
 
				+      ];
			
 
				+
			
 
				+    buildInputs =
			
 
				+      optionals effectiveStdenv.isDarwin darwinBuildInputs
			
 
				+      ++ optionals useCuda cudaBuildInputs
			
 
				+      ++ optionals useMpi [ mpi ]
			
 
				+      ++ optionals useOpenCL [ clblast ]
			
 
				+      ++ optionals useRocm rocmBuildInputs;
			
 
				+
			
 
				+    cmakeFlags =
			
 
				+      [
			
 
				+        (cmakeBool "LLAMA_NATIVE" true)
			
 
				+        (cmakeBool "LLAMA_BUILD_SERVER" true)
			
 
				+        (cmakeBool "BUILD_SHARED_LIBS" true)
			
 
				+        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
			
 
				+        (cmakeBool "LLAMA_BLAS" useBlas)
			
 
				+        (cmakeBool "LLAMA_CLBLAST" useOpenCL)
			
 
				+        (cmakeBool "LLAMA_CUBLAS" useCuda)
			
 
				+        (cmakeBool "LLAMA_HIPBLAS" useRocm)
			
 
				+        (cmakeBool "LLAMA_METAL" useMetalKit)
			
 
				+        (cmakeBool "LLAMA_MPI" useMpi)
			
 
				+      ]
			
 
				+      ++ optionals useCuda [
			
 
				+        (
			
 
				+          with cudaPackages.flags;
			
 
				+          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
			
 
				+            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
			
 
				+          )
			
 
				+        )
			
 
				+      ]
			
 
				+      ++ optionals useRocm [
			
 
				+        (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
			
 
				+        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
			
 
				+
			
 
				+        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
			
 
				+        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
			
 
				+        # and select the line that matches the current nixpkgs version of rocBLAS.
			
 
				+        # Should likely use `rocmPackages.clr.gpuTargets`.
			
 
				+        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
			
 
				+      ]
			
 
				+      ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
			
 
				+      ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
			
 
				+
			
 
				+    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
			
 
				+    # if they haven't been added yet.
			
 
				+    postInstall = ''
			
 
				+      mv $out/bin/main $out/bin/llama
			
 
				+      mv $out/bin/server $out/bin/llama-server
			
 
				+      mkdir -p $out/include
			
 
				+      cp $src/llama.h $out/include/
			
 
				+    '';
			
 
				+
			
 
				+    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
			
 
				+    passthru = {
			
 
				+      inherit
			
 
				+        useBlas
			
 
				+        useCuda
			
 
				+        useMetalKit
			
 
				+        useMpi
			
 
				+        useOpenCL
			
 
				+        useRocm
			
 
				+        ;
			
 
				+
			
 
				+      shell = mkShell {
			
 
				+        name = "shell-${finalAttrs.finalPackage.name}";
			
 
				+        description = "contains numpy and sentencepiece";
			
 
				+        buildInputs = [ llama-python ];
			
 
				+        inputsFrom = [ finalAttrs.finalPackage ];
			
 
				+      };
			
 
				+
			
 
				+      shell-extra = mkShell {
			
 
				+        name = "shell-extra-${finalAttrs.finalPackage.name}";
			
 
				+        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
			
 
				+        buildInputs = [ llama-python-extra ];
			
 
				+        inputsFrom = [ finalAttrs.finalPackage ];
			
 
				+      };
			
 
				+    };
			
 
				+
			
 
				+    meta = {
			
 
				+      # Configurations we don't want even the CI to evaluate. Results in the
			
 
				+      # "unsupported platform" messages. This is mostly a no-op, because
			
 
				+      # cudaPackages would've refused to evaluate anyway.
			
 
				+      badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
			
 
				+
			
 
				+      # Configurations that are known to result in build failures. Can be
			
 
				+      # overridden by importing Nixpkgs with `allowBroken = true`.
			
 
				+      broken = (useMetalKit && !effectiveStdenv.isDarwin);
			
 
				+
			
 
				+      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
			
 
				+      homepage = "https://github.com/ggerganov/llama.cpp/";
			
 
				+      license = lib.licenses.mit;
			
 
				+
			
 
				+      # Accommodates `nix run` and `lib.getExe`
			
 
				+      mainProgram = "llama";
			
 
				+
			
 
				+      # These people might respond, on the best effort basis, if you ping them
			
 
				+      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
			
 
				+      # Consider adding yourself to this list if you want to ensure this flake
			
 
				+      # stays maintained and you're willing to invest your time. Do not add
			
 
				+      # other people without their consent. Consider removing people after
			
 
				+      # they've been unreachable for long periods of time.
			
 
				+
			
 
				+      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
			
 
				+      # an attrset following the same format as in
			
 
				+      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
			
 
				+      maintainers = with lib.maintainers; [
			
 
				+        philiptaron
			
 
				+        SomeoneSerge
			
 
				+      ];
			
 
				+
			
 
				+      # Extend `badPlatforms` instead
			
 
				+      platforms = lib.platforms.all;
			
 
				+    };
			
 
				+  }
			
 
				+)
			
--- a/.devops/nix/scope.nix
+++ b/.devops/nix/scope.nix
@@ -0,0 +1,12 @@
 
				+{
			
 
				+  lib,
			
 
				+  newScope,
			
 
				+  llamaVersion ? "0.0.0",
			
 
				+}:
			
 
				+
			
 
				+lib.makeScope newScope (
			
 
				+  self: {
			
 
				+    inherit llamaVersion;
			
 
				+    llama-cpp = self.callPackage ./package.nix { };
			
 
				+  }
			
 
				+)
			
--- a/.github/workflows/nix-flakestry.yml
+++ b/.github/workflows/nix-flakestry.yml
@@ -0,0 +1,23 @@
 
				+# Make the flake discoverable on https://flakestry.dev
			
 
				+name: "Publish a flake to flakestry"
			
 
				+on:
			
 
				+    push:
			
 
				+        tags:
			
 
				+        - "v?[0-9]+.[0-9]+.[0-9]+"
			
 
				+        - "v?[0-9]+.[0-9]+"
			
 
				+    workflow_dispatch:
			
 
				+        inputs:
			
 
				+            tag:
			
 
				+                description: "The existing tag to publish"
			
 
				+                type: "string"
			
 
				+                required: true
			
 
				+jobs:
			
 
				+    publish-flake:
			
 
				+        runs-on: ubuntu-latest
			
 
				+        permissions:
			
 
				+            id-token: "write"
			
 
				+            contents: "read"
			
 
				+        steps:
			
 
				+            - uses: flakestry/flakestry-publish@main
			
 
				+              with:
			
 
				+                version: "${{ inputs.tag || github.ref_name }}"
			
--- a/flake.lock
+++ b/flake.lock
@@ -1,30 +1,30 @@
 
				 {
			
 
				   "nodes": {
			
 
				-    "flake-utils": {
			
 
				+    "flake-parts": {
			
 
				       "inputs": {
			
 
				-        "systems": "systems"
			
 
				+        "nixpkgs-lib": "nixpkgs-lib"
			
 
				       },
			
 
				       "locked": {
			
 
				-        "lastModified": 1694529238,
			
 
				-        "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
			
 
				-        "owner": "numtide",
			
 
				-        "repo": "flake-utils",
			
 
				-        "rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
			
 
				+        "lastModified": 1701473968,
			
 
				+        "narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=",
			
 
				+        "owner": "hercules-ci",
			
 
				+        "repo": "flake-parts",
			
 
				+        "rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5",
			
 
				         "type": "github"
			
 
				       },
			
 
				       "original": {
			
 
				-        "owner": "numtide",
			
 
				-        "repo": "flake-utils",
			
 
				+        "owner": "hercules-ci",
			
 
				+        "repo": "flake-parts",
			
 
				         "type": "github"
			
 
				       }
			
 
				     },
			
 
				     "nixpkgs": {
			
 
				       "locked": {
			
 
				-        "lastModified": 1698318101,
			
 
				-        "narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=",
			
 
				+        "lastModified": 1703559957,
			
 
				+        "narHash": "sha256-x9PUuMEPGUOMB51zNxrDr2QoHbYWlCS2xhFedm9MC5Q=",
			
 
				         "owner": "NixOS",
			
 
				         "repo": "nixpkgs",
			
 
				-        "rev": "63678e9f3d3afecfeafa0acead6239cdb447574c",
			
 
				+        "rev": "75dd68c36f458c6593c5bbb48abfd3e59bfed380",
			
 
				         "type": "github"
			
 
				       },
			
 
				       "original": {
			
@@ -34,26 +34,29 @@
 
				         "type": "github"
			
 
				       }
			
 
				     },
			
 
				-    "root": {
			
 
				-      "inputs": {
			
 
				-        "flake-utils": "flake-utils",
			
 
				-        "nixpkgs": "nixpkgs"
			
 
				-      }
			
 
				-    },
			
 
				-    "systems": {
			
 
				+    "nixpkgs-lib": {
			
 
				       "locked": {
			
 
				-        "lastModified": 1681028828,
			
 
				-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
			
 
				-        "owner": "nix-systems",
			
 
				-        "repo": "default",
			
 
				-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
			
 
				+        "dir": "lib",
			
 
				+        "lastModified": 1701253981,
			
 
				+        "narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=",
			
 
				+        "owner": "NixOS",
			
 
				+        "repo": "nixpkgs",
			
 
				+        "rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58",
			
 
				         "type": "github"
			
 
				       },
			
 
				       "original": {
			
 
				-        "owner": "nix-systems",
			
 
				-        "repo": "default",
			
 
				+        "dir": "lib",
			
 
				+        "owner": "NixOS",
			
 
				+        "ref": "nixos-unstable",
			
 
				+        "repo": "nixpkgs",
			
 
				         "type": "github"
			
 
				       }
			
 
				+    },
			
 
				+    "root": {
			
 
				+      "inputs": {
			
 
				+        "flake-parts": "flake-parts",
			
 
				+        "nixpkgs": "nixpkgs"
			
 
				+      }
			
 
				     }
			
 
				   },
			
 
				   "root": "root",
			
--- a/flake.nix
+++ b/flake.nix
@@ -1,139 +1,99 @@
 
				 {
			
 
				+  description = "Port of Facebook's LLaMA model in C/C++";
			
 
				+
			
 
				   inputs = {
			
 
				     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
			
 
				-    flake-utils.url = "github:numtide/flake-utils";
			
 
				+    flake-parts.url = "github:hercules-ci/flake-parts";
			
 
				   };
			
 
				-  outputs = { self, nixpkgs, flake-utils }:
			
 
				-    flake-utils.lib.eachDefaultSystem (system:
			
 
				-      let
			
 
				-        name = "llama.cpp";
			
 
				-        src = ./.;
			
 
				-        meta.mainProgram = "llama";
			
 
				-        inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
			
 
				-        buildInputs = with pkgs; [ openmpi ];
			
 
				-        osSpecific = with pkgs; buildInputs ++ (
			
 
				-          if isAarch64 && isDarwin then
			
 
				-            with pkgs.darwin.apple_sdk_11_0.frameworks; [
			
 
				-              Accelerate
			
 
				-              MetalKit
			
 
				-            ]
			
 
				-          else if isAarch32 && isDarwin then
			
 
				-            with pkgs.darwin.apple_sdk.frameworks; [
			
 
				-              Accelerate
			
 
				-              CoreGraphics
			
 
				-              CoreVideo
			
 
				-            ]
			
 
				-          else if isDarwin then
			
 
				-            with pkgs.darwin.apple_sdk.frameworks; [
			
 
				-              Accelerate
			
 
				-              CoreGraphics
			
 
				-              CoreVideo
			
 
				-            ]
			
 
				-          else
			
 
				-            with pkgs; [ openblas ]
			
 
				-        );
			
 
				-        pkgs = import nixpkgs { inherit system; };
			
 
				-        nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
			
 
				-        cudatoolkit_joined = with pkgs; symlinkJoin {
			
 
				-          # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
			
 
				-          # see https://github.com/NixOS/nixpkgs/issues/224291
			
 
				-          # copied from jaxlib
			
 
				-          name = "${cudaPackages.cudatoolkit.name}-merged";
			
 
				-          paths = [
			
 
				-            cudaPackages.cudatoolkit.lib
			
 
				-            cudaPackages.cudatoolkit.out
			
 
				-          ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
			
 
				-            # for some reason some of the required libs are in the targets/x86_64-linux
			
 
				-            # directory; not sure why but this works around it
			
 
				-            "${cudaPackages.cudatoolkit}/targets/${system}"
			
 
				-          ];
			
 
				-        };
			
 
				-        llama-python =
			
 
				-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
			
 
				-        # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
			
 
				-        llama-python-extra =
			
 
				-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
			
 
				-        postPatch = ''
			
 
				-          substituteInPlace ./ggml-metal.m \
			
 
				-            --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
			
 
				-          substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
			
 
				-        '';
			
 
				-        postInstall = ''
			
 
				-          mv $out/bin/main $out/bin/llama
			
 
				-          mv $out/bin/server $out/bin/llama-server
			
 
				-          mkdir -p $out/include
			
 
				-          cp ${src}/llama.h $out/include/
			
 
				-        '';
			
 
				-        cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
			
 
				-      in
			
 
				+
			
 
				+  # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl:
			
 
				+  #
			
 
				+  # ```bash
			
 
				+  # ❯ nix repl
			
 
				+  # nix-repl> :lf github:ggerganov/llama.cpp
			
 
				+  # Added 13 variables.
			
 
				+  # nix-repl> outputs.apps.x86_64-linux.quantize
			
 
				+  # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; }
			
 
				+  # ```
			
 
				+  outputs =
			
 
				+    { self, flake-parts, ... }@inputs:
			
 
				+    let
			
 
				+      # We could include the git revisions in the package names but those would
			
 
				+      # needlessly trigger rebuilds:
			
 
				+      # llamaVersion = self.dirtyShortRev or self.shortRev;
			
 
				+
			
 
				+      # Nix already uses cryptographic hashes for versioning, so we'll just fix
			
 
				+      # the fake semver for now:
			
 
				+      llamaVersion = "0.0.0";
			
 
				+    in
			
 
				+    flake-parts.lib.mkFlake { inherit inputs; }
			
 
				+
			
 
				       {
			
 
				-        packages.default = pkgs.stdenv.mkDerivation {
			
 
				-          inherit name src meta postPatch nativeBuildInputs postInstall;
			
 
				-          buildInputs = osSpecific;
			
 
				-          cmakeFlags = cmakeFlags
			
 
				-            ++ (if isAarch64 && isDarwin then [
			
 
				-            "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
			
 
				-            "-DLLAMA_METAL=ON"
			
 
				-          ] else [
			
 
				-            "-DLLAMA_BLAS=ON"
			
 
				-            "-DLLAMA_BLAS_VENDOR=OpenBLAS"
			
 
				-          ]);
			
 
				-        };
			
 
				-        packages.opencl = pkgs.stdenv.mkDerivation {
			
 
				-          inherit name src meta postPatch nativeBuildInputs postInstall;
			
 
				-          buildInputs = with pkgs; buildInputs ++ [ clblast ];
			
 
				-          cmakeFlags = cmakeFlags ++ [
			
 
				-            "-DLLAMA_CLBLAST=ON"
			
 
				-          ];
			
 
				-        };
			
 
				-        packages.cuda = pkgs.stdenv.mkDerivation {
			
 
				-          inherit name src meta postPatch nativeBuildInputs postInstall;
			
 
				-          buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
			
 
				-          cmakeFlags = cmakeFlags ++ [
			
 
				-            "-DLLAMA_CUBLAS=ON"
			
 
				-          ];
			
 
				-        };
			
 
				-        packages.rocm = pkgs.stdenv.mkDerivation {
			
 
				-          inherit name src meta postPatch nativeBuildInputs postInstall;
			
 
				-          buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
			
 
				-          cmakeFlags = cmakeFlags ++ [
			
 
				-            "-DLLAMA_HIPBLAS=1"
			
 
				-            "-DCMAKE_C_COMPILER=hipcc"
			
 
				-            "-DCMAKE_CXX_COMPILER=hipcc"
			
 
				-            # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
			
 
				-            # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
			
 
				-            # and select the line that matches the current nixpkgs version of rocBLAS.
			
 
				-            "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
			
 
				-          ];
			
 
				-        };
			
 
				-        apps.llama-server = {
			
 
				-          type = "app";
			
 
				-          program = "${self.packages.${system}.default}/bin/llama-server";
			
 
				-        };
			
 
				-        apps.llama-embedding = {
			
 
				-          type = "app";
			
 
				-          program = "${self.packages.${system}.default}/bin/embedding";
			
 
				-        };
			
 
				-        apps.llama = {
			
 
				-          type = "app";
			
 
				-          program = "${self.packages.${system}.default}/bin/llama";
			
 
				-        };
			
 
				-        apps.quantize = {
			
 
				-          type = "app";
			
 
				-          program = "${self.packages.${system}.default}/bin/quantize";
			
 
				-        };
			
 
				-        apps.train-text-from-scratch = {
			
 
				-          type = "app";
			
 
				-          program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
			
 
				-        };
			
 
				-        apps.default = self.apps.${system}.llama;
			
 
				-        devShells.default = pkgs.mkShell {
			
 
				-          buildInputs = [ llama-python ];
			
 
				-          packages = nativeBuildInputs ++ osSpecific;
			
 
				-        };
			
 
				-        devShells.extra = pkgs.mkShell {
			
 
				-          buildInputs = [ llama-python-extra ];
			
 
				-          packages = nativeBuildInputs ++ osSpecific;
			
 
				-        };
			
 
				-      });
			
 
				+
			
 
				+        imports = [
			
 
				+          .devops/nix/nixpkgs-instances.nix
			
 
				+          .devops/nix/apps.nix
			
 
				+          .devops/nix/devshells.nix
			
 
				+          .devops/nix/jetson-support.nix
			
 
				+        ];
			
 
				+
			
 
				+        # An overlay can be used to have a more granular control over llama-cpp's
			
 
				+        # dependencies and configuration, than that offered by the `.override`
			
 
				+        # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays.
			
 
				+        #
			
 
				+        # E.g. in a flake:
			
 
				+        # ```
			
 
				+        # { nixpkgs, llama-cpp, ... }:
			
 
				+        # let pkgs = import nixpkgs {
			
 
				+        #     overlays = [ (llama-cpp.overlays.default) ];
			
 
				+        #     system = "aarch64-linux";
			
 
				+        #     config.allowUnfree = true;
			
 
				+        #     config.cudaSupport = true;
			
 
				+        #     config.cudaCapabilities = [ "7.2" ];
			
 
				+        #     config.cudaEnableForwardCompat = false;
			
 
				+        # }; in {
			
 
				+        #     packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp;
			
 
				+        # }
			
 
				+        # ```
			
 
				+        #
			
 
				+        # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
			
 
				+        flake.overlays.default =
			
 
				+          (final: prev: {
			
 
				+            llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
			
 
				+            inherit (final.llamaPackages) llama-cpp;
			
 
				+          });
			
 
				+
			
 
				+        systems = [
			
 
				+          "aarch64-darwin"
			
 
				+          "aarch64-linux"
			
 
				+          "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
			
 
				+          "x86_64-linux"
			
 
				+        ];
			
 
				+
			
 
				+        perSystem =
			
 
				+          {
			
 
				+            config,
			
 
				+            lib,
			
 
				+            pkgs,
			
 
				+            pkgsCuda,
			
 
				+            pkgsRocm,
			
 
				+            ...
			
 
				+          }:
			
 
				+          {
			
 
				+            # We don't use the overlay here so as to avoid making too many instances of nixpkgs,
			
 
				+            # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
			
 
				+            packages =
			
 
				+              {
			
 
				+                default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
			
 
				+              }
			
 
				+              // lib.optionalAttrs pkgs.stdenv.isLinux {
			
 
				+                opencl = config.packages.default.override { useOpenCL = true; };
			
 
				+                cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
			
 
				+                rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
			
 
				+
			
 
				+                mpi-cpu = config.packages.default.override { useMpi = true; };
			
 
				+                mpi-cuda = config.packages.default.override { useMpi = true; };
			
 
				+              };
			
 
				+          };
			
 
				+      };
			
 
				 }