.PHONY: all test test-gen clean build build-cuda cuda-lib quantize # CUDA Configuration CUDA_HOME ?= /usr/local/cuda NVCC ?= $(CUDA_HOME)/bin/nvcc CUDA_LIB_PATH ?= $(CUDA_HOME)/lib64 GO_BUILD_FLAGS ?= -trimpath -ldflags="-s -w" # Build directories CUDA_SRC_DIR = pkg/backend/cuda CUDA_BUILD_DIR = build/cuda CUDA_OBJ = \ $(CUDA_BUILD_DIR)/cuda_memory.o \ $(CUDA_BUILD_DIR)/cuda_elementwise.o \ $(CUDA_BUILD_DIR)/cuda_dequant_q8k.o \ $(CUDA_BUILD_DIR)/cuda_dequant_q4k.o \ $(CUDA_BUILD_DIR)/cuda_dequant_q5k.o \ $(CUDA_BUILD_DIR)/cuda_dequant_other.o \ $(CUDA_BUILD_DIR)/cuda_matmul.o \ $(CUDA_BUILD_DIR)/cuda_nn.o CUDA_STATIC_LIB = $(CUDA_BUILD_DIR)/libmakarna_cuda.a CUDA_SHARED_LIB = $(CUDA_BUILD_DIR)/libmakarna_cuda.so all: build # Build CPU-only binaries build: go build $(GO_BUILD_FLAGS) -o bin/makarna ./cmd/run-model go build $(GO_BUILD_FLAGS) -o bin/quantize ./cmd/quantize go build $(GO_BUILD_FLAGS) -o bin/convert ./cmd/convert # Build CUDA-enabled binaries with static linking of our code build-cuda: cuda-static-lib CGO_LDFLAGS="-L$(CURDIR)/$(CUDA_BUILD_DIR) -L$(CUDA_LIB_PATH) -Wl,-Bstatic -lmakarna_cuda -Wl,-Bdynamic -lcudart -lstdc++" \ CGO_CFLAGS="-I$(CURDIR)/$(CUDA_SRC_DIR)" \ go build $(GO_BUILD_FLAGS) -tags cuda -o bin/makarna-cuda ./cmd/run-model go build $(GO_BUILD_FLAGS) -o bin/quantize ./cmd/quantize go build $(GO_BUILD_FLAGS) -o bin/convert ./cmd/convert @echo "CUDA build complete. Run with: ./bin/makarna-cuda" # Compile CUDA kernels into static library cuda-static-lib: $(CUDA_STATIC_LIB) $(CUDA_STATIC_LIB): $(CUDA_OBJ) @echo "Creating static library..." ar rcs $@ $^ @echo "Static library built: $@" $(CUDA_BUILD_DIR): mkdir -p $@ $(CUDA_BUILD_DIR)/%.o: $(CUDA_SRC_DIR)/%.cu $(CUDA_SRC_DIR)/kernels.h $(CUDA_SRC_DIR)/cuda_common.cuh | $(CUDA_BUILD_DIR) @echo "Compiling CUDA kernels..." $(NVCC) -c -Xcompiler -fPIC -Xcompiler -O3 -Xcompiler -DNDEBUG \ -O3 \ --use_fast_math \ --expt-relaxed-constexpr \ -std=c++17 \ -arch=sm_75 \ -gencode=arch=compute_75,code=sm_75 \ -gencode=arch=compute_80,code=sm_80 \ -gencode=arch=compute_86,code=sm_86 \ -gencode=arch=compute_89,code=sm_89 \ -o $@ $< # Legacy: shared library (kept for compatibility) cuda-lib: $(CUDA_OBJ) @echo "Building CUDA shared library..." $(NVCC) -shared -Xcompiler -fPIC \ -O3 \ --use_fast_math \ -arch=sm_75 \ -o $(CUDA_SHARED_LIB) $(CUDA_OBJ) run-cuda: build-cuda LD_LIBRARY_PATH=$(CURDIR)/$(CUDA_BUILD_DIR):$(CUDA_LIB_PATH):$$LD_LIBRARY_PATH \ ./bin/makarna-cuda -model $(MODEL) -prompt "$(PROMPT)" -chat -steps $(STEPS) -n-gpu-layers $(GPU_LAYERS) # Default values for run-cuda MODEL ?= /home/ai/llama/quants/qwen3-q8.mak PROMPT ?= "Hello" STEPS ?= 10 GPU_LAYERS ?= 28 PYTHON ?= python3 test-gen: @echo "Generating golden test data..." PYTHONPATH=. $(PYTHON) scripts/gen_test_data.py @echo "Running tests..." go test -v ./tests/... ./pkg/... test-cpu: @echo "Running CPU tests..." go test -v ./pkg/... test-cuda: cuda-lib @echo "Running CUDA tests..." CGO_LDFLAGS="-L$(CURDIR)/$(CUDA_BUILD_DIR) -L$(CUDA_LIB_PATH) -Wl,-Bstatic -lmakarna_cuda -Wl,-Bdynamic -lcudart -Wl,-rpath,$(CURDIR)/$(CUDA_BUILD_DIR) -Wl,-rpath,$(CUDA_LIB_PATH)" \ LD_LIBRARY_PATH=$(CURDIR)/$(CUDA_BUILD_DIR):$(CUDA_LIB_PATH):$$LD_LIBRARY_PATH \ go test -tags cuda -v ./pkg/backend/cuda/... test-quant: @echo "Testing quantization functions..." go test -v ./pkg/quant/... bench-quant: @echo "Benchmarking quantization..." go test -bench=. ./pkg/quant/ clean: rm -rf bin/ rm -rf build/ rm -f tests/data/*.bin rm -f $(CUDA_LIB) clean-cuda: rm -f $(CUDA_LIB) # Convenience targets for model conversion convert-f32: PYTHONPATH=scripts $(PYTHON) scripts/convert_fast.py $(MODEL) $(OUTPUT) quantize-q4k: ./bin/quantize $(INPUT) $(OUTPUT) q4_k quantize-q6k: ./bin/quantize $(INPUT) $(OUTPUT) q6_k quantize-q8k: ./bin/quantize $(INPUT) $(OUTPUT) q8_k # Help help: @echo "Makarna - Inference Engine" @echo "" @echo "Build targets:" @echo " make build - Build CPU-only binaries" @echo " make build-cuda - Build CUDA-enabled binaries" @echo " make cuda-lib - Build CUDA kernel library only" @echo "" @echo "Run targets:" @echo " make run-cuda MODEL=path PROMPT='text' STEPS=n GPU_LAYERS=n" @echo "" @echo "Test targets:" @echo " make test-cpu - Run CPU tests" @echo " make test-cuda - Run CUDA tests" @echo " make test-quant - Run quantization tests" @echo "" @echo "Clean targets:" @echo " make clean - Remove all build artifacts" @echo " make clean-cuda - Remove CUDA library only"