Makefile 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. .PHONY: all test test-gen clean build build-cuda cuda-lib quantize
  2. # CUDA Configuration
  3. CUDA_HOME ?= /usr/local/cuda
  4. NVCC ?= $(CUDA_HOME)/bin/nvcc
  5. CUDA_LIB_PATH ?= $(CUDA_HOME)/lib64
  6. GO_BUILD_FLAGS ?= -trimpath -ldflags="-s -w"
  7. # Build directories
  8. CUDA_SRC_DIR = pkg/backend/cuda
  9. CUDA_BUILD_DIR = build/cuda
  10. CUDA_OBJ = \
  11. $(CUDA_BUILD_DIR)/cuda_memory.o \
  12. $(CUDA_BUILD_DIR)/cuda_elementwise.o \
  13. $(CUDA_BUILD_DIR)/cuda_dequant_q8k.o \
  14. $(CUDA_BUILD_DIR)/cuda_dequant_q4k.o \
  15. $(CUDA_BUILD_DIR)/cuda_dequant_q5k.o \
  16. $(CUDA_BUILD_DIR)/cuda_dequant_other.o \
  17. $(CUDA_BUILD_DIR)/cuda_matmul.o \
  18. $(CUDA_BUILD_DIR)/cuda_nn.o
  19. CUDA_STATIC_LIB = $(CUDA_BUILD_DIR)/libmakarna_cuda.a
  20. CUDA_SHARED_LIB = $(CUDA_BUILD_DIR)/libmakarna_cuda.so
  21. all: build
  22. # Build CPU-only binaries
  23. build:
  24. go build $(GO_BUILD_FLAGS) -o bin/makarna ./cmd/run-model
  25. go build $(GO_BUILD_FLAGS) -o bin/quantize ./cmd/quantize
  26. go build $(GO_BUILD_FLAGS) -o bin/convert ./cmd/convert
  27. # Build CUDA-enabled binaries with static linking of our code
  28. build-cuda: cuda-static-lib
  29. CGO_LDFLAGS="-L$(CURDIR)/$(CUDA_BUILD_DIR) -L$(CUDA_LIB_PATH) -Wl,-Bstatic -lmakarna_cuda -Wl,-Bdynamic -lcudart -lstdc++" \
  30. CGO_CFLAGS="-I$(CURDIR)/$(CUDA_SRC_DIR)" \
  31. go build $(GO_BUILD_FLAGS) -tags cuda -o bin/makarna-cuda ./cmd/run-model
  32. go build $(GO_BUILD_FLAGS) -o bin/quantize ./cmd/quantize
  33. go build $(GO_BUILD_FLAGS) -o bin/convert ./cmd/convert
  34. @echo "CUDA build complete. Run with: ./bin/makarna-cuda"
  35. # Compile CUDA kernels into static library
  36. cuda-static-lib: $(CUDA_STATIC_LIB)
  37. $(CUDA_STATIC_LIB): $(CUDA_OBJ)
  38. @echo "Creating static library..."
  39. ar rcs $@ $^
  40. @echo "Static library built: $@"
  41. $(CUDA_BUILD_DIR):
  42. mkdir -p $@
  43. $(CUDA_BUILD_DIR)/%.o: $(CUDA_SRC_DIR)/%.cu $(CUDA_SRC_DIR)/kernels.h $(CUDA_SRC_DIR)/cuda_common.cuh | $(CUDA_BUILD_DIR)
  44. @echo "Compiling CUDA kernels..."
  45. $(NVCC) -c -Xcompiler -fPIC -Xcompiler -O3 -Xcompiler -DNDEBUG \
  46. -O3 \
  47. --use_fast_math \
  48. --expt-relaxed-constexpr \
  49. -std=c++17 \
  50. -arch=sm_75 \
  51. -gencode=arch=compute_75,code=sm_75 \
  52. -gencode=arch=compute_80,code=sm_80 \
  53. -gencode=arch=compute_86,code=sm_86 \
  54. -gencode=arch=compute_89,code=sm_89 \
  55. -o $@ $<
  56. # Legacy: shared library (kept for compatibility)
  57. cuda-lib: $(CUDA_OBJ)
  58. @echo "Building CUDA shared library..."
  59. $(NVCC) -shared -Xcompiler -fPIC \
  60. -O3 \
  61. --use_fast_math \
  62. -arch=sm_75 \
  63. -o $(CUDA_SHARED_LIB) $(CUDA_OBJ)
  64. run-cuda: build-cuda
  65. LD_LIBRARY_PATH=$(CURDIR)/$(CUDA_BUILD_DIR):$(CUDA_LIB_PATH):$$LD_LIBRARY_PATH \
  66. ./bin/makarna-cuda -model $(MODEL) -prompt "$(PROMPT)" -chat -steps $(STEPS) -n-gpu-layers $(GPU_LAYERS)
  67. # Default values for run-cuda
  68. MODEL ?= /home/ai/llama/quants/qwen3-q8.mak
  69. PROMPT ?= "Hello"
  70. STEPS ?= 10
  71. GPU_LAYERS ?= 28
  72. PYTHON ?= python3
  73. test-gen:
  74. @echo "Generating golden test data..."
  75. PYTHONPATH=. $(PYTHON) scripts/gen_test_data.py
  76. @echo "Running tests..."
  77. go test -v ./tests/... ./pkg/...
  78. test-cpu:
  79. @echo "Running CPU tests..."
  80. go test -v ./pkg/...
  81. test-cuda: cuda-lib
  82. @echo "Running CUDA tests..."
  83. CGO_LDFLAGS="-L$(CURDIR)/$(CUDA_BUILD_DIR) -L$(CUDA_LIB_PATH) -Wl,-Bstatic -lmakarna_cuda -Wl,-Bdynamic -lcudart -Wl,-rpath,$(CURDIR)/$(CUDA_BUILD_DIR) -Wl,-rpath,$(CUDA_LIB_PATH)" \
  84. LD_LIBRARY_PATH=$(CURDIR)/$(CUDA_BUILD_DIR):$(CUDA_LIB_PATH):$$LD_LIBRARY_PATH \
  85. go test -tags cuda -v ./pkg/backend/cuda/...
  86. test-quant:
  87. @echo "Testing quantization functions..."
  88. go test -v ./pkg/quant/...
  89. bench-quant:
  90. @echo "Benchmarking quantization..."
  91. go test -bench=. ./pkg/quant/
  92. clean:
  93. rm -rf bin/
  94. rm -rf build/
  95. rm -f tests/data/*.bin
  96. rm -f $(CUDA_LIB)
  97. clean-cuda:
  98. rm -f $(CUDA_LIB)
  99. # Convenience targets for model conversion
  100. convert-f32:
  101. PYTHONPATH=scripts $(PYTHON) scripts/convert_fast.py $(MODEL) $(OUTPUT)
  102. quantize-q4k:
  103. ./bin/quantize $(INPUT) $(OUTPUT) q4_k
  104. quantize-q6k:
  105. ./bin/quantize $(INPUT) $(OUTPUT) q6_k
  106. quantize-q8k:
  107. ./bin/quantize $(INPUT) $(OUTPUT) q8_k
  108. # Help
  109. help:
  110. @echo "Makarna - Inference Engine"
  111. @echo ""
  112. @echo "Build targets:"
  113. @echo " make build - Build CPU-only binaries"
  114. @echo " make build-cuda - Build CUDA-enabled binaries"
  115. @echo " make cuda-lib - Build CUDA kernel library only"
  116. @echo ""
  117. @echo "Run targets:"
  118. @echo " make run-cuda MODEL=path PROMPT='text' STEPS=n GPU_LAYERS=n"
  119. @echo ""
  120. @echo "Test targets:"
  121. @echo " make test-cpu - Run CPU tests"
  122. @echo " make test-cuda - Run CUDA tests"
  123. @echo " make test-quant - Run quantization tests"
  124. @echo ""
  125. @echo "Clean targets:"
  126. @echo " make clean - Remove all build artifacts"
  127. @echo " make clean-cuda - Remove CUDA library only"