llama-server-rocm.Dockerfile 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. ARG UBUNTU_VERSION=22.04
  2. # This needs to generally match the container host's environment.
  3. ARG ROCM_VERSION=5.6
  4. # Target the CUDA build image
  5. ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
  6. FROM ${BASE_ROCM_DEV_CONTAINER} as build
  7. # Unless otherwise specified, we make a fat build.
  8. # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
  9. # This is mostly tied to rocBLAS supported archs.
  10. ARG ROCM_DOCKER_ARCH=\
  11. gfx803 \
  12. gfx900 \
  13. gfx906 \
  14. gfx908 \
  15. gfx90a \
  16. gfx1010 \
  17. gfx1030 \
  18. gfx1100 \
  19. gfx1101 \
  20. gfx1102
  21. COPY requirements.txt requirements.txt
  22. COPY requirements requirements
  23. RUN pip install --upgrade pip setuptools wheel \
  24. && pip install -r requirements.txt
  25. WORKDIR /app
  26. COPY . .
  27. # Set nvcc architecture
  28. ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
  29. # Enable ROCm
  30. ENV LLAMA_HIPBLAS=1
  31. ENV CC=/opt/rocm/llvm/bin/clang
  32. ENV CXX=/opt/rocm/llvm/bin/clang++
  33. # Enable cURL
  34. ENV LLAMA_CURL=1
  35. RUN apt-get update && \
  36. apt-get install -y libcurl4-openssl-dev curl
  37. RUN make -j$(nproc) llama-server
  38. HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
  39. ENTRYPOINT [ "/app/llama-server" ]