diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index 750b91e1f..e190b2b20 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -34,13 +34,13 @@ jobs: uses: docker/build-push-action@v4 with: context: . - file: "docker/simple/Dockerfile" + file: "docker/cuda_simple/Dockerfile" push: ${{ startsWith(github.ref, 'refs/tags/') }} pull: true platforms: linux/amd64,linux/arm64 tags: | - ghcr.io/abetlen/llama-cpp-python:latest - ghcr.io/abetlen/llama-cpp-python:${{ github.ref_name }} + ghcr.io/thiner/llama-cpp-python:latest-cuda + ghcr.io/thiner/llama-cpp-python:${{ github.ref_name }}-cuda build-args: | BUILDKIT_INLINE_CACHE=1 diff --git a/docker/cuda_simple/Dockerfile b/docker/cuda_simple/Dockerfile index a9e51cdc1..8e0f2f9b0 100644 --- a/docker/cuda_simple/Dockerfile +++ b/docker/cuda_simple/Dockerfile @@ -21,7 +21,7 @@ ENV LLAMA_CUBLAS=1 RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context # Install llama-cpp-python (build with cuda) -RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -e .[server] # Run the server CMD python3 -m llama_cpp.server