diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml
index 750b91e1f..e190b2b20 100644
--- a/.github/workflows/build-docker.yaml
+++ b/.github/workflows/build-docker.yaml
@@ -34,13 +34,13 @@ jobs:
         uses: docker/build-push-action@v4
         with:
           context: .
-          file: "docker/simple/Dockerfile"
+          file: "docker/cuda_simple/Dockerfile"
           push: ${{ startsWith(github.ref, 'refs/tags/') }}
           pull: true
           platforms: linux/amd64,linux/arm64
           tags: |
-            ghcr.io/abetlen/llama-cpp-python:latest
-            ghcr.io/abetlen/llama-cpp-python:${{ github.ref_name }}
+            ghcr.io/thiner/llama-cpp-python:latest-cuda
+            ghcr.io/thiner/llama-cpp-python:${{ github.ref_name }}-cuda
           build-args: |
             BUILDKIT_INLINE_CACHE=1
 
diff --git a/docker/cuda_simple/Dockerfile b/docker/cuda_simple/Dockerfile
index a9e51cdc1..8e0f2f9b0 100644
--- a/docker/cuda_simple/Dockerfile
+++ b/docker/cuda_simple/Dockerfile
@@ -21,7 +21,7 @@ ENV LLAMA_CUBLAS=1
 RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
 
 # Install llama-cpp-python (build with cuda)
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
+RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -e .[server]
 
 # Run the server
 CMD python3 -m llama_cpp.server