diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 9214a6e0b7ab..e8d456b0d4a6 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -92,7 +92,7 @@ jobs:
             cuda-minor-version: "0"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-l4t-diffusers'
+            tag-suffix: '-nvidia-l4t-diffusers'
             runs-on: 'ubuntu-24.04-arm'
             base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
             skip-drivers: 'true'
@@ -245,7 +245,6 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          # CUDA 12 additional backends
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -306,6 +305,247 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "stablediffusion-ggml"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "rfdetr"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "exllama2"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-neutts'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "neutts"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          # cuda 13
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-rerankers'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "rerankers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'true'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "llama-cpp"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-transformers'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "transformers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-diffusers'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "diffusers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'l4t'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-diffusers'
+            runs-on: 'ubuntu-24.04-arm'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            skip-drivers: 'true'
+            backend: "diffusers"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-kokoro'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "kokoro"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "faster-whisper"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-bark'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "bark"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-chatterbox'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "chatterbox"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "stablediffusion-ggml"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'true'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "stablediffusion-ggml"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'true'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisper'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-rfdetr'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "rfdetr"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
           # hipblas builds
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -494,7 +734,7 @@ jobs:
             cuda-minor-version: "0"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-l4t-kokoro'
+            tag-suffix: '-nvidia-l4t-kokoro'
             runs-on: 'ubuntu-24.04-arm'
             base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
             skip-drivers: 'true'
@@ -625,18 +865,6 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -710,18 +938,6 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -846,18 +1062,6 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -907,18 +1111,6 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -961,7 +1153,7 @@ jobs:
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-l4t-arm64-chatterbox'
+            tag-suffix: '-nvidia-l4t-arm64-chatterbox'
             base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
             runs-on: 'ubuntu-24.04-arm'
             backend: "chatterbox"
@@ -1006,18 +1198,7 @@ jobs:
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-neutts'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "neutts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
index ecc0e460677c..aa17f162c6a4 100644
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
         run: |
           CGO_ENABLED=0 make build
       - name: rm
-        uses: appleboy/ssh-action@v1.2.3
+        uses: appleboy/ssh-action@v1.2.4
         with:
             host: ${{ secrets.EXPLORER_SSH_HOST }}
             username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
             rm: true
             target: ./local-ai
       - name: restarting
-        uses: appleboy/ssh-action@v1.2.3
+        uses: appleboy/ssh-action@v1.2.4
         with:
             host: ${{ secrets.EXPLORER_SSH_HOST }}
             username: ${{ secrets.EXPLORER_SSH_USERNAME }}
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index a2410b22827e..ff4bb560fc35 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -43,6 +43,15 @@ jobs:
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=3 --output-sync=target"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-gpu-nvidia-cuda-13'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -66,3 +75,13 @@ jobs:
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=4 --output-sync=target"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'false'
+            tag-suffix: '-nvidia-l4t-arm64-cuda-13'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
\ No newline at end of file
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 3864930d03ed..0c58ba4acee0 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -100,6 +100,17 @@ jobs:
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-12"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            makeflags: "--jobs=4 --output-sync=target"
+            aio: "-aio-gpu-nvidia-cuda-13"
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -152,3 +163,13 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'true'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-cuda-13'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
diff --git a/Dockerfile b/Dockerfile
index 7a705a9fc514..05a0c2779641 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -46,7 +46,7 @@ EOT
 
 # CuBLAS requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+    if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
@@ -54,7 +54,21 @@ RUN <<EOT bash
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
+                apt-get install -y cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+                apt-get remove -y cuda-keyring && \
+                apt-get clean && \
+                rm -rf /var/lib/apt/lists/* && \
+                apt-get remove -y cuda-nvcc-* \
+                            libcufft-dev-* \
+                            libcurand-dev-* \
+                            libcublas-dev-* \
+                            libcusparse-dev-* \
+                            libcusolver-dev-* && \
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb
+            else
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+            fi
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -68,13 +82,13 @@ RUN <<EOT bash
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/* && \
-        echo "nvidia" > /run/localai/capability
+        echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
     fi
 EOT
 
 RUN <<EOT bash
     if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
-        echo "nvidia-l4t" > /run/localai/capability
+        echo "nvidia-l4t-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
     fi
 EOT
 
@@ -84,7 +98,11 @@ RUN <<EOT bash
         wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
         dpkg -i cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
         cp /var/cudss-local-tegra-repo-ubuntu2204-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
-        apt-get update && apt-get -y install cudss
+        apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
+        wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
+        dpkg -i nvpl-local-repo-ubuntu2404-25.5_1.0-1_arm64.deb && \
+        cp /var/nvpl-local-repo-ubuntu2404-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
+        apt-get update && apt-get install -y nvpl
     fi
 EOT
 
diff --git a/Makefile b/Makefile
index d1938411aac5..1f855b02a673 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,9 @@ GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 LAUNCHER_BINARY_NAME=local-ai-launcher
 
+CUDA_MAJOR_VERSION?=13
+CUDA_MINOR_VERSION?=0
+
 GORELEASER?=
 
 export BUILD_TYPE?=
@@ -383,6 +386,9 @@ backends/llama-cpp-darwin: build
 backends/neutts: docker-build-neutts docker-save-neutts build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/neutts.tar)"
 
+backends/vllm: docker-build-vllm docker-save-vllm build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/vllm.tar)"
+
 build-darwin-python-backend: build
 	bash ./scripts/build/python-darwin.sh
 
@@ -448,6 +454,12 @@ docker-save-neutts: backend-images
 docker-build-kokoro:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
 
+docker-build-vllm:
+	docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend
+
+docker-save-vllm: backend-images
+	docker save local-ai-backend:vllm -o backend-images/vllm.tar
+
 docker-save-kokoro: backend-images
 	docker save local-ai-backend:kokoro -o backend-images/kokoro.tar
 
@@ -484,9 +496,6 @@ docker-save-stablediffusion-ggml: backend-images
 docker-build-rerankers:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
 
-docker-build-vllm:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
-
 docker-build-transformers:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
 
diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
index 4d0a30f8b61f..49d4e8ffdd3b 100644
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
 
-LLAMA_VERSION?=7f8ef50cce40e3e7e4526a3696cb45658190e69a
+LLAMA_VERSION?=ec18edfcba94dacb166e6523612fc0129cead67a
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
 
 CMAKE_ARGS?=
diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index 4cbf9dcf136e..c1c22680b001 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
 
 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=0ebe6fe118f125665939b27c89f34ed38716bff8
+STABLEDIFFUSION_GGML_VERSION?=710169df5c93d3756bf1fc547512f4724e89c745
 
 CMAKE_ARGS+=-DGGML_MAX_NAME=128
 
diff --git a/backend/go/stablediffusion-ggml/gosd.cpp b/backend/go/stablediffusion-ggml/gosd.cpp
index db51b0b0978a..76889447061f 100644
--- a/backend/go/stablediffusion-ggml/gosd.cpp
+++ b/backend/go/stablediffusion-ggml/gosd.cpp
@@ -1,3 +1,4 @@
+#include "stable-diffusion.h"
 #include <cstdint>
 #define GGML_MAX_NAME 128
 
@@ -23,8 +24,8 @@
 
 // Names of the sampler method, same order as enum sample_method in stable-diffusion.h
 const char* sample_method_str[] = {
-    "default",
     "euler",
+    "euler_a",
     "heun",
     "dpm2",
     "dpm++2s_a",
@@ -35,29 +36,29 @@ const char* sample_method_str[] = {
     "lcm",
     "ddim_trailing",
     "tcd",
-    "euler_a",
 };
 
 static_assert(std::size(sample_method_str) == SAMPLE_METHOD_COUNT, "sample method mismatch");
 
 // Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
 const char* schedulers[] = {
-    "default",
     "discrete",
     "karras",
     "exponential",
     "ays",
     "gits",
+    "sgm_uniform",
+    "simple",
     "smoothstep",
+    "lcm",
 };
 
-static_assert(std::size(schedulers) == SCHEDULE_COUNT, "schedulers mismatch");
+static_assert(std::size(schedulers) == SCHEDULER_COUNT, "schedulers mismatch");
 
 sd_ctx_t* sd_c;
 // Moved from the context (load time) to generation time params
-scheduler_t scheduler = scheduler_t::DEFAULT;
-
-sample_method_t sample_method;
+scheduler_t scheduler = SCHEDULER_COUNT;
+sample_method_t sample_method = SAMPLE_METHOD_COUNT;
 
 // Copied from the upstream CLI
 static void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
@@ -159,26 +160,6 @@ int load_model(const char *model, char *model_path, char* options[], int threads
 
     fprintf(stderr, "parsed options\n");
 
-    int sample_method_found = -1;
-    for (int m = 0; m < SAMPLE_METHOD_COUNT; m++) {
-        if (!strcmp(sampler, sample_method_str[m])) {
-            sample_method_found = m;
-            fprintf(stderr, "Found sampler: %s\n", sampler);
-        }
-    }
-    if (sample_method_found == -1) {
-        fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
-        sample_method_found = sample_method_t::SAMPLE_METHOD_DEFAULT;
-    }
-    sample_method = (sample_method_t)sample_method_found;
-
-    for (int d = 0; d < SCHEDULE_COUNT; d++) {
-        if (!strcmp(scheduler_str, schedulers[d])) {
-            scheduler = (scheduler_t)d;
-            fprintf (stderr, "Found scheduler: %s\n", scheduler_str);
-        }
-    }
-
     fprintf (stderr, "Creating context\n");
     sd_ctx_params_t ctx_params;
     sd_ctx_params_init(&ctx_params);
@@ -208,6 +189,30 @@ int load_model(const char *model, char *model_path, char* options[], int threads
     }
     fprintf (stderr, "Created context: OK\n");
 
+    int sample_method_found = -1;
+    for (int m = 0; m < SAMPLE_METHOD_COUNT; m++) {
+        if (!strcmp(sampler, sample_method_str[m])) {
+            sample_method_found = m;
+            fprintf(stderr, "Found sampler: %s\n", sampler);
+        }
+    }
+    if (sample_method_found == -1) {
+        sample_method_found = sd_get_default_sample_method(sd_ctx);
+        fprintf(stderr, "Invalid sample method, using default: %s\n", sample_method_str[sample_method_found]);
+    }
+    sample_method = (sample_method_t)sample_method_found;
+
+    for (int d = 0; d < SCHEDULER_COUNT; d++) {
+        if (!strcmp(scheduler_str, schedulers[d])) {
+            scheduler = (scheduler_t)d;
+            fprintf (stderr, "Found scheduler: %s\n", scheduler_str);
+        }
+    }
+    if (scheduler == SCHEDULER_COUNT) {
+      scheduler = sd_get_default_scheduler(sd_ctx);
+      fprintf(stderr, "Invalid scheduler, using default: %s\n", schedulers[scheduler]);
+    }
+
     sd_c = sd_ctx;
 
     // Clean up allocated memory
diff --git a/backend/index.yaml b/backend/index.yaml
index da142b0998a8..9b89716809b9 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -26,6 +26,10 @@
     vulkan: "vulkan-llama-cpp"
     nvidia-l4t: "nvidia-l4t-arm64-llama-cpp"
     darwin-x86: "darwin-x86-llama-cpp"
+    nvidia-cuda-13: "cuda13-llama-cpp"
+    nvidia-cuda-12: "cuda12-llama-cpp"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp"
 - &whispercpp
   name: "whisper"
   alias: "whisper"
@@ -49,6 +53,10 @@
     amd: "rocm-whisper"
     vulkan: "vulkan-whisper"
     nvidia-l4t: "nvidia-l4t-arm64-whisper"
+    nvidia-cuda-13: "cuda13-whisper"
+    nvidia-cuda-12: "cuda12-whisper"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisper"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-whisper"
 - &stablediffusionggml
   name: "stablediffusion-ggml"
   alias: "stablediffusion-ggml"
@@ -73,6 +81,10 @@
     vulkan: "vulkan-stablediffusion-ggml"
     nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
     metal: "metal-stablediffusion-ggml"
+    nvidia-cuda-13: "cuda13-stablediffusion-ggml"
+    nvidia-cuda-12: "cuda12-stablediffusion-ggml"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-stablediffusion-ggml"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml"
     # darwin-x86: "darwin-x86-stablediffusion-ggml"
 - &rfdetr
   name: "rfdetr"
@@ -96,6 +108,9 @@
     #amd: "rocm-rfdetr"
     nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
     default: "cpu-rfdetr"
+    nvidia-cuda-13: "cuda13-rfdetr"
+    nvidia-cuda-12: "cuda12-rfdetr"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr"
 - &vllm
   name: "vllm"
   license: apache-2.0
@@ -128,6 +143,7 @@
     nvidia: "cuda12-vllm"
     amd: "rocm-vllm"
     intel: "intel-vllm"
+    nvidia-cuda-12: "cuda12-vllm"
 - &mlx
   name: "mlx"
   uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx"
@@ -201,6 +217,8 @@
     nvidia: "cuda12-transformers"
     intel: "intel-transformers"
     amd: "rocm-transformers"
+    nvidia-cuda-13: "cuda13-transformers"
+    nvidia-cuda-12: "cuda12-transformers"
 - &diffusers
   name: "diffusers"
   icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg
@@ -221,6 +239,10 @@
     nvidia-l4t: "nvidia-l4t-diffusers"
     metal: "metal-diffusers"
     default: "cpu-diffusers"
+    nvidia-cuda-13: "cuda13-diffusers"
+    nvidia-cuda-12: "cuda12-diffusers"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-diffusers"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-diffusers"
 - &exllama2
   name: "exllama2"
   urls:
@@ -236,6 +258,7 @@
   capabilities:
     nvidia: "cuda12-exllama2"
     intel: "intel-exllama2"
+    nvidia-cuda-12: "cuda12-exllama2"
 - &faster-whisper
   icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
   description: |
@@ -252,6 +275,8 @@
     nvidia: "cuda12-faster-whisper"
     intel: "intel-faster-whisper"
     amd: "rocm-faster-whisper"
+    nvidia-cuda-13: "cuda13-faster-whisper"
+    nvidia-cuda-12: "cuda12-faster-whisper"
 - &kokoro
   icon: https://avatars.githubusercontent.com/u/166769057?v=4
   description: |
@@ -271,6 +296,9 @@
     intel: "intel-kokoro"
     amd: "rocm-kokoro"
     nvidia-l4t: "nvidia-l4t-kokoro"
+    nvidia-cuda-13: "cuda13-kokoro"
+    nvidia-cuda-12: "cuda12-kokoro"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro"
 - &coqui
   urls:
     - https://github.com/idiap/coqui-ai-TTS
@@ -292,6 +320,8 @@
     nvidia: "cuda12-coqui"
     intel: "intel-coqui"
     amd: "rocm-coqui"
+    nvidia-cuda-13: "cuda13-coqui"
+    nvidia-cuda-12: "cuda12-coqui"
   icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
 - &bark
   urls:
@@ -308,6 +338,8 @@
     cuda: "cuda12-bark"
     intel: "intel-bark"
     rocm: "rocm-bark"
+    nvidia-cuda-13: "cuda13-bark"
+    nvidia-cuda-12: "cuda12-bark"
   icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
 - &barkcpp
   urls:
@@ -354,6 +386,10 @@
     metal: "metal-chatterbox"
     default: "cpu-chatterbox"
     nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
+    nvidia-cuda-13: "cuda13-chatterbox"
+    nvidia-cuda-12: "cuda12-chatterbox"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox"
 - &piper
   name: "piper"
   uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
@@ -442,6 +478,8 @@
     nvidia: "cuda12-neutts"
     amd: "rocm-neutts"
     nvidia-l4t: "nvidia-l4t-neutts"
+    nvidia-cuda-12: "cuda12-neutts"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts"
 - !!merge <<: *neutts
   name: "neutts-development"
   capabilities:
@@ -449,6 +487,22 @@
     nvidia: "cuda12-neutts-development"
     amd: "rocm-neutts-development"
     nvidia-l4t: "nvidia-l4t-neutts-development"
+    nvidia-cuda-12: "cuda12-neutts-development"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts-development"
+- !!merge <<: *llamacpp
+  name: "llama-cpp-development"
+  capabilities:
+    default: "cpu-llama-cpp-development"
+    nvidia: "cuda12-llama-cpp-development"
+    intel: "intel-sycl-f16-llama-cpp-development"
+    amd: "rocm-llama-cpp-development"
+    metal: "metal-llama-cpp-development"
+    vulkan: "vulkan-llama-cpp-development"
+    nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-development"
+    nvidia-cuda-13: "cuda13-llama-cpp-development"
+    nvidia-cuda-12: "cuda12-llama-cpp-development"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-development"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-development"
 - !!merge <<: *neutts
   name: "cpu-neutts"
   uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-neutts"
@@ -465,7 +519,7 @@
   mirrors:
     - localai/localai-backends:latest-gpu-rocm-hipblas-neutts
 - !!merge <<: *neutts
-  name: "nvidia-l4t-neutts"
+  name: "nvidia-l4t-arm64-neutts"
   uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-neutts"
   mirrors:
     - localai/localai-backends:latest-nvidia-l4t-arm64-neutts
@@ -485,7 +539,7 @@
   mirrors:
     - localai/localai-backends:master-gpu-rocm-hipblas-neutts
 - !!merge <<: *neutts
-  name: "nvidia-l4t-neutts-development"
+  name: "nvidia-l4t-arm64-neutts-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-neutts"
   mirrors:
     - localai/localai-backends:master-nvidia-l4t-arm64-neutts
@@ -550,6 +604,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp"
   mirrors:
     - localai/localai-backends:master-nvidia-l4t-arm64-llama-cpp
+- !!merge <<: *llamacpp
+  name: "cuda13-nvidia-l4t-arm64-llama-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp
+- !!merge <<: *llamacpp
+  name: "cuda13-nvidia-l4t-arm64-llama-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-llama-cpp"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-llama-cpp
 - !!merge <<: *llamacpp
   name: "cpu-llama-cpp"
   uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp"
@@ -630,6 +694,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp"
   mirrors:
     - localai/localai-backends:master-gpu-intel-sycl-f16-llama-cpp
+- !!merge <<: *llamacpp
+  name: "cuda13-llama-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-llama-cpp
+- !!merge <<: *llamacpp
+  name: "cuda13-llama-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp
 ## whisper
 - !!merge <<: *whispercpp
   name: "nvidia-l4t-arm64-whisper"
@@ -641,6 +715,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
   mirrors:
     - localai/localai-backends:master-nvidia-l4t-arm64-whisper
+- !!merge <<: *whispercpp
+  name: "cuda13-nvidia-l4t-arm64-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-whisper"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-whisper
+- !!merge <<: *whispercpp
+  name: "cuda13-nvidia-l4t-arm64-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-whisper"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-whisper
 - !!merge <<: *whispercpp
   name: "cpu-whisper"
   uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
@@ -731,6 +815,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper"
   mirrors:
     - localai/localai-backends:master-gpu-intel-sycl-f16-whisper
+- !!merge <<: *whispercpp
+  name: "cuda13-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-whisper"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-whisper
+- !!merge <<: *whispercpp
+  name: "cuda13-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisper"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-whisper
 ## stablediffusion-ggml
 - !!merge <<: *stablediffusionggml
   name: "cpu-stablediffusion-ggml"
@@ -810,6 +904,26 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml"
   mirrors:
     - localai/localai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml
+- !!merge <<: *stablediffusionggml
+  name: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml
+- !!merge <<: *stablediffusionggml
+  name: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml
+- !!merge <<: *stablediffusionggml
+  name: "cuda13-stablediffusion-ggml"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-stablediffusion-ggml"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-stablediffusion-ggml
+- !!merge <<: *stablediffusionggml
+  name: "cuda13-stablediffusion-ggml-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-stablediffusion-ggml"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-stablediffusion-ggml
 # vllm
 - !!merge <<: *vllm
   name: "vllm-development"
@@ -856,6 +970,7 @@
     #amd: "rocm-rfdetr-development"
     nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
     default: "cpu-rfdetr-development"
+    nvidia-cuda-13: "cuda13-rfdetr-development"
 - !!merge <<: *rfdetr
   name: "cuda12-rfdetr"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr"
@@ -876,6 +991,11 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-rfdetr"
   mirrors:
     - localai/localai-backends:latest-nvidia-l4t-arm64-rfdetr
+- !!merge <<: *rfdetr
+  name: "nvidia-l4t-arm64-rfdetr-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-rfdetr"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-arm64-rfdetr
 - !!merge <<: *rfdetr
   name: "cpu-rfdetr"
   uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr"
@@ -906,6 +1026,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-rfdetr
+- !!merge <<: *rfdetr
+  name: "cuda13-rfdetr"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rfdetr"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-rfdetr
+- !!merge <<: *rfdetr
+  name: "cuda13-rfdetr-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr
 ## Rerankers
 - !!merge <<: *rerankers
   name: "rerankers-development"
@@ -913,6 +1043,7 @@
     nvidia: "cuda12-rerankers-development"
     intel: "intel-rerankers-development"
     amd: "rocm-rerankers-development"
+    nvidia-cuda-13: "cuda13-rerankers-development"
 - !!merge <<: *rerankers
   name: "cuda11-rerankers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
@@ -953,6 +1084,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-rerankers"
   mirrors:
     - localai/localai-backends:master-gpu-intel-rerankers
+- !!merge <<: *rerankers
+  name: "cuda13-rerankers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rerankers"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-rerankers
+- !!merge <<: *rerankers
+  name: "cuda13-rerankers-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rerankers"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-rerankers
 ## Transformers
 - !!merge <<: *transformers
   name: "transformers-development"
@@ -960,6 +1101,7 @@
     nvidia: "cuda12-transformers-development"
     intel: "intel-transformers-development"
     amd: "rocm-transformers-development"
+    nvidia-cuda-13: "cuda13-transformers-development"
 - !!merge <<: *transformers
   name: "cuda12-transformers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
@@ -1000,6 +1142,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-transformers"
   mirrors:
     - localai/localai-backends:master-gpu-intel-transformers
+- !!merge <<: *transformers
+  name: "cuda13-transformers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-transformers"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-transformers
+- !!merge <<: *transformers
+  name: "cuda13-transformers-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-transformers"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-transformers
 ## Diffusers
 - !!merge <<: *diffusers
   name: "diffusers-development"
@@ -1010,6 +1162,7 @@
     nvidia-l4t: "nvidia-l4t-diffusers-development"
     metal: "metal-diffusers-development"
     default: "cpu-diffusers-development"
+    nvidia-cuda-13: "cuda13-diffusers-development"
 - !!merge <<: *diffusers
   name: "cpu-diffusers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-diffusers"
@@ -1022,14 +1175,24 @@
     - localai/localai-backends:master-cpu-diffusers
 - !!merge <<: *diffusers
   name: "nvidia-l4t-diffusers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-diffusers"
   mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-l4t-diffusers
+    - localai/localai-backends:latest-nvidia-l4t-diffusers
 - !!merge <<: *diffusers
   name: "nvidia-l4t-diffusers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-diffusers"
   mirrors:
-    - localai/localai-backends:master-gpu-nvidia-l4t-diffusers
+    - localai/localai-backends:master-nvidia-l4t-diffusers
+- !!merge <<: *diffusers
+  name: "cuda13-nvidia-l4t-arm64-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-diffusers"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-diffusers
+- !!merge <<: *diffusers
+  name: "cuda13-nvidia-l4t-arm64-diffusers-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-diffusers"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-diffusers
 - !!merge <<: *diffusers
   name: "cuda12-diffusers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
@@ -1070,6 +1233,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-diffusers"
   mirrors:
     - localai/localai-backends:master-gpu-intel-diffusers
+- !!merge <<: *diffusers
+  name: "cuda13-diffusers"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-diffusers"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-diffusers
+- !!merge <<: *diffusers
+  name: "cuda13-diffusers-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-diffusers"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-diffusers
 - !!merge <<: *diffusers
   name: "metal-diffusers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-diffusers"
@@ -1141,14 +1314,14 @@
     - localai/localai-backends:master-gpu-intel-kokoro
 - !!merge <<: *kokoro
   name: "nvidia-l4t-kokoro"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-kokoro"
   mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-l4t-kokoro
+    - localai/localai-backends:latest-nvidia-l4t-kokoro
 - !!merge <<: *kokoro
   name: "nvidia-l4t-kokoro-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-kokoro"
   mirrors:
-    - localai/localai-backends:master-gpu-nvidia-l4t-kokoro
+    - localai/localai-backends:master-nvidia-l4t-kokoro
 - !!merge <<: *kokoro
   name: "cuda11-kokoro"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
@@ -1164,6 +1337,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
   mirrors:
     - localai/localai-backends:latest-gpu-rocm-hipblas-kokoro
+- !!merge <<: *kokoro
+  name: "cuda13-kokoro"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-kokoro"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-kokoro
+- !!merge <<: *kokoro
+  name: "cuda13-kokoro-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-kokoro"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-kokoro
 ## faster-whisper
 - !!merge <<: *faster-whisper
   name: "faster-whisper-development"
@@ -1171,6 +1354,7 @@
     nvidia: "cuda12-faster-whisper-development"
     intel: "intel-faster-whisper-development"
     amd: "rocm-faster-whisper-development"
+    nvidia-cuda-13: "cuda13-faster-whisper-development"
 - !!merge <<: *faster-whisper
   name: "cuda11-faster-whisper"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
@@ -1196,6 +1380,16 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-faster-whisper"
   mirrors:
     - localai/localai-backends:master-gpu-intel-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "cuda13-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-faster-whisper"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "cuda13-faster-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-whisper"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-faster-whisper
 ## coqui
 
 - !!merge <<: *coqui
@@ -1303,6 +1497,10 @@
     metal: "metal-chatterbox-development"
     default: "cpu-chatterbox-development"
     nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
+    nvidia-cuda-13: "cuda13-chatterbox-development"
+    nvidia-cuda-12: "cuda12-chatterbox-development"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox"
 - !!merge <<: *chatterbox
   name: "cpu-chatterbox"
   uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
@@ -1315,14 +1513,14 @@
     - localai/localai-backends:master-cpu-chatterbox
 - !!merge <<: *chatterbox
   name: "nvidia-l4t-arm64-chatterbox"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-chatterbox"
   mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox
+    - localai/localai-backends:latest-nvidia-l4t-arm64-chatterbox
 - !!merge <<: *chatterbox
   name: "nvidia-l4t-arm64-chatterbox-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-arm64-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-chatterbox"
   mirrors:
-    - localai/localai-backends:master-gpu-nvidia-l4t-arm64-chatterbox
+    - localai/localai-backends:master-nvidia-l4t-arm64-chatterbox
 - !!merge <<: *chatterbox
   name: "metal-chatterbox"
   uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
@@ -1353,3 +1551,23 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
   mirrors:
     - localai/localai-backends:latest-gpu-nvidia-cuda-12-chatterbox
+- !!merge <<: *chatterbox
+  name: "cuda13-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-chatterbox"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-chatterbox
+- !!merge <<: *chatterbox
+  name: "cuda13-chatterbox-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-chatterbox"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-chatterbox
+- !!merge <<: *chatterbox
+  name: "cuda13-nvidia-l4t-arm64-chatterbox"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-chatterbox"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-chatterbox
+- !!merge <<: *chatterbox
+  name: "cuda13-nvidia-l4t-arm64-chatterbox-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-chatterbox"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-chatterbox
diff --git a/backend/python/chatterbox/requirements-cublas13.txt b/backend/python/chatterbox/requirements-cublas13.txt
new file mode 100644
index 000000000000..4ac324c9db73
--- /dev/null
+++ b/backend/python/chatterbox/requirements-cublas13.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch
+torchaudio
+transformers
+numpy>=1.24.0,<1.26.0
+# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
+accelerate
diff --git a/backend/python/chatterbox/requirements-l4t.txt b/backend/python/chatterbox/requirements-l4t12.txt
similarity index 100%
rename from backend/python/chatterbox/requirements-l4t.txt
rename to backend/python/chatterbox/requirements-l4t12.txt
diff --git a/backend/python/chatterbox/requirements-l4t13.txt b/backend/python/chatterbox/requirements-l4t13.txt
new file mode 100644
index 000000000000..7610e1af5133
--- /dev/null
+++ b/backend/python/chatterbox/requirements-l4t13.txt
@@ -0,0 +1,7 @@
+--extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130
+torch
+torchaudio
+transformers
+numpy>=1.24.0,<1.26.0
+chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
+accelerate
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index dc2ac44b9741..b4bcf578fc73 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -211,7 +211,7 @@ function init() {
 # - hipblas
 # - intel
 function getBuildProfile() {
-    if [ x"${BUILD_TYPE:-}" == "xcublas" ]; then
+    if [ x"${BUILD_TYPE:-}" == "xcublas" ] || [ x"${BUILD_TYPE:-}" == "xl4t" ]; then
         if [ ! -z "${CUDA_MAJOR_VERSION:-}" ]; then
             echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
         else
diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 32befa8e6c03..09a5efea7694 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -16,4 +16,11 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
     EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi
 
+# Use python 3.12 for l4t
+if [ "x${BUILD_PROFILE}" == "xl4t12" ] || [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
+  PYTHON_VERSION="3.12"
+  PYTHON_PATCH="12"
+  PY_STANDALONE_TAG="20251120"
+fi
+
 installRequirements
diff --git a/backend/python/diffusers/requirements-cublas13.txt b/backend/python/diffusers/requirements-cublas13.txt
new file mode 100644
index 000000000000..4867a85cd405
--- /dev/null
+++ b/backend/python/diffusers/requirements-cublas13.txt
@@ -0,0 +1,12 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+git+https://github.com/huggingface/diffusers
+opencv-python
+transformers
+torchvision
+accelerate
+compel
+peft
+sentencepiece
+torch
+ftfy
+optimum-quanto
diff --git a/backend/python/diffusers/requirements-l4t.txt b/backend/python/diffusers/requirements-l4t12.txt
similarity index 100%
rename from backend/python/diffusers/requirements-l4t.txt
rename to backend/python/diffusers/requirements-l4t12.txt
diff --git a/backend/python/diffusers/requirements-l4t13.txt b/backend/python/diffusers/requirements-l4t13.txt
new file mode 100644
index 000000000000..b608e460d342
--- /dev/null
+++ b/backend/python/diffusers/requirements-l4t13.txt
@@ -0,0 +1,12 @@
+--extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130
+torch
+git+https://github.com/huggingface/diffusers
+transformers
+accelerate
+compel
+peft
+optimum-quanto
+numpy<2
+sentencepiece
+torchvision
+ftfy
diff --git a/backend/python/faster-whisper/requirements-cublas13.txt b/backend/python/faster-whisper/requirements-cublas13.txt
new file mode 100644
index 000000000000..3c797fce3a06
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cublas13.txt
@@ -0,0 +1,9 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch==2.9.1
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-cublas13.txt b/backend/python/kokoro/requirements-cublas13.txt
new file mode 100644
index 000000000000..0835ac729bb8
--- /dev/null
+++ b/backend/python/kokoro/requirements-cublas13.txt
@@ -0,0 +1,7 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch==2.9.1
+torchaudio==2.9.1
+transformers
+accelerate
+kokoro
+soundfile
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-l4t.txt b/backend/python/kokoro/requirements-l4t12.txt
similarity index 100%
rename from backend/python/kokoro/requirements-l4t.txt
rename to backend/python/kokoro/requirements-l4t12.txt
diff --git a/backend/python/neutts/requirements-l4t.txt b/backend/python/neutts/requirements-l4t12.txt
similarity index 100%
rename from backend/python/neutts/requirements-l4t.txt
rename to backend/python/neutts/requirements-l4t12.txt
diff --git a/backend/python/rerankers/requirements-cublas13.txt b/backend/python/rerankers/requirements-cublas13.txt
new file mode 100644
index 000000000000..b565a9cc154a
--- /dev/null
+++ b/backend/python/rerankers/requirements-cublas13.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+transformers
+accelerate
+torch==2.9.1
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rfdetr/requirements-cublas13.txt b/backend/python/rfdetr/requirements-cublas13.txt
new file mode 100644
index 000000000000..d75a2013c24d
--- /dev/null
+++ b/backend/python/rfdetr/requirements-cublas13.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch==2.9.1
+rfdetr
+opencv-python
+accelerate
+inference
+peft
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas13.txt b/backend/python/transformers/requirements-cublas13.txt
new file mode 100644
index 000000000000..8f72998e4f46
--- /dev/null
+++ b/backend/python/transformers/requirements-cublas13.txt
@@ -0,0 +1,9 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch==2.9.0
+llvmlite==0.43.0
+numba==0.60.0
+transformers
+bitsandbytes
+outetts
+sentence-transformers==5.1.0
+protobuf==6.33.1
\ No newline at end of file
diff --git a/pkg/system/capabilities.go b/pkg/system/capabilities.go
index 0ea60dfc81fe..c031bd61d129 100644
--- a/pkg/system/capabilities.go
+++ b/pkg/system/capabilities.go
@@ -4,6 +4,7 @@ package system
 
 import (
 	"os"
+	"path/filepath"
 	"runtime"
 	"strings"
 
@@ -17,14 +18,32 @@ const (
 	darwinX86         = "darwin-x86"
 	metal             = "metal"
 	nvidia            = "nvidia"
-	amd               = "amd"
-	intel             = "intel"
+
+	amd   = "amd"
+	intel = "intel"
+
+	nvidiaCuda13    = "nvidia-cuda-13"
+	nvidiaCuda12    = "nvidia-cuda-12"
+	nvidiaL4TCuda12 = "nvidia-l4t-cuda-12"
+	nvidiaL4TCuda13 = "nvidia-l4t-cuda-13"
 
 	capabilityEnv        = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
 	capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
 	defaultRunFile       = "/run/localai/capability"
 )
 
+var (
+	cuda13DirExists bool
+	cuda12DirExists bool
+)
+
+func init() {
+	_, err := os.Stat(filepath.Join("usr", "local", "cuda-13"))
+	cuda13DirExists = err == nil
+	_, err = os.Stat(filepath.Join("usr", "local", "cuda-12"))
+	cuda12DirExists = err == nil
+}
+
 func (s *SystemState) Capability(capMap map[string]string) string {
 	reportedCapability := s.getSystemCapabilities()
 
@@ -77,12 +96,26 @@ func (s *SystemState) getSystemCapabilities() string {
 
 	// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
 	if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
-		if s.GPUVendor == "nvidia" {
+		if s.GPUVendor == nvidia {
 			log.Info().Msgf("Using nvidia-l4t capability (arm64 on linux), set %s to override", capabilityEnv)
+			if cuda13DirExists {
+				return nvidiaL4TCuda13
+			}
+			if cuda12DirExists {
+				return nvidiaL4TCuda12
+			}
 			return nvidiaL4T
 		}
 	}
 
+	if cuda13DirExists {
+		return nvidiaCuda13
+	}
+
+	if cuda12DirExists {
+		return nvidiaCuda12
+	}
+
 	if s.GPUVendor == "" {
 		log.Info().Msgf("Default capability (no GPU detected), set %s to override", capabilityEnv)
 		return defaultCapability
@@ -103,13 +136,13 @@ func detectGPUVendor(gpus []*gpu.GraphicsCard) (string, error) {
 		if gpu.DeviceInfo != nil {
 			if gpu.DeviceInfo.Vendor != nil {
 				gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name)
-				if strings.Contains(gpuVendorName, "NVIDIA") {
+				if strings.Contains(gpuVendorName, strings.ToUpper(nvidia)) {
 					return nvidia, nil
 				}
-				if strings.Contains(gpuVendorName, "AMD") {
+				if strings.Contains(gpuVendorName, strings.ToUpper(amd)) {
 					return amd, nil
 				}
-				if strings.Contains(gpuVendorName, "INTEL") {
+				if strings.Contains(gpuVendorName, strings.ToUpper(intel)) {
 					return intel, nil
 				}
 			}
@@ -131,7 +164,7 @@ func (s *SystemState) BackendPreferenceTokens() []string {
 	case strings.HasPrefix(capStr, amd):
 		return []string{"rocm", "hip", "vulkan", "cpu"}
 	case strings.HasPrefix(capStr, intel):
-		return []string{"sycl", "intel", "cpu"}
+		return []string{"sycl", intel, "cpu"}
 	case strings.HasPrefix(capStr, metal):
 		return []string{"metal", "cpu"}
 	case strings.HasPrefix(capStr, darwinX86):