diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 9214a6e0b7ab..e8d456b0d4a6 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -92,7 +92,7 @@ jobs: cuda-minor-version: "0" platforms: 'linux/arm64' tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-l4t-diffusers' + tag-suffix: '-nvidia-l4t-diffusers' runs-on: 'ubuntu-24.04-arm' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" skip-drivers: 'true' @@ -245,7 +245,6 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - # CUDA 12 additional backends - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -306,6 +305,247 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "stablediffusion-ggml" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "rfdetr" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-exllama2' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "exllama2" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-neutts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "neutts" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + # cuda 13 + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-rerankers' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "rerankers" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "llama-cpp" + dockerfile: "./backend/Dockerfile.llama-cpp" + context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'true' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "llama-cpp" + dockerfile: "./backend/Dockerfile.llama-cpp" + context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-transformers' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "transformers" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-diffusers' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "diffusers" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'l4t' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-diffusers' + runs-on: 'ubuntu-24.04-arm' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + skip-drivers: 'true' + backend: "diffusers" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-kokoro' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "kokoro" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-bark' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "bark" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-chatterbox' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "chatterbox" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "stablediffusion-ggml" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'true' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "stablediffusion-ggml" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'true' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisper' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "whisper" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-rfdetr' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "rfdetr" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" # hipblas builds - build-type: 'hipblas' cuda-major-version: "" @@ -494,7 +734,7 @@ jobs: cuda-minor-version: "0" platforms: 'linux/arm64' tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-l4t-kokoro' + tag-suffix: '-nvidia-l4t-kokoro' runs-on: 'ubuntu-24.04-arm' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" skip-drivers: 'true' @@ -625,18 +865,6 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -710,18 +938,6 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -846,18 +1062,6 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -907,18 +1111,6 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -961,7 +1153,7 @@ jobs: platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-l4t-arm64-chatterbox' + tag-suffix: '-nvidia-l4t-arm64-chatterbox' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' backend: "chatterbox" @@ -1006,18 +1198,7 @@ jobs: backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-neutts' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "neutts" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" + - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml index ecc0e460677c..aa17f162c6a4 100644 --- a/.github/workflows/deploy-explorer.yaml +++ b/.github/workflows/deploy-explorer.yaml @@ -33,7 +33,7 @@ jobs: run: | CGO_ENABLED=0 make build - name: rm - uses: appleboy/ssh-action@v1.2.3 + uses: appleboy/ssh-action@v1.2.4 with: host: ${{ secrets.EXPLORER_SSH_HOST }} username: ${{ secrets.EXPLORER_SSH_USERNAME }} @@ -53,7 +53,7 @@ jobs: rm: true target: ./local-ai - name: restarting - uses: appleboy/ssh-action@v1.2.3 + uses: appleboy/ssh-action@v1.2.4 with: host: ${{ secrets.EXPLORER_SSH_HOST }} username: ${{ secrets.EXPLORER_SSH_USERNAME }} diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index a2410b22827e..ff4bb560fc35 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -43,6 +43,15 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-gpu-nvidia-cuda-13' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -66,3 +75,13 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'false' + tag-suffix: '-nvidia-l4t-arm64-cuda-13' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 3864930d03ed..0c58ba4acee0 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -100,6 +100,17 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-12" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + makeflags: "--jobs=4 --output-sync=target" + aio: "-aio-gpu-nvidia-cuda-13" - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'auto' @@ -152,3 +163,13 @@ jobs: runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'true' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-arm64-cuda-13' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' diff --git a/Dockerfile b/Dockerfile index 7a705a9fc514..05a0c2779641 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ EOT # CuBLAS requirements RUN < /run/localai/capability + echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability fi EOT RUN < /run/localai/capability + echo "nvidia-l4t-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability fi EOT @@ -84,7 +98,11 @@ RUN < #define GGML_MAX_NAME 128 @@ -23,8 +24,8 @@ // Names of the sampler method, same order as enum sample_method in stable-diffusion.h const char* sample_method_str[] = { - "default", "euler", + "euler_a", "heun", "dpm2", "dpm++2s_a", @@ -35,29 +36,29 @@ const char* sample_method_str[] = { "lcm", "ddim_trailing", "tcd", - "euler_a", }; static_assert(std::size(sample_method_str) == SAMPLE_METHOD_COUNT, "sample method mismatch"); // Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h const char* schedulers[] = { - "default", "discrete", "karras", "exponential", "ays", "gits", + "sgm_uniform", + "simple", "smoothstep", + "lcm", }; -static_assert(std::size(schedulers) == SCHEDULE_COUNT, "schedulers mismatch"); +static_assert(std::size(schedulers) == SCHEDULER_COUNT, "schedulers mismatch"); sd_ctx_t* sd_c; // Moved from the context (load time) to generation time params -scheduler_t scheduler = scheduler_t::DEFAULT; - -sample_method_t sample_method; +scheduler_t scheduler = SCHEDULER_COUNT; +sample_method_t sample_method = SAMPLE_METHOD_COUNT; // Copied from the upstream CLI static void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { @@ -159,26 +160,6 @@ int load_model(const char *model, char *model_path, char* options[], int threads fprintf(stderr, "parsed options\n"); - int sample_method_found = -1; - for (int m = 0; m < SAMPLE_METHOD_COUNT; m++) { - if (!strcmp(sampler, sample_method_str[m])) { - sample_method_found = m; - fprintf(stderr, "Found sampler: %s\n", sampler); - } - } - if (sample_method_found == -1) { - fprintf(stderr, "Invalid sample method, default to EULER_A!\n"); - sample_method_found = sample_method_t::SAMPLE_METHOD_DEFAULT; - } - sample_method = (sample_method_t)sample_method_found; - - for (int d = 0; d < SCHEDULE_COUNT; d++) { - if (!strcmp(scheduler_str, schedulers[d])) { - scheduler = (scheduler_t)d; - fprintf (stderr, "Found scheduler: %s\n", scheduler_str); - } - } - fprintf (stderr, "Creating context\n"); sd_ctx_params_t ctx_params; sd_ctx_params_init(&ctx_params); @@ -208,6 +189,30 @@ int load_model(const char *model, char *model_path, char* options[], int threads } fprintf (stderr, "Created context: OK\n"); + int sample_method_found = -1; + for (int m = 0; m < SAMPLE_METHOD_COUNT; m++) { + if (!strcmp(sampler, sample_method_str[m])) { + sample_method_found = m; + fprintf(stderr, "Found sampler: %s\n", sampler); + } + } + if (sample_method_found == -1) { + sample_method_found = sd_get_default_sample_method(sd_ctx); + fprintf(stderr, "Invalid sample method, using default: %s\n", sample_method_str[sample_method_found]); + } + sample_method = (sample_method_t)sample_method_found; + + for (int d = 0; d < SCHEDULER_COUNT; d++) { + if (!strcmp(scheduler_str, schedulers[d])) { + scheduler = (scheduler_t)d; + fprintf (stderr, "Found scheduler: %s\n", scheduler_str); + } + } + if (scheduler == SCHEDULER_COUNT) { + scheduler = sd_get_default_scheduler(sd_ctx); + fprintf(stderr, "Invalid scheduler, using default: %s\n", schedulers[scheduler]); + } + sd_c = sd_ctx; // Clean up allocated memory diff --git a/backend/index.yaml b/backend/index.yaml index da142b0998a8..9b89716809b9 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -26,6 +26,10 @@ vulkan: "vulkan-llama-cpp" nvidia-l4t: "nvidia-l4t-arm64-llama-cpp" darwin-x86: "darwin-x86-llama-cpp" + nvidia-cuda-13: "cuda13-llama-cpp" + nvidia-cuda-12: "cuda12-llama-cpp" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp" - &whispercpp name: "whisper" alias: "whisper" @@ -49,6 +53,10 @@ amd: "rocm-whisper" vulkan: "vulkan-whisper" nvidia-l4t: "nvidia-l4t-arm64-whisper" + nvidia-cuda-13: "cuda13-whisper" + nvidia-cuda-12: "cuda12-whisper" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisper" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-whisper" - &stablediffusionggml name: "stablediffusion-ggml" alias: "stablediffusion-ggml" @@ -73,6 +81,10 @@ vulkan: "vulkan-stablediffusion-ggml" nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml" metal: "metal-stablediffusion-ggml" + nvidia-cuda-13: "cuda13-stablediffusion-ggml" + nvidia-cuda-12: "cuda12-stablediffusion-ggml" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-stablediffusion-ggml" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml" # darwin-x86: "darwin-x86-stablediffusion-ggml" - &rfdetr name: "rfdetr" @@ -96,6 +108,9 @@ #amd: "rocm-rfdetr" nvidia-l4t: "nvidia-l4t-arm64-rfdetr" default: "cpu-rfdetr" + nvidia-cuda-13: "cuda13-rfdetr" + nvidia-cuda-12: "cuda12-rfdetr" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr" - &vllm name: "vllm" license: apache-2.0 @@ -128,6 +143,7 @@ nvidia: "cuda12-vllm" amd: "rocm-vllm" intel: "intel-vllm" + nvidia-cuda-12: "cuda12-vllm" - &mlx name: "mlx" uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx" @@ -201,6 +217,8 @@ nvidia: "cuda12-transformers" intel: "intel-transformers" amd: "rocm-transformers" + nvidia-cuda-13: "cuda13-transformers" + nvidia-cuda-12: "cuda12-transformers" - &diffusers name: "diffusers" icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg @@ -221,6 +239,10 @@ nvidia-l4t: "nvidia-l4t-diffusers" metal: "metal-diffusers" default: "cpu-diffusers" + nvidia-cuda-13: "cuda13-diffusers" + nvidia-cuda-12: "cuda12-diffusers" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-diffusers" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-diffusers" - &exllama2 name: "exllama2" urls: @@ -236,6 +258,7 @@ capabilities: nvidia: "cuda12-exllama2" intel: "intel-exllama2" + nvidia-cuda-12: "cuda12-exllama2" - &faster-whisper icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4 description: | @@ -252,6 +275,8 @@ nvidia: "cuda12-faster-whisper" intel: "intel-faster-whisper" amd: "rocm-faster-whisper" + nvidia-cuda-13: "cuda13-faster-whisper" + nvidia-cuda-12: "cuda12-faster-whisper" - &kokoro icon: https://avatars.githubusercontent.com/u/166769057?v=4 description: | @@ -271,6 +296,9 @@ intel: "intel-kokoro" amd: "rocm-kokoro" nvidia-l4t: "nvidia-l4t-kokoro" + nvidia-cuda-13: "cuda13-kokoro" + nvidia-cuda-12: "cuda12-kokoro" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro" - &coqui urls: - https://github.com/idiap/coqui-ai-TTS @@ -292,6 +320,8 @@ nvidia: "cuda12-coqui" intel: "intel-coqui" amd: "rocm-coqui" + nvidia-cuda-13: "cuda13-coqui" + nvidia-cuda-12: "cuda12-coqui" icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4 - &bark urls: @@ -308,6 +338,8 @@ cuda: "cuda12-bark" intel: "intel-bark" rocm: "rocm-bark" + nvidia-cuda-13: "cuda13-bark" + nvidia-cuda-12: "cuda12-bark" icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4 - &barkcpp urls: @@ -354,6 +386,10 @@ metal: "metal-chatterbox" default: "cpu-chatterbox" nvidia-l4t: "nvidia-l4t-arm64-chatterbox" + nvidia-cuda-13: "cuda13-chatterbox" + nvidia-cuda-12: "cuda12-chatterbox" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox" - &piper name: "piper" uri: "quay.io/go-skynet/local-ai-backends:latest-piper" @@ -442,6 +478,8 @@ nvidia: "cuda12-neutts" amd: "rocm-neutts" nvidia-l4t: "nvidia-l4t-neutts" + nvidia-cuda-12: "cuda12-neutts" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts" - !!merge <<: *neutts name: "neutts-development" capabilities: @@ -449,6 +487,22 @@ nvidia: "cuda12-neutts-development" amd: "rocm-neutts-development" nvidia-l4t: "nvidia-l4t-neutts-development" + nvidia-cuda-12: "cuda12-neutts-development" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts-development" +- !!merge <<: *llamacpp + name: "llama-cpp-development" + capabilities: + default: "cpu-llama-cpp-development" + nvidia: "cuda12-llama-cpp-development" + intel: "intel-sycl-f16-llama-cpp-development" + amd: "rocm-llama-cpp-development" + metal: "metal-llama-cpp-development" + vulkan: "vulkan-llama-cpp-development" + nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-development" + nvidia-cuda-13: "cuda13-llama-cpp-development" + nvidia-cuda-12: "cuda12-llama-cpp-development" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-development" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-development" - !!merge <<: *neutts name: "cpu-neutts" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-neutts" @@ -465,7 +519,7 @@ mirrors: - localai/localai-backends:latest-gpu-rocm-hipblas-neutts - !!merge <<: *neutts - name: "nvidia-l4t-neutts" + name: "nvidia-l4t-arm64-neutts" uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-neutts" mirrors: - localai/localai-backends:latest-nvidia-l4t-arm64-neutts @@ -485,7 +539,7 @@ mirrors: - localai/localai-backends:master-gpu-rocm-hipblas-neutts - !!merge <<: *neutts - name: "nvidia-l4t-neutts-development" + name: "nvidia-l4t-arm64-neutts-development" uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-neutts" mirrors: - localai/localai-backends:master-nvidia-l4t-arm64-neutts @@ -550,6 +604,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp" mirrors: - localai/localai-backends:master-nvidia-l4t-arm64-llama-cpp +- !!merge <<: *llamacpp + name: "cuda13-nvidia-l4t-arm64-llama-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp +- !!merge <<: *llamacpp + name: "cuda13-nvidia-l4t-arm64-llama-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-llama-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-llama-cpp - !!merge <<: *llamacpp name: "cpu-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp" @@ -630,6 +694,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp" mirrors: - localai/localai-backends:master-gpu-intel-sycl-f16-llama-cpp +- !!merge <<: *llamacpp + name: "cuda13-llama-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-llama-cpp +- !!merge <<: *llamacpp + name: "cuda13-llama-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp ## whisper - !!merge <<: *whispercpp name: "nvidia-l4t-arm64-whisper" @@ -641,6 +715,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper" mirrors: - localai/localai-backends:master-nvidia-l4t-arm64-whisper +- !!merge <<: *whispercpp + name: "cuda13-nvidia-l4t-arm64-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-whisper" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-whisper +- !!merge <<: *whispercpp + name: "cuda13-nvidia-l4t-arm64-whisper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-whisper" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-whisper - !!merge <<: *whispercpp name: "cpu-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper" @@ -731,6 +815,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper" mirrors: - localai/localai-backends:master-gpu-intel-sycl-f16-whisper +- !!merge <<: *whispercpp + name: "cuda13-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-whisper" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-whisper +- !!merge <<: *whispercpp + name: "cuda13-whisper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisper" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-whisper ## stablediffusion-ggml - !!merge <<: *stablediffusionggml name: "cpu-stablediffusion-ggml" @@ -810,6 +904,26 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml" mirrors: - localai/localai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml +- !!merge <<: *stablediffusionggml + name: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml +- !!merge <<: *stablediffusionggml + name: "cuda13-nvidia-l4t-arm64-stablediffusion-ggml-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml +- !!merge <<: *stablediffusionggml + name: "cuda13-stablediffusion-ggml" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-stablediffusion-ggml" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-stablediffusion-ggml +- !!merge <<: *stablediffusionggml + name: "cuda13-stablediffusion-ggml-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-stablediffusion-ggml" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-stablediffusion-ggml # vllm - !!merge <<: *vllm name: "vllm-development" @@ -856,6 +970,7 @@ #amd: "rocm-rfdetr-development" nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development" default: "cpu-rfdetr-development" + nvidia-cuda-13: "cuda13-rfdetr-development" - !!merge <<: *rfdetr name: "cuda12-rfdetr" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr" @@ -876,6 +991,11 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-rfdetr" mirrors: - localai/localai-backends:latest-nvidia-l4t-arm64-rfdetr +- !!merge <<: *rfdetr + name: "nvidia-l4t-arm64-rfdetr-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-rfdetr" + mirrors: + - localai/localai-backends:master-nvidia-l4t-arm64-rfdetr - !!merge <<: *rfdetr name: "cpu-rfdetr" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr" @@ -906,6 +1026,16 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr" mirrors: - localai/localai-backends:latest-gpu-intel-rfdetr +- !!merge <<: *rfdetr + name: "cuda13-rfdetr" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rfdetr" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-rfdetr +- !!merge <<: *rfdetr + name: "cuda13-rfdetr-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr ## Rerankers - !!merge <<: *rerankers name: "rerankers-development" @@ -913,6 +1043,7 @@ nvidia: "cuda12-rerankers-development" intel: "intel-rerankers-development" amd: "rocm-rerankers-development" + nvidia-cuda-13: "cuda13-rerankers-development" - !!merge <<: *rerankers name: "cuda11-rerankers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers" @@ -953,6 +1084,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-rerankers" mirrors: - localai/localai-backends:master-gpu-intel-rerankers +- !!merge <<: *rerankers + name: "cuda13-rerankers" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-rerankers" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-rerankers +- !!merge <<: *rerankers + name: "cuda13-rerankers-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rerankers" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-rerankers ## Transformers - !!merge <<: *transformers name: "transformers-development" @@ -960,6 +1101,7 @@ nvidia: "cuda12-transformers-development" intel: "intel-transformers-development" amd: "rocm-transformers-development" + nvidia-cuda-13: "cuda13-transformers-development" - !!merge <<: *transformers name: "cuda12-transformers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers" @@ -1000,6 +1142,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-transformers" mirrors: - localai/localai-backends:master-gpu-intel-transformers +- !!merge <<: *transformers + name: "cuda13-transformers" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-transformers" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-transformers +- !!merge <<: *transformers + name: "cuda13-transformers-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-transformers" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-transformers ## Diffusers - !!merge <<: *diffusers name: "diffusers-development" @@ -1010,6 +1162,7 @@ nvidia-l4t: "nvidia-l4t-diffusers-development" metal: "metal-diffusers-development" default: "cpu-diffusers-development" + nvidia-cuda-13: "cuda13-diffusers-development" - !!merge <<: *diffusers name: "cpu-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-diffusers" @@ -1022,14 +1175,24 @@ - localai/localai-backends:master-cpu-diffusers - !!merge <<: *diffusers name: "nvidia-l4t-diffusers" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-diffusers" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-diffusers" mirrors: - - localai/localai-backends:latest-gpu-nvidia-l4t-diffusers + - localai/localai-backends:latest-nvidia-l4t-diffusers - !!merge <<: *diffusers name: "nvidia-l4t-diffusers-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-diffusers" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-diffusers" mirrors: - - localai/localai-backends:master-gpu-nvidia-l4t-diffusers + - localai/localai-backends:master-nvidia-l4t-diffusers +- !!merge <<: *diffusers + name: "cuda13-nvidia-l4t-arm64-diffusers" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-diffusers" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-diffusers +- !!merge <<: *diffusers + name: "cuda13-nvidia-l4t-arm64-diffusers-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-diffusers" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-diffusers - !!merge <<: *diffusers name: "cuda12-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers" @@ -1070,6 +1233,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-diffusers" mirrors: - localai/localai-backends:master-gpu-intel-diffusers +- !!merge <<: *diffusers + name: "cuda13-diffusers" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-diffusers" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-diffusers +- !!merge <<: *diffusers + name: "cuda13-diffusers-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-diffusers" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-diffusers - !!merge <<: *diffusers name: "metal-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-diffusers" @@ -1141,14 +1314,14 @@ - localai/localai-backends:master-gpu-intel-kokoro - !!merge <<: *kokoro name: "nvidia-l4t-kokoro" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-kokoro" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-kokoro" mirrors: - - localai/localai-backends:latest-gpu-nvidia-l4t-kokoro + - localai/localai-backends:latest-nvidia-l4t-kokoro - !!merge <<: *kokoro name: "nvidia-l4t-kokoro-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-kokoro" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-kokoro" mirrors: - - localai/localai-backends:master-gpu-nvidia-l4t-kokoro + - localai/localai-backends:master-nvidia-l4t-kokoro - !!merge <<: *kokoro name: "cuda11-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro" @@ -1164,6 +1337,16 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro" mirrors: - localai/localai-backends:latest-gpu-rocm-hipblas-kokoro +- !!merge <<: *kokoro + name: "cuda13-kokoro" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-kokoro" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-kokoro +- !!merge <<: *kokoro + name: "cuda13-kokoro-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-kokoro" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-kokoro ## faster-whisper - !!merge <<: *faster-whisper name: "faster-whisper-development" @@ -1171,6 +1354,7 @@ nvidia: "cuda12-faster-whisper-development" intel: "intel-faster-whisper-development" amd: "rocm-faster-whisper-development" + nvidia-cuda-13: "cuda13-faster-whisper-development" - !!merge <<: *faster-whisper name: "cuda11-faster-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper" @@ -1196,6 +1380,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-faster-whisper" mirrors: - localai/localai-backends:master-gpu-intel-faster-whisper +- !!merge <<: *faster-whisper + name: "cuda13-faster-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-faster-whisper" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-faster-whisper +- !!merge <<: *faster-whisper + name: "cuda13-faster-whisper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-whisper" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-faster-whisper ## coqui - !!merge <<: *coqui @@ -1303,6 +1497,10 @@ metal: "metal-chatterbox-development" default: "cpu-chatterbox-development" nvidia-l4t: "nvidia-l4t-arm64-chatterbox" + nvidia-cuda-13: "cuda13-chatterbox-development" + nvidia-cuda-12: "cuda12-chatterbox-development" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox" - !!merge <<: *chatterbox name: "cpu-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox" @@ -1315,14 +1513,14 @@ - localai/localai-backends:master-cpu-chatterbox - !!merge <<: *chatterbox name: "nvidia-l4t-arm64-chatterbox" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-chatterbox" mirrors: - - localai/localai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox + - localai/localai-backends:latest-nvidia-l4t-arm64-chatterbox - !!merge <<: *chatterbox name: "nvidia-l4t-arm64-chatterbox-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-arm64-chatterbox" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-chatterbox" mirrors: - - localai/localai-backends:master-gpu-nvidia-l4t-arm64-chatterbox + - localai/localai-backends:master-nvidia-l4t-arm64-chatterbox - !!merge <<: *chatterbox name: "metal-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox" @@ -1353,3 +1551,23 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox" mirrors: - localai/localai-backends:latest-gpu-nvidia-cuda-12-chatterbox +- !!merge <<: *chatterbox + name: "cuda13-chatterbox" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-chatterbox" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-chatterbox +- !!merge <<: *chatterbox + name: "cuda13-chatterbox-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-chatterbox" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-chatterbox +- !!merge <<: *chatterbox + name: "cuda13-nvidia-l4t-arm64-chatterbox" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-chatterbox" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-chatterbox +- !!merge <<: *chatterbox + name: "cuda13-nvidia-l4t-arm64-chatterbox-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-chatterbox" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-chatterbox diff --git a/backend/python/chatterbox/requirements-cublas13.txt b/backend/python/chatterbox/requirements-cublas13.txt new file mode 100644 index 000000000000..4ac324c9db73 --- /dev/null +++ b/backend/python/chatterbox/requirements-cublas13.txt @@ -0,0 +1,8 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +torchaudio +transformers +numpy>=1.24.0,<1.26.0 +# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 +chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster +accelerate diff --git a/backend/python/chatterbox/requirements-l4t.txt b/backend/python/chatterbox/requirements-l4t12.txt similarity index 100% rename from backend/python/chatterbox/requirements-l4t.txt rename to backend/python/chatterbox/requirements-l4t12.txt diff --git a/backend/python/chatterbox/requirements-l4t13.txt b/backend/python/chatterbox/requirements-l4t13.txt new file mode 100644 index 000000000000..7610e1af5133 --- /dev/null +++ b/backend/python/chatterbox/requirements-l4t13.txt @@ -0,0 +1,7 @@ +--extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130 +torch +torchaudio +transformers +numpy>=1.24.0,<1.26.0 +chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster +accelerate diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index dc2ac44b9741..b4bcf578fc73 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -211,7 +211,7 @@ function init() { # - hipblas # - intel function getBuildProfile() { - if [ x"${BUILD_TYPE:-}" == "xcublas" ]; then + if [ x"${BUILD_TYPE:-}" == "xcublas" ] || [ x"${BUILD_TYPE:-}" == "xl4t" ]; then if [ ! -z "${CUDA_MAJOR_VERSION:-}" ]; then echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION} else diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 32befa8e6c03..09a5efea7694 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -16,4 +16,11 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" fi +# Use python 3.12 for l4t +if [ "x${BUILD_PROFILE}" == "xl4t12" ] || [ "x${BUILD_PROFILE}" == "xl4t13" ]; then + PYTHON_VERSION="3.12" + PYTHON_PATCH="12" + PY_STANDALONE_TAG="20251120" +fi + installRequirements diff --git a/backend/python/diffusers/requirements-cublas13.txt b/backend/python/diffusers/requirements-cublas13.txt new file mode 100644 index 000000000000..4867a85cd405 --- /dev/null +++ b/backend/python/diffusers/requirements-cublas13.txt @@ -0,0 +1,12 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +git+https://github.com/huggingface/diffusers +opencv-python +transformers +torchvision +accelerate +compel +peft +sentencepiece +torch +ftfy +optimum-quanto diff --git a/backend/python/diffusers/requirements-l4t.txt b/backend/python/diffusers/requirements-l4t12.txt similarity index 100% rename from backend/python/diffusers/requirements-l4t.txt rename to backend/python/diffusers/requirements-l4t12.txt diff --git a/backend/python/diffusers/requirements-l4t13.txt b/backend/python/diffusers/requirements-l4t13.txt new file mode 100644 index 000000000000..b608e460d342 --- /dev/null +++ b/backend/python/diffusers/requirements-l4t13.txt @@ -0,0 +1,12 @@ +--extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130 +torch +git+https://github.com/huggingface/diffusers +transformers +accelerate +compel +peft +optimum-quanto +numpy<2 +sentencepiece +torchvision +ftfy diff --git a/backend/python/faster-whisper/requirements-cublas13.txt b/backend/python/faster-whisper/requirements-cublas13.txt new file mode 100644 index 000000000000..3c797fce3a06 --- /dev/null +++ b/backend/python/faster-whisper/requirements-cublas13.txt @@ -0,0 +1,9 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch==2.9.1 +faster-whisper +opencv-python +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cublas13.txt b/backend/python/kokoro/requirements-cublas13.txt new file mode 100644 index 000000000000..0835ac729bb8 --- /dev/null +++ b/backend/python/kokoro/requirements-cublas13.txt @@ -0,0 +1,7 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch==2.9.1 +torchaudio==2.9.1 +transformers +accelerate +kokoro +soundfile \ No newline at end of file diff --git a/backend/python/kokoro/requirements-l4t.txt b/backend/python/kokoro/requirements-l4t12.txt similarity index 100% rename from backend/python/kokoro/requirements-l4t.txt rename to backend/python/kokoro/requirements-l4t12.txt diff --git a/backend/python/neutts/requirements-l4t.txt b/backend/python/neutts/requirements-l4t12.txt similarity index 100% rename from backend/python/neutts/requirements-l4t.txt rename to backend/python/neutts/requirements-l4t12.txt diff --git a/backend/python/rerankers/requirements-cublas13.txt b/backend/python/rerankers/requirements-cublas13.txt new file mode 100644 index 000000000000..b565a9cc154a --- /dev/null +++ b/backend/python/rerankers/requirements-cublas13.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +transformers +accelerate +torch==2.9.1 +rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-cublas13.txt b/backend/python/rfdetr/requirements-cublas13.txt new file mode 100644 index 000000000000..d75a2013c24d --- /dev/null +++ b/backend/python/rfdetr/requirements-cublas13.txt @@ -0,0 +1,8 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch==2.9.1 +rfdetr +opencv-python +accelerate +inference +peft +optimum-quanto \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas13.txt b/backend/python/transformers/requirements-cublas13.txt new file mode 100644 index 000000000000..8f72998e4f46 --- /dev/null +++ b/backend/python/transformers/requirements-cublas13.txt @@ -0,0 +1,9 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch==2.9.0 +llvmlite==0.43.0 +numba==0.60.0 +transformers +bitsandbytes +outetts +sentence-transformers==5.1.0 +protobuf==6.33.1 \ No newline at end of file diff --git a/pkg/system/capabilities.go b/pkg/system/capabilities.go index 0ea60dfc81fe..c031bd61d129 100644 --- a/pkg/system/capabilities.go +++ b/pkg/system/capabilities.go @@ -4,6 +4,7 @@ package system import ( "os" + "path/filepath" "runtime" "strings" @@ -17,14 +18,32 @@ const ( darwinX86 = "darwin-x86" metal = "metal" nvidia = "nvidia" - amd = "amd" - intel = "intel" + + amd = "amd" + intel = "intel" + + nvidiaCuda13 = "nvidia-cuda-13" + nvidiaCuda12 = "nvidia-cuda-12" + nvidiaL4TCuda12 = "nvidia-l4t-cuda-12" + nvidiaL4TCuda13 = "nvidia-l4t-cuda-13" capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY" capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE" defaultRunFile = "/run/localai/capability" ) +var ( + cuda13DirExists bool + cuda12DirExists bool +) + +func init() { + _, err := os.Stat(filepath.Join("usr", "local", "cuda-13")) + cuda13DirExists = err == nil + _, err = os.Stat(filepath.Join("usr", "local", "cuda-12")) + cuda12DirExists = err == nil +} + func (s *SystemState) Capability(capMap map[string]string) string { reportedCapability := s.getSystemCapabilities() @@ -77,12 +96,26 @@ func (s *SystemState) getSystemCapabilities() string { // If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" { - if s.GPUVendor == "nvidia" { + if s.GPUVendor == nvidia { log.Info().Msgf("Using nvidia-l4t capability (arm64 on linux), set %s to override", capabilityEnv) + if cuda13DirExists { + return nvidiaL4TCuda13 + } + if cuda12DirExists { + return nvidiaL4TCuda12 + } return nvidiaL4T } } + if cuda13DirExists { + return nvidiaCuda13 + } + + if cuda12DirExists { + return nvidiaCuda12 + } + if s.GPUVendor == "" { log.Info().Msgf("Default capability (no GPU detected), set %s to override", capabilityEnv) return defaultCapability @@ -103,13 +136,13 @@ func detectGPUVendor(gpus []*gpu.GraphicsCard) (string, error) { if gpu.DeviceInfo != nil { if gpu.DeviceInfo.Vendor != nil { gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name) - if strings.Contains(gpuVendorName, "NVIDIA") { + if strings.Contains(gpuVendorName, strings.ToUpper(nvidia)) { return nvidia, nil } - if strings.Contains(gpuVendorName, "AMD") { + if strings.Contains(gpuVendorName, strings.ToUpper(amd)) { return amd, nil } - if strings.Contains(gpuVendorName, "INTEL") { + if strings.Contains(gpuVendorName, strings.ToUpper(intel)) { return intel, nil } } @@ -131,7 +164,7 @@ func (s *SystemState) BackendPreferenceTokens() []string { case strings.HasPrefix(capStr, amd): return []string{"rocm", "hip", "vulkan", "cpu"} case strings.HasPrefix(capStr, intel): - return []string{"sycl", "intel", "cpu"} + return []string{"sycl", intel, "cpu"} case strings.HasPrefix(capStr, metal): return []string{"metal", "cpu"} case strings.HasPrefix(capStr, darwinX86):