diff --git a/.gitignore b/.gitignore index 560ab5cbe2..368183a9f4 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,10 @@ CODE_CHANGES.md README_COMPACT_HERE.md artifacts/ tests/e2e/tmp/ +runs/ + +# Python +__pycache__ + +tmpfork +.cmux-agent-cli diff --git a/Makefile b/Makefile index 1b9227b57d..b7ec50ff06 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ include fmt.mk .PHONY: test test-unit test-integration test-watch test-coverage test-e2e .PHONY: dist dist-mac dist-win dist-linux .PHONY: docs docs-build docs-watch +.PHONY: benchmark-terminal .PHONY: ensure-deps TS_SOURCES := $(shell find src -type f \( -name '*.ts' -o -name '*.tsx' \)) @@ -174,6 +175,19 @@ docs-build: ## Build documentation docs-watch: ## Watch and rebuild documentation @cd docs && mdbook watch +## Benchmarks +benchmark-terminal: ## Run Terminal-Bench with the cmux agent (use TB_DATASET/TB_ARGS to customize) + @TB_DATASET=$${TB_DATASET:-terminal-bench-core==0.1.1}; \ + CONCURRENCY_FLAG=$${TB_CONCURRENCY:+--n-concurrent $$TB_CONCURRENCY}; \ + LIVESTREAM_FLAG=$${TB_LIVESTREAM:+--livestream}; \ + echo "Running Terminal-Bench with dataset $$TB_DATASET"; \ + uvx terminal-bench run \ + --dataset "$$TB_DATASET" \ + --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \ + $$CONCURRENCY_FLAG \ + $$LIVESTREAM_FLAG \ + $${TB_ARGS} + ## Clean clean: ## Clean build artifacts @echo "Cleaning build artifacts..." @@ -182,5 +196,3 @@ clean: ## Clean build artifacts # Parallel build optimization - these can run concurrently .NOTPARALLEL: build-main # TypeScript can handle its own parallelism - - diff --git a/benchmarks/terminal_bench/__init__.py b/benchmarks/terminal_bench/__init__.py new file mode 100644 index 0000000000..228448c524 --- /dev/null +++ b/benchmarks/terminal_bench/__init__.py @@ -0,0 +1,3 @@ +from .cmux_agent import CmuxAgent + +__all__ = ["CmuxAgent"] diff --git a/benchmarks/terminal_bench/cmux-run.sh b/benchmarks/terminal_bench/cmux-run.sh new file mode 100644 index 0000000000..4aaa698956 --- /dev/null +++ b/benchmarks/terminal_bench/cmux-run.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +set -euo pipefail + +log() { + printf '[cmux-run] %s\n' "$1" +} + +fatal() { + printf '[cmux-run] ERROR: %s\n' "$1" >&2 + exit 1 +} + +instruction=${1:-} +if [[ -z "${instruction}" ]]; then + fatal "instruction argument is required" +fi + +export BUN_INSTALL="${BUN_INSTALL:-/root/.bun}" +export PATH="${BUN_INSTALL}/bin:${PATH}" + +CMUX_APP_ROOT="${CMUX_APP_ROOT:-/opt/cmux-app}" +CMUX_CONFIG_ROOT="${CMUX_CONFIG_ROOT:-/root/.cmux}" +CMUX_PROJECT_PATH="${CMUX_PROJECT_PATH:-}" +CMUX_PROJECT_CANDIDATES="${CMUX_PROJECT_CANDIDATES:-/workspace:/app:/workspaces:/root/project}" +CMUX_MODEL="${CMUX_MODEL:-anthropic:claude-sonnet-4-5}" +CMUX_TIMEOUT_MS="${CMUX_TIMEOUT_MS:-}" +CMUX_TRUNK="${CMUX_TRUNK:-main}" +CMUX_WORKSPACE_ID="${CMUX_WORKSPACE_ID:-cmux-bench}" +CMUX_THINKING_LEVEL="${CMUX_THINKING_LEVEL:-high}" +CMUX_MODE="${CMUX_MODE:-exec}" + +ensure_bun() { + if ! command -v bun >/dev/null 2>&1; then + fatal "bun must be installed before running the cmux agent" + fi +} + +resolve_project_path() { + if [[ -n "${CMUX_PROJECT_PATH}" ]]; then + if [[ -d "${CMUX_PROJECT_PATH}" ]]; then + printf '%s\n' "${CMUX_PROJECT_PATH}" + return 0 + fi + fatal "CMUX_PROJECT_PATH=${CMUX_PROJECT_PATH} not found" + fi + + IFS=":" read -r -a candidates <<<"${CMUX_PROJECT_CANDIDATES}" + for candidate in "${candidates[@]}"; do + if [[ -d "${candidate}" ]]; then + printf '%s\n' "${candidate}" + return 0 + fi + done + + fatal "no project path located (searched ${CMUX_PROJECT_CANDIDATES})" +} + +ensure_git_repo() { + local project_path=$1 + + if command -v git >/dev/null 2>&1; then + if git -C "${project_path}" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + # Ensure trunk branch exists even on pre-existing repos. + if ! git -C "${project_path}" rev-parse --verify "${CMUX_TRUNK}" >/dev/null 2>&1; then + git -C "${project_path}" checkout -b "${CMUX_TRUNK}" >/dev/null 2>&1 || true + else + git -C "${project_path}" checkout "${CMUX_TRUNK}" >/dev/null 2>&1 || true + fi + return 0 + fi + + log "initialising git repository at ${project_path}" + if git -C "${project_path}" init --initial-branch="${CMUX_TRUNK}" >/dev/null 2>&1; then + : + else + git -C "${project_path}" init >/dev/null + git -C "${project_path}" checkout -B "${CMUX_TRUNK}" >/dev/null + fi + git -C "${project_path}" config user.name "cmux-bench" + git -C "${project_path}" config user.email "bench@cmux.local" + git -C "${project_path}" add -A >/dev/null + git -C "${project_path}" commit -m "chore: initial snapshot" --allow-empty >/dev/null + git -C "${project_path}" branch -M "${CMUX_TRUNK}" >/dev/null + else + log "git not available; skipping repository initialisation" + fi +} + +ensure_bun +project_path=$(resolve_project_path) +ensure_git_repo "${project_path}" + +bun --version >/dev/null 2>&1 || fatal "bun not available after ensure_bun" + +log "starting cmux agent session for ${project_path}" +cd "${CMUX_APP_ROOT}" + +cmd=(bun src/debug/agentSessionCli.ts + --config-root "${CMUX_CONFIG_ROOT}" + --project-path "${project_path}" + --workspace-path "${project_path}" + --workspace-id "${CMUX_WORKSPACE_ID}" + --model "${CMUX_MODEL}" + --mode "${CMUX_MODE}") + +if [[ -n "${CMUX_TIMEOUT_MS}" ]]; then + cmd+=(--timeout "${CMUX_TIMEOUT_MS}") +fi + +if [[ -n "${CMUX_THINKING_LEVEL}" ]]; then + cmd+=(--thinking-level "${CMUX_THINKING_LEVEL}") +fi + +if ! printf '%s' "${instruction}" | "${cmd[@]}"; then + fatal "cmux agent session failed" +fi diff --git a/benchmarks/terminal_bench/cmux_agent.py b/benchmarks/terminal_bench/cmux_agent.py new file mode 100644 index 0000000000..2653ec5c97 --- /dev/null +++ b/benchmarks/terminal_bench/cmux_agent.py @@ -0,0 +1,221 @@ +from __future__ import annotations + +import io +import os +import shlex +import tarfile +import tempfile +from pathlib import Path +from typing import Any + +from terminal_bench.agents.base_agent import AgentResult +from terminal_bench.agents.installed_agents.abstract_installed_agent import ( + AbstractInstalledAgent, +) +from terminal_bench.terminal.models import TerminalCommand +from terminal_bench.terminal.tmux_session import TmuxSession + + +class CmuxAgent(AbstractInstalledAgent): + """ + Minimal Terminal-Bench adapter that installs cmux into the task container and + forwards the benchmark instruction to the cmux headless runner. + """ + + _ARCHIVE_NAME = "cmux-app.tar.gz" + _RUNNER_NAME = "cmux-run.sh" + _DEFAULT_TRUNK = "main" + _DEFAULT_MODEL = "anthropic/claude-sonnet-4-5" + + def __init__( + self, mode: str | None = None, thinking_level: str | None = None, **kwargs: Any + ) -> None: + super().__init__(**kwargs) + repo_root_env = os.environ.get("CMUX_AGENT_REPO_ROOT") + repo_root = ( + Path(repo_root_env).resolve() + if repo_root_env + else Path(__file__).resolve().parents[2] + ) + if not repo_root.exists(): + raise RuntimeError(f"cmux repo root {repo_root} does not exist") + + self._repo_root = repo_root + self._archive_bytes: bytes | None = None + self._prepared_container_id: str | None = None + self._mode = mode.lower() if mode else None + self._thinking_level = thinking_level.lower() if thinking_level else None + + @staticmethod + def name() -> str: + return "cmux" + + @property + def _env(self) -> dict[str, str]: + keys = [ + "ANTHROPIC_API_KEY", + "ANTHROPIC_BASE_URL", + "OPENAI_API_KEY", + "OPENAI_BASE_URL", + "OPENAI_API_BASE", + "OPENAI_ORG_ID", + "AZURE_OPENAI_API_KEY", + "AZURE_OPENAI_ENDPOINT", + "AZURE_OPENAI_DEPLOYMENT", + "AZURE_OPENAI_API_VERSION", + "MISTRAL_API_KEY", + "GOOGLE_API_KEY", + "OPENROUTER_API_KEY", + "CMUX_AGENT_GIT_URL", + "CMUX_BUN_INSTALL_URL", + "CMUX_PROJECT_PATH", + "CMUX_PROJECT_CANDIDATES", + "CMUX_TRUNK", + "CMUX_MODEL", + "CMUX_TIMEOUT_MS", + "CMUX_THINKING_LEVEL", + "CMUX_CONFIG_ROOT", + "CMUX_APP_ROOT", + "CMUX_WORKSPACE_ID", + ] + + env: dict[str, str] = {} + for key in keys: + value = os.environ.get(key) + if value: + env[key] = value + + env.setdefault("CMUX_TRUNK", self._DEFAULT_TRUNK) + env.setdefault("CMUX_MODEL", self._DEFAULT_MODEL) + env.setdefault("CMUX_CONFIG_ROOT", "/root/.cmux") + env.setdefault("CMUX_APP_ROOT", "/opt/cmux-app") + env.setdefault("CMUX_WORKSPACE_ID", "cmux-bench") + env.setdefault("CMUX_THINKING_LEVEL", "high") + env.setdefault("CMUX_MODE", "exec") + + model_value = env.get("CMUX_MODEL") + if model_value and "/" in model_value and ":" not in model_value: + provider, model_name = model_value.split("/", 1) + env["CMUX_MODEL"] = f"{provider}:{model_name}" + + thinking_value = self._thinking_level or env.get("CMUX_THINKING_LEVEL") + if thinking_value: + normalized = thinking_value.strip().lower() + if normalized not in {"off", "low", "medium", "high"}: + raise ValueError( + "CMUX_THINKING_LEVEL must be one of off, low, medium, high" + ) + env["CMUX_THINKING_LEVEL"] = normalized + + mode_value = self._mode or env.get("CMUX_MODE") + if mode_value: + normalized_mode = mode_value.strip().lower() + if normalized_mode in {"exec", "execute"}: + env["CMUX_MODE"] = "exec" + elif normalized_mode == "plan": + env["CMUX_MODE"] = "plan" + else: + raise ValueError("CMUX_MODE must be one of plan, exec, or execute") + + return env + + @property + def _install_agent_script_path(self) -> Path: + return self._get_templated_script_path("cmux_setup.sh.j2") + + def perform_task( + self, + instruction: str, + session: TmuxSession, + logging_dir=None, + ) -> AgentResult: + if not instruction or not instruction.strip(): + raise ValueError("instruction must be a non-empty string") + + self._prepare_payloads(session) + return super().perform_task( + instruction=instruction, session=session, logging_dir=logging_dir + ) + + def _prepare_payloads(self, session: TmuxSession) -> None: + container_id = getattr(session.container, "id", None) + if container_id and container_id == self._prepared_container_id: + return + + archive = self._build_archive() + temp_path: Path | None = None + try: + with tempfile.NamedTemporaryFile( + suffix=".tar.gz", delete=False + ) as temp_file: + temp_file.write(archive) + temp_path = Path(temp_file.name) + except Exception as error: + raise RuntimeError( + f"failed to materialize cmux archive: {error}" + ) from error + + try: + assert temp_path is not None, "temporary archive path missing" + session.copy_to_container( + paths=temp_path, + container_dir="/installed-agent", + container_filename=self._ARCHIVE_NAME, + ) + finally: + if temp_path is not None: + temp_path.unlink(missing_ok=True) + + runner_path = Path(__file__).with_name(self._RUNNER_NAME) + if not runner_path.exists(): + raise RuntimeError(f"cmux runner script missing at {runner_path}") + + session.copy_to_container( + paths=runner_path, + container_dir="/installed-agent", + container_filename=self._RUNNER_NAME, + ) + + if container_id: + self._prepared_container_id = container_id + + def _build_archive(self) -> bytes: + if self._archive_bytes is not None: + return self._archive_bytes + + include_paths = [ + "package.json", + "bun.lock", + "bunfig.toml", + "tsconfig.json", + "tsconfig.main.json", + "src", + ] + + buffer = io.BytesIO() + with tarfile.open(fileobj=buffer, mode="w:gz") as tar: + for relative in include_paths: + source_path = self._repo_root / relative + if not source_path.exists(): + raise FileNotFoundError(f"Required file {source_path} not found") + tar.add( + source_path, + arcname=relative, + recursive=True, + ) + buffer.seek(0) + self._archive_bytes = buffer.getvalue() + return self._archive_bytes + + def _run_agent_commands(self, instruction: str) -> list[TerminalCommand]: + escaped = shlex.quote(instruction) + command = f"bash /installed-agent/{self._RUNNER_NAME} {escaped}" + return [ + TerminalCommand( + command=command, + min_timeout_sec=0.0, + max_timeout_sec=float("inf"), + block=True, + append_enter=True, + ) + ] diff --git a/benchmarks/terminal_bench/cmux_setup.sh.j2 b/benchmarks/terminal_bench/cmux_setup.sh.j2 new file mode 100644 index 0000000000..e5b1542f0f --- /dev/null +++ b/benchmarks/terminal_bench/cmux_setup.sh.j2 @@ -0,0 +1,68 @@ +#!/usr/bin/env bash + +set -euo pipefail + +log() { + printf '[cmux-setup] %s\n' "$1" +} + +ensure_tool() { + if command -v "$1" >/dev/null 2>&1; then + return 0 + fi + + if ! command -v apt-get >/dev/null 2>&1; then + printf 'Required tool "%s" missing and apt-get unavailable\n' "$1" >&2 + return 1 + fi + + log "installing missing dependency: $1" + export DEBIAN_FRONTEND=noninteractive + apt-get update + apt-get install -y "$1" +} + +ensure_tool curl +ensure_tool git +ensure_tool unzip +export BUN_INSTALL="${BUN_INSTALL:-/root/.bun}" +export PATH="${BUN_INSTALL}/bin:${PATH}" + +if ! command -v bun >/dev/null 2>&1; then + log "installing bun" + curl -fsSL "${CMUX_BUN_INSTALL_URL:-https://bun.sh/install}" | bash +fi + +CMUX_APP_ROOT="${CMUX_APP_ROOT:-/opt/cmux-app}" +CMUX_CONFIG_ROOT="${CMUX_CONFIG_ROOT:-/root/.cmux}" +CMUX_AGENT_VERSION="{{ version if version is not none else '' }}" + +mkdir -p "$CMUX_APP_ROOT" + +if [[ -n "${CMUX_AGENT_VERSION}" ]]; then + : "${CMUX_AGENT_GIT_URL:?CMUX_AGENT_GIT_URL must be set when version is provided}" + log "cloning cmux from ${CMUX_AGENT_GIT_URL} @ ${CMUX_AGENT_VERSION}" + rm -rf "${CMUX_APP_ROOT}" + git clone --depth 1 --branch "${CMUX_AGENT_VERSION}" "${CMUX_AGENT_GIT_URL}" "${CMUX_APP_ROOT}" +else + ARCHIVE_PATH="/installed-agent/cmux-app.tar.gz" + if [[ ! -s "${ARCHIVE_PATH}" ]]; then + printf 'Expected cmux archive at %s\n' "${ARCHIVE_PATH}" >&2 + exit 1 + fi + log "extracting cmux archive" + rm -rf "${CMUX_APP_ROOT}" + mkdir -p "${CMUX_APP_ROOT}" + tar -xzf "${ARCHIVE_PATH}" -C "${CMUX_APP_ROOT}" +fi + +cd "${CMUX_APP_ROOT}" + +log "installing cmux dependencies via bun" +bun install --frozen-lockfile + +mkdir -p "${CMUX_CONFIG_ROOT}" + +chmod +x /installed-agent/cmux-run.sh + +log "setup complete" diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 23e3de18f4..bbb4bfa3f2 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -10,5 +10,6 @@ - [System Prompt](./system-prompt.md) - [Instruction Files](./instruction-files.md) - [Project Secrets](./project-secrets.md) - - [Agentic Git Identity](./agentic-git-identity.md) +- [Agentic Git Identity](./agentic-git-identity.md) +- [Terminal Benchmarking](./benchmarking.md) - [AGENTS](./AGENTS.md) diff --git a/docs/benchmarking.md b/docs/benchmarking.md new file mode 100644 index 0000000000..12a49ade1a --- /dev/null +++ b/docs/benchmarking.md @@ -0,0 +1,76 @@ +# Terminal Benchmarking + +cmux ships with a headless adapter for [Terminal-Bench](https://www.tbench.ai/). The adapter runs the Electron backend without opening a window and exercises it through the same IPC paths we use in integration tests. This page documents how to launch benchmarks from the repository tree. + +## Prerequisites + +- Docker must be installed and running. Terminal-Bench executes each task inside a dedicated Docker container. +- `uv` is available in the nix `devShell` (provided via `flake.nix`), or install it manually from . +- Standard provider API keys (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`) should be exported so cmux can stream responses. + +Optional environment overrides: + +| Variable | Purpose | Default | +| ---------------------- | --------------------------------------------------------- | -------------------------------------- | +| `CMUX_AGENT_REPO_ROOT` | Path copied into each task container | repo root inferred from the agent file | +| `CMUX_TRUNK` | Branch checked out when preparing the project | `main` | +| `CMUX_WORKSPACE_ID` | Workspace identifier used inside cmux | `cmux-bench` | +| `CMUX_MODEL` | Preferred model (supports `provider/model` syntax) | `anthropic/claude-sonnet-4-5` | +| `CMUX_THINKING_LEVEL` | Optional reasoning level (`off`, `low`, `medium`, `high`) | `high` | +| `CMUX_MODE` | Starting mode (`plan` or `exec`) | `exec` | +| `CMUX_TIMEOUT_MS` | Optional stream timeout in milliseconds | no timeout | +| `CMUX_CONFIG_ROOT` | Location for cmux session data inside the container | `/root/.cmux` | +| `CMUX_APP_ROOT` | Path where the cmux sources are staged | `/opt/cmux-app` | +| `CMUX_PROJECT_PATH` | Explicit project directory inside the task container | auto-detected from common paths | + +## Running Terminal-Bench + +All commands below should be run from the repository root. + +### Quick smoke test (single task) + +```bash +uvx terminal-bench run \ + --dataset terminal-bench-core==0.1.1 \ + --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \ + --n-tasks 1 +``` + +This downloads the Terminal-Bench runner, copies the cmux sources into the container, and validates the adapter against the first task only. Use this before attempting a full sweep. + +### Full dataset + +```bash +uvx terminal-bench run \ + --dataset terminal-bench-core==0.1.1 \ + --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent +``` + +Results (pass/fail, token usage, wall-clock) are printed at the end of the run. Terminal-Bench also writes per-task logs under the current working directory; review them when diagnosing failures. + +You can also use `make`: + +```bash +TB_CONCURRENCY=6 TB_LIVESTREAM=1 \ +make benchmark-terminal TB_ARGS="--n-tasks 3 --model anthropic/claude-sonnet-4-20250514 --agent-kwarg mode=plan --agent-kwarg thinking_level=medium" +``` + +`TB_DATASET` defaults to `terminal-bench-core==0.1.1`, but can be overridden (e.g. `make benchmark-terminal TB_DATASET=terminal-bench-core==head`). +Use `--agent-kwarg mode=plan` to exercise the plan/execute workflow—the CLI will gather a plan first, then automatically approve it and switch to execution. Leaving the flag off (or setting `mode=exec`) skips the planning phase. +Use `TB_CONCURRENCY=` to control `--n-concurrent` (number of concurrently running tasks) and `TB_LIVESTREAM=1` to stream log output live instead of waiting for the run to finish. These map to Terminal-Bench’s `--n-concurrent` and `--livestream` flags. + +## How the Adapter Works + +The adapter lives in `benchmarks/terminal_bench/cmux_agent.py`. For each task it: + +1. Copies the cmux repository (package manifests + `src/`) into `/tmp/cmux-app` inside the container. +2. Ensures Bun exists, then runs `bun install --frozen-lockfile`. +3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `CMUX_CONFIG_ROOT` (default `/root/.cmux`). + +`CMUX_MODEL` accepts either the cmux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide. + +## Troubleshooting + +- **`command not found: bun`** – ensure the container can reach Bun’s install script, or pre-install Bun in your base image. The adapter aborts if the install step fails. +- **Workspace creation errors** – set `CMUX_PROJECT_PATH` to the project directory inside the task container if auto-discovery misses it. +- **Streaming timeouts** – pass `--n-tasks 1` while iterating on fixes, or set `CMUX_TIMEOUT_MS=180000` to reinstate a timeout if needed. diff --git a/flake.nix b/flake.nix index d3e7785bd7..52784c734c 100644 --- a/flake.nix +++ b/flake.nix @@ -129,6 +129,10 @@ git bash nixfmt-rfc-style + + # Terminal bench + uv + asciinema ]; }; } diff --git a/src/bench/headlessEnvironment.ts b/src/bench/headlessEnvironment.ts new file mode 100644 index 0000000000..c72e50a2f5 --- /dev/null +++ b/src/bench/headlessEnvironment.ts @@ -0,0 +1,125 @@ +import * as os from "os"; +import * as path from "path"; +import * as fs from "fs/promises"; +import createIPCMock from "electron-mock-ipc"; +import type { BrowserWindow, IpcMain as ElectronIpcMain, WebContents } from "electron"; +import { Config } from "@/config"; +import { IpcMain } from "@/services/ipcMain"; + +type MockedElectron = ReturnType; + +interface CreateHeadlessEnvironmentOptions { + /** + * Override the root directory for Config. If omitted, a temporary directory is created. + */ + rootDir?: string; +} + +export interface HeadlessEnvironment { + config: Config; + ipcMain: IpcMain; + mockIpcMain: ElectronIpcMain; + mockIpcRenderer: Electron.IpcRenderer; + mockWindow: BrowserWindow; + sentEvents: Array<{ channel: string; data: unknown }>; + rootDir: string; + dispose: () => Promise; +} + +function createMockBrowserWindow(): { + window: BrowserWindow; + sentEvents: Array<{ channel: string; data: unknown }>; +} { + const sentEvents: Array<{ channel: string; data: unknown }> = []; + + const mockWindow = { + webContents: { + send: (channel: string, data: unknown) => { + sentEvents.push({ channel, data }); + }, + openDevTools: () => { + throw new Error("openDevTools is not supported in headless mode"); + }, + } as unknown as WebContents, + isMinimized: () => false, + restore: () => undefined, + focus: () => undefined, + loadURL: () => { + throw new Error("loadURL should not be called in headless mode"); + }, + on: () => undefined, + setTitle: () => undefined, + } as unknown as BrowserWindow; + + return { window: mockWindow, sentEvents }; +} + +async function establishRootDir(providedRootDir?: string): Promise<{ + rootDir: string; + dispose: () => Promise; +}> { + if (providedRootDir) { + return { + rootDir: providedRootDir, + dispose: async () => { + // Caller owns the directory; nothing to clean up. + }, + }; + } + + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "cmux-headless-")); + return { + rootDir: tempRoot, + dispose: async () => { + await fs.rm(tempRoot, { recursive: true, force: true }); + }, + }; +} + +function assertMockedElectron(mocked: MockedElectron): void { + if (!mocked || typeof mocked !== "object") { + throw new Error("Failed to initialize electron-mock-ipc"); + } + + if (!("ipcMain" in mocked) || !mocked.ipcMain) { + throw new Error("electron-mock-ipc returned an invalid ipcMain"); + } + + if (!("ipcRenderer" in mocked) || !mocked.ipcRenderer) { + throw new Error("electron-mock-ipc returned an invalid ipcRenderer"); + } +} + +export async function createHeadlessEnvironment( + options: CreateHeadlessEnvironmentOptions = {} +): Promise { + const { rootDir, dispose: disposeRootDir } = await establishRootDir(options.rootDir); + + const config = new Config(rootDir); + + const { window: mockWindow, sentEvents } = createMockBrowserWindow(); + + const mockedElectron = createIPCMock(); + assertMockedElectron(mockedElectron); + const mockIpcMainModule = mockedElectron.ipcMain; + const mockIpcRendererModule = mockedElectron.ipcRenderer; + + const ipcMain = new IpcMain(config); + ipcMain.register(mockIpcMainModule, mockWindow); + + const dispose = async () => { + sentEvents.length = 0; + await disposeRootDir(); + }; + + return { + config, + ipcMain, + mockIpcMain: mockIpcMainModule, + mockIpcRenderer: mockIpcRendererModule, + mockWindow, + sentEvents, + rootDir, + dispose, + }; +} diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts new file mode 100644 index 0000000000..e8a0f694fb --- /dev/null +++ b/src/debug/agentSessionCli.ts @@ -0,0 +1,508 @@ +#!/usr/bin/env bun + +import assert from "node:assert/strict"; +import * as fs from "fs/promises"; +import * as path from "path"; +import { parseArgs } from "util"; +import { Config } from "@/config"; +import { HistoryService } from "@/services/historyService"; +import { PartialService } from "@/services/partialService"; +import { AIService } from "@/services/aiService"; +import { AgentSession, type AgentSessionChatEvent } from "@/services/agentSession"; +import { + isCaughtUpMessage, + isStreamAbort, + isStreamDelta, + isStreamEnd, + isStreamError, + isStreamStart, + isToolCallDelta, + isToolCallEnd, + isToolCallStart, + type SendMessageOptions, + type WorkspaceChatMessage, +} from "@/types/ipc"; +import { defaultModel } from "@/utils/ai/models"; +import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig"; +import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; +import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors"; +import type { ThinkingLevel } from "@/types/thinking"; + +interface CliResult { + success: boolean; + error?: string; + data?: Record; +} + +async function ensureDirectory(pathToCheck: string): Promise { + const stats = await fs.stat(pathToCheck); + if (!stats.isDirectory()) { + throw new Error(`"${pathToCheck}" is not a directory`); + } +} + +async function gatherMessageFromStdin(): Promise { + if (process.stdin.isTTY) { + return ""; + } + + const chunks: Uint8Array[] = []; + for await (const chunk of process.stdin) { + if (Buffer.isBuffer(chunk)) { + chunks.push(chunk); + continue; + } + if (typeof chunk === "string") { + chunks.push(Buffer.from(chunk)); + continue; + } + if (chunk instanceof Uint8Array) { + chunks.push(chunk); + continue; + } + throw new Error(`Unsupported stdin chunk type: ${typeof chunk}`); + } + return Buffer.concat(chunks).toString("utf-8"); +} + +function parseTimeout(timeoutRaw: string | undefined): number | undefined { + if (!timeoutRaw) { + return undefined; + } + + const parsed = Number.parseInt(timeoutRaw, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`Invalid timeout value "${timeoutRaw}"`); + } + return parsed; +} + +function parseThinkingLevel(value: string | undefined): ThinkingLevel | undefined { + if (!value) { + return undefined; + } + + const normalized = value.trim().toLowerCase(); + if ( + normalized === "off" || + normalized === "low" || + normalized === "medium" || + normalized === "high" + ) { + return normalized; + } + throw new Error(`Invalid thinking level "${value}". Expected one of: off, low, medium, high.`); +} + +type CLIMode = "plan" | "exec"; + +function parseMode(raw: string | undefined): CLIMode { + if (!raw) { + return "exec"; + } + + const normalized = raw.trim().toLowerCase(); + if (normalized === "plan") { + return "plan"; + } + if (normalized === "exec" || normalized === "execute") { + return "exec"; + } + + throw new Error('Invalid mode "' + raw + '". Expected "plan" or "exec" (or "execute").'); +} + +function renderUnknown(value: unknown): string { + if (typeof value === "string") { + return value; + } + try { + return JSON.stringify(value, null, 2); + } catch { + return String(value); + } +} + +function writeJson(result: CliResult): void { + process.stdout.write(`${JSON.stringify(result)}\n`); +} + +async function main(): Promise { + const { values } = parseArgs({ + args: process.argv.slice(2), + options: { + "workspace-path": { type: "string" }, + "workspace-id": { type: "string" }, + "project-path": { type: "string" }, + "config-root": { type: "string" }, + message: { type: "string" }, + model: { type: "string" }, + "thinking-level": { type: "string" }, + mode: { type: "string" }, + timeout: { type: "string" }, + json: { type: "boolean" }, + "json-streaming": { type: "boolean" }, + }, + allowPositionals: false, + }); + + const workspacePathRaw = values["workspace-path"]; + if (typeof workspacePathRaw !== "string" || workspacePathRaw.trim().length === 0) { + throw new Error("--workspace-path is required"); + } + const workspacePath = path.resolve(workspacePathRaw.trim()); + await ensureDirectory(workspacePath); + + const configRootRaw = values["config-root"]; + const configRoot = + configRootRaw && configRootRaw.trim().length > 0 ? configRootRaw.trim() : undefined; + const config = new Config(configRoot); + + const workspaceIdRaw = values["workspace-id"]; + const projectPathRaw = values["project-path"]; + + const workspaceId = + workspaceIdRaw && workspaceIdRaw.trim().length > 0 + ? workspaceIdRaw.trim() + : (() => { + if (typeof projectPathRaw !== "string" || projectPathRaw.trim().length === 0) { + throw new Error("Provide --workspace-id or --project-path to derive workspace ID"); + } + const projectPath = path.resolve(projectPathRaw.trim()); + return config.generateWorkspaceId(projectPath, workspacePath); + })(); + + const projectName = + typeof projectPathRaw === "string" && projectPathRaw.trim().length > 0 + ? path.basename(path.resolve(projectPathRaw.trim())) + : path.basename(path.dirname(workspacePath)) || "unknown"; + + const messageArg = + values.message && values.message.trim().length > 0 ? values.message : undefined; + const messageText = messageArg ?? (await gatherMessageFromStdin()); + if (!messageText || messageText.trim().length === 0) { + throw new Error("Message must be provided via --message or stdin"); + } + + const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel; + const timeoutMs = parseTimeout(values.timeout); + const thinkingLevel = parseThinkingLevel(values["thinking-level"]); + const initialMode = parseMode(values.mode); + const emitFinalJson = values.json === true; + const emitJsonStreaming = values["json-streaming"] === true; + + const suppressHumanOutput = emitJsonStreaming || emitFinalJson; + const humanStream = process.stdout; + const writeHuman = (text: string) => { + if (suppressHumanOutput) { + return; + } + humanStream.write(text); + }; + const writeHumanLine = (text = "") => { + if (suppressHumanOutput) { + return; + } + humanStream.write(`${text}\n`); + }; + const emitJsonLine = (payload: unknown) => { + if (!emitJsonStreaming) { + return; + } + process.stdout.write(`${JSON.stringify(payload)}\n`); + }; + + const historyService = new HistoryService(config); + const partialService = new PartialService(config, historyService); + const aiService = new AIService(config, historyService, partialService); + ensureProvidersConfig(config); + + const session = new AgentSession({ + workspaceId, + config, + historyService, + partialService, + aiService, + }); + + await session.ensureMetadata({ + workspacePath, + projectName, + }); + + const buildSendOptions = (mode: CLIMode): SendMessageOptions => ({ + model, + thinkingLevel, + toolPolicy: modeToToolPolicy(mode), + additionalSystemInstructions: mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined, + }); + + const liveEvents: WorkspaceChatMessage[] = []; + let readyForLive = false; + let streamLineOpen = false; + let activeMessageId: string | null = null; + let planProposed = false; + let streamEnded = false; + + let resolveCompletion: ((value: void) => void) | null = null; + let rejectCompletion: ((reason?: unknown) => void) | null = null; + let completionPromise: Promise = Promise.resolve(); + + const createCompletionPromise = (): Promise => { + streamEnded = false; + return new Promise((resolve, reject) => { + resolveCompletion = resolve; + rejectCompletion = reject; + }); + }; + + const waitForCompletion = async (): Promise => { + if (timeoutMs !== undefined) { + let timeoutHandle: NodeJS.Timeout | null = null; + try { + await Promise.race([ + completionPromise, + new Promise((_, reject) => { + timeoutHandle = setTimeout( + () => reject(new Error(`Timed out after ${timeoutMs}ms`)), + timeoutMs + ); + }), + ]); + } finally { + if (timeoutHandle) { + clearTimeout(timeoutHandle); + } + } + } else { + await completionPromise; + } + + if (!streamEnded) { + throw new Error("Stream completion promise resolved unexpectedly without stream end"); + } + }; + + const sendAndAwait = async (message: string, options: SendMessageOptions): Promise => { + completionPromise = createCompletionPromise(); + const sendResult = await session.sendMessage(message, options); + if (!sendResult.success) { + const errorValue = sendResult.error; + let formattedError = "unknown error"; + if (typeof errorValue === "string") { + formattedError = errorValue; + } else if (errorValue && typeof errorValue === "object") { + const maybeRaw = (errorValue as { raw?: unknown }).raw; + if (typeof maybeRaw === "string" && maybeRaw.trim().length > 0) { + formattedError = maybeRaw; + } else { + formattedError = JSON.stringify(errorValue); + } + } + throw new Error(`Failed to send message: ${formattedError}`); + } + + await waitForCompletion(); + }; + + const handleToolStart = (payload: WorkspaceChatMessage): boolean => { + if (!isToolCallStart(payload)) { + return false; + } + writeHumanLine("\n========== TOOL CALL START =========="); + writeHumanLine(`Tool: ${payload.toolName}`); + writeHumanLine(`Call ID: ${payload.toolCallId}`); + writeHumanLine("Arguments:"); + writeHumanLine(renderUnknown(payload.args)); + writeHumanLine("====================================="); + return true; + }; + + const handleToolDelta = (payload: WorkspaceChatMessage): boolean => { + if (!isToolCallDelta(payload)) { + return false; + } + writeHumanLine("\n----------- TOOL OUTPUT -------------"); + writeHumanLine(renderUnknown(payload.delta)); + writeHumanLine("-------------------------------------"); + return true; + }; + + const handleToolEnd = (payload: WorkspaceChatMessage): boolean => { + if (!isToolCallEnd(payload)) { + return false; + } + writeHumanLine("\n=========== TOOL CALL END ==========="); + writeHumanLine(`Tool: ${payload.toolName}`); + writeHumanLine(`Call ID: ${payload.toolCallId}`); + writeHumanLine("Result:"); + writeHumanLine(renderUnknown(payload.result)); + writeHumanLine("====================================="); + if (payload.toolName === "propose_plan") { + planProposed = true; + } + return true; + }; + + const chatListener = (event: AgentSessionChatEvent) => { + const payload = event.message; + + if (!readyForLive) { + if (isCaughtUpMessage(payload)) { + readyForLive = true; + emitJsonLine({ type: "caught-up", workspaceId }); + } + return; + } + + emitJsonLine({ type: "event", workspaceId, payload }); + liveEvents.push(payload); + + if (handleToolStart(payload) || handleToolDelta(payload) || handleToolEnd(payload)) { + return; + } + + if (isStreamStart(payload)) { + if (activeMessageId && activeMessageId !== payload.messageId) { + if (rejectCompletion) { + rejectCompletion( + new Error( + `Received conflicting stream-start message IDs (${activeMessageId} vs ${payload.messageId})` + ) + ); + resolveCompletion = null; + rejectCompletion = null; + } + return; + } + activeMessageId = payload.messageId; + return; + } + + if (isStreamDelta(payload)) { + assert(typeof payload.delta === "string", "stream delta must include text"); + writeHuman(payload.delta); + streamLineOpen = !payload.delta.endsWith("\n"); + return; + } + + if (isStreamError(payload)) { + if (rejectCompletion) { + rejectCompletion(new Error(payload.error)); + resolveCompletion = null; + rejectCompletion = null; + } + return; + } + + if (isStreamAbort(payload)) { + if (rejectCompletion) { + rejectCompletion(new Error("Stream aborted before completion")); + resolveCompletion = null; + rejectCompletion = null; + } + return; + } + + if (isStreamEnd(payload)) { + if (activeMessageId && payload.messageId !== activeMessageId) { + if (rejectCompletion) { + rejectCompletion( + new Error( + `Mismatched stream-end message ID. Expected ${activeMessageId}, received ${payload.messageId}` + ) + ); + resolveCompletion = null; + rejectCompletion = null; + } + return; + } + if (streamLineOpen) { + writeHuman("\n"); + streamLineOpen = false; + } + streamEnded = true; + if (resolveCompletion) { + resolveCompletion(); + resolveCompletion = null; + rejectCompletion = null; + } + activeMessageId = null; + } + }; + + const unsubscribe = await session.subscribeChat(chatListener); + + try { + await sendAndAwait(messageText, buildSendOptions(initialMode)); + + const planWasProposed = planProposed; + planProposed = false; + if (initialMode === "plan" && !planWasProposed) { + throw new Error("Plan mode was requested, but the assistant never proposed a plan."); + } + if (planWasProposed) { + writeHumanLine("\n[auto] Plan received. Approving and switching to execute mode...\n"); + await sendAndAwait("Plan approved. Execute it.", buildSendOptions("exec")); + } + + let finalEvent: WorkspaceChatMessage | undefined; + for (let i = liveEvents.length - 1; i >= 0; i -= 1) { + const candidate = liveEvents[i]; + if (isStreamEnd(candidate)) { + finalEvent = candidate; + break; + } + } + + if (!finalEvent || !isStreamEnd(finalEvent)) { + throw new Error("Stream ended without receiving stream-end event"); + } + + const parts = (finalEvent as unknown as { parts?: unknown }).parts ?? []; + const text = extractAssistantText(parts); + const reasoning = extractReasoning(parts); + const toolCalls = extractToolCalls(parts); + + if (emitFinalJson) { + writeJson({ + success: true, + data: { + messageId: finalEvent.messageId, + model: finalEvent.metadata?.model ?? null, + text, + reasoning, + toolCalls, + metadata: finalEvent.metadata ?? null, + parts, + events: liveEvents, + }, + }); + } + } finally { + unsubscribe(); + session.dispose(); + } +} + +void (async () => { + try { + await main(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + const wantsJsonStreaming = + process.argv.includes("--json-streaming") || process.argv.includes("--json-streaming=true"); + const wantsJson = process.argv.includes("--json") || process.argv.includes("--json=true"); + + if (wantsJsonStreaming) { + process.stdout.write(`${JSON.stringify({ type: "error", error: message })}\n`); + } + + if (wantsJson) { + writeJson({ success: false, error: message }); + } else { + process.stderr.write(`Error: ${message}\n`); + } + process.exitCode = 1; + } +})(); diff --git a/src/debug/chatExtractors.ts b/src/debug/chatExtractors.ts new file mode 100644 index 0000000000..1650cf31d9 --- /dev/null +++ b/src/debug/chatExtractors.ts @@ -0,0 +1,42 @@ +import assert from "node:assert/strict"; +import type { CmuxReasoningPart, CmuxTextPart, CmuxToolPart } from "@/types/message"; + +export function extractAssistantText(parts: unknown): string { + if (!Array.isArray(parts)) { + return ""; + } + + const textParts = (parts as CmuxTextPart[]).filter( + (part): part is CmuxTextPart => part.type === "text" + ); + return textParts + .map((part) => { + assert(typeof part.text === "string", "Text part must include text"); + return part.text; + }) + .join(""); +} + +export function extractReasoning(parts: unknown): string[] { + if (!Array.isArray(parts)) { + return []; + } + + const reasoningParts = (parts as CmuxReasoningPart[]).filter( + (part): part is CmuxReasoningPart => part.type === "reasoning" + ); + return reasoningParts.map((part) => { + assert(typeof part.text === "string", "Reasoning part must include text"); + return part.text; + }); +} + +export function extractToolCalls(parts: unknown): CmuxToolPart[] { + if (!Array.isArray(parts)) { + return []; + } + + return (parts as CmuxToolPart[]).filter( + (part): part is CmuxToolPart => part.type === "dynamic-tool" + ); +} diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts new file mode 100644 index 0000000000..aef1608068 --- /dev/null +++ b/src/services/agentSession.ts @@ -0,0 +1,383 @@ +import assert from "node:assert/strict"; +import { EventEmitter } from "events"; +import * as path from "path"; +import { createCmuxMessage } from "@/types/message"; +import type { Config } from "@/config"; +import type { AIService } from "@/services/aiService"; +import type { HistoryService } from "@/services/historyService"; +import type { PartialService } from "@/services/partialService"; +import type { WorkspaceMetadata } from "@/types/workspace"; +import type { WorkspaceChatMessage, StreamErrorMessage, SendMessageOptions } from "@/types/ipc"; +import type { SendMessageError } from "@/types/errors"; +import { createUnknownSendMessageError } from "@/services/utils/sendMessageError"; +import type { Result } from "@/types/result"; +import { Ok, Err } from "@/types/result"; + +interface ImagePart { + image: string; + mimeType: string; +} + +export interface AgentSessionChatEvent { + workspaceId: string; + message: WorkspaceChatMessage; +} + +export interface AgentSessionMetadataEvent { + workspaceId: string; + metadata: WorkspaceMetadata | null; +} + +interface AgentSessionOptions { + workspaceId: string; + config: Config; + historyService: HistoryService; + partialService: PartialService; + aiService: AIService; +} + +export class AgentSession { + private readonly workspaceId: string; + private readonly config: Config; + private readonly historyService: HistoryService; + private readonly partialService: PartialService; + private readonly aiService: AIService; + private readonly emitter = new EventEmitter(); + private readonly aiListeners: Array<{ event: string; handler: (...args: unknown[]) => void }> = + []; + private disposed = false; + + constructor(options: AgentSessionOptions) { + assert(options, "AgentSession requires options"); + const { workspaceId, config, historyService, partialService, aiService } = options; + + assert(typeof workspaceId === "string", "workspaceId must be a string"); + const trimmedWorkspaceId = workspaceId.trim(); + assert(trimmedWorkspaceId.length > 0, "workspaceId must not be empty"); + + this.workspaceId = trimmedWorkspaceId; + this.config = config; + this.historyService = historyService; + this.partialService = partialService; + this.aiService = aiService; + + this.attachAiListeners(); + } + + dispose(): void { + if (this.disposed) { + return; + } + this.disposed = true; + for (const { event, handler } of this.aiListeners) { + this.aiService.off(event, handler as never); + } + this.aiListeners.length = 0; + this.emitter.removeAllListeners(); + } + + onChatEvent(listener: (event: AgentSessionChatEvent) => void): () => void { + assert(typeof listener === "function", "listener must be a function"); + this.emitter.on("chat-event", listener); + return () => { + this.emitter.off("chat-event", listener); + }; + } + + onMetadataEvent(listener: (event: AgentSessionMetadataEvent) => void): () => void { + assert(typeof listener === "function", "listener must be a function"); + this.emitter.on("metadata-event", listener); + return () => { + this.emitter.off("metadata-event", listener); + }; + } + + async subscribeChat(listener: (event: AgentSessionChatEvent) => void): Promise<() => void> { + this.assertNotDisposed("subscribeChat"); + assert(typeof listener === "function", "listener must be a function"); + + const unsubscribe = this.onChatEvent(listener); + await this.emitHistoricalEvents(listener); + + return unsubscribe; + } + + async replayHistory(listener: (event: AgentSessionChatEvent) => void): Promise { + this.assertNotDisposed("replayHistory"); + assert(typeof listener === "function", "listener must be a function"); + await this.emitHistoricalEvents(listener); + } + + emitMetadata(metadata: WorkspaceMetadata | null): void { + this.assertNotDisposed("emitMetadata"); + this.emitter.emit("metadata-event", { + workspaceId: this.workspaceId, + metadata, + } satisfies AgentSessionMetadataEvent); + } + + private async emitHistoricalEvents( + listener: (event: AgentSessionChatEvent) => void + ): Promise { + const historyResult = await this.historyService.getHistory(this.workspaceId); + if (historyResult.success) { + for (const message of historyResult.data) { + listener({ workspaceId: this.workspaceId, message }); + } + } + + const streamInfo = this.aiService.getStreamInfo(this.workspaceId); + const partial = await this.partialService.readPartial(this.workspaceId); + + if (streamInfo) { + this.aiService.replayStream(this.workspaceId); + } else if (partial) { + listener({ workspaceId: this.workspaceId, message: partial }); + } + + listener({ + workspaceId: this.workspaceId, + message: { type: "caught-up" }, + }); + } + + async ensureMetadata(args: { workspacePath: string; projectName?: string }): Promise { + this.assertNotDisposed("ensureMetadata"); + assert(args, "ensureMetadata requires arguments"); + const { workspacePath, projectName } = args; + + assert(typeof workspacePath === "string", "workspacePath must be a string"); + const trimmedWorkspacePath = workspacePath.trim(); + assert(trimmedWorkspacePath.length > 0, "workspacePath must not be empty"); + + const normalizedWorkspacePath = path.resolve(trimmedWorkspacePath); + const existing = await this.aiService.getWorkspaceMetadata(this.workspaceId); + + if (existing.success) { + const metadata = existing.data; + assert( + metadata.workspacePath === normalizedWorkspacePath, + `Existing metadata workspacePath mismatch for ${this.workspaceId}` + ); + return; + } + + const derivedProjectName = + projectName && projectName.trim().length > 0 + ? projectName.trim() + : path.basename(path.dirname(normalizedWorkspacePath)) || "unknown"; + + const metadata: WorkspaceMetadata = { + id: this.workspaceId, + projectName: derivedProjectName, + workspacePath: normalizedWorkspacePath, + }; + + const saveResult = await this.aiService.saveWorkspaceMetadata(this.workspaceId, metadata); + if (!saveResult.success) { + const errorDetail = saveResult.error ?? "unknown"; + throw new Error(`Failed to save metadata: ${errorDetail}`); + } + this.emitMetadata(metadata); + } + + async sendMessage( + message: string, + options?: SendMessageOptions & { imageParts?: ImagePart[] } + ): Promise> { + this.assertNotDisposed("sendMessage"); + + assert(typeof message === "string", "sendMessage requires a string message"); + const trimmedMessage = message.trim(); + const imageParts = options?.imageParts; + + if (trimmedMessage.length === 0 && (!imageParts || imageParts.length === 0)) { + return Err( + createUnknownSendMessageError( + "Empty message not allowed. Use interruptStream() to interrupt active streams." + ) + ); + } + + if (options?.editMessageId) { + const truncateResult = await this.historyService.truncateAfterMessage( + this.workspaceId, + options.editMessageId + ); + if (!truncateResult.success) { + return Err(createUnknownSendMessageError(truncateResult.error)); + } + } + + const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`; + const additionalParts = + imageParts && imageParts.length > 0 + ? imageParts.map((img) => { + assert(typeof img.image === "string", "image part must include base64 string content"); + assert( + typeof img.mimeType === "string" && img.mimeType.trim().length > 0, + "image part must include a mimeType" + ); + return { + type: "image" as const, + image: img.image, + mimeType: img.mimeType, + }; + }) + : undefined; + + const userMessage = createCmuxMessage( + messageId, + "user", + message, + { + timestamp: Date.now(), + toolPolicy: options?.toolPolicy, + }, + additionalParts + ); + + const appendResult = await this.historyService.appendToHistory(this.workspaceId, userMessage); + if (!appendResult.success) { + return Err(createUnknownSendMessageError(appendResult.error)); + } + + this.emitChatEvent(userMessage); + + if (!options?.model || options.model.trim().length === 0) { + return Err( + createUnknownSendMessageError("No model specified. Please select a model using /model.") + ); + } + + return this.streamWithHistory(options.model, options); + } + + async resumeStream(options: SendMessageOptions): Promise> { + this.assertNotDisposed("resumeStream"); + + assert(options, "resumeStream requires options"); + const { model } = options; + assert(typeof model === "string" && model.trim().length > 0, "resumeStream requires a model"); + + if (this.aiService.isStreaming(this.workspaceId)) { + return Ok(undefined); + } + + return this.streamWithHistory(model, options); + } + + async interruptStream(): Promise> { + this.assertNotDisposed("interruptStream"); + + if (!this.aiService.isStreaming(this.workspaceId)) { + return Ok(undefined); + } + + const stopResult = await this.aiService.stopStream(this.workspaceId); + if (!stopResult.success) { + return Err(stopResult.error); + } + + return Ok(undefined); + } + + private async streamWithHistory( + modelString: string, + options?: SendMessageOptions + ): Promise> { + const commitResult = await this.partialService.commitToHistory(this.workspaceId); + if (!commitResult.success) { + return Err(createUnknownSendMessageError(commitResult.error)); + } + + const historyResult = await this.historyService.getHistory(this.workspaceId); + if (!historyResult.success) { + return Err(createUnknownSendMessageError(historyResult.error)); + } + + const streamResult = await this.aiService.streamMessage( + historyResult.data, + this.workspaceId, + modelString, + options?.thinkingLevel, + options?.toolPolicy, + undefined, + options?.additionalSystemInstructions, + options?.maxOutputTokens, + options?.providerOptions, + options?.mode + ); + + return streamResult; + } + + private attachAiListeners(): void { + const forward = (event: string, handler: (payload: WorkspaceChatMessage) => void) => { + const wrapped = (...args: unknown[]) => { + const [payload] = args; + if ( + typeof payload === "object" && + payload !== null && + "workspaceId" in payload && + (payload as { workspaceId: unknown }).workspaceId !== this.workspaceId + ) { + return; + } + handler(payload as WorkspaceChatMessage); + }; + this.aiListeners.push({ event, handler: wrapped }); + this.aiService.on(event, wrapped as never); + }; + + forward("stream-start", (payload) => this.emitChatEvent(payload)); + forward("stream-delta", (payload) => this.emitChatEvent(payload)); + forward("stream-end", (payload) => this.emitChatEvent(payload)); + forward("tool-call-start", (payload) => this.emitChatEvent(payload)); + forward("tool-call-delta", (payload) => this.emitChatEvent(payload)); + forward("tool-call-end", (payload) => this.emitChatEvent(payload)); + forward("reasoning-delta", (payload) => this.emitChatEvent(payload)); + forward("reasoning-end", (payload) => this.emitChatEvent(payload)); + forward("stream-abort", (payload) => this.emitChatEvent(payload)); + + const errorHandler = (...args: unknown[]) => { + const [raw] = args; + if ( + typeof raw !== "object" || + raw === null || + !("workspaceId" in raw) || + (raw as { workspaceId: unknown }).workspaceId !== this.workspaceId + ) { + return; + } + const data = raw as { + workspaceId: string; + messageId: string; + error: string; + errorType?: string; + }; + const streamError: StreamErrorMessage = { + type: "stream-error", + messageId: data.messageId, + error: data.error, + errorType: (data.errorType ?? "unknown") as StreamErrorMessage["errorType"], + }; + this.emitChatEvent(streamError); + }; + + this.aiListeners.push({ event: "error", handler: errorHandler }); + this.aiService.on("error", errorHandler as never); + } + + private emitChatEvent(message: WorkspaceChatMessage): void { + this.assertNotDisposed("emitChatEvent"); + this.emitter.emit("chat-event", { + workspaceId: this.workspaceId, + message, + } satisfies AgentSessionChatEvent); + } + + private assertNotDisposed(operation: string): void { + assert(!this.disposed, `AgentSession.${operation} called after dispose`); + } +} diff --git a/src/services/aiService.ts b/src/services/aiService.ts index e6e59ed8c9..8cdbf3eb44 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -509,6 +509,12 @@ export class AIService extends EventEmitter { // Apply tool policy to filter tools (if policy provided) const tools = applyToolPolicy(allTools, toolPolicy); + log.info("AIService.streamMessage: tool configuration", { + workspaceId, + model: modelString, + toolNames: Object.keys(tools), + hasToolPolicy: Boolean(toolPolicy), + }); // Create assistant message placeholder with historySequence from backend const assistantMessageId = `assistant-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`; diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts index fa8b561725..63e9cce91e 100644 --- a/src/services/ipcMain.ts +++ b/src/services/ipcMain.ts @@ -1,3 +1,4 @@ +import assert from "node:assert/strict"; import type { BrowserWindow, IpcMain as ElectronIpcMain } from "electron"; import { spawn, spawnSync } from "child_process"; import * as path from "path"; @@ -14,33 +15,18 @@ import { removeWorktreeSafe, removeWorktree, pruneWorktrees } from "@/services/g import { AIService } from "@/services/aiService"; import { HistoryService } from "@/services/historyService"; import { PartialService } from "@/services/partialService"; -import { createCmuxMessage, type CmuxMessage } from "@/types/message"; +import { AgentSession } from "@/services/agentSession"; +import type { CmuxMessage } from "@/types/message"; import { log } from "@/services/log"; -import type { - StreamStartEvent, - StreamDeltaEvent, - StreamEndEvent, - StreamAbortEvent, - ToolCallStartEvent, - ToolCallDeltaEvent, - ToolCallEndEvent, - ErrorEvent, -} from "@/types/stream"; import { IPC_CHANNELS, getChatChannel } from "@/constants/ipc-constants"; import type { SendMessageError } from "@/types/errors"; -import type { StreamErrorMessage, SendMessageOptions, DeleteMessage } from "@/types/ipc"; -import { Ok, Err, type Result } from "@/types/result"; +import type { SendMessageOptions, DeleteMessage } from "@/types/ipc"; +import { Ok, Err } from "@/types/result"; import { validateWorkspaceName } from "@/utils/validation/workspaceValidation"; import { createBashTool } from "@/services/tools/bash"; import type { BashToolResult } from "@/types/tools"; - import { secretsToRecord } from "@/types/secrets"; -const createUnknownSendMessageError = (raw: string): SendMessageError => ({ - type: "unknown", - raw, -}); - /** * IpcMain - Manages all IPC handlers and service coordination * @@ -59,6 +45,11 @@ export class IpcMain { private readonly historyService: HistoryService; private readonly partialService: PartialService; private readonly aiService: AIService; + private readonly sessions = new Map(); + private readonly sessionSubscriptions = new Map< + string, + { chat: () => void; metadata: () => void } + >(); private mainWindow: BrowserWindow | null = null; private registered = false; @@ -69,6 +60,68 @@ export class IpcMain { this.aiService = new AIService(config, this.historyService, this.partialService); } + private getOrCreateSession(workspaceId: string): AgentSession { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + const trimmed = workspaceId.trim(); + assert(trimmed.length > 0, "workspaceId must not be empty"); + + let session = this.sessions.get(trimmed); + if (session) { + return session; + } + + session = new AgentSession({ + workspaceId: trimmed, + config: this.config, + historyService: this.historyService, + partialService: this.partialService, + aiService: this.aiService, + }); + + const chatUnsubscribe = session.onChatEvent((event) => { + if (!this.mainWindow) { + return; + } + const channel = getChatChannel(event.workspaceId); + this.mainWindow.webContents.send(channel, event.message); + }); + + const metadataUnsubscribe = session.onMetadataEvent((event) => { + if (!this.mainWindow) { + return; + } + this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { + workspaceId: event.workspaceId, + metadata: event.metadata, + }); + }); + + this.sessions.set(trimmed, session); + this.sessionSubscriptions.set(trimmed, { + chat: chatUnsubscribe, + metadata: metadataUnsubscribe, + }); + + return session; + } + + private disposeSession(workspaceId: string): void { + const session = this.sessions.get(workspaceId); + if (!session) { + return; + } + + const subscriptions = this.sessionSubscriptions.get(workspaceId); + if (subscriptions) { + subscriptions.chat(); + subscriptions.metadata(); + this.sessionSubscriptions.delete(workspaceId); + } + + session.dispose(); + this.sessions.delete(workspaceId); + } + /** * Register all IPC handlers and setup event forwarding * @param ipcMain - Electron's ipcMain module @@ -90,7 +143,6 @@ export class IpcMain { this.registerProviderHandlers(ipcMain); this.registerProjectHandlers(ipcMain); this.registerSubscriptionHandlers(ipcMain); - this.setupEventForwarding(); this.registered = true; } @@ -121,65 +173,6 @@ export class IpcMain { }); } - /** - * Helper method: Stream AI response with history - * Shared logic between sendMessage and resumeStream handlers - */ - private async streamWithHistory( - workspaceId: string, - modelString: string, - options?: SendMessageOptions - ): Promise> { - const { - thinkingLevel, - toolPolicy, - additionalSystemInstructions, - maxOutputTokens, - providerOptions, - mode, - } = options ?? {}; - - // Commit any existing partial to history BEFORE loading - // This ensures interrupted messages are included in the AI's context - await this.partialService.commitToHistory(workspaceId); - - // Get full conversation history - const historyResult = await this.historyService.getHistory(workspaceId); - if (!historyResult.success) { - log.error("Failed to get conversation history:", historyResult.error); - return { - success: false, - error: createUnknownSendMessageError(historyResult.error), - }; - } - - // Stream the AI response - log.debug("Calling aiService.streamMessage", { - workspaceId, - thinkingLevel, - modelString, - toolPolicy, - additionalSystemInstructions, - maxOutputTokens, - providerOptions, - }); - - const streamResult = await this.aiService.streamMessage( - historyResult.data, - workspaceId, - modelString, - thinkingLevel, - toolPolicy, - undefined, - additionalSystemInstructions, - maxOutputTokens, - providerOptions, - mode - ); - log.debug("Stream completed", { workspaceId }); - return streamResult; - } - private registerWorkspaceHandlers(ipcMain: ElectronIpcMain): void { ipcMain.handle( IPC_CHANNELS.WORKSPACE_CREATE, @@ -235,10 +228,8 @@ export class IpcMain { }); // Emit metadata event for new workspace - this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { - workspaceId, - metadata, - }); + const session = this.getOrCreateSession(workspaceId); + session.emitMetadata(metadata); return { success: true, @@ -380,16 +371,20 @@ export class IpcMain { }); // Emit metadata event for old workspace deletion - this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { - workspaceId, - metadata: null, - }); + const oldSession = this.sessions.get(workspaceId); + if (oldSession) { + oldSession.emitMetadata(null); + this.disposeSession(workspaceId); + } else if (this.mainWindow) { + this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { + workspaceId, + metadata: null, + }); + } // Emit metadata event for new workspace - this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { - workspaceId: newWorkspaceId, - metadata: newMetadata, - }); + const newSession = this.getOrCreateSession(newWorkspaceId); + newSession.emitMetadata(newMetadata); return Ok({ newWorkspaceId }); } catch (error) { @@ -421,113 +416,23 @@ export class IpcMain { message: string, options?: SendMessageOptions & { imageParts?: Array<{ image: string; mimeType: string }> } ) => { - const { - editMessageId, - thinkingLevel, - model, - toolPolicy, - additionalSystemInstructions, - maxOutputTokens, - providerOptions, - imageParts, - mode, // Passed to streamWithHistory for plan file loading - } = options ?? {}; log.debug("sendMessage handler: Received", { workspaceId, messagePreview: message.substring(0, 50), - editMessageId, - thinkingLevel, - model, - toolPolicy, - mode, - additionalSystemInstructions, - maxOutputTokens, - providerOptions, + mode: options?.mode, + options, }); try { - // Reject empty messages - use interruptStream() to interrupt active streams - if (!message.trim() && (!imageParts || imageParts.length === 0)) { - log.debug("sendMessage handler: Rejected empty message (use interruptStream instead)"); - return { - success: false, - error: { - type: "unknown", - raw: "Empty message not allowed. Use interruptStream() to interrupt active streams.", - }, - }; - } - - // If editing, truncate history after the message being edited - if (editMessageId) { - const truncateResult = await this.historyService.truncateAfterMessage( + const session = this.getOrCreateSession(workspaceId); + const result = await session.sendMessage(message, options); + if (!result.success) { + log.error("sendMessage handler: session returned error", { workspaceId, - editMessageId - ); - if (!truncateResult.success) { - log.error("Failed to truncate history for edit:", truncateResult.error); - return { - success: false, - error: createUnknownSendMessageError(truncateResult.error), - }; - } - // Note: We don't send a clear event here. The aggregator will handle - // replacement automatically when the new message arrives with the same historySequence - } - - // Create user message with text and optional image parts - const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`; - const additionalParts = imageParts?.map((img) => ({ - type: "image" as const, - image: img.image, - mimeType: img.mimeType, - })); - if (additionalParts && additionalParts.length > 0) { - log.debug("sendMessage: Creating message with images", { - imageCount: additionalParts.length, - mimeTypes: additionalParts.map((p) => p.mimeType), + error: result.error, }); } - const userMessage = createCmuxMessage( - messageId, - "user", - message, - { - // historySequence will be assigned by historyService.appendToHistory() - timestamp: Date.now(), - toolPolicy, // Store for historical record and compaction detection - }, - additionalParts - ); - - // Append user message to history - const appendResult = await this.historyService.appendToHistory(workspaceId, userMessage); - if (!appendResult.success) { - log.error("Failed to append message to history:", appendResult.error); - return { - success: false, - error: createUnknownSendMessageError(appendResult.error), - }; - } - - // Broadcast the user message immediately to the frontend - if (this.mainWindow) { - this.mainWindow.webContents.send(getChatChannel(workspaceId), userMessage); - } - - // Stream the AI response - if (!model) { - log.error("No model provided by frontend"); - return { - success: false, - error: createUnknownSendMessageError( - "No model specified. Please select a model using /model command." - ), - }; - } - - return await this.streamWithHistory(workspaceId, model, options); + return result; } catch (error) { - // Convert to SendMessageError for typed error handling const errorMessage = error instanceof Error ? error.message : String(error); log.error("Unexpected error in sendMessage handler:", error); const sendError: SendMessageError = { @@ -547,15 +452,15 @@ export class IpcMain { options, }); try { - // Idempotent: if stream already active, return success (not error) - // This makes client code simpler and more resilient - if (this.aiService.isStreaming(workspaceId)) { - log.debug("resumeStream handler: Stream already active, returning success"); - return { success: true }; + const session = this.getOrCreateSession(workspaceId); + const result = await session.resumeStream(options); + if (!result.success) { + log.error("resumeStream handler: session returned error", { + workspaceId, + error: result.error, + }); } - - // Stream the AI response with existing history (no new user message) - return await this.streamWithHistory(workspaceId, options.model, options); + return result; } catch (error) { // Convert to SendMessageError for typed error handling const errorMessage = error instanceof Error ? error.message : String(error); @@ -572,13 +477,8 @@ export class IpcMain { ipcMain.handle(IPC_CHANNELS.WORKSPACE_INTERRUPT_STREAM, async (_event, workspaceId: string) => { log.debug("interruptStream handler: Received", { workspaceId }); try { - // Idempotent: if not streaming, return success (not error) - if (!this.aiService.isStreaming(workspaceId)) { - log.debug("interruptStream handler: Not streaming, returning success"); - return { success: true, data: undefined }; - } - - const stopResult = await this.aiService.stopStream(workspaceId); + const session = this.getOrCreateSession(workspaceId); + const stopResult = await session.interruptStream(); if (!stopResult.success) { log.error("Failed to stop stream:", stopResult.error); return { success: false, error: stopResult.error }; @@ -896,10 +796,17 @@ export class IpcMain { } // Emit metadata event for workspace removal (with null metadata to indicate deletion) - this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { - workspaceId, - metadata: null, // null indicates workspace was deleted - }); + const existingSession = this.sessions.get(workspaceId); + if (existingSession) { + existingSession.emitMetadata(null); + } else if (this.mainWindow) { + this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, { + workspaceId, + metadata: null, + }); + } + + this.disposeSession(workspaceId); return { success: true }; } catch (error) { @@ -1072,31 +979,15 @@ export class IpcMain { // Handle subscription events for chat history ipcMain.on(`workspace:chat:subscribe`, (_event, workspaceId: string) => { void (async () => { + const session = this.getOrCreateSession(workspaceId); const chatChannel = getChatChannel(workspaceId); - const history = await this.historyService.getHistory(workspaceId); - if (history.success) { - for (const msg of history.data) { - this.mainWindow?.webContents.send(chatChannel, msg); - } - - // Check if there's an active stream or a partial message - const streamInfo = this.aiService.getStreamInfo(workspaceId); - const partial = await this.partialService.readPartial(workspaceId); - - if (streamInfo) { - // Stream is actively running - replay events to re-establish streaming context - // Events flow: StreamManager → AIService → IpcMain → renderer - // This ensures frontend receives stream-start and creates activeStream entry - // so that stream-end can properly clean up the streaming indicator - this.aiService.replayStream(workspaceId); - } else if (partial) { - // No active stream but there's a partial - send as regular message (shows CONTINUE) - this.mainWindow?.webContents.send(chatChannel, partial); + await session.replayHistory((event) => { + if (!this.mainWindow) { + return; } - } - - this.mainWindow?.webContents.send(chatChannel, { type: "caught-up" }); + this.mainWindow.webContents.send(chatChannel, event.message); + }); })(); }); @@ -1118,89 +1009,6 @@ export class IpcMain { }); } - private setupEventForwarding(): void { - // Set up event listeners for AI service - this.aiService.on("stream-start", (data: StreamStartEvent) => { - if (this.mainWindow) { - // Send the actual stream-start event - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - - this.aiService.on("stream-delta", (data: StreamDeltaEvent) => { - if (this.mainWindow) { - // Send ONLY the delta event - efficient IPC usage - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - - this.aiService.on("stream-end", (data: StreamEndEvent) => { - if (this.mainWindow) { - // Send the stream-end event with final content and metadata - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - - // Forward tool events to renderer - this.aiService.on("tool-call-start", (data: ToolCallStartEvent) => { - if (this.mainWindow) { - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - - this.aiService.on("tool-call-delta", (data: ToolCallDeltaEvent) => { - if (this.mainWindow) { - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - - this.aiService.on("tool-call-end", (data: ToolCallEndEvent) => { - if (this.mainWindow) { - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - - // Forward reasoning events to renderer - this.aiService.on( - "reasoning-delta", - (data: { type: string; workspaceId: string; messageId: string; delta: string }) => { - if (this.mainWindow) { - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - } - ); - - this.aiService.on( - "reasoning-end", - (data: { type: string; workspaceId: string; messageId: string }) => { - if (this.mainWindow) { - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - } - ); - - this.aiService.on("error", (data: ErrorEvent) => { - if (this.mainWindow) { - // Send properly typed StreamErrorMessage - const errorMessage: StreamErrorMessage = { - type: "stream-error", - messageId: data.messageId, - error: data.error, - errorType: data.errorType ?? "unknown", - }; - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), errorMessage); - } - }); - - // Handle stream abort events - this.aiService.on("stream-abort", (data: StreamAbortEvent) => { - if (this.mainWindow) { - // Forward complete abort event including metadata (usage, duration) - this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data); - } - }); - } - /** * Check if a command is available in the system PATH */ diff --git a/src/services/utils/sendMessageError.ts b/src/services/utils/sendMessageError.ts new file mode 100644 index 0000000000..6b34497796 --- /dev/null +++ b/src/services/utils/sendMessageError.ts @@ -0,0 +1,17 @@ +import assert from "node:assert/strict"; +import type { SendMessageError } from "@/types/errors"; + +/** + * Helper to wrap arbitrary errors into SendMessageError structures. + * Enforces that the raw string is non-empty for defensive debugging. + */ +export const createUnknownSendMessageError = (raw: string): SendMessageError => { + assert(typeof raw === "string", "Expected raw error to be a string"); + const trimmed = raw.trim(); + assert(trimmed.length > 0, "createUnknownSendMessageError requires a non-empty message"); + + return { + type: "unknown", + raw: trimmed, + }; +}; diff --git a/src/utils/ai/providerOptions.ts b/src/utils/ai/providerOptions.ts index b4926aa7ce..65497ef46e 100644 --- a/src/utils/ai/providerOptions.ts +++ b/src/utils/ai/providerOptions.ts @@ -93,7 +93,7 @@ export function buildProviderOptions( }), }, }; - log.info("buildProviderOptions: Returning Anthropic options", options); + log.debug("buildProviderOptions: Returning Anthropic options", options); return options; } diff --git a/src/utils/providers/ensureProvidersConfig.ts b/src/utils/providers/ensureProvidersConfig.ts new file mode 100644 index 0000000000..0b205dcfa4 --- /dev/null +++ b/src/utils/providers/ensureProvidersConfig.ts @@ -0,0 +1,112 @@ +import type { Config, ProviderConfig, ProvidersConfig } from "@/config"; + +const trim = (value: unknown): string => (typeof value === "string" ? value.trim() : ""); + +const hasApiKey = (config: ProviderConfig | undefined): boolean => + Boolean(config && typeof config.apiKey === "string" && config.apiKey.trim().length > 0); + +const hasAnyConfiguredProvider = (providers: ProvidersConfig | null | undefined): boolean => { + if (!providers) { + return false; + } + + return Object.values(providers).some((providerConfig) => hasApiKey(providerConfig)); +}; + +const buildProvidersFromEnv = (env: NodeJS.ProcessEnv): ProvidersConfig => { + const providers: ProvidersConfig = {}; + + const anthropicKey = trim(env.ANTHROPIC_API_KEY); + if (anthropicKey.length > 0) { + const entry: ProviderConfig = { apiKey: anthropicKey }; + + const baseUrl = trim(env.ANTHROPIC_BASE_URL); + if (baseUrl.length > 0) { + entry.baseUrl = baseUrl; + } + + providers.anthropic = entry; + } + + const openAIKey = trim(env.OPENAI_API_KEY); + if (openAIKey.length > 0) { + const entry: ProviderConfig = { apiKey: openAIKey }; + + const baseUrlCandidates = [env.OPENAI_BASE_URL, env.OPENAI_API_BASE]; + for (const candidate of baseUrlCandidates) { + const baseUrl = trim(candidate); + if (baseUrl.length > 0) { + entry.baseUrl = baseUrl; + break; + } + } + + const organization = trim(env.OPENAI_ORG_ID); + if (organization.length > 0) { + entry.organization = organization; + } + + providers.openai = entry; + } + + if (!providers.openai) { + const azureKey = trim(env.AZURE_OPENAI_API_KEY); + const azureEndpoint = trim(env.AZURE_OPENAI_ENDPOINT); + + if (azureKey.length > 0 && azureEndpoint.length > 0) { + const entry: ProviderConfig = { + apiKey: azureKey, + baseUrl: azureEndpoint, + }; + + const deployment = trim(env.AZURE_OPENAI_DEPLOYMENT); + if (deployment.length > 0) { + entry.defaultModel = deployment; + } + + const apiVersion = trim(env.AZURE_OPENAI_API_VERSION); + if (apiVersion.length > 0) { + entry.apiVersion = apiVersion; + } + + providers.openai = entry; + } + } + + return providers; +}; + +export const ensureProvidersConfig = ( + config: Config, + env: NodeJS.ProcessEnv = process.env +): ProvidersConfig => { + if (!config) { + throw new Error("Config instance is required to ensure providers configuration"); + } + + const existingProviders = config.loadProvidersConfig(); + if (hasAnyConfiguredProvider(existingProviders)) { + if (!existingProviders) { + throw new Error( + "Providers config reported configured providers but returned null. Please validate providers.jsonc." + ); + } + return existingProviders; + } + + const providersFromEnv = buildProvidersFromEnv(env); + if (!hasAnyConfiguredProvider(providersFromEnv)) { + throw new Error( + "No provider credentials found. Configure providers.jsonc or set ANTHROPIC_API_KEY / OPENAI_API_KEY." + ); + } + + config.saveProvidersConfig(providersFromEnv); + return providersFromEnv; +}; + +export const getProvidersFromEnv = (env: NodeJS.ProcessEnv = process.env): ProvidersConfig => + buildProvidersFromEnv(env); + +export const hasAnyProvidersConfigured = (providers: ProvidersConfig | null | undefined): boolean => + hasAnyConfiguredProvider(providers);