diff --git a/.gitignore b/.gitignore
index 560ab5cbe2..368183a9f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,10 @@ CODE_CHANGES.md
README_COMPACT_HERE.md
artifacts/
tests/e2e/tmp/
+runs/
+
+# Python
+__pycache__
+
+tmpfork
+.cmux-agent-cli
diff --git a/Makefile b/Makefile
index 1b9227b57d..b7ec50ff06 100644
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,7 @@ include fmt.mk
.PHONY: test test-unit test-integration test-watch test-coverage test-e2e
.PHONY: dist dist-mac dist-win dist-linux
.PHONY: docs docs-build docs-watch
+.PHONY: benchmark-terminal
.PHONY: ensure-deps
TS_SOURCES := $(shell find src -type f \( -name '*.ts' -o -name '*.tsx' \))
@@ -174,6 +175,19 @@ docs-build: ## Build documentation
docs-watch: ## Watch and rebuild documentation
@cd docs && mdbook watch
+## Benchmarks
+benchmark-terminal: ## Run Terminal-Bench with the cmux agent (use TB_DATASET/TB_ARGS to customize)
+ @TB_DATASET=$${TB_DATASET:-terminal-bench-core==0.1.1}; \
+ CONCURRENCY_FLAG=$${TB_CONCURRENCY:+--n-concurrent $$TB_CONCURRENCY}; \
+ LIVESTREAM_FLAG=$${TB_LIVESTREAM:+--livestream}; \
+ echo "Running Terminal-Bench with dataset $$TB_DATASET"; \
+ uvx terminal-bench run \
+ --dataset "$$TB_DATASET" \
+ --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \
+ $$CONCURRENCY_FLAG \
+ $$LIVESTREAM_FLAG \
+ $${TB_ARGS}
+
## Clean
clean: ## Clean build artifacts
@echo "Cleaning build artifacts..."
@@ -182,5 +196,3 @@ clean: ## Clean build artifacts
# Parallel build optimization - these can run concurrently
.NOTPARALLEL: build-main # TypeScript can handle its own parallelism
-
-
diff --git a/benchmarks/terminal_bench/__init__.py b/benchmarks/terminal_bench/__init__.py
new file mode 100644
index 0000000000..228448c524
--- /dev/null
+++ b/benchmarks/terminal_bench/__init__.py
@@ -0,0 +1,3 @@
+from .cmux_agent import CmuxAgent
+
+__all__ = ["CmuxAgent"]
diff --git a/benchmarks/terminal_bench/cmux-run.sh b/benchmarks/terminal_bench/cmux-run.sh
new file mode 100644
index 0000000000..4aaa698956
--- /dev/null
+++ b/benchmarks/terminal_bench/cmux-run.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+log() {
+ printf '[cmux-run] %s\n' "$1"
+}
+
+fatal() {
+ printf '[cmux-run] ERROR: %s\n' "$1" >&2
+ exit 1
+}
+
+instruction=${1:-}
+if [[ -z "${instruction}" ]]; then
+ fatal "instruction argument is required"
+fi
+
+export BUN_INSTALL="${BUN_INSTALL:-/root/.bun}"
+export PATH="${BUN_INSTALL}/bin:${PATH}"
+
+CMUX_APP_ROOT="${CMUX_APP_ROOT:-/opt/cmux-app}"
+CMUX_CONFIG_ROOT="${CMUX_CONFIG_ROOT:-/root/.cmux}"
+CMUX_PROJECT_PATH="${CMUX_PROJECT_PATH:-}"
+CMUX_PROJECT_CANDIDATES="${CMUX_PROJECT_CANDIDATES:-/workspace:/app:/workspaces:/root/project}"
+CMUX_MODEL="${CMUX_MODEL:-anthropic:claude-sonnet-4-5}"
+CMUX_TIMEOUT_MS="${CMUX_TIMEOUT_MS:-}"
+CMUX_TRUNK="${CMUX_TRUNK:-main}"
+CMUX_WORKSPACE_ID="${CMUX_WORKSPACE_ID:-cmux-bench}"
+CMUX_THINKING_LEVEL="${CMUX_THINKING_LEVEL:-high}"
+CMUX_MODE="${CMUX_MODE:-exec}"
+
+ensure_bun() {
+ if ! command -v bun >/dev/null 2>&1; then
+ fatal "bun must be installed before running the cmux agent"
+ fi
+}
+
+resolve_project_path() {
+ if [[ -n "${CMUX_PROJECT_PATH}" ]]; then
+ if [[ -d "${CMUX_PROJECT_PATH}" ]]; then
+ printf '%s\n' "${CMUX_PROJECT_PATH}"
+ return 0
+ fi
+ fatal "CMUX_PROJECT_PATH=${CMUX_PROJECT_PATH} not found"
+ fi
+
+ IFS=":" read -r -a candidates <<<"${CMUX_PROJECT_CANDIDATES}"
+ for candidate in "${candidates[@]}"; do
+ if [[ -d "${candidate}" ]]; then
+ printf '%s\n' "${candidate}"
+ return 0
+ fi
+ done
+
+ fatal "no project path located (searched ${CMUX_PROJECT_CANDIDATES})"
+}
+
+ensure_git_repo() {
+ local project_path=$1
+
+ if command -v git >/dev/null 2>&1; then
+ if git -C "${project_path}" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+ # Ensure trunk branch exists even on pre-existing repos.
+ if ! git -C "${project_path}" rev-parse --verify "${CMUX_TRUNK}" >/dev/null 2>&1; then
+ git -C "${project_path}" checkout -b "${CMUX_TRUNK}" >/dev/null 2>&1 || true
+ else
+ git -C "${project_path}" checkout "${CMUX_TRUNK}" >/dev/null 2>&1 || true
+ fi
+ return 0
+ fi
+
+ log "initialising git repository at ${project_path}"
+ if git -C "${project_path}" init --initial-branch="${CMUX_TRUNK}" >/dev/null 2>&1; then
+ :
+ else
+ git -C "${project_path}" init >/dev/null
+ git -C "${project_path}" checkout -B "${CMUX_TRUNK}" >/dev/null
+ fi
+ git -C "${project_path}" config user.name "cmux-bench"
+ git -C "${project_path}" config user.email "bench@cmux.local"
+ git -C "${project_path}" add -A >/dev/null
+ git -C "${project_path}" commit -m "chore: initial snapshot" --allow-empty >/dev/null
+ git -C "${project_path}" branch -M "${CMUX_TRUNK}" >/dev/null
+ else
+ log "git not available; skipping repository initialisation"
+ fi
+}
+
+ensure_bun
+project_path=$(resolve_project_path)
+ensure_git_repo "${project_path}"
+
+bun --version >/dev/null 2>&1 || fatal "bun not available after ensure_bun"
+
+log "starting cmux agent session for ${project_path}"
+cd "${CMUX_APP_ROOT}"
+
+cmd=(bun src/debug/agentSessionCli.ts
+ --config-root "${CMUX_CONFIG_ROOT}"
+ --project-path "${project_path}"
+ --workspace-path "${project_path}"
+ --workspace-id "${CMUX_WORKSPACE_ID}"
+ --model "${CMUX_MODEL}"
+ --mode "${CMUX_MODE}")
+
+if [[ -n "${CMUX_TIMEOUT_MS}" ]]; then
+ cmd+=(--timeout "${CMUX_TIMEOUT_MS}")
+fi
+
+if [[ -n "${CMUX_THINKING_LEVEL}" ]]; then
+ cmd+=(--thinking-level "${CMUX_THINKING_LEVEL}")
+fi
+
+if ! printf '%s' "${instruction}" | "${cmd[@]}"; then
+ fatal "cmux agent session failed"
+fi
diff --git a/benchmarks/terminal_bench/cmux_agent.py b/benchmarks/terminal_bench/cmux_agent.py
new file mode 100644
index 0000000000..2653ec5c97
--- /dev/null
+++ b/benchmarks/terminal_bench/cmux_agent.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+import io
+import os
+import shlex
+import tarfile
+import tempfile
+from pathlib import Path
+from typing import Any
+
+from terminal_bench.agents.base_agent import AgentResult
+from terminal_bench.agents.installed_agents.abstract_installed_agent import (
+ AbstractInstalledAgent,
+)
+from terminal_bench.terminal.models import TerminalCommand
+from terminal_bench.terminal.tmux_session import TmuxSession
+
+
+class CmuxAgent(AbstractInstalledAgent):
+ """
+ Minimal Terminal-Bench adapter that installs cmux into the task container and
+ forwards the benchmark instruction to the cmux headless runner.
+ """
+
+ _ARCHIVE_NAME = "cmux-app.tar.gz"
+ _RUNNER_NAME = "cmux-run.sh"
+ _DEFAULT_TRUNK = "main"
+ _DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
+
+ def __init__(
+ self, mode: str | None = None, thinking_level: str | None = None, **kwargs: Any
+ ) -> None:
+ super().__init__(**kwargs)
+ repo_root_env = os.environ.get("CMUX_AGENT_REPO_ROOT")
+ repo_root = (
+ Path(repo_root_env).resolve()
+ if repo_root_env
+ else Path(__file__).resolve().parents[2]
+ )
+ if not repo_root.exists():
+ raise RuntimeError(f"cmux repo root {repo_root} does not exist")
+
+ self._repo_root = repo_root
+ self._archive_bytes: bytes | None = None
+ self._prepared_container_id: str | None = None
+ self._mode = mode.lower() if mode else None
+ self._thinking_level = thinking_level.lower() if thinking_level else None
+
+ @staticmethod
+ def name() -> str:
+ return "cmux"
+
+ @property
+ def _env(self) -> dict[str, str]:
+ keys = [
+ "ANTHROPIC_API_KEY",
+ "ANTHROPIC_BASE_URL",
+ "OPENAI_API_KEY",
+ "OPENAI_BASE_URL",
+ "OPENAI_API_BASE",
+ "OPENAI_ORG_ID",
+ "AZURE_OPENAI_API_KEY",
+ "AZURE_OPENAI_ENDPOINT",
+ "AZURE_OPENAI_DEPLOYMENT",
+ "AZURE_OPENAI_API_VERSION",
+ "MISTRAL_API_KEY",
+ "GOOGLE_API_KEY",
+ "OPENROUTER_API_KEY",
+ "CMUX_AGENT_GIT_URL",
+ "CMUX_BUN_INSTALL_URL",
+ "CMUX_PROJECT_PATH",
+ "CMUX_PROJECT_CANDIDATES",
+ "CMUX_TRUNK",
+ "CMUX_MODEL",
+ "CMUX_TIMEOUT_MS",
+ "CMUX_THINKING_LEVEL",
+ "CMUX_CONFIG_ROOT",
+ "CMUX_APP_ROOT",
+ "CMUX_WORKSPACE_ID",
+ ]
+
+ env: dict[str, str] = {}
+ for key in keys:
+ value = os.environ.get(key)
+ if value:
+ env[key] = value
+
+ env.setdefault("CMUX_TRUNK", self._DEFAULT_TRUNK)
+ env.setdefault("CMUX_MODEL", self._DEFAULT_MODEL)
+ env.setdefault("CMUX_CONFIG_ROOT", "/root/.cmux")
+ env.setdefault("CMUX_APP_ROOT", "/opt/cmux-app")
+ env.setdefault("CMUX_WORKSPACE_ID", "cmux-bench")
+ env.setdefault("CMUX_THINKING_LEVEL", "high")
+ env.setdefault("CMUX_MODE", "exec")
+
+ model_value = env.get("CMUX_MODEL")
+ if model_value and "/" in model_value and ":" not in model_value:
+ provider, model_name = model_value.split("/", 1)
+ env["CMUX_MODEL"] = f"{provider}:{model_name}"
+
+ thinking_value = self._thinking_level or env.get("CMUX_THINKING_LEVEL")
+ if thinking_value:
+ normalized = thinking_value.strip().lower()
+ if normalized not in {"off", "low", "medium", "high"}:
+ raise ValueError(
+ "CMUX_THINKING_LEVEL must be one of off, low, medium, high"
+ )
+ env["CMUX_THINKING_LEVEL"] = normalized
+
+ mode_value = self._mode or env.get("CMUX_MODE")
+ if mode_value:
+ normalized_mode = mode_value.strip().lower()
+ if normalized_mode in {"exec", "execute"}:
+ env["CMUX_MODE"] = "exec"
+ elif normalized_mode == "plan":
+ env["CMUX_MODE"] = "plan"
+ else:
+ raise ValueError("CMUX_MODE must be one of plan, exec, or execute")
+
+ return env
+
+ @property
+ def _install_agent_script_path(self) -> Path:
+ return self._get_templated_script_path("cmux_setup.sh.j2")
+
+ def perform_task(
+ self,
+ instruction: str,
+ session: TmuxSession,
+ logging_dir=None,
+ ) -> AgentResult:
+ if not instruction or not instruction.strip():
+ raise ValueError("instruction must be a non-empty string")
+
+ self._prepare_payloads(session)
+ return super().perform_task(
+ instruction=instruction, session=session, logging_dir=logging_dir
+ )
+
+ def _prepare_payloads(self, session: TmuxSession) -> None:
+ container_id = getattr(session.container, "id", None)
+ if container_id and container_id == self._prepared_container_id:
+ return
+
+ archive = self._build_archive()
+ temp_path: Path | None = None
+ try:
+ with tempfile.NamedTemporaryFile(
+ suffix=".tar.gz", delete=False
+ ) as temp_file:
+ temp_file.write(archive)
+ temp_path = Path(temp_file.name)
+ except Exception as error:
+ raise RuntimeError(
+ f"failed to materialize cmux archive: {error}"
+ ) from error
+
+ try:
+ assert temp_path is not None, "temporary archive path missing"
+ session.copy_to_container(
+ paths=temp_path,
+ container_dir="/installed-agent",
+ container_filename=self._ARCHIVE_NAME,
+ )
+ finally:
+ if temp_path is not None:
+ temp_path.unlink(missing_ok=True)
+
+ runner_path = Path(__file__).with_name(self._RUNNER_NAME)
+ if not runner_path.exists():
+ raise RuntimeError(f"cmux runner script missing at {runner_path}")
+
+ session.copy_to_container(
+ paths=runner_path,
+ container_dir="/installed-agent",
+ container_filename=self._RUNNER_NAME,
+ )
+
+ if container_id:
+ self._prepared_container_id = container_id
+
+ def _build_archive(self) -> bytes:
+ if self._archive_bytes is not None:
+ return self._archive_bytes
+
+ include_paths = [
+ "package.json",
+ "bun.lock",
+ "bunfig.toml",
+ "tsconfig.json",
+ "tsconfig.main.json",
+ "src",
+ ]
+
+ buffer = io.BytesIO()
+ with tarfile.open(fileobj=buffer, mode="w:gz") as tar:
+ for relative in include_paths:
+ source_path = self._repo_root / relative
+ if not source_path.exists():
+ raise FileNotFoundError(f"Required file {source_path} not found")
+ tar.add(
+ source_path,
+ arcname=relative,
+ recursive=True,
+ )
+ buffer.seek(0)
+ self._archive_bytes = buffer.getvalue()
+ return self._archive_bytes
+
+ def _run_agent_commands(self, instruction: str) -> list[TerminalCommand]:
+ escaped = shlex.quote(instruction)
+ command = f"bash /installed-agent/{self._RUNNER_NAME} {escaped}"
+ return [
+ TerminalCommand(
+ command=command,
+ min_timeout_sec=0.0,
+ max_timeout_sec=float("inf"),
+ block=True,
+ append_enter=True,
+ )
+ ]
diff --git a/benchmarks/terminal_bench/cmux_setup.sh.j2 b/benchmarks/terminal_bench/cmux_setup.sh.j2
new file mode 100644
index 0000000000..e5b1542f0f
--- /dev/null
+++ b/benchmarks/terminal_bench/cmux_setup.sh.j2
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+log() {
+ printf '[cmux-setup] %s\n' "$1"
+}
+
+ensure_tool() {
+ if command -v "$1" >/dev/null 2>&1; then
+ return 0
+ fi
+
+ if ! command -v apt-get >/dev/null 2>&1; then
+ printf 'Required tool "%s" missing and apt-get unavailable\n' "$1" >&2
+ return 1
+ fi
+
+ log "installing missing dependency: $1"
+ export DEBIAN_FRONTEND=noninteractive
+ apt-get update
+ apt-get install -y "$1"
+}
+
+ensure_tool curl
+ensure_tool git
+ensure_tool unzip
+export BUN_INSTALL="${BUN_INSTALL:-/root/.bun}"
+export PATH="${BUN_INSTALL}/bin:${PATH}"
+
+if ! command -v bun >/dev/null 2>&1; then
+ log "installing bun"
+ curl -fsSL "${CMUX_BUN_INSTALL_URL:-https://bun.sh/install}" | bash
+fi
+
+CMUX_APP_ROOT="${CMUX_APP_ROOT:-/opt/cmux-app}"
+CMUX_CONFIG_ROOT="${CMUX_CONFIG_ROOT:-/root/.cmux}"
+CMUX_AGENT_VERSION="{{ version if version is not none else '' }}"
+
+mkdir -p "$CMUX_APP_ROOT"
+
+if [[ -n "${CMUX_AGENT_VERSION}" ]]; then
+ : "${CMUX_AGENT_GIT_URL:?CMUX_AGENT_GIT_URL must be set when version is provided}"
+ log "cloning cmux from ${CMUX_AGENT_GIT_URL} @ ${CMUX_AGENT_VERSION}"
+ rm -rf "${CMUX_APP_ROOT}"
+ git clone --depth 1 --branch "${CMUX_AGENT_VERSION}" "${CMUX_AGENT_GIT_URL}" "${CMUX_APP_ROOT}"
+else
+ ARCHIVE_PATH="/installed-agent/cmux-app.tar.gz"
+ if [[ ! -s "${ARCHIVE_PATH}" ]]; then
+ printf 'Expected cmux archive at %s\n' "${ARCHIVE_PATH}" >&2
+ exit 1
+ fi
+ log "extracting cmux archive"
+ rm -rf "${CMUX_APP_ROOT}"
+ mkdir -p "${CMUX_APP_ROOT}"
+ tar -xzf "${ARCHIVE_PATH}" -C "${CMUX_APP_ROOT}"
+fi
+
+cd "${CMUX_APP_ROOT}"
+
+log "installing cmux dependencies via bun"
+bun install --frozen-lockfile
+
+mkdir -p "${CMUX_CONFIG_ROOT}"
+
+chmod +x /installed-agent/cmux-run.sh
+
+log "setup complete"
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 23e3de18f4..bbb4bfa3f2 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -10,5 +10,6 @@
- [System Prompt](./system-prompt.md)
- [Instruction Files](./instruction-files.md)
- [Project Secrets](./project-secrets.md)
- - [Agentic Git Identity](./agentic-git-identity.md)
+- [Agentic Git Identity](./agentic-git-identity.md)
+- [Terminal Benchmarking](./benchmarking.md)
- [AGENTS](./AGENTS.md)
diff --git a/docs/benchmarking.md b/docs/benchmarking.md
new file mode 100644
index 0000000000..12a49ade1a
--- /dev/null
+++ b/docs/benchmarking.md
@@ -0,0 +1,76 @@
+# Terminal Benchmarking
+
+cmux ships with a headless adapter for [Terminal-Bench](https://www.tbench.ai/). The adapter runs the Electron backend without opening a window and exercises it through the same IPC paths we use in integration tests. This page documents how to launch benchmarks from the repository tree.
+
+## Prerequisites
+
+- Docker must be installed and running. Terminal-Bench executes each task inside a dedicated Docker container.
+- `uv` is available in the nix `devShell` (provided via `flake.nix`), or install it manually from .
+- Standard provider API keys (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`) should be exported so cmux can stream responses.
+
+Optional environment overrides:
+
+| Variable | Purpose | Default |
+| ---------------------- | --------------------------------------------------------- | -------------------------------------- |
+| `CMUX_AGENT_REPO_ROOT` | Path copied into each task container | repo root inferred from the agent file |
+| `CMUX_TRUNK` | Branch checked out when preparing the project | `main` |
+| `CMUX_WORKSPACE_ID` | Workspace identifier used inside cmux | `cmux-bench` |
+| `CMUX_MODEL` | Preferred model (supports `provider/model` syntax) | `anthropic/claude-sonnet-4-5` |
+| `CMUX_THINKING_LEVEL` | Optional reasoning level (`off`, `low`, `medium`, `high`) | `high` |
+| `CMUX_MODE` | Starting mode (`plan` or `exec`) | `exec` |
+| `CMUX_TIMEOUT_MS` | Optional stream timeout in milliseconds | no timeout |
+| `CMUX_CONFIG_ROOT` | Location for cmux session data inside the container | `/root/.cmux` |
+| `CMUX_APP_ROOT` | Path where the cmux sources are staged | `/opt/cmux-app` |
+| `CMUX_PROJECT_PATH` | Explicit project directory inside the task container | auto-detected from common paths |
+
+## Running Terminal-Bench
+
+All commands below should be run from the repository root.
+
+### Quick smoke test (single task)
+
+```bash
+uvx terminal-bench run \
+ --dataset terminal-bench-core==0.1.1 \
+ --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \
+ --n-tasks 1
+```
+
+This downloads the Terminal-Bench runner, copies the cmux sources into the container, and validates the adapter against the first task only. Use this before attempting a full sweep.
+
+### Full dataset
+
+```bash
+uvx terminal-bench run \
+ --dataset terminal-bench-core==0.1.1 \
+ --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent
+```
+
+Results (pass/fail, token usage, wall-clock) are printed at the end of the run. Terminal-Bench also writes per-task logs under the current working directory; review them when diagnosing failures.
+
+You can also use `make`:
+
+```bash
+TB_CONCURRENCY=6 TB_LIVESTREAM=1 \
+make benchmark-terminal TB_ARGS="--n-tasks 3 --model anthropic/claude-sonnet-4-20250514 --agent-kwarg mode=plan --agent-kwarg thinking_level=medium"
+```
+
+`TB_DATASET` defaults to `terminal-bench-core==0.1.1`, but can be overridden (e.g. `make benchmark-terminal TB_DATASET=terminal-bench-core==head`).
+Use `--agent-kwarg mode=plan` to exercise the plan/execute workflow—the CLI will gather a plan first, then automatically approve it and switch to execution. Leaving the flag off (or setting `mode=exec`) skips the planning phase.
+Use `TB_CONCURRENCY=` to control `--n-concurrent` (number of concurrently running tasks) and `TB_LIVESTREAM=1` to stream log output live instead of waiting for the run to finish. These map to Terminal-Bench’s `--n-concurrent` and `--livestream` flags.
+
+## How the Adapter Works
+
+The adapter lives in `benchmarks/terminal_bench/cmux_agent.py`. For each task it:
+
+1. Copies the cmux repository (package manifests + `src/`) into `/tmp/cmux-app` inside the container.
+2. Ensures Bun exists, then runs `bun install --frozen-lockfile`.
+3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `CMUX_CONFIG_ROOT` (default `/root/.cmux`).
+
+`CMUX_MODEL` accepts either the cmux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide.
+
+## Troubleshooting
+
+- **`command not found: bun`** – ensure the container can reach Bun’s install script, or pre-install Bun in your base image. The adapter aborts if the install step fails.
+- **Workspace creation errors** – set `CMUX_PROJECT_PATH` to the project directory inside the task container if auto-discovery misses it.
+- **Streaming timeouts** – pass `--n-tasks 1` while iterating on fixes, or set `CMUX_TIMEOUT_MS=180000` to reinstate a timeout if needed.
diff --git a/flake.nix b/flake.nix
index d3e7785bd7..52784c734c 100644
--- a/flake.nix
+++ b/flake.nix
@@ -129,6 +129,10 @@
git
bash
nixfmt-rfc-style
+
+ # Terminal bench
+ uv
+ asciinema
];
};
}
diff --git a/src/bench/headlessEnvironment.ts b/src/bench/headlessEnvironment.ts
new file mode 100644
index 0000000000..c72e50a2f5
--- /dev/null
+++ b/src/bench/headlessEnvironment.ts
@@ -0,0 +1,125 @@
+import * as os from "os";
+import * as path from "path";
+import * as fs from "fs/promises";
+import createIPCMock from "electron-mock-ipc";
+import type { BrowserWindow, IpcMain as ElectronIpcMain, WebContents } from "electron";
+import { Config } from "@/config";
+import { IpcMain } from "@/services/ipcMain";
+
+type MockedElectron = ReturnType;
+
+interface CreateHeadlessEnvironmentOptions {
+ /**
+ * Override the root directory for Config. If omitted, a temporary directory is created.
+ */
+ rootDir?: string;
+}
+
+export interface HeadlessEnvironment {
+ config: Config;
+ ipcMain: IpcMain;
+ mockIpcMain: ElectronIpcMain;
+ mockIpcRenderer: Electron.IpcRenderer;
+ mockWindow: BrowserWindow;
+ sentEvents: Array<{ channel: string; data: unknown }>;
+ rootDir: string;
+ dispose: () => Promise;
+}
+
+function createMockBrowserWindow(): {
+ window: BrowserWindow;
+ sentEvents: Array<{ channel: string; data: unknown }>;
+} {
+ const sentEvents: Array<{ channel: string; data: unknown }> = [];
+
+ const mockWindow = {
+ webContents: {
+ send: (channel: string, data: unknown) => {
+ sentEvents.push({ channel, data });
+ },
+ openDevTools: () => {
+ throw new Error("openDevTools is not supported in headless mode");
+ },
+ } as unknown as WebContents,
+ isMinimized: () => false,
+ restore: () => undefined,
+ focus: () => undefined,
+ loadURL: () => {
+ throw new Error("loadURL should not be called in headless mode");
+ },
+ on: () => undefined,
+ setTitle: () => undefined,
+ } as unknown as BrowserWindow;
+
+ return { window: mockWindow, sentEvents };
+}
+
+async function establishRootDir(providedRootDir?: string): Promise<{
+ rootDir: string;
+ dispose: () => Promise;
+}> {
+ if (providedRootDir) {
+ return {
+ rootDir: providedRootDir,
+ dispose: async () => {
+ // Caller owns the directory; nothing to clean up.
+ },
+ };
+ }
+
+ const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "cmux-headless-"));
+ return {
+ rootDir: tempRoot,
+ dispose: async () => {
+ await fs.rm(tempRoot, { recursive: true, force: true });
+ },
+ };
+}
+
+function assertMockedElectron(mocked: MockedElectron): void {
+ if (!mocked || typeof mocked !== "object") {
+ throw new Error("Failed to initialize electron-mock-ipc");
+ }
+
+ if (!("ipcMain" in mocked) || !mocked.ipcMain) {
+ throw new Error("electron-mock-ipc returned an invalid ipcMain");
+ }
+
+ if (!("ipcRenderer" in mocked) || !mocked.ipcRenderer) {
+ throw new Error("electron-mock-ipc returned an invalid ipcRenderer");
+ }
+}
+
+export async function createHeadlessEnvironment(
+ options: CreateHeadlessEnvironmentOptions = {}
+): Promise {
+ const { rootDir, dispose: disposeRootDir } = await establishRootDir(options.rootDir);
+
+ const config = new Config(rootDir);
+
+ const { window: mockWindow, sentEvents } = createMockBrowserWindow();
+
+ const mockedElectron = createIPCMock();
+ assertMockedElectron(mockedElectron);
+ const mockIpcMainModule = mockedElectron.ipcMain;
+ const mockIpcRendererModule = mockedElectron.ipcRenderer;
+
+ const ipcMain = new IpcMain(config);
+ ipcMain.register(mockIpcMainModule, mockWindow);
+
+ const dispose = async () => {
+ sentEvents.length = 0;
+ await disposeRootDir();
+ };
+
+ return {
+ config,
+ ipcMain,
+ mockIpcMain: mockIpcMainModule,
+ mockIpcRenderer: mockIpcRendererModule,
+ mockWindow,
+ sentEvents,
+ rootDir,
+ dispose,
+ };
+}
diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts
new file mode 100644
index 0000000000..e8a0f694fb
--- /dev/null
+++ b/src/debug/agentSessionCli.ts
@@ -0,0 +1,508 @@
+#!/usr/bin/env bun
+
+import assert from "node:assert/strict";
+import * as fs from "fs/promises";
+import * as path from "path";
+import { parseArgs } from "util";
+import { Config } from "@/config";
+import { HistoryService } from "@/services/historyService";
+import { PartialService } from "@/services/partialService";
+import { AIService } from "@/services/aiService";
+import { AgentSession, type AgentSessionChatEvent } from "@/services/agentSession";
+import {
+ isCaughtUpMessage,
+ isStreamAbort,
+ isStreamDelta,
+ isStreamEnd,
+ isStreamError,
+ isStreamStart,
+ isToolCallDelta,
+ isToolCallEnd,
+ isToolCallStart,
+ type SendMessageOptions,
+ type WorkspaceChatMessage,
+} from "@/types/ipc";
+import { defaultModel } from "@/utils/ai/models";
+import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig";
+import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
+import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors";
+import type { ThinkingLevel } from "@/types/thinking";
+
+interface CliResult {
+ success: boolean;
+ error?: string;
+ data?: Record;
+}
+
+async function ensureDirectory(pathToCheck: string): Promise {
+ const stats = await fs.stat(pathToCheck);
+ if (!stats.isDirectory()) {
+ throw new Error(`"${pathToCheck}" is not a directory`);
+ }
+}
+
+async function gatherMessageFromStdin(): Promise {
+ if (process.stdin.isTTY) {
+ return "";
+ }
+
+ const chunks: Uint8Array[] = [];
+ for await (const chunk of process.stdin) {
+ if (Buffer.isBuffer(chunk)) {
+ chunks.push(chunk);
+ continue;
+ }
+ if (typeof chunk === "string") {
+ chunks.push(Buffer.from(chunk));
+ continue;
+ }
+ if (chunk instanceof Uint8Array) {
+ chunks.push(chunk);
+ continue;
+ }
+ throw new Error(`Unsupported stdin chunk type: ${typeof chunk}`);
+ }
+ return Buffer.concat(chunks).toString("utf-8");
+}
+
+function parseTimeout(timeoutRaw: string | undefined): number | undefined {
+ if (!timeoutRaw) {
+ return undefined;
+ }
+
+ const parsed = Number.parseInt(timeoutRaw, 10);
+ if (!Number.isFinite(parsed) || parsed <= 0) {
+ throw new Error(`Invalid timeout value "${timeoutRaw}"`);
+ }
+ return parsed;
+}
+
+function parseThinkingLevel(value: string | undefined): ThinkingLevel | undefined {
+ if (!value) {
+ return undefined;
+ }
+
+ const normalized = value.trim().toLowerCase();
+ if (
+ normalized === "off" ||
+ normalized === "low" ||
+ normalized === "medium" ||
+ normalized === "high"
+ ) {
+ return normalized;
+ }
+ throw new Error(`Invalid thinking level "${value}". Expected one of: off, low, medium, high.`);
+}
+
+type CLIMode = "plan" | "exec";
+
+function parseMode(raw: string | undefined): CLIMode {
+ if (!raw) {
+ return "exec";
+ }
+
+ const normalized = raw.trim().toLowerCase();
+ if (normalized === "plan") {
+ return "plan";
+ }
+ if (normalized === "exec" || normalized === "execute") {
+ return "exec";
+ }
+
+ throw new Error('Invalid mode "' + raw + '". Expected "plan" or "exec" (or "execute").');
+}
+
+function renderUnknown(value: unknown): string {
+ if (typeof value === "string") {
+ return value;
+ }
+ try {
+ return JSON.stringify(value, null, 2);
+ } catch {
+ return String(value);
+ }
+}
+
+function writeJson(result: CliResult): void {
+ process.stdout.write(`${JSON.stringify(result)}\n`);
+}
+
+async function main(): Promise {
+ const { values } = parseArgs({
+ args: process.argv.slice(2),
+ options: {
+ "workspace-path": { type: "string" },
+ "workspace-id": { type: "string" },
+ "project-path": { type: "string" },
+ "config-root": { type: "string" },
+ message: { type: "string" },
+ model: { type: "string" },
+ "thinking-level": { type: "string" },
+ mode: { type: "string" },
+ timeout: { type: "string" },
+ json: { type: "boolean" },
+ "json-streaming": { type: "boolean" },
+ },
+ allowPositionals: false,
+ });
+
+ const workspacePathRaw = values["workspace-path"];
+ if (typeof workspacePathRaw !== "string" || workspacePathRaw.trim().length === 0) {
+ throw new Error("--workspace-path is required");
+ }
+ const workspacePath = path.resolve(workspacePathRaw.trim());
+ await ensureDirectory(workspacePath);
+
+ const configRootRaw = values["config-root"];
+ const configRoot =
+ configRootRaw && configRootRaw.trim().length > 0 ? configRootRaw.trim() : undefined;
+ const config = new Config(configRoot);
+
+ const workspaceIdRaw = values["workspace-id"];
+ const projectPathRaw = values["project-path"];
+
+ const workspaceId =
+ workspaceIdRaw && workspaceIdRaw.trim().length > 0
+ ? workspaceIdRaw.trim()
+ : (() => {
+ if (typeof projectPathRaw !== "string" || projectPathRaw.trim().length === 0) {
+ throw new Error("Provide --workspace-id or --project-path to derive workspace ID");
+ }
+ const projectPath = path.resolve(projectPathRaw.trim());
+ return config.generateWorkspaceId(projectPath, workspacePath);
+ })();
+
+ const projectName =
+ typeof projectPathRaw === "string" && projectPathRaw.trim().length > 0
+ ? path.basename(path.resolve(projectPathRaw.trim()))
+ : path.basename(path.dirname(workspacePath)) || "unknown";
+
+ const messageArg =
+ values.message && values.message.trim().length > 0 ? values.message : undefined;
+ const messageText = messageArg ?? (await gatherMessageFromStdin());
+ if (!messageText || messageText.trim().length === 0) {
+ throw new Error("Message must be provided via --message or stdin");
+ }
+
+ const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel;
+ const timeoutMs = parseTimeout(values.timeout);
+ const thinkingLevel = parseThinkingLevel(values["thinking-level"]);
+ const initialMode = parseMode(values.mode);
+ const emitFinalJson = values.json === true;
+ const emitJsonStreaming = values["json-streaming"] === true;
+
+ const suppressHumanOutput = emitJsonStreaming || emitFinalJson;
+ const humanStream = process.stdout;
+ const writeHuman = (text: string) => {
+ if (suppressHumanOutput) {
+ return;
+ }
+ humanStream.write(text);
+ };
+ const writeHumanLine = (text = "") => {
+ if (suppressHumanOutput) {
+ return;
+ }
+ humanStream.write(`${text}\n`);
+ };
+ const emitJsonLine = (payload: unknown) => {
+ if (!emitJsonStreaming) {
+ return;
+ }
+ process.stdout.write(`${JSON.stringify(payload)}\n`);
+ };
+
+ const historyService = new HistoryService(config);
+ const partialService = new PartialService(config, historyService);
+ const aiService = new AIService(config, historyService, partialService);
+ ensureProvidersConfig(config);
+
+ const session = new AgentSession({
+ workspaceId,
+ config,
+ historyService,
+ partialService,
+ aiService,
+ });
+
+ await session.ensureMetadata({
+ workspacePath,
+ projectName,
+ });
+
+ const buildSendOptions = (mode: CLIMode): SendMessageOptions => ({
+ model,
+ thinkingLevel,
+ toolPolicy: modeToToolPolicy(mode),
+ additionalSystemInstructions: mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined,
+ });
+
+ const liveEvents: WorkspaceChatMessage[] = [];
+ let readyForLive = false;
+ let streamLineOpen = false;
+ let activeMessageId: string | null = null;
+ let planProposed = false;
+ let streamEnded = false;
+
+ let resolveCompletion: ((value: void) => void) | null = null;
+ let rejectCompletion: ((reason?: unknown) => void) | null = null;
+ let completionPromise: Promise = Promise.resolve();
+
+ const createCompletionPromise = (): Promise => {
+ streamEnded = false;
+ return new Promise((resolve, reject) => {
+ resolveCompletion = resolve;
+ rejectCompletion = reject;
+ });
+ };
+
+ const waitForCompletion = async (): Promise => {
+ if (timeoutMs !== undefined) {
+ let timeoutHandle: NodeJS.Timeout | null = null;
+ try {
+ await Promise.race([
+ completionPromise,
+ new Promise((_, reject) => {
+ timeoutHandle = setTimeout(
+ () => reject(new Error(`Timed out after ${timeoutMs}ms`)),
+ timeoutMs
+ );
+ }),
+ ]);
+ } finally {
+ if (timeoutHandle) {
+ clearTimeout(timeoutHandle);
+ }
+ }
+ } else {
+ await completionPromise;
+ }
+
+ if (!streamEnded) {
+ throw new Error("Stream completion promise resolved unexpectedly without stream end");
+ }
+ };
+
+ const sendAndAwait = async (message: string, options: SendMessageOptions): Promise => {
+ completionPromise = createCompletionPromise();
+ const sendResult = await session.sendMessage(message, options);
+ if (!sendResult.success) {
+ const errorValue = sendResult.error;
+ let formattedError = "unknown error";
+ if (typeof errorValue === "string") {
+ formattedError = errorValue;
+ } else if (errorValue && typeof errorValue === "object") {
+ const maybeRaw = (errorValue as { raw?: unknown }).raw;
+ if (typeof maybeRaw === "string" && maybeRaw.trim().length > 0) {
+ formattedError = maybeRaw;
+ } else {
+ formattedError = JSON.stringify(errorValue);
+ }
+ }
+ throw new Error(`Failed to send message: ${formattedError}`);
+ }
+
+ await waitForCompletion();
+ };
+
+ const handleToolStart = (payload: WorkspaceChatMessage): boolean => {
+ if (!isToolCallStart(payload)) {
+ return false;
+ }
+ writeHumanLine("\n========== TOOL CALL START ==========");
+ writeHumanLine(`Tool: ${payload.toolName}`);
+ writeHumanLine(`Call ID: ${payload.toolCallId}`);
+ writeHumanLine("Arguments:");
+ writeHumanLine(renderUnknown(payload.args));
+ writeHumanLine("=====================================");
+ return true;
+ };
+
+ const handleToolDelta = (payload: WorkspaceChatMessage): boolean => {
+ if (!isToolCallDelta(payload)) {
+ return false;
+ }
+ writeHumanLine("\n----------- TOOL OUTPUT -------------");
+ writeHumanLine(renderUnknown(payload.delta));
+ writeHumanLine("-------------------------------------");
+ return true;
+ };
+
+ const handleToolEnd = (payload: WorkspaceChatMessage): boolean => {
+ if (!isToolCallEnd(payload)) {
+ return false;
+ }
+ writeHumanLine("\n=========== TOOL CALL END ===========");
+ writeHumanLine(`Tool: ${payload.toolName}`);
+ writeHumanLine(`Call ID: ${payload.toolCallId}`);
+ writeHumanLine("Result:");
+ writeHumanLine(renderUnknown(payload.result));
+ writeHumanLine("=====================================");
+ if (payload.toolName === "propose_plan") {
+ planProposed = true;
+ }
+ return true;
+ };
+
+ const chatListener = (event: AgentSessionChatEvent) => {
+ const payload = event.message;
+
+ if (!readyForLive) {
+ if (isCaughtUpMessage(payload)) {
+ readyForLive = true;
+ emitJsonLine({ type: "caught-up", workspaceId });
+ }
+ return;
+ }
+
+ emitJsonLine({ type: "event", workspaceId, payload });
+ liveEvents.push(payload);
+
+ if (handleToolStart(payload) || handleToolDelta(payload) || handleToolEnd(payload)) {
+ return;
+ }
+
+ if (isStreamStart(payload)) {
+ if (activeMessageId && activeMessageId !== payload.messageId) {
+ if (rejectCompletion) {
+ rejectCompletion(
+ new Error(
+ `Received conflicting stream-start message IDs (${activeMessageId} vs ${payload.messageId})`
+ )
+ );
+ resolveCompletion = null;
+ rejectCompletion = null;
+ }
+ return;
+ }
+ activeMessageId = payload.messageId;
+ return;
+ }
+
+ if (isStreamDelta(payload)) {
+ assert(typeof payload.delta === "string", "stream delta must include text");
+ writeHuman(payload.delta);
+ streamLineOpen = !payload.delta.endsWith("\n");
+ return;
+ }
+
+ if (isStreamError(payload)) {
+ if (rejectCompletion) {
+ rejectCompletion(new Error(payload.error));
+ resolveCompletion = null;
+ rejectCompletion = null;
+ }
+ return;
+ }
+
+ if (isStreamAbort(payload)) {
+ if (rejectCompletion) {
+ rejectCompletion(new Error("Stream aborted before completion"));
+ resolveCompletion = null;
+ rejectCompletion = null;
+ }
+ return;
+ }
+
+ if (isStreamEnd(payload)) {
+ if (activeMessageId && payload.messageId !== activeMessageId) {
+ if (rejectCompletion) {
+ rejectCompletion(
+ new Error(
+ `Mismatched stream-end message ID. Expected ${activeMessageId}, received ${payload.messageId}`
+ )
+ );
+ resolveCompletion = null;
+ rejectCompletion = null;
+ }
+ return;
+ }
+ if (streamLineOpen) {
+ writeHuman("\n");
+ streamLineOpen = false;
+ }
+ streamEnded = true;
+ if (resolveCompletion) {
+ resolveCompletion();
+ resolveCompletion = null;
+ rejectCompletion = null;
+ }
+ activeMessageId = null;
+ }
+ };
+
+ const unsubscribe = await session.subscribeChat(chatListener);
+
+ try {
+ await sendAndAwait(messageText, buildSendOptions(initialMode));
+
+ const planWasProposed = planProposed;
+ planProposed = false;
+ if (initialMode === "plan" && !planWasProposed) {
+ throw new Error("Plan mode was requested, but the assistant never proposed a plan.");
+ }
+ if (planWasProposed) {
+ writeHumanLine("\n[auto] Plan received. Approving and switching to execute mode...\n");
+ await sendAndAwait("Plan approved. Execute it.", buildSendOptions("exec"));
+ }
+
+ let finalEvent: WorkspaceChatMessage | undefined;
+ for (let i = liveEvents.length - 1; i >= 0; i -= 1) {
+ const candidate = liveEvents[i];
+ if (isStreamEnd(candidate)) {
+ finalEvent = candidate;
+ break;
+ }
+ }
+
+ if (!finalEvent || !isStreamEnd(finalEvent)) {
+ throw new Error("Stream ended without receiving stream-end event");
+ }
+
+ const parts = (finalEvent as unknown as { parts?: unknown }).parts ?? [];
+ const text = extractAssistantText(parts);
+ const reasoning = extractReasoning(parts);
+ const toolCalls = extractToolCalls(parts);
+
+ if (emitFinalJson) {
+ writeJson({
+ success: true,
+ data: {
+ messageId: finalEvent.messageId,
+ model: finalEvent.metadata?.model ?? null,
+ text,
+ reasoning,
+ toolCalls,
+ metadata: finalEvent.metadata ?? null,
+ parts,
+ events: liveEvents,
+ },
+ });
+ }
+ } finally {
+ unsubscribe();
+ session.dispose();
+ }
+}
+
+void (async () => {
+ try {
+ await main();
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ const wantsJsonStreaming =
+ process.argv.includes("--json-streaming") || process.argv.includes("--json-streaming=true");
+ const wantsJson = process.argv.includes("--json") || process.argv.includes("--json=true");
+
+ if (wantsJsonStreaming) {
+ process.stdout.write(`${JSON.stringify({ type: "error", error: message })}\n`);
+ }
+
+ if (wantsJson) {
+ writeJson({ success: false, error: message });
+ } else {
+ process.stderr.write(`Error: ${message}\n`);
+ }
+ process.exitCode = 1;
+ }
+})();
diff --git a/src/debug/chatExtractors.ts b/src/debug/chatExtractors.ts
new file mode 100644
index 0000000000..1650cf31d9
--- /dev/null
+++ b/src/debug/chatExtractors.ts
@@ -0,0 +1,42 @@
+import assert from "node:assert/strict";
+import type { CmuxReasoningPart, CmuxTextPart, CmuxToolPart } from "@/types/message";
+
+export function extractAssistantText(parts: unknown): string {
+ if (!Array.isArray(parts)) {
+ return "";
+ }
+
+ const textParts = (parts as CmuxTextPart[]).filter(
+ (part): part is CmuxTextPart => part.type === "text"
+ );
+ return textParts
+ .map((part) => {
+ assert(typeof part.text === "string", "Text part must include text");
+ return part.text;
+ })
+ .join("");
+}
+
+export function extractReasoning(parts: unknown): string[] {
+ if (!Array.isArray(parts)) {
+ return [];
+ }
+
+ const reasoningParts = (parts as CmuxReasoningPart[]).filter(
+ (part): part is CmuxReasoningPart => part.type === "reasoning"
+ );
+ return reasoningParts.map((part) => {
+ assert(typeof part.text === "string", "Reasoning part must include text");
+ return part.text;
+ });
+}
+
+export function extractToolCalls(parts: unknown): CmuxToolPart[] {
+ if (!Array.isArray(parts)) {
+ return [];
+ }
+
+ return (parts as CmuxToolPart[]).filter(
+ (part): part is CmuxToolPart => part.type === "dynamic-tool"
+ );
+}
diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts
new file mode 100644
index 0000000000..aef1608068
--- /dev/null
+++ b/src/services/agentSession.ts
@@ -0,0 +1,383 @@
+import assert from "node:assert/strict";
+import { EventEmitter } from "events";
+import * as path from "path";
+import { createCmuxMessage } from "@/types/message";
+import type { Config } from "@/config";
+import type { AIService } from "@/services/aiService";
+import type { HistoryService } from "@/services/historyService";
+import type { PartialService } from "@/services/partialService";
+import type { WorkspaceMetadata } from "@/types/workspace";
+import type { WorkspaceChatMessage, StreamErrorMessage, SendMessageOptions } from "@/types/ipc";
+import type { SendMessageError } from "@/types/errors";
+import { createUnknownSendMessageError } from "@/services/utils/sendMessageError";
+import type { Result } from "@/types/result";
+import { Ok, Err } from "@/types/result";
+
+interface ImagePart {
+ image: string;
+ mimeType: string;
+}
+
+export interface AgentSessionChatEvent {
+ workspaceId: string;
+ message: WorkspaceChatMessage;
+}
+
+export interface AgentSessionMetadataEvent {
+ workspaceId: string;
+ metadata: WorkspaceMetadata | null;
+}
+
+interface AgentSessionOptions {
+ workspaceId: string;
+ config: Config;
+ historyService: HistoryService;
+ partialService: PartialService;
+ aiService: AIService;
+}
+
+export class AgentSession {
+ private readonly workspaceId: string;
+ private readonly config: Config;
+ private readonly historyService: HistoryService;
+ private readonly partialService: PartialService;
+ private readonly aiService: AIService;
+ private readonly emitter = new EventEmitter();
+ private readonly aiListeners: Array<{ event: string; handler: (...args: unknown[]) => void }> =
+ [];
+ private disposed = false;
+
+ constructor(options: AgentSessionOptions) {
+ assert(options, "AgentSession requires options");
+ const { workspaceId, config, historyService, partialService, aiService } = options;
+
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ const trimmedWorkspaceId = workspaceId.trim();
+ assert(trimmedWorkspaceId.length > 0, "workspaceId must not be empty");
+
+ this.workspaceId = trimmedWorkspaceId;
+ this.config = config;
+ this.historyService = historyService;
+ this.partialService = partialService;
+ this.aiService = aiService;
+
+ this.attachAiListeners();
+ }
+
+ dispose(): void {
+ if (this.disposed) {
+ return;
+ }
+ this.disposed = true;
+ for (const { event, handler } of this.aiListeners) {
+ this.aiService.off(event, handler as never);
+ }
+ this.aiListeners.length = 0;
+ this.emitter.removeAllListeners();
+ }
+
+ onChatEvent(listener: (event: AgentSessionChatEvent) => void): () => void {
+ assert(typeof listener === "function", "listener must be a function");
+ this.emitter.on("chat-event", listener);
+ return () => {
+ this.emitter.off("chat-event", listener);
+ };
+ }
+
+ onMetadataEvent(listener: (event: AgentSessionMetadataEvent) => void): () => void {
+ assert(typeof listener === "function", "listener must be a function");
+ this.emitter.on("metadata-event", listener);
+ return () => {
+ this.emitter.off("metadata-event", listener);
+ };
+ }
+
+ async subscribeChat(listener: (event: AgentSessionChatEvent) => void): Promise<() => void> {
+ this.assertNotDisposed("subscribeChat");
+ assert(typeof listener === "function", "listener must be a function");
+
+ const unsubscribe = this.onChatEvent(listener);
+ await this.emitHistoricalEvents(listener);
+
+ return unsubscribe;
+ }
+
+ async replayHistory(listener: (event: AgentSessionChatEvent) => void): Promise {
+ this.assertNotDisposed("replayHistory");
+ assert(typeof listener === "function", "listener must be a function");
+ await this.emitHistoricalEvents(listener);
+ }
+
+ emitMetadata(metadata: WorkspaceMetadata | null): void {
+ this.assertNotDisposed("emitMetadata");
+ this.emitter.emit("metadata-event", {
+ workspaceId: this.workspaceId,
+ metadata,
+ } satisfies AgentSessionMetadataEvent);
+ }
+
+ private async emitHistoricalEvents(
+ listener: (event: AgentSessionChatEvent) => void
+ ): Promise {
+ const historyResult = await this.historyService.getHistory(this.workspaceId);
+ if (historyResult.success) {
+ for (const message of historyResult.data) {
+ listener({ workspaceId: this.workspaceId, message });
+ }
+ }
+
+ const streamInfo = this.aiService.getStreamInfo(this.workspaceId);
+ const partial = await this.partialService.readPartial(this.workspaceId);
+
+ if (streamInfo) {
+ this.aiService.replayStream(this.workspaceId);
+ } else if (partial) {
+ listener({ workspaceId: this.workspaceId, message: partial });
+ }
+
+ listener({
+ workspaceId: this.workspaceId,
+ message: { type: "caught-up" },
+ });
+ }
+
+ async ensureMetadata(args: { workspacePath: string; projectName?: string }): Promise {
+ this.assertNotDisposed("ensureMetadata");
+ assert(args, "ensureMetadata requires arguments");
+ const { workspacePath, projectName } = args;
+
+ assert(typeof workspacePath === "string", "workspacePath must be a string");
+ const trimmedWorkspacePath = workspacePath.trim();
+ assert(trimmedWorkspacePath.length > 0, "workspacePath must not be empty");
+
+ const normalizedWorkspacePath = path.resolve(trimmedWorkspacePath);
+ const existing = await this.aiService.getWorkspaceMetadata(this.workspaceId);
+
+ if (existing.success) {
+ const metadata = existing.data;
+ assert(
+ metadata.workspacePath === normalizedWorkspacePath,
+ `Existing metadata workspacePath mismatch for ${this.workspaceId}`
+ );
+ return;
+ }
+
+ const derivedProjectName =
+ projectName && projectName.trim().length > 0
+ ? projectName.trim()
+ : path.basename(path.dirname(normalizedWorkspacePath)) || "unknown";
+
+ const metadata: WorkspaceMetadata = {
+ id: this.workspaceId,
+ projectName: derivedProjectName,
+ workspacePath: normalizedWorkspacePath,
+ };
+
+ const saveResult = await this.aiService.saveWorkspaceMetadata(this.workspaceId, metadata);
+ if (!saveResult.success) {
+ const errorDetail = saveResult.error ?? "unknown";
+ throw new Error(`Failed to save metadata: ${errorDetail}`);
+ }
+ this.emitMetadata(metadata);
+ }
+
+ async sendMessage(
+ message: string,
+ options?: SendMessageOptions & { imageParts?: ImagePart[] }
+ ): Promise> {
+ this.assertNotDisposed("sendMessage");
+
+ assert(typeof message === "string", "sendMessage requires a string message");
+ const trimmedMessage = message.trim();
+ const imageParts = options?.imageParts;
+
+ if (trimmedMessage.length === 0 && (!imageParts || imageParts.length === 0)) {
+ return Err(
+ createUnknownSendMessageError(
+ "Empty message not allowed. Use interruptStream() to interrupt active streams."
+ )
+ );
+ }
+
+ if (options?.editMessageId) {
+ const truncateResult = await this.historyService.truncateAfterMessage(
+ this.workspaceId,
+ options.editMessageId
+ );
+ if (!truncateResult.success) {
+ return Err(createUnknownSendMessageError(truncateResult.error));
+ }
+ }
+
+ const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
+ const additionalParts =
+ imageParts && imageParts.length > 0
+ ? imageParts.map((img) => {
+ assert(typeof img.image === "string", "image part must include base64 string content");
+ assert(
+ typeof img.mimeType === "string" && img.mimeType.trim().length > 0,
+ "image part must include a mimeType"
+ );
+ return {
+ type: "image" as const,
+ image: img.image,
+ mimeType: img.mimeType,
+ };
+ })
+ : undefined;
+
+ const userMessage = createCmuxMessage(
+ messageId,
+ "user",
+ message,
+ {
+ timestamp: Date.now(),
+ toolPolicy: options?.toolPolicy,
+ },
+ additionalParts
+ );
+
+ const appendResult = await this.historyService.appendToHistory(this.workspaceId, userMessage);
+ if (!appendResult.success) {
+ return Err(createUnknownSendMessageError(appendResult.error));
+ }
+
+ this.emitChatEvent(userMessage);
+
+ if (!options?.model || options.model.trim().length === 0) {
+ return Err(
+ createUnknownSendMessageError("No model specified. Please select a model using /model.")
+ );
+ }
+
+ return this.streamWithHistory(options.model, options);
+ }
+
+ async resumeStream(options: SendMessageOptions): Promise> {
+ this.assertNotDisposed("resumeStream");
+
+ assert(options, "resumeStream requires options");
+ const { model } = options;
+ assert(typeof model === "string" && model.trim().length > 0, "resumeStream requires a model");
+
+ if (this.aiService.isStreaming(this.workspaceId)) {
+ return Ok(undefined);
+ }
+
+ return this.streamWithHistory(model, options);
+ }
+
+ async interruptStream(): Promise> {
+ this.assertNotDisposed("interruptStream");
+
+ if (!this.aiService.isStreaming(this.workspaceId)) {
+ return Ok(undefined);
+ }
+
+ const stopResult = await this.aiService.stopStream(this.workspaceId);
+ if (!stopResult.success) {
+ return Err(stopResult.error);
+ }
+
+ return Ok(undefined);
+ }
+
+ private async streamWithHistory(
+ modelString: string,
+ options?: SendMessageOptions
+ ): Promise> {
+ const commitResult = await this.partialService.commitToHistory(this.workspaceId);
+ if (!commitResult.success) {
+ return Err(createUnknownSendMessageError(commitResult.error));
+ }
+
+ const historyResult = await this.historyService.getHistory(this.workspaceId);
+ if (!historyResult.success) {
+ return Err(createUnknownSendMessageError(historyResult.error));
+ }
+
+ const streamResult = await this.aiService.streamMessage(
+ historyResult.data,
+ this.workspaceId,
+ modelString,
+ options?.thinkingLevel,
+ options?.toolPolicy,
+ undefined,
+ options?.additionalSystemInstructions,
+ options?.maxOutputTokens,
+ options?.providerOptions,
+ options?.mode
+ );
+
+ return streamResult;
+ }
+
+ private attachAiListeners(): void {
+ const forward = (event: string, handler: (payload: WorkspaceChatMessage) => void) => {
+ const wrapped = (...args: unknown[]) => {
+ const [payload] = args;
+ if (
+ typeof payload === "object" &&
+ payload !== null &&
+ "workspaceId" in payload &&
+ (payload as { workspaceId: unknown }).workspaceId !== this.workspaceId
+ ) {
+ return;
+ }
+ handler(payload as WorkspaceChatMessage);
+ };
+ this.aiListeners.push({ event, handler: wrapped });
+ this.aiService.on(event, wrapped as never);
+ };
+
+ forward("stream-start", (payload) => this.emitChatEvent(payload));
+ forward("stream-delta", (payload) => this.emitChatEvent(payload));
+ forward("stream-end", (payload) => this.emitChatEvent(payload));
+ forward("tool-call-start", (payload) => this.emitChatEvent(payload));
+ forward("tool-call-delta", (payload) => this.emitChatEvent(payload));
+ forward("tool-call-end", (payload) => this.emitChatEvent(payload));
+ forward("reasoning-delta", (payload) => this.emitChatEvent(payload));
+ forward("reasoning-end", (payload) => this.emitChatEvent(payload));
+ forward("stream-abort", (payload) => this.emitChatEvent(payload));
+
+ const errorHandler = (...args: unknown[]) => {
+ const [raw] = args;
+ if (
+ typeof raw !== "object" ||
+ raw === null ||
+ !("workspaceId" in raw) ||
+ (raw as { workspaceId: unknown }).workspaceId !== this.workspaceId
+ ) {
+ return;
+ }
+ const data = raw as {
+ workspaceId: string;
+ messageId: string;
+ error: string;
+ errorType?: string;
+ };
+ const streamError: StreamErrorMessage = {
+ type: "stream-error",
+ messageId: data.messageId,
+ error: data.error,
+ errorType: (data.errorType ?? "unknown") as StreamErrorMessage["errorType"],
+ };
+ this.emitChatEvent(streamError);
+ };
+
+ this.aiListeners.push({ event: "error", handler: errorHandler });
+ this.aiService.on("error", errorHandler as never);
+ }
+
+ private emitChatEvent(message: WorkspaceChatMessage): void {
+ this.assertNotDisposed("emitChatEvent");
+ this.emitter.emit("chat-event", {
+ workspaceId: this.workspaceId,
+ message,
+ } satisfies AgentSessionChatEvent);
+ }
+
+ private assertNotDisposed(operation: string): void {
+ assert(!this.disposed, `AgentSession.${operation} called after dispose`);
+ }
+}
diff --git a/src/services/aiService.ts b/src/services/aiService.ts
index e6e59ed8c9..8cdbf3eb44 100644
--- a/src/services/aiService.ts
+++ b/src/services/aiService.ts
@@ -509,6 +509,12 @@ export class AIService extends EventEmitter {
// Apply tool policy to filter tools (if policy provided)
const tools = applyToolPolicy(allTools, toolPolicy);
+ log.info("AIService.streamMessage: tool configuration", {
+ workspaceId,
+ model: modelString,
+ toolNames: Object.keys(tools),
+ hasToolPolicy: Boolean(toolPolicy),
+ });
// Create assistant message placeholder with historySequence from backend
const assistantMessageId = `assistant-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts
index fa8b561725..63e9cce91e 100644
--- a/src/services/ipcMain.ts
+++ b/src/services/ipcMain.ts
@@ -1,3 +1,4 @@
+import assert from "node:assert/strict";
import type { BrowserWindow, IpcMain as ElectronIpcMain } from "electron";
import { spawn, spawnSync } from "child_process";
import * as path from "path";
@@ -14,33 +15,18 @@ import { removeWorktreeSafe, removeWorktree, pruneWorktrees } from "@/services/g
import { AIService } from "@/services/aiService";
import { HistoryService } from "@/services/historyService";
import { PartialService } from "@/services/partialService";
-import { createCmuxMessage, type CmuxMessage } from "@/types/message";
+import { AgentSession } from "@/services/agentSession";
+import type { CmuxMessage } from "@/types/message";
import { log } from "@/services/log";
-import type {
- StreamStartEvent,
- StreamDeltaEvent,
- StreamEndEvent,
- StreamAbortEvent,
- ToolCallStartEvent,
- ToolCallDeltaEvent,
- ToolCallEndEvent,
- ErrorEvent,
-} from "@/types/stream";
import { IPC_CHANNELS, getChatChannel } from "@/constants/ipc-constants";
import type { SendMessageError } from "@/types/errors";
-import type { StreamErrorMessage, SendMessageOptions, DeleteMessage } from "@/types/ipc";
-import { Ok, Err, type Result } from "@/types/result";
+import type { SendMessageOptions, DeleteMessage } from "@/types/ipc";
+import { Ok, Err } from "@/types/result";
import { validateWorkspaceName } from "@/utils/validation/workspaceValidation";
import { createBashTool } from "@/services/tools/bash";
import type { BashToolResult } from "@/types/tools";
-
import { secretsToRecord } from "@/types/secrets";
-const createUnknownSendMessageError = (raw: string): SendMessageError => ({
- type: "unknown",
- raw,
-});
-
/**
* IpcMain - Manages all IPC handlers and service coordination
*
@@ -59,6 +45,11 @@ export class IpcMain {
private readonly historyService: HistoryService;
private readonly partialService: PartialService;
private readonly aiService: AIService;
+ private readonly sessions = new Map();
+ private readonly sessionSubscriptions = new Map<
+ string,
+ { chat: () => void; metadata: () => void }
+ >();
private mainWindow: BrowserWindow | null = null;
private registered = false;
@@ -69,6 +60,68 @@ export class IpcMain {
this.aiService = new AIService(config, this.historyService, this.partialService);
}
+ private getOrCreateSession(workspaceId: string): AgentSession {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ const trimmed = workspaceId.trim();
+ assert(trimmed.length > 0, "workspaceId must not be empty");
+
+ let session = this.sessions.get(trimmed);
+ if (session) {
+ return session;
+ }
+
+ session = new AgentSession({
+ workspaceId: trimmed,
+ config: this.config,
+ historyService: this.historyService,
+ partialService: this.partialService,
+ aiService: this.aiService,
+ });
+
+ const chatUnsubscribe = session.onChatEvent((event) => {
+ if (!this.mainWindow) {
+ return;
+ }
+ const channel = getChatChannel(event.workspaceId);
+ this.mainWindow.webContents.send(channel, event.message);
+ });
+
+ const metadataUnsubscribe = session.onMetadataEvent((event) => {
+ if (!this.mainWindow) {
+ return;
+ }
+ this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
+ workspaceId: event.workspaceId,
+ metadata: event.metadata,
+ });
+ });
+
+ this.sessions.set(trimmed, session);
+ this.sessionSubscriptions.set(trimmed, {
+ chat: chatUnsubscribe,
+ metadata: metadataUnsubscribe,
+ });
+
+ return session;
+ }
+
+ private disposeSession(workspaceId: string): void {
+ const session = this.sessions.get(workspaceId);
+ if (!session) {
+ return;
+ }
+
+ const subscriptions = this.sessionSubscriptions.get(workspaceId);
+ if (subscriptions) {
+ subscriptions.chat();
+ subscriptions.metadata();
+ this.sessionSubscriptions.delete(workspaceId);
+ }
+
+ session.dispose();
+ this.sessions.delete(workspaceId);
+ }
+
/**
* Register all IPC handlers and setup event forwarding
* @param ipcMain - Electron's ipcMain module
@@ -90,7 +143,6 @@ export class IpcMain {
this.registerProviderHandlers(ipcMain);
this.registerProjectHandlers(ipcMain);
this.registerSubscriptionHandlers(ipcMain);
- this.setupEventForwarding();
this.registered = true;
}
@@ -121,65 +173,6 @@ export class IpcMain {
});
}
- /**
- * Helper method: Stream AI response with history
- * Shared logic between sendMessage and resumeStream handlers
- */
- private async streamWithHistory(
- workspaceId: string,
- modelString: string,
- options?: SendMessageOptions
- ): Promise> {
- const {
- thinkingLevel,
- toolPolicy,
- additionalSystemInstructions,
- maxOutputTokens,
- providerOptions,
- mode,
- } = options ?? {};
-
- // Commit any existing partial to history BEFORE loading
- // This ensures interrupted messages are included in the AI's context
- await this.partialService.commitToHistory(workspaceId);
-
- // Get full conversation history
- const historyResult = await this.historyService.getHistory(workspaceId);
- if (!historyResult.success) {
- log.error("Failed to get conversation history:", historyResult.error);
- return {
- success: false,
- error: createUnknownSendMessageError(historyResult.error),
- };
- }
-
- // Stream the AI response
- log.debug("Calling aiService.streamMessage", {
- workspaceId,
- thinkingLevel,
- modelString,
- toolPolicy,
- additionalSystemInstructions,
- maxOutputTokens,
- providerOptions,
- });
-
- const streamResult = await this.aiService.streamMessage(
- historyResult.data,
- workspaceId,
- modelString,
- thinkingLevel,
- toolPolicy,
- undefined,
- additionalSystemInstructions,
- maxOutputTokens,
- providerOptions,
- mode
- );
- log.debug("Stream completed", { workspaceId });
- return streamResult;
- }
-
private registerWorkspaceHandlers(ipcMain: ElectronIpcMain): void {
ipcMain.handle(
IPC_CHANNELS.WORKSPACE_CREATE,
@@ -235,10 +228,8 @@ export class IpcMain {
});
// Emit metadata event for new workspace
- this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
- workspaceId,
- metadata,
- });
+ const session = this.getOrCreateSession(workspaceId);
+ session.emitMetadata(metadata);
return {
success: true,
@@ -380,16 +371,20 @@ export class IpcMain {
});
// Emit metadata event for old workspace deletion
- this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
- workspaceId,
- metadata: null,
- });
+ const oldSession = this.sessions.get(workspaceId);
+ if (oldSession) {
+ oldSession.emitMetadata(null);
+ this.disposeSession(workspaceId);
+ } else if (this.mainWindow) {
+ this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
+ workspaceId,
+ metadata: null,
+ });
+ }
// Emit metadata event for new workspace
- this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
- workspaceId: newWorkspaceId,
- metadata: newMetadata,
- });
+ const newSession = this.getOrCreateSession(newWorkspaceId);
+ newSession.emitMetadata(newMetadata);
return Ok({ newWorkspaceId });
} catch (error) {
@@ -421,113 +416,23 @@ export class IpcMain {
message: string,
options?: SendMessageOptions & { imageParts?: Array<{ image: string; mimeType: string }> }
) => {
- const {
- editMessageId,
- thinkingLevel,
- model,
- toolPolicy,
- additionalSystemInstructions,
- maxOutputTokens,
- providerOptions,
- imageParts,
- mode, // Passed to streamWithHistory for plan file loading
- } = options ?? {};
log.debug("sendMessage handler: Received", {
workspaceId,
messagePreview: message.substring(0, 50),
- editMessageId,
- thinkingLevel,
- model,
- toolPolicy,
- mode,
- additionalSystemInstructions,
- maxOutputTokens,
- providerOptions,
+ mode: options?.mode,
+ options,
});
try {
- // Reject empty messages - use interruptStream() to interrupt active streams
- if (!message.trim() && (!imageParts || imageParts.length === 0)) {
- log.debug("sendMessage handler: Rejected empty message (use interruptStream instead)");
- return {
- success: false,
- error: {
- type: "unknown",
- raw: "Empty message not allowed. Use interruptStream() to interrupt active streams.",
- },
- };
- }
-
- // If editing, truncate history after the message being edited
- if (editMessageId) {
- const truncateResult = await this.historyService.truncateAfterMessage(
+ const session = this.getOrCreateSession(workspaceId);
+ const result = await session.sendMessage(message, options);
+ if (!result.success) {
+ log.error("sendMessage handler: session returned error", {
workspaceId,
- editMessageId
- );
- if (!truncateResult.success) {
- log.error("Failed to truncate history for edit:", truncateResult.error);
- return {
- success: false,
- error: createUnknownSendMessageError(truncateResult.error),
- };
- }
- // Note: We don't send a clear event here. The aggregator will handle
- // replacement automatically when the new message arrives with the same historySequence
- }
-
- // Create user message with text and optional image parts
- const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
- const additionalParts = imageParts?.map((img) => ({
- type: "image" as const,
- image: img.image,
- mimeType: img.mimeType,
- }));
- if (additionalParts && additionalParts.length > 0) {
- log.debug("sendMessage: Creating message with images", {
- imageCount: additionalParts.length,
- mimeTypes: additionalParts.map((p) => p.mimeType),
+ error: result.error,
});
}
- const userMessage = createCmuxMessage(
- messageId,
- "user",
- message,
- {
- // historySequence will be assigned by historyService.appendToHistory()
- timestamp: Date.now(),
- toolPolicy, // Store for historical record and compaction detection
- },
- additionalParts
- );
-
- // Append user message to history
- const appendResult = await this.historyService.appendToHistory(workspaceId, userMessage);
- if (!appendResult.success) {
- log.error("Failed to append message to history:", appendResult.error);
- return {
- success: false,
- error: createUnknownSendMessageError(appendResult.error),
- };
- }
-
- // Broadcast the user message immediately to the frontend
- if (this.mainWindow) {
- this.mainWindow.webContents.send(getChatChannel(workspaceId), userMessage);
- }
-
- // Stream the AI response
- if (!model) {
- log.error("No model provided by frontend");
- return {
- success: false,
- error: createUnknownSendMessageError(
- "No model specified. Please select a model using /model command."
- ),
- };
- }
-
- return await this.streamWithHistory(workspaceId, model, options);
+ return result;
} catch (error) {
- // Convert to SendMessageError for typed error handling
const errorMessage = error instanceof Error ? error.message : String(error);
log.error("Unexpected error in sendMessage handler:", error);
const sendError: SendMessageError = {
@@ -547,15 +452,15 @@ export class IpcMain {
options,
});
try {
- // Idempotent: if stream already active, return success (not error)
- // This makes client code simpler and more resilient
- if (this.aiService.isStreaming(workspaceId)) {
- log.debug("resumeStream handler: Stream already active, returning success");
- return { success: true };
+ const session = this.getOrCreateSession(workspaceId);
+ const result = await session.resumeStream(options);
+ if (!result.success) {
+ log.error("resumeStream handler: session returned error", {
+ workspaceId,
+ error: result.error,
+ });
}
-
- // Stream the AI response with existing history (no new user message)
- return await this.streamWithHistory(workspaceId, options.model, options);
+ return result;
} catch (error) {
// Convert to SendMessageError for typed error handling
const errorMessage = error instanceof Error ? error.message : String(error);
@@ -572,13 +477,8 @@ export class IpcMain {
ipcMain.handle(IPC_CHANNELS.WORKSPACE_INTERRUPT_STREAM, async (_event, workspaceId: string) => {
log.debug("interruptStream handler: Received", { workspaceId });
try {
- // Idempotent: if not streaming, return success (not error)
- if (!this.aiService.isStreaming(workspaceId)) {
- log.debug("interruptStream handler: Not streaming, returning success");
- return { success: true, data: undefined };
- }
-
- const stopResult = await this.aiService.stopStream(workspaceId);
+ const session = this.getOrCreateSession(workspaceId);
+ const stopResult = await session.interruptStream();
if (!stopResult.success) {
log.error("Failed to stop stream:", stopResult.error);
return { success: false, error: stopResult.error };
@@ -896,10 +796,17 @@ export class IpcMain {
}
// Emit metadata event for workspace removal (with null metadata to indicate deletion)
- this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
- workspaceId,
- metadata: null, // null indicates workspace was deleted
- });
+ const existingSession = this.sessions.get(workspaceId);
+ if (existingSession) {
+ existingSession.emitMetadata(null);
+ } else if (this.mainWindow) {
+ this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
+ workspaceId,
+ metadata: null,
+ });
+ }
+
+ this.disposeSession(workspaceId);
return { success: true };
} catch (error) {
@@ -1072,31 +979,15 @@ export class IpcMain {
// Handle subscription events for chat history
ipcMain.on(`workspace:chat:subscribe`, (_event, workspaceId: string) => {
void (async () => {
+ const session = this.getOrCreateSession(workspaceId);
const chatChannel = getChatChannel(workspaceId);
- const history = await this.historyService.getHistory(workspaceId);
- if (history.success) {
- for (const msg of history.data) {
- this.mainWindow?.webContents.send(chatChannel, msg);
- }
-
- // Check if there's an active stream or a partial message
- const streamInfo = this.aiService.getStreamInfo(workspaceId);
- const partial = await this.partialService.readPartial(workspaceId);
-
- if (streamInfo) {
- // Stream is actively running - replay events to re-establish streaming context
- // Events flow: StreamManager → AIService → IpcMain → renderer
- // This ensures frontend receives stream-start and creates activeStream entry
- // so that stream-end can properly clean up the streaming indicator
- this.aiService.replayStream(workspaceId);
- } else if (partial) {
- // No active stream but there's a partial - send as regular message (shows CONTINUE)
- this.mainWindow?.webContents.send(chatChannel, partial);
+ await session.replayHistory((event) => {
+ if (!this.mainWindow) {
+ return;
}
- }
-
- this.mainWindow?.webContents.send(chatChannel, { type: "caught-up" });
+ this.mainWindow.webContents.send(chatChannel, event.message);
+ });
})();
});
@@ -1118,89 +1009,6 @@ export class IpcMain {
});
}
- private setupEventForwarding(): void {
- // Set up event listeners for AI service
- this.aiService.on("stream-start", (data: StreamStartEvent) => {
- if (this.mainWindow) {
- // Send the actual stream-start event
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
-
- this.aiService.on("stream-delta", (data: StreamDeltaEvent) => {
- if (this.mainWindow) {
- // Send ONLY the delta event - efficient IPC usage
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
-
- this.aiService.on("stream-end", (data: StreamEndEvent) => {
- if (this.mainWindow) {
- // Send the stream-end event with final content and metadata
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
-
- // Forward tool events to renderer
- this.aiService.on("tool-call-start", (data: ToolCallStartEvent) => {
- if (this.mainWindow) {
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
-
- this.aiService.on("tool-call-delta", (data: ToolCallDeltaEvent) => {
- if (this.mainWindow) {
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
-
- this.aiService.on("tool-call-end", (data: ToolCallEndEvent) => {
- if (this.mainWindow) {
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
-
- // Forward reasoning events to renderer
- this.aiService.on(
- "reasoning-delta",
- (data: { type: string; workspaceId: string; messageId: string; delta: string }) => {
- if (this.mainWindow) {
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- }
- );
-
- this.aiService.on(
- "reasoning-end",
- (data: { type: string; workspaceId: string; messageId: string }) => {
- if (this.mainWindow) {
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- }
- );
-
- this.aiService.on("error", (data: ErrorEvent) => {
- if (this.mainWindow) {
- // Send properly typed StreamErrorMessage
- const errorMessage: StreamErrorMessage = {
- type: "stream-error",
- messageId: data.messageId,
- error: data.error,
- errorType: data.errorType ?? "unknown",
- };
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), errorMessage);
- }
- });
-
- // Handle stream abort events
- this.aiService.on("stream-abort", (data: StreamAbortEvent) => {
- if (this.mainWindow) {
- // Forward complete abort event including metadata (usage, duration)
- this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
- }
- });
- }
-
/**
* Check if a command is available in the system PATH
*/
diff --git a/src/services/utils/sendMessageError.ts b/src/services/utils/sendMessageError.ts
new file mode 100644
index 0000000000..6b34497796
--- /dev/null
+++ b/src/services/utils/sendMessageError.ts
@@ -0,0 +1,17 @@
+import assert from "node:assert/strict";
+import type { SendMessageError } from "@/types/errors";
+
+/**
+ * Helper to wrap arbitrary errors into SendMessageError structures.
+ * Enforces that the raw string is non-empty for defensive debugging.
+ */
+export const createUnknownSendMessageError = (raw: string): SendMessageError => {
+ assert(typeof raw === "string", "Expected raw error to be a string");
+ const trimmed = raw.trim();
+ assert(trimmed.length > 0, "createUnknownSendMessageError requires a non-empty message");
+
+ return {
+ type: "unknown",
+ raw: trimmed,
+ };
+};
diff --git a/src/utils/ai/providerOptions.ts b/src/utils/ai/providerOptions.ts
index b4926aa7ce..65497ef46e 100644
--- a/src/utils/ai/providerOptions.ts
+++ b/src/utils/ai/providerOptions.ts
@@ -93,7 +93,7 @@ export function buildProviderOptions(
}),
},
};
- log.info("buildProviderOptions: Returning Anthropic options", options);
+ log.debug("buildProviderOptions: Returning Anthropic options", options);
return options;
}
diff --git a/src/utils/providers/ensureProvidersConfig.ts b/src/utils/providers/ensureProvidersConfig.ts
new file mode 100644
index 0000000000..0b205dcfa4
--- /dev/null
+++ b/src/utils/providers/ensureProvidersConfig.ts
@@ -0,0 +1,112 @@
+import type { Config, ProviderConfig, ProvidersConfig } from "@/config";
+
+const trim = (value: unknown): string => (typeof value === "string" ? value.trim() : "");
+
+const hasApiKey = (config: ProviderConfig | undefined): boolean =>
+ Boolean(config && typeof config.apiKey === "string" && config.apiKey.trim().length > 0);
+
+const hasAnyConfiguredProvider = (providers: ProvidersConfig | null | undefined): boolean => {
+ if (!providers) {
+ return false;
+ }
+
+ return Object.values(providers).some((providerConfig) => hasApiKey(providerConfig));
+};
+
+const buildProvidersFromEnv = (env: NodeJS.ProcessEnv): ProvidersConfig => {
+ const providers: ProvidersConfig = {};
+
+ const anthropicKey = trim(env.ANTHROPIC_API_KEY);
+ if (anthropicKey.length > 0) {
+ const entry: ProviderConfig = { apiKey: anthropicKey };
+
+ const baseUrl = trim(env.ANTHROPIC_BASE_URL);
+ if (baseUrl.length > 0) {
+ entry.baseUrl = baseUrl;
+ }
+
+ providers.anthropic = entry;
+ }
+
+ const openAIKey = trim(env.OPENAI_API_KEY);
+ if (openAIKey.length > 0) {
+ const entry: ProviderConfig = { apiKey: openAIKey };
+
+ const baseUrlCandidates = [env.OPENAI_BASE_URL, env.OPENAI_API_BASE];
+ for (const candidate of baseUrlCandidates) {
+ const baseUrl = trim(candidate);
+ if (baseUrl.length > 0) {
+ entry.baseUrl = baseUrl;
+ break;
+ }
+ }
+
+ const organization = trim(env.OPENAI_ORG_ID);
+ if (organization.length > 0) {
+ entry.organization = organization;
+ }
+
+ providers.openai = entry;
+ }
+
+ if (!providers.openai) {
+ const azureKey = trim(env.AZURE_OPENAI_API_KEY);
+ const azureEndpoint = trim(env.AZURE_OPENAI_ENDPOINT);
+
+ if (azureKey.length > 0 && azureEndpoint.length > 0) {
+ const entry: ProviderConfig = {
+ apiKey: azureKey,
+ baseUrl: azureEndpoint,
+ };
+
+ const deployment = trim(env.AZURE_OPENAI_DEPLOYMENT);
+ if (deployment.length > 0) {
+ entry.defaultModel = deployment;
+ }
+
+ const apiVersion = trim(env.AZURE_OPENAI_API_VERSION);
+ if (apiVersion.length > 0) {
+ entry.apiVersion = apiVersion;
+ }
+
+ providers.openai = entry;
+ }
+ }
+
+ return providers;
+};
+
+export const ensureProvidersConfig = (
+ config: Config,
+ env: NodeJS.ProcessEnv = process.env
+): ProvidersConfig => {
+ if (!config) {
+ throw new Error("Config instance is required to ensure providers configuration");
+ }
+
+ const existingProviders = config.loadProvidersConfig();
+ if (hasAnyConfiguredProvider(existingProviders)) {
+ if (!existingProviders) {
+ throw new Error(
+ "Providers config reported configured providers but returned null. Please validate providers.jsonc."
+ );
+ }
+ return existingProviders;
+ }
+
+ const providersFromEnv = buildProvidersFromEnv(env);
+ if (!hasAnyConfiguredProvider(providersFromEnv)) {
+ throw new Error(
+ "No provider credentials found. Configure providers.jsonc or set ANTHROPIC_API_KEY / OPENAI_API_KEY."
+ );
+ }
+
+ config.saveProvidersConfig(providersFromEnv);
+ return providersFromEnv;
+};
+
+export const getProvidersFromEnv = (env: NodeJS.ProcessEnv = process.env): ProvidersConfig =>
+ buildProvidersFromEnv(env);
+
+export const hasAnyProvidersConfigured = (providers: ProvidersConfig | null | undefined): boolean =>
+ hasAnyConfiguredProvider(providers);