diff --git a/.gitignore b/.gitignore
index 560ab5cbe2..368183a9f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,10 @@ CODE_CHANGES.md
 README_COMPACT_HERE.md
 artifacts/
 tests/e2e/tmp/
+runs/
+
+# Python
+__pycache__
+
+tmpfork
+.cmux-agent-cli
diff --git a/Makefile b/Makefile
index 1b9227b57d..b7ec50ff06 100644
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,7 @@ include fmt.mk
 .PHONY: test test-unit test-integration test-watch test-coverage test-e2e
 .PHONY: dist dist-mac dist-win dist-linux
 .PHONY: docs docs-build docs-watch
+.PHONY: benchmark-terminal
 .PHONY: ensure-deps
 
 TS_SOURCES := $(shell find src -type f \( -name '*.ts' -o -name '*.tsx' \))
@@ -174,6 +175,19 @@ docs-build: ## Build documentation
 docs-watch: ## Watch and rebuild documentation
 	@cd docs && mdbook watch
 
+## Benchmarks
+benchmark-terminal: ## Run Terminal-Bench with the cmux agent (use TB_DATASET/TB_ARGS to customize)
+	@TB_DATASET=$${TB_DATASET:-terminal-bench-core==0.1.1}; \
+	CONCURRENCY_FLAG=$${TB_CONCURRENCY:+--n-concurrent $$TB_CONCURRENCY}; \
+	LIVESTREAM_FLAG=$${TB_LIVESTREAM:+--livestream}; \
+	echo "Running Terminal-Bench with dataset $$TB_DATASET"; \
+	uvx terminal-bench run \
+		--dataset "$$TB_DATASET" \
+		--agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \
+		$$CONCURRENCY_FLAG \
+		$$LIVESTREAM_FLAG \
+		$${TB_ARGS}
+
 ## Clean
 clean: ## Clean build artifacts
 	@echo "Cleaning build artifacts..."
@@ -182,5 +196,3 @@ clean: ## Clean build artifacts
 
 # Parallel build optimization - these can run concurrently
 .NOTPARALLEL: build-main  # TypeScript can handle its own parallelism
-
-
diff --git a/benchmarks/terminal_bench/__init__.py b/benchmarks/terminal_bench/__init__.py
new file mode 100644
index 0000000000..228448c524
--- /dev/null
+++ b/benchmarks/terminal_bench/__init__.py
@@ -0,0 +1,3 @@
+from .cmux_agent import CmuxAgent
+
+__all__ = ["CmuxAgent"]
diff --git a/benchmarks/terminal_bench/cmux-run.sh b/benchmarks/terminal_bench/cmux-run.sh
new file mode 100644
index 0000000000..4aaa698956
--- /dev/null
+++ b/benchmarks/terminal_bench/cmux-run.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+log() {
+  printf '[cmux-run] %s\n' "$1"
+}
+
+fatal() {
+  printf '[cmux-run] ERROR: %s\n' "$1" >&2
+  exit 1
+}
+
+instruction=${1:-}
+if [[ -z "${instruction}" ]]; then
+  fatal "instruction argument is required"
+fi
+
+export BUN_INSTALL="${BUN_INSTALL:-/root/.bun}"
+export PATH="${BUN_INSTALL}/bin:${PATH}"
+
+CMUX_APP_ROOT="${CMUX_APP_ROOT:-/opt/cmux-app}"
+CMUX_CONFIG_ROOT="${CMUX_CONFIG_ROOT:-/root/.cmux}"
+CMUX_PROJECT_PATH="${CMUX_PROJECT_PATH:-}"
+CMUX_PROJECT_CANDIDATES="${CMUX_PROJECT_CANDIDATES:-/workspace:/app:/workspaces:/root/project}"
+CMUX_MODEL="${CMUX_MODEL:-anthropic:claude-sonnet-4-5}"
+CMUX_TIMEOUT_MS="${CMUX_TIMEOUT_MS:-}"
+CMUX_TRUNK="${CMUX_TRUNK:-main}"
+CMUX_WORKSPACE_ID="${CMUX_WORKSPACE_ID:-cmux-bench}"
+CMUX_THINKING_LEVEL="${CMUX_THINKING_LEVEL:-high}"
+CMUX_MODE="${CMUX_MODE:-exec}"
+
+ensure_bun() {
+  if ! command -v bun >/dev/null 2>&1; then
+    fatal "bun must be installed before running the cmux agent"
+  fi
+}
+
+resolve_project_path() {
+  if [[ -n "${CMUX_PROJECT_PATH}" ]]; then
+    if [[ -d "${CMUX_PROJECT_PATH}" ]]; then
+      printf '%s\n' "${CMUX_PROJECT_PATH}"
+      return 0
+    fi
+    fatal "CMUX_PROJECT_PATH=${CMUX_PROJECT_PATH} not found"
+  fi
+
+  IFS=":" read -r -a candidates <<<"${CMUX_PROJECT_CANDIDATES}"
+  for candidate in "${candidates[@]}"; do
+    if [[ -d "${candidate}" ]]; then
+      printf '%s\n' "${candidate}"
+      return 0
+    fi
+  done
+
+  fatal "no project path located (searched ${CMUX_PROJECT_CANDIDATES})"
+}
+
+ensure_git_repo() {
+  local project_path=$1
+
+  if command -v git >/dev/null 2>&1; then
+    if git -C "${project_path}" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+      # Ensure trunk branch exists even on pre-existing repos.
+      if ! git -C "${project_path}" rev-parse --verify "${CMUX_TRUNK}" >/dev/null 2>&1; then
+        git -C "${project_path}" checkout -b "${CMUX_TRUNK}" >/dev/null 2>&1 || true
+      else
+        git -C "${project_path}" checkout "${CMUX_TRUNK}" >/dev/null 2>&1 || true
+      fi
+      return 0
+    fi
+
+    log "initialising git repository at ${project_path}"
+    if git -C "${project_path}" init --initial-branch="${CMUX_TRUNK}" >/dev/null 2>&1; then
+      :
+    else
+      git -C "${project_path}" init >/dev/null
+      git -C "${project_path}" checkout -B "${CMUX_TRUNK}" >/dev/null
+    fi
+    git -C "${project_path}" config user.name "cmux-bench"
+    git -C "${project_path}" config user.email "bench@cmux.local"
+    git -C "${project_path}" add -A >/dev/null
+    git -C "${project_path}" commit -m "chore: initial snapshot" --allow-empty >/dev/null
+    git -C "${project_path}" branch -M "${CMUX_TRUNK}" >/dev/null
+  else
+    log "git not available; skipping repository initialisation"
+  fi
+}
+
+ensure_bun
+project_path=$(resolve_project_path)
+ensure_git_repo "${project_path}"
+
+bun --version >/dev/null 2>&1 || fatal "bun not available after ensure_bun"
+
+log "starting cmux agent session for ${project_path}"
+cd "${CMUX_APP_ROOT}"
+
+cmd=(bun src/debug/agentSessionCli.ts
+  --config-root "${CMUX_CONFIG_ROOT}"
+  --project-path "${project_path}"
+  --workspace-path "${project_path}"
+  --workspace-id "${CMUX_WORKSPACE_ID}"
+  --model "${CMUX_MODEL}"
+  --mode "${CMUX_MODE}")
+
+if [[ -n "${CMUX_TIMEOUT_MS}" ]]; then
+  cmd+=(--timeout "${CMUX_TIMEOUT_MS}")
+fi
+
+if [[ -n "${CMUX_THINKING_LEVEL}" ]]; then
+  cmd+=(--thinking-level "${CMUX_THINKING_LEVEL}")
+fi
+
+if ! printf '%s' "${instruction}" | "${cmd[@]}"; then
+  fatal "cmux agent session failed"
+fi
diff --git a/benchmarks/terminal_bench/cmux_agent.py b/benchmarks/terminal_bench/cmux_agent.py
new file mode 100644
index 0000000000..2653ec5c97
--- /dev/null
+++ b/benchmarks/terminal_bench/cmux_agent.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+import io
+import os
+import shlex
+import tarfile
+import tempfile
+from pathlib import Path
+from typing import Any
+
+from terminal_bench.agents.base_agent import AgentResult
+from terminal_bench.agents.installed_agents.abstract_installed_agent import (
+    AbstractInstalledAgent,
+)
+from terminal_bench.terminal.models import TerminalCommand
+from terminal_bench.terminal.tmux_session import TmuxSession
+
+
+class CmuxAgent(AbstractInstalledAgent):
+    """
+    Minimal Terminal-Bench adapter that installs cmux into the task container and
+    forwards the benchmark instruction to the cmux headless runner.
+    """
+
+    _ARCHIVE_NAME = "cmux-app.tar.gz"
+    _RUNNER_NAME = "cmux-run.sh"
+    _DEFAULT_TRUNK = "main"
+    _DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
+
+    def __init__(
+        self, mode: str | None = None, thinking_level: str | None = None, **kwargs: Any
+    ) -> None:
+        super().__init__(**kwargs)
+        repo_root_env = os.environ.get("CMUX_AGENT_REPO_ROOT")
+        repo_root = (
+            Path(repo_root_env).resolve()
+            if repo_root_env
+            else Path(__file__).resolve().parents[2]
+        )
+        if not repo_root.exists():
+            raise RuntimeError(f"cmux repo root {repo_root} does not exist")
+
+        self._repo_root = repo_root
+        self._archive_bytes: bytes | None = None
+        self._prepared_container_id: str | None = None
+        self._mode = mode.lower() if mode else None
+        self._thinking_level = thinking_level.lower() if thinking_level else None
+
+    @staticmethod
+    def name() -> str:
+        return "cmux"
+
+    @property
+    def _env(self) -> dict[str, str]:
+        keys = [
+            "ANTHROPIC_API_KEY",
+            "ANTHROPIC_BASE_URL",
+            "OPENAI_API_KEY",
+            "OPENAI_BASE_URL",
+            "OPENAI_API_BASE",
+            "OPENAI_ORG_ID",
+            "AZURE_OPENAI_API_KEY",
+            "AZURE_OPENAI_ENDPOINT",
+            "AZURE_OPENAI_DEPLOYMENT",
+            "AZURE_OPENAI_API_VERSION",
+            "MISTRAL_API_KEY",
+            "GOOGLE_API_KEY",
+            "OPENROUTER_API_KEY",
+            "CMUX_AGENT_GIT_URL",
+            "CMUX_BUN_INSTALL_URL",
+            "CMUX_PROJECT_PATH",
+            "CMUX_PROJECT_CANDIDATES",
+            "CMUX_TRUNK",
+            "CMUX_MODEL",
+            "CMUX_TIMEOUT_MS",
+            "CMUX_THINKING_LEVEL",
+            "CMUX_CONFIG_ROOT",
+            "CMUX_APP_ROOT",
+            "CMUX_WORKSPACE_ID",
+        ]
+
+        env: dict[str, str] = {}
+        for key in keys:
+            value = os.environ.get(key)
+            if value:
+                env[key] = value
+
+        env.setdefault("CMUX_TRUNK", self._DEFAULT_TRUNK)
+        env.setdefault("CMUX_MODEL", self._DEFAULT_MODEL)
+        env.setdefault("CMUX_CONFIG_ROOT", "/root/.cmux")
+        env.setdefault("CMUX_APP_ROOT", "/opt/cmux-app")
+        env.setdefault("CMUX_WORKSPACE_ID", "cmux-bench")
+        env.setdefault("CMUX_THINKING_LEVEL", "high")
+        env.setdefault("CMUX_MODE", "exec")
+
+        model_value = env.get("CMUX_MODEL")
+        if model_value and "/" in model_value and ":" not in model_value:
+            provider, model_name = model_value.split("/", 1)
+            env["CMUX_MODEL"] = f"{provider}:{model_name}"
+
+        thinking_value = self._thinking_level or env.get("CMUX_THINKING_LEVEL")
+        if thinking_value:
+            normalized = thinking_value.strip().lower()
+            if normalized not in {"off", "low", "medium", "high"}:
+                raise ValueError(
+                    "CMUX_THINKING_LEVEL must be one of off, low, medium, high"
+                )
+            env["CMUX_THINKING_LEVEL"] = normalized
+
+        mode_value = self._mode or env.get("CMUX_MODE")
+        if mode_value:
+            normalized_mode = mode_value.strip().lower()
+            if normalized_mode in {"exec", "execute"}:
+                env["CMUX_MODE"] = "exec"
+            elif normalized_mode == "plan":
+                env["CMUX_MODE"] = "plan"
+            else:
+                raise ValueError("CMUX_MODE must be one of plan, exec, or execute")
+
+        return env
+
+    @property
+    def _install_agent_script_path(self) -> Path:
+        return self._get_templated_script_path("cmux_setup.sh.j2")
+
+    def perform_task(
+        self,
+        instruction: str,
+        session: TmuxSession,
+        logging_dir=None,
+    ) -> AgentResult:
+        if not instruction or not instruction.strip():
+            raise ValueError("instruction must be a non-empty string")
+
+        self._prepare_payloads(session)
+        return super().perform_task(
+            instruction=instruction, session=session, logging_dir=logging_dir
+        )
+
+    def _prepare_payloads(self, session: TmuxSession) -> None:
+        container_id = getattr(session.container, "id", None)
+        if container_id and container_id == self._prepared_container_id:
+            return
+
+        archive = self._build_archive()
+        temp_path: Path | None = None
+        try:
+            with tempfile.NamedTemporaryFile(
+                suffix=".tar.gz", delete=False
+            ) as temp_file:
+                temp_file.write(archive)
+                temp_path = Path(temp_file.name)
+        except Exception as error:
+            raise RuntimeError(
+                f"failed to materialize cmux archive: {error}"
+            ) from error
+
+        try:
+            assert temp_path is not None, "temporary archive path missing"
+            session.copy_to_container(
+                paths=temp_path,
+                container_dir="/installed-agent",
+                container_filename=self._ARCHIVE_NAME,
+            )
+        finally:
+            if temp_path is not None:
+                temp_path.unlink(missing_ok=True)
+
+        runner_path = Path(__file__).with_name(self._RUNNER_NAME)
+        if not runner_path.exists():
+            raise RuntimeError(f"cmux runner script missing at {runner_path}")
+
+        session.copy_to_container(
+            paths=runner_path,
+            container_dir="/installed-agent",
+            container_filename=self._RUNNER_NAME,
+        )
+
+        if container_id:
+            self._prepared_container_id = container_id
+
+    def _build_archive(self) -> bytes:
+        if self._archive_bytes is not None:
+            return self._archive_bytes
+
+        include_paths = [
+            "package.json",
+            "bun.lock",
+            "bunfig.toml",
+            "tsconfig.json",
+            "tsconfig.main.json",
+            "src",
+        ]
+
+        buffer = io.BytesIO()
+        with tarfile.open(fileobj=buffer, mode="w:gz") as tar:
+            for relative in include_paths:
+                source_path = self._repo_root / relative
+                if not source_path.exists():
+                    raise FileNotFoundError(f"Required file {source_path} not found")
+                tar.add(
+                    source_path,
+                    arcname=relative,
+                    recursive=True,
+                )
+        buffer.seek(0)
+        self._archive_bytes = buffer.getvalue()
+        return self._archive_bytes
+
+    def _run_agent_commands(self, instruction: str) -> list[TerminalCommand]:
+        escaped = shlex.quote(instruction)
+        command = f"bash /installed-agent/{self._RUNNER_NAME} {escaped}"
+        return [
+            TerminalCommand(
+                command=command,
+                min_timeout_sec=0.0,
+                max_timeout_sec=float("inf"),
+                block=True,
+                append_enter=True,
+            )
+        ]
diff --git a/benchmarks/terminal_bench/cmux_setup.sh.j2 b/benchmarks/terminal_bench/cmux_setup.sh.j2
new file mode 100644
index 0000000000..e5b1542f0f
--- /dev/null
+++ b/benchmarks/terminal_bench/cmux_setup.sh.j2
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+log() {
+  printf '[cmux-setup] %s\n' "$1"
+}
+
+ensure_tool() {
+  if command -v "$1" >/dev/null 2>&1; then
+    return 0
+  fi
+
+  if ! command -v apt-get >/dev/null 2>&1; then
+    printf 'Required tool "%s" missing and apt-get unavailable\n' "$1" >&2
+    return 1
+  fi
+
+  log "installing missing dependency: $1"
+  export DEBIAN_FRONTEND=noninteractive
+  apt-get update
+  apt-get install -y "$1"
+}
+
+ensure_tool curl
+ensure_tool git
+ensure_tool unzip
+export BUN_INSTALL="${BUN_INSTALL:-/root/.bun}"
+export PATH="${BUN_INSTALL}/bin:${PATH}"
+
+if ! command -v bun >/dev/null 2>&1; then
+  log "installing bun"
+  curl -fsSL "${CMUX_BUN_INSTALL_URL:-https://bun.sh/install}" | bash
+fi
+
+CMUX_APP_ROOT="${CMUX_APP_ROOT:-/opt/cmux-app}"
+CMUX_CONFIG_ROOT="${CMUX_CONFIG_ROOT:-/root/.cmux}"
+CMUX_AGENT_VERSION="{{ version if version is not none else '' }}"
+
+mkdir -p "$CMUX_APP_ROOT"
+
+if [[ -n "${CMUX_AGENT_VERSION}" ]]; then
+  : "${CMUX_AGENT_GIT_URL:?CMUX_AGENT_GIT_URL must be set when version is provided}"
+  log "cloning cmux from ${CMUX_AGENT_GIT_URL} @ ${CMUX_AGENT_VERSION}"
+  rm -rf "${CMUX_APP_ROOT}"
+  git clone --depth 1 --branch "${CMUX_AGENT_VERSION}" "${CMUX_AGENT_GIT_URL}" "${CMUX_APP_ROOT}"
+else
+  ARCHIVE_PATH="/installed-agent/cmux-app.tar.gz"
+  if [[ ! -s "${ARCHIVE_PATH}" ]]; then
+    printf 'Expected cmux archive at %s\n' "${ARCHIVE_PATH}" >&2
+    exit 1
+  fi
+  log "extracting cmux archive"
+  rm -rf "${CMUX_APP_ROOT}"
+  mkdir -p "${CMUX_APP_ROOT}"
+  tar -xzf "${ARCHIVE_PATH}" -C "${CMUX_APP_ROOT}"
+fi
+
+cd "${CMUX_APP_ROOT}"
+
+log "installing cmux dependencies via bun"
+bun install --frozen-lockfile
+
+mkdir -p "${CMUX_CONFIG_ROOT}"
+
+chmod +x /installed-agent/cmux-run.sh
+
+log "setup complete"
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 23e3de18f4..bbb4bfa3f2 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -10,5 +10,6 @@
 - [System Prompt](./system-prompt.md)
 - [Instruction Files](./instruction-files.md)
 - [Project Secrets](./project-secrets.md)
-  - [Agentic Git Identity](./agentic-git-identity.md)
+- [Agentic Git Identity](./agentic-git-identity.md)
+- [Terminal Benchmarking](./benchmarking.md)
 - [AGENTS](./AGENTS.md)
diff --git a/docs/benchmarking.md b/docs/benchmarking.md
new file mode 100644
index 0000000000..12a49ade1a
--- /dev/null
+++ b/docs/benchmarking.md
@@ -0,0 +1,76 @@
+# Terminal Benchmarking
+
+cmux ships with a headless adapter for [Terminal-Bench](https://www.tbench.ai/). The adapter runs the Electron backend without opening a window and exercises it through the same IPC paths we use in integration tests. This page documents how to launch benchmarks from the repository tree.
+
+## Prerequisites
+
+- Docker must be installed and running. Terminal-Bench executes each task inside a dedicated Docker container.
+- `uv` is available in the nix `devShell` (provided via `flake.nix`), or install it manually from <https://docs.astral.sh/uv/>.
+- Standard provider API keys (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`) should be exported so cmux can stream responses.
+
+Optional environment overrides:
+
+| Variable               | Purpose                                                   | Default                                |
+| ---------------------- | --------------------------------------------------------- | -------------------------------------- |
+| `CMUX_AGENT_REPO_ROOT` | Path copied into each task container                      | repo root inferred from the agent file |
+| `CMUX_TRUNK`           | Branch checked out when preparing the project             | `main`                                 |
+| `CMUX_WORKSPACE_ID`    | Workspace identifier used inside cmux                     | `cmux-bench`                           |
+| `CMUX_MODEL`           | Preferred model (supports `provider/model` syntax)        | `anthropic/claude-sonnet-4-5`          |
+| `CMUX_THINKING_LEVEL`  | Optional reasoning level (`off`, `low`, `medium`, `high`) | `high`                                 |
+| `CMUX_MODE`            | Starting mode (`plan` or `exec`)                          | `exec`                                 |
+| `CMUX_TIMEOUT_MS`      | Optional stream timeout in milliseconds                   | no timeout                             |
+| `CMUX_CONFIG_ROOT`     | Location for cmux session data inside the container       | `/root/.cmux`                          |
+| `CMUX_APP_ROOT`        | Path where the cmux sources are staged                    | `/opt/cmux-app`                        |
+| `CMUX_PROJECT_PATH`    | Explicit project directory inside the task container      | auto-detected from common paths        |
+
+## Running Terminal-Bench
+
+All commands below should be run from the repository root.
+
+### Quick smoke test (single task)
+
+```bash
+uvx terminal-bench run \
+  --dataset terminal-bench-core==0.1.1 \
+  --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \
+  --n-tasks 1
+```
+
+This downloads the Terminal-Bench runner, copies the cmux sources into the container, and validates the adapter against the first task only. Use this before attempting a full sweep.
+
+### Full dataset
+
+```bash
+uvx terminal-bench run \
+  --dataset terminal-bench-core==0.1.1 \
+  --agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent
+```
+
+Results (pass/fail, token usage, wall-clock) are printed at the end of the run. Terminal-Bench also writes per-task logs under the current working directory; review them when diagnosing failures.
+
+You can also use `make`:
+
+```bash
+TB_CONCURRENCY=6 TB_LIVESTREAM=1 \
+make benchmark-terminal TB_ARGS="--n-tasks 3 --model anthropic/claude-sonnet-4-20250514 --agent-kwarg mode=plan --agent-kwarg thinking_level=medium"
+```
+
+`TB_DATASET` defaults to `terminal-bench-core==0.1.1`, but can be overridden (e.g. `make benchmark-terminal TB_DATASET=terminal-bench-core==head`).
+Use `--agent-kwarg mode=plan` to exercise the plan/execute workflow—the CLI will gather a plan first, then automatically approve it and switch to execution. Leaving the flag off (or setting `mode=exec`) skips the planning phase.
+Use `TB_CONCURRENCY=<n>` to control `--n-concurrent` (number of concurrently running tasks) and `TB_LIVESTREAM=1` to stream log output live instead of waiting for the run to finish. These map to Terminal-Bench’s `--n-concurrent` and `--livestream` flags.
+
+## How the Adapter Works
+
+The adapter lives in `benchmarks/terminal_bench/cmux_agent.py`. For each task it:
+
+1. Copies the cmux repository (package manifests + `src/`) into `/tmp/cmux-app` inside the container.
+2. Ensures Bun exists, then runs `bun install --frozen-lockfile`.
+3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `CMUX_CONFIG_ROOT` (default `/root/.cmux`).
+
+`CMUX_MODEL` accepts either the cmux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide.
+
+## Troubleshooting
+
+- **`command not found: bun`** – ensure the container can reach Bun’s install script, or pre-install Bun in your base image. The adapter aborts if the install step fails.
+- **Workspace creation errors** – set `CMUX_PROJECT_PATH` to the project directory inside the task container if auto-discovery misses it.
+- **Streaming timeouts** – pass `--n-tasks 1` while iterating on fixes, or set `CMUX_TIMEOUT_MS=180000` to reinstate a timeout if needed.
diff --git a/flake.nix b/flake.nix
index d3e7785bd7..52784c734c 100644
--- a/flake.nix
+++ b/flake.nix
@@ -129,6 +129,10 @@
             git
             bash
             nixfmt-rfc-style
+
+            # Terminal bench
+            uv
+            asciinema
           ];
         };
       }
diff --git a/src/bench/headlessEnvironment.ts b/src/bench/headlessEnvironment.ts
new file mode 100644
index 0000000000..c72e50a2f5
--- /dev/null
+++ b/src/bench/headlessEnvironment.ts
@@ -0,0 +1,125 @@
+import * as os from "os";
+import * as path from "path";
+import * as fs from "fs/promises";
+import createIPCMock from "electron-mock-ipc";
+import type { BrowserWindow, IpcMain as ElectronIpcMain, WebContents } from "electron";
+import { Config } from "@/config";
+import { IpcMain } from "@/services/ipcMain";
+
+type MockedElectron = ReturnType<typeof createIPCMock>;
+
+interface CreateHeadlessEnvironmentOptions {
+  /**
+   * Override the root directory for Config. If omitted, a temporary directory is created.
+   */
+  rootDir?: string;
+}
+
+export interface HeadlessEnvironment {
+  config: Config;
+  ipcMain: IpcMain;
+  mockIpcMain: ElectronIpcMain;
+  mockIpcRenderer: Electron.IpcRenderer;
+  mockWindow: BrowserWindow;
+  sentEvents: Array<{ channel: string; data: unknown }>;
+  rootDir: string;
+  dispose: () => Promise<void>;
+}
+
+function createMockBrowserWindow(): {
+  window: BrowserWindow;
+  sentEvents: Array<{ channel: string; data: unknown }>;
+} {
+  const sentEvents: Array<{ channel: string; data: unknown }> = [];
+
+  const mockWindow = {
+    webContents: {
+      send: (channel: string, data: unknown) => {
+        sentEvents.push({ channel, data });
+      },
+      openDevTools: () => {
+        throw new Error("openDevTools is not supported in headless mode");
+      },
+    } as unknown as WebContents,
+    isMinimized: () => false,
+    restore: () => undefined,
+    focus: () => undefined,
+    loadURL: () => {
+      throw new Error("loadURL should not be called in headless mode");
+    },
+    on: () => undefined,
+    setTitle: () => undefined,
+  } as unknown as BrowserWindow;
+
+  return { window: mockWindow, sentEvents };
+}
+
+async function establishRootDir(providedRootDir?: string): Promise<{
+  rootDir: string;
+  dispose: () => Promise<void>;
+}> {
+  if (providedRootDir) {
+    return {
+      rootDir: providedRootDir,
+      dispose: async () => {
+        // Caller owns the directory; nothing to clean up.
+      },
+    };
+  }
+
+  const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "cmux-headless-"));
+  return {
+    rootDir: tempRoot,
+    dispose: async () => {
+      await fs.rm(tempRoot, { recursive: true, force: true });
+    },
+  };
+}
+
+function assertMockedElectron(mocked: MockedElectron): void {
+  if (!mocked || typeof mocked !== "object") {
+    throw new Error("Failed to initialize electron-mock-ipc");
+  }
+
+  if (!("ipcMain" in mocked) || !mocked.ipcMain) {
+    throw new Error("electron-mock-ipc returned an invalid ipcMain");
+  }
+
+  if (!("ipcRenderer" in mocked) || !mocked.ipcRenderer) {
+    throw new Error("electron-mock-ipc returned an invalid ipcRenderer");
+  }
+}
+
+export async function createHeadlessEnvironment(
+  options: CreateHeadlessEnvironmentOptions = {}
+): Promise<HeadlessEnvironment> {
+  const { rootDir, dispose: disposeRootDir } = await establishRootDir(options.rootDir);
+
+  const config = new Config(rootDir);
+
+  const { window: mockWindow, sentEvents } = createMockBrowserWindow();
+
+  const mockedElectron = createIPCMock();
+  assertMockedElectron(mockedElectron);
+  const mockIpcMainModule = mockedElectron.ipcMain;
+  const mockIpcRendererModule = mockedElectron.ipcRenderer;
+
+  const ipcMain = new IpcMain(config);
+  ipcMain.register(mockIpcMainModule, mockWindow);
+
+  const dispose = async () => {
+    sentEvents.length = 0;
+    await disposeRootDir();
+  };
+
+  return {
+    config,
+    ipcMain,
+    mockIpcMain: mockIpcMainModule,
+    mockIpcRenderer: mockIpcRendererModule,
+    mockWindow,
+    sentEvents,
+    rootDir,
+    dispose,
+  };
+}
diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts
new file mode 100644
index 0000000000..e8a0f694fb
--- /dev/null
+++ b/src/debug/agentSessionCli.ts
@@ -0,0 +1,508 @@
+#!/usr/bin/env bun
+
+import assert from "node:assert/strict";
+import * as fs from "fs/promises";
+import * as path from "path";
+import { parseArgs } from "util";
+import { Config } from "@/config";
+import { HistoryService } from "@/services/historyService";
+import { PartialService } from "@/services/partialService";
+import { AIService } from "@/services/aiService";
+import { AgentSession, type AgentSessionChatEvent } from "@/services/agentSession";
+import {
+  isCaughtUpMessage,
+  isStreamAbort,
+  isStreamDelta,
+  isStreamEnd,
+  isStreamError,
+  isStreamStart,
+  isToolCallDelta,
+  isToolCallEnd,
+  isToolCallStart,
+  type SendMessageOptions,
+  type WorkspaceChatMessage,
+} from "@/types/ipc";
+import { defaultModel } from "@/utils/ai/models";
+import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig";
+import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
+import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors";
+import type { ThinkingLevel } from "@/types/thinking";
+
+interface CliResult {
+  success: boolean;
+  error?: string;
+  data?: Record<string, unknown>;
+}
+
+async function ensureDirectory(pathToCheck: string): Promise<void> {
+  const stats = await fs.stat(pathToCheck);
+  if (!stats.isDirectory()) {
+    throw new Error(`"${pathToCheck}" is not a directory`);
+  }
+}
+
+async function gatherMessageFromStdin(): Promise<string> {
+  if (process.stdin.isTTY) {
+    return "";
+  }
+
+  const chunks: Uint8Array[] = [];
+  for await (const chunk of process.stdin) {
+    if (Buffer.isBuffer(chunk)) {
+      chunks.push(chunk);
+      continue;
+    }
+    if (typeof chunk === "string") {
+      chunks.push(Buffer.from(chunk));
+      continue;
+    }
+    if (chunk instanceof Uint8Array) {
+      chunks.push(chunk);
+      continue;
+    }
+    throw new Error(`Unsupported stdin chunk type: ${typeof chunk}`);
+  }
+  return Buffer.concat(chunks).toString("utf-8");
+}
+
+function parseTimeout(timeoutRaw: string | undefined): number | undefined {
+  if (!timeoutRaw) {
+    return undefined;
+  }
+
+  const parsed = Number.parseInt(timeoutRaw, 10);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    throw new Error(`Invalid timeout value "${timeoutRaw}"`);
+  }
+  return parsed;
+}
+
+function parseThinkingLevel(value: string | undefined): ThinkingLevel | undefined {
+  if (!value) {
+    return undefined;
+  }
+
+  const normalized = value.trim().toLowerCase();
+  if (
+    normalized === "off" ||
+    normalized === "low" ||
+    normalized === "medium" ||
+    normalized === "high"
+  ) {
+    return normalized;
+  }
+  throw new Error(`Invalid thinking level "${value}". Expected one of: off, low, medium, high.`);
+}
+
+type CLIMode = "plan" | "exec";
+
+function parseMode(raw: string | undefined): CLIMode {
+  if (!raw) {
+    return "exec";
+  }
+
+  const normalized = raw.trim().toLowerCase();
+  if (normalized === "plan") {
+    return "plan";
+  }
+  if (normalized === "exec" || normalized === "execute") {
+    return "exec";
+  }
+
+  throw new Error('Invalid mode "' + raw + '". Expected "plan" or "exec" (or "execute").');
+}
+
+function renderUnknown(value: unknown): string {
+  if (typeof value === "string") {
+    return value;
+  }
+  try {
+    return JSON.stringify(value, null, 2);
+  } catch {
+    return String(value);
+  }
+}
+
+function writeJson(result: CliResult): void {
+  process.stdout.write(`${JSON.stringify(result)}\n`);
+}
+
+async function main(): Promise<void> {
+  const { values } = parseArgs({
+    args: process.argv.slice(2),
+    options: {
+      "workspace-path": { type: "string" },
+      "workspace-id": { type: "string" },
+      "project-path": { type: "string" },
+      "config-root": { type: "string" },
+      message: { type: "string" },
+      model: { type: "string" },
+      "thinking-level": { type: "string" },
+      mode: { type: "string" },
+      timeout: { type: "string" },
+      json: { type: "boolean" },
+      "json-streaming": { type: "boolean" },
+    },
+    allowPositionals: false,
+  });
+
+  const workspacePathRaw = values["workspace-path"];
+  if (typeof workspacePathRaw !== "string" || workspacePathRaw.trim().length === 0) {
+    throw new Error("--workspace-path is required");
+  }
+  const workspacePath = path.resolve(workspacePathRaw.trim());
+  await ensureDirectory(workspacePath);
+
+  const configRootRaw = values["config-root"];
+  const configRoot =
+    configRootRaw && configRootRaw.trim().length > 0 ? configRootRaw.trim() : undefined;
+  const config = new Config(configRoot);
+
+  const workspaceIdRaw = values["workspace-id"];
+  const projectPathRaw = values["project-path"];
+
+  const workspaceId =
+    workspaceIdRaw && workspaceIdRaw.trim().length > 0
+      ? workspaceIdRaw.trim()
+      : (() => {
+          if (typeof projectPathRaw !== "string" || projectPathRaw.trim().length === 0) {
+            throw new Error("Provide --workspace-id or --project-path to derive workspace ID");
+          }
+          const projectPath = path.resolve(projectPathRaw.trim());
+          return config.generateWorkspaceId(projectPath, workspacePath);
+        })();
+
+  const projectName =
+    typeof projectPathRaw === "string" && projectPathRaw.trim().length > 0
+      ? path.basename(path.resolve(projectPathRaw.trim()))
+      : path.basename(path.dirname(workspacePath)) || "unknown";
+
+  const messageArg =
+    values.message && values.message.trim().length > 0 ? values.message : undefined;
+  const messageText = messageArg ?? (await gatherMessageFromStdin());
+  if (!messageText || messageText.trim().length === 0) {
+    throw new Error("Message must be provided via --message or stdin");
+  }
+
+  const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel;
+  const timeoutMs = parseTimeout(values.timeout);
+  const thinkingLevel = parseThinkingLevel(values["thinking-level"]);
+  const initialMode = parseMode(values.mode);
+  const emitFinalJson = values.json === true;
+  const emitJsonStreaming = values["json-streaming"] === true;
+
+  const suppressHumanOutput = emitJsonStreaming || emitFinalJson;
+  const humanStream = process.stdout;
+  const writeHuman = (text: string) => {
+    if (suppressHumanOutput) {
+      return;
+    }
+    humanStream.write(text);
+  };
+  const writeHumanLine = (text = "") => {
+    if (suppressHumanOutput) {
+      return;
+    }
+    humanStream.write(`${text}\n`);
+  };
+  const emitJsonLine = (payload: unknown) => {
+    if (!emitJsonStreaming) {
+      return;
+    }
+    process.stdout.write(`${JSON.stringify(payload)}\n`);
+  };
+
+  const historyService = new HistoryService(config);
+  const partialService = new PartialService(config, historyService);
+  const aiService = new AIService(config, historyService, partialService);
+  ensureProvidersConfig(config);
+
+  const session = new AgentSession({
+    workspaceId,
+    config,
+    historyService,
+    partialService,
+    aiService,
+  });
+
+  await session.ensureMetadata({
+    workspacePath,
+    projectName,
+  });
+
+  const buildSendOptions = (mode: CLIMode): SendMessageOptions => ({
+    model,
+    thinkingLevel,
+    toolPolicy: modeToToolPolicy(mode),
+    additionalSystemInstructions: mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined,
+  });
+
+  const liveEvents: WorkspaceChatMessage[] = [];
+  let readyForLive = false;
+  let streamLineOpen = false;
+  let activeMessageId: string | null = null;
+  let planProposed = false;
+  let streamEnded = false;
+
+  let resolveCompletion: ((value: void) => void) | null = null;
+  let rejectCompletion: ((reason?: unknown) => void) | null = null;
+  let completionPromise: Promise<void> = Promise.resolve();
+
+  const createCompletionPromise = (): Promise<void> => {
+    streamEnded = false;
+    return new Promise<void>((resolve, reject) => {
+      resolveCompletion = resolve;
+      rejectCompletion = reject;
+    });
+  };
+
+  const waitForCompletion = async (): Promise<void> => {
+    if (timeoutMs !== undefined) {
+      let timeoutHandle: NodeJS.Timeout | null = null;
+      try {
+        await Promise.race([
+          completionPromise,
+          new Promise<never>((_, reject) => {
+            timeoutHandle = setTimeout(
+              () => reject(new Error(`Timed out after ${timeoutMs}ms`)),
+              timeoutMs
+            );
+          }),
+        ]);
+      } finally {
+        if (timeoutHandle) {
+          clearTimeout(timeoutHandle);
+        }
+      }
+    } else {
+      await completionPromise;
+    }
+
+    if (!streamEnded) {
+      throw new Error("Stream completion promise resolved unexpectedly without stream end");
+    }
+  };
+
+  const sendAndAwait = async (message: string, options: SendMessageOptions): Promise<void> => {
+    completionPromise = createCompletionPromise();
+    const sendResult = await session.sendMessage(message, options);
+    if (!sendResult.success) {
+      const errorValue = sendResult.error;
+      let formattedError = "unknown error";
+      if (typeof errorValue === "string") {
+        formattedError = errorValue;
+      } else if (errorValue && typeof errorValue === "object") {
+        const maybeRaw = (errorValue as { raw?: unknown }).raw;
+        if (typeof maybeRaw === "string" && maybeRaw.trim().length > 0) {
+          formattedError = maybeRaw;
+        } else {
+          formattedError = JSON.stringify(errorValue);
+        }
+      }
+      throw new Error(`Failed to send message: ${formattedError}`);
+    }
+
+    await waitForCompletion();
+  };
+
+  const handleToolStart = (payload: WorkspaceChatMessage): boolean => {
+    if (!isToolCallStart(payload)) {
+      return false;
+    }
+    writeHumanLine("\n========== TOOL CALL START ==========");
+    writeHumanLine(`Tool: ${payload.toolName}`);
+    writeHumanLine(`Call ID: ${payload.toolCallId}`);
+    writeHumanLine("Arguments:");
+    writeHumanLine(renderUnknown(payload.args));
+    writeHumanLine("=====================================");
+    return true;
+  };
+
+  const handleToolDelta = (payload: WorkspaceChatMessage): boolean => {
+    if (!isToolCallDelta(payload)) {
+      return false;
+    }
+    writeHumanLine("\n----------- TOOL OUTPUT -------------");
+    writeHumanLine(renderUnknown(payload.delta));
+    writeHumanLine("-------------------------------------");
+    return true;
+  };
+
+  const handleToolEnd = (payload: WorkspaceChatMessage): boolean => {
+    if (!isToolCallEnd(payload)) {
+      return false;
+    }
+    writeHumanLine("\n=========== TOOL CALL END ===========");
+    writeHumanLine(`Tool: ${payload.toolName}`);
+    writeHumanLine(`Call ID: ${payload.toolCallId}`);
+    writeHumanLine("Result:");
+    writeHumanLine(renderUnknown(payload.result));
+    writeHumanLine("=====================================");
+    if (payload.toolName === "propose_plan") {
+      planProposed = true;
+    }
+    return true;
+  };
+
+  const chatListener = (event: AgentSessionChatEvent) => {
+    const payload = event.message;
+
+    if (!readyForLive) {
+      if (isCaughtUpMessage(payload)) {
+        readyForLive = true;
+        emitJsonLine({ type: "caught-up", workspaceId });
+      }
+      return;
+    }
+
+    emitJsonLine({ type: "event", workspaceId, payload });
+    liveEvents.push(payload);
+
+    if (handleToolStart(payload) || handleToolDelta(payload) || handleToolEnd(payload)) {
+      return;
+    }
+
+    if (isStreamStart(payload)) {
+      if (activeMessageId && activeMessageId !== payload.messageId) {
+        if (rejectCompletion) {
+          rejectCompletion(
+            new Error(
+              `Received conflicting stream-start message IDs (${activeMessageId} vs ${payload.messageId})`
+            )
+          );
+          resolveCompletion = null;
+          rejectCompletion = null;
+        }
+        return;
+      }
+      activeMessageId = payload.messageId;
+      return;
+    }
+
+    if (isStreamDelta(payload)) {
+      assert(typeof payload.delta === "string", "stream delta must include text");
+      writeHuman(payload.delta);
+      streamLineOpen = !payload.delta.endsWith("\n");
+      return;
+    }
+
+    if (isStreamError(payload)) {
+      if (rejectCompletion) {
+        rejectCompletion(new Error(payload.error));
+        resolveCompletion = null;
+        rejectCompletion = null;
+      }
+      return;
+    }
+
+    if (isStreamAbort(payload)) {
+      if (rejectCompletion) {
+        rejectCompletion(new Error("Stream aborted before completion"));
+        resolveCompletion = null;
+        rejectCompletion = null;
+      }
+      return;
+    }
+
+    if (isStreamEnd(payload)) {
+      if (activeMessageId && payload.messageId !== activeMessageId) {
+        if (rejectCompletion) {
+          rejectCompletion(
+            new Error(
+              `Mismatched stream-end message ID. Expected ${activeMessageId}, received ${payload.messageId}`
+            )
+          );
+          resolveCompletion = null;
+          rejectCompletion = null;
+        }
+        return;
+      }
+      if (streamLineOpen) {
+        writeHuman("\n");
+        streamLineOpen = false;
+      }
+      streamEnded = true;
+      if (resolveCompletion) {
+        resolveCompletion();
+        resolveCompletion = null;
+        rejectCompletion = null;
+      }
+      activeMessageId = null;
+    }
+  };
+
+  const unsubscribe = await session.subscribeChat(chatListener);
+
+  try {
+    await sendAndAwait(messageText, buildSendOptions(initialMode));
+
+    const planWasProposed = planProposed;
+    planProposed = false;
+    if (initialMode === "plan" && !planWasProposed) {
+      throw new Error("Plan mode was requested, but the assistant never proposed a plan.");
+    }
+    if (planWasProposed) {
+      writeHumanLine("\n[auto] Plan received. Approving and switching to execute mode...\n");
+      await sendAndAwait("Plan approved. Execute it.", buildSendOptions("exec"));
+    }
+
+    let finalEvent: WorkspaceChatMessage | undefined;
+    for (let i = liveEvents.length - 1; i >= 0; i -= 1) {
+      const candidate = liveEvents[i];
+      if (isStreamEnd(candidate)) {
+        finalEvent = candidate;
+        break;
+      }
+    }
+
+    if (!finalEvent || !isStreamEnd(finalEvent)) {
+      throw new Error("Stream ended without receiving stream-end event");
+    }
+
+    const parts = (finalEvent as unknown as { parts?: unknown }).parts ?? [];
+    const text = extractAssistantText(parts);
+    const reasoning = extractReasoning(parts);
+    const toolCalls = extractToolCalls(parts);
+
+    if (emitFinalJson) {
+      writeJson({
+        success: true,
+        data: {
+          messageId: finalEvent.messageId,
+          model: finalEvent.metadata?.model ?? null,
+          text,
+          reasoning,
+          toolCalls,
+          metadata: finalEvent.metadata ?? null,
+          parts,
+          events: liveEvents,
+        },
+      });
+    }
+  } finally {
+    unsubscribe();
+    session.dispose();
+  }
+}
+
+void (async () => {
+  try {
+    await main();
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    const wantsJsonStreaming =
+      process.argv.includes("--json-streaming") || process.argv.includes("--json-streaming=true");
+    const wantsJson = process.argv.includes("--json") || process.argv.includes("--json=true");
+
+    if (wantsJsonStreaming) {
+      process.stdout.write(`${JSON.stringify({ type: "error", error: message })}\n`);
+    }
+
+    if (wantsJson) {
+      writeJson({ success: false, error: message });
+    } else {
+      process.stderr.write(`Error: ${message}\n`);
+    }
+    process.exitCode = 1;
+  }
+})();
diff --git a/src/debug/chatExtractors.ts b/src/debug/chatExtractors.ts
new file mode 100644
index 0000000000..1650cf31d9
--- /dev/null
+++ b/src/debug/chatExtractors.ts
@@ -0,0 +1,42 @@
+import assert from "node:assert/strict";
+import type { CmuxReasoningPart, CmuxTextPart, CmuxToolPart } from "@/types/message";
+
+export function extractAssistantText(parts: unknown): string {
+  if (!Array.isArray(parts)) {
+    return "";
+  }
+
+  const textParts = (parts as CmuxTextPart[]).filter(
+    (part): part is CmuxTextPart => part.type === "text"
+  );
+  return textParts
+    .map((part) => {
+      assert(typeof part.text === "string", "Text part must include text");
+      return part.text;
+    })
+    .join("");
+}
+
+export function extractReasoning(parts: unknown): string[] {
+  if (!Array.isArray(parts)) {
+    return [];
+  }
+
+  const reasoningParts = (parts as CmuxReasoningPart[]).filter(
+    (part): part is CmuxReasoningPart => part.type === "reasoning"
+  );
+  return reasoningParts.map((part) => {
+    assert(typeof part.text === "string", "Reasoning part must include text");
+    return part.text;
+  });
+}
+
+export function extractToolCalls(parts: unknown): CmuxToolPart[] {
+  if (!Array.isArray(parts)) {
+    return [];
+  }
+
+  return (parts as CmuxToolPart[]).filter(
+    (part): part is CmuxToolPart => part.type === "dynamic-tool"
+  );
+}
diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts
new file mode 100644
index 0000000000..aef1608068
--- /dev/null
+++ b/src/services/agentSession.ts
@@ -0,0 +1,383 @@
+import assert from "node:assert/strict";
+import { EventEmitter } from "events";
+import * as path from "path";
+import { createCmuxMessage } from "@/types/message";
+import type { Config } from "@/config";
+import type { AIService } from "@/services/aiService";
+import type { HistoryService } from "@/services/historyService";
+import type { PartialService } from "@/services/partialService";
+import type { WorkspaceMetadata } from "@/types/workspace";
+import type { WorkspaceChatMessage, StreamErrorMessage, SendMessageOptions } from "@/types/ipc";
+import type { SendMessageError } from "@/types/errors";
+import { createUnknownSendMessageError } from "@/services/utils/sendMessageError";
+import type { Result } from "@/types/result";
+import { Ok, Err } from "@/types/result";
+
+interface ImagePart {
+  image: string;
+  mimeType: string;
+}
+
+export interface AgentSessionChatEvent {
+  workspaceId: string;
+  message: WorkspaceChatMessage;
+}
+
+export interface AgentSessionMetadataEvent {
+  workspaceId: string;
+  metadata: WorkspaceMetadata | null;
+}
+
+interface AgentSessionOptions {
+  workspaceId: string;
+  config: Config;
+  historyService: HistoryService;
+  partialService: PartialService;
+  aiService: AIService;
+}
+
+export class AgentSession {
+  private readonly workspaceId: string;
+  private readonly config: Config;
+  private readonly historyService: HistoryService;
+  private readonly partialService: PartialService;
+  private readonly aiService: AIService;
+  private readonly emitter = new EventEmitter();
+  private readonly aiListeners: Array<{ event: string; handler: (...args: unknown[]) => void }> =
+    [];
+  private disposed = false;
+
+  constructor(options: AgentSessionOptions) {
+    assert(options, "AgentSession requires options");
+    const { workspaceId, config, historyService, partialService, aiService } = options;
+
+    assert(typeof workspaceId === "string", "workspaceId must be a string");
+    const trimmedWorkspaceId = workspaceId.trim();
+    assert(trimmedWorkspaceId.length > 0, "workspaceId must not be empty");
+
+    this.workspaceId = trimmedWorkspaceId;
+    this.config = config;
+    this.historyService = historyService;
+    this.partialService = partialService;
+    this.aiService = aiService;
+
+    this.attachAiListeners();
+  }
+
+  dispose(): void {
+    if (this.disposed) {
+      return;
+    }
+    this.disposed = true;
+    for (const { event, handler } of this.aiListeners) {
+      this.aiService.off(event, handler as never);
+    }
+    this.aiListeners.length = 0;
+    this.emitter.removeAllListeners();
+  }
+
+  onChatEvent(listener: (event: AgentSessionChatEvent) => void): () => void {
+    assert(typeof listener === "function", "listener must be a function");
+    this.emitter.on("chat-event", listener);
+    return () => {
+      this.emitter.off("chat-event", listener);
+    };
+  }
+
+  onMetadataEvent(listener: (event: AgentSessionMetadataEvent) => void): () => void {
+    assert(typeof listener === "function", "listener must be a function");
+    this.emitter.on("metadata-event", listener);
+    return () => {
+      this.emitter.off("metadata-event", listener);
+    };
+  }
+
+  async subscribeChat(listener: (event: AgentSessionChatEvent) => void): Promise<() => void> {
+    this.assertNotDisposed("subscribeChat");
+    assert(typeof listener === "function", "listener must be a function");
+
+    const unsubscribe = this.onChatEvent(listener);
+    await this.emitHistoricalEvents(listener);
+
+    return unsubscribe;
+  }
+
+  async replayHistory(listener: (event: AgentSessionChatEvent) => void): Promise<void> {
+    this.assertNotDisposed("replayHistory");
+    assert(typeof listener === "function", "listener must be a function");
+    await this.emitHistoricalEvents(listener);
+  }
+
+  emitMetadata(metadata: WorkspaceMetadata | null): void {
+    this.assertNotDisposed("emitMetadata");
+    this.emitter.emit("metadata-event", {
+      workspaceId: this.workspaceId,
+      metadata,
+    } satisfies AgentSessionMetadataEvent);
+  }
+
+  private async emitHistoricalEvents(
+    listener: (event: AgentSessionChatEvent) => void
+  ): Promise<void> {
+    const historyResult = await this.historyService.getHistory(this.workspaceId);
+    if (historyResult.success) {
+      for (const message of historyResult.data) {
+        listener({ workspaceId: this.workspaceId, message });
+      }
+    }
+
+    const streamInfo = this.aiService.getStreamInfo(this.workspaceId);
+    const partial = await this.partialService.readPartial(this.workspaceId);
+
+    if (streamInfo) {
+      this.aiService.replayStream(this.workspaceId);
+    } else if (partial) {
+      listener({ workspaceId: this.workspaceId, message: partial });
+    }
+
+    listener({
+      workspaceId: this.workspaceId,
+      message: { type: "caught-up" },
+    });
+  }
+
+  async ensureMetadata(args: { workspacePath: string; projectName?: string }): Promise<void> {
+    this.assertNotDisposed("ensureMetadata");
+    assert(args, "ensureMetadata requires arguments");
+    const { workspacePath, projectName } = args;
+
+    assert(typeof workspacePath === "string", "workspacePath must be a string");
+    const trimmedWorkspacePath = workspacePath.trim();
+    assert(trimmedWorkspacePath.length > 0, "workspacePath must not be empty");
+
+    const normalizedWorkspacePath = path.resolve(trimmedWorkspacePath);
+    const existing = await this.aiService.getWorkspaceMetadata(this.workspaceId);
+
+    if (existing.success) {
+      const metadata = existing.data;
+      assert(
+        metadata.workspacePath === normalizedWorkspacePath,
+        `Existing metadata workspacePath mismatch for ${this.workspaceId}`
+      );
+      return;
+    }
+
+    const derivedProjectName =
+      projectName && projectName.trim().length > 0
+        ? projectName.trim()
+        : path.basename(path.dirname(normalizedWorkspacePath)) || "unknown";
+
+    const metadata: WorkspaceMetadata = {
+      id: this.workspaceId,
+      projectName: derivedProjectName,
+      workspacePath: normalizedWorkspacePath,
+    };
+
+    const saveResult = await this.aiService.saveWorkspaceMetadata(this.workspaceId, metadata);
+    if (!saveResult.success) {
+      const errorDetail = saveResult.error ?? "unknown";
+      throw new Error(`Failed to save metadata: ${errorDetail}`);
+    }
+    this.emitMetadata(metadata);
+  }
+
+  async sendMessage(
+    message: string,
+    options?: SendMessageOptions & { imageParts?: ImagePart[] }
+  ): Promise<Result<void, SendMessageError>> {
+    this.assertNotDisposed("sendMessage");
+
+    assert(typeof message === "string", "sendMessage requires a string message");
+    const trimmedMessage = message.trim();
+    const imageParts = options?.imageParts;
+
+    if (trimmedMessage.length === 0 && (!imageParts || imageParts.length === 0)) {
+      return Err(
+        createUnknownSendMessageError(
+          "Empty message not allowed. Use interruptStream() to interrupt active streams."
+        )
+      );
+    }
+
+    if (options?.editMessageId) {
+      const truncateResult = await this.historyService.truncateAfterMessage(
+        this.workspaceId,
+        options.editMessageId
+      );
+      if (!truncateResult.success) {
+        return Err(createUnknownSendMessageError(truncateResult.error));
+      }
+    }
+
+    const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
+    const additionalParts =
+      imageParts && imageParts.length > 0
+        ? imageParts.map((img) => {
+            assert(typeof img.image === "string", "image part must include base64 string content");
+            assert(
+              typeof img.mimeType === "string" && img.mimeType.trim().length > 0,
+              "image part must include a mimeType"
+            );
+            return {
+              type: "image" as const,
+              image: img.image,
+              mimeType: img.mimeType,
+            };
+          })
+        : undefined;
+
+    const userMessage = createCmuxMessage(
+      messageId,
+      "user",
+      message,
+      {
+        timestamp: Date.now(),
+        toolPolicy: options?.toolPolicy,
+      },
+      additionalParts
+    );
+
+    const appendResult = await this.historyService.appendToHistory(this.workspaceId, userMessage);
+    if (!appendResult.success) {
+      return Err(createUnknownSendMessageError(appendResult.error));
+    }
+
+    this.emitChatEvent(userMessage);
+
+    if (!options?.model || options.model.trim().length === 0) {
+      return Err(
+        createUnknownSendMessageError("No model specified. Please select a model using /model.")
+      );
+    }
+
+    return this.streamWithHistory(options.model, options);
+  }
+
+  async resumeStream(options: SendMessageOptions): Promise<Result<void, SendMessageError>> {
+    this.assertNotDisposed("resumeStream");
+
+    assert(options, "resumeStream requires options");
+    const { model } = options;
+    assert(typeof model === "string" && model.trim().length > 0, "resumeStream requires a model");
+
+    if (this.aiService.isStreaming(this.workspaceId)) {
+      return Ok(undefined);
+    }
+
+    return this.streamWithHistory(model, options);
+  }
+
+  async interruptStream(): Promise<Result<void>> {
+    this.assertNotDisposed("interruptStream");
+
+    if (!this.aiService.isStreaming(this.workspaceId)) {
+      return Ok(undefined);
+    }
+
+    const stopResult = await this.aiService.stopStream(this.workspaceId);
+    if (!stopResult.success) {
+      return Err(stopResult.error);
+    }
+
+    return Ok(undefined);
+  }
+
+  private async streamWithHistory(
+    modelString: string,
+    options?: SendMessageOptions
+  ): Promise<Result<void, SendMessageError>> {
+    const commitResult = await this.partialService.commitToHistory(this.workspaceId);
+    if (!commitResult.success) {
+      return Err(createUnknownSendMessageError(commitResult.error));
+    }
+
+    const historyResult = await this.historyService.getHistory(this.workspaceId);
+    if (!historyResult.success) {
+      return Err(createUnknownSendMessageError(historyResult.error));
+    }
+
+    const streamResult = await this.aiService.streamMessage(
+      historyResult.data,
+      this.workspaceId,
+      modelString,
+      options?.thinkingLevel,
+      options?.toolPolicy,
+      undefined,
+      options?.additionalSystemInstructions,
+      options?.maxOutputTokens,
+      options?.providerOptions,
+      options?.mode
+    );
+
+    return streamResult;
+  }
+
+  private attachAiListeners(): void {
+    const forward = (event: string, handler: (payload: WorkspaceChatMessage) => void) => {
+      const wrapped = (...args: unknown[]) => {
+        const [payload] = args;
+        if (
+          typeof payload === "object" &&
+          payload !== null &&
+          "workspaceId" in payload &&
+          (payload as { workspaceId: unknown }).workspaceId !== this.workspaceId
+        ) {
+          return;
+        }
+        handler(payload as WorkspaceChatMessage);
+      };
+      this.aiListeners.push({ event, handler: wrapped });
+      this.aiService.on(event, wrapped as never);
+    };
+
+    forward("stream-start", (payload) => this.emitChatEvent(payload));
+    forward("stream-delta", (payload) => this.emitChatEvent(payload));
+    forward("stream-end", (payload) => this.emitChatEvent(payload));
+    forward("tool-call-start", (payload) => this.emitChatEvent(payload));
+    forward("tool-call-delta", (payload) => this.emitChatEvent(payload));
+    forward("tool-call-end", (payload) => this.emitChatEvent(payload));
+    forward("reasoning-delta", (payload) => this.emitChatEvent(payload));
+    forward("reasoning-end", (payload) => this.emitChatEvent(payload));
+    forward("stream-abort", (payload) => this.emitChatEvent(payload));
+
+    const errorHandler = (...args: unknown[]) => {
+      const [raw] = args;
+      if (
+        typeof raw !== "object" ||
+        raw === null ||
+        !("workspaceId" in raw) ||
+        (raw as { workspaceId: unknown }).workspaceId !== this.workspaceId
+      ) {
+        return;
+      }
+      const data = raw as {
+        workspaceId: string;
+        messageId: string;
+        error: string;
+        errorType?: string;
+      };
+      const streamError: StreamErrorMessage = {
+        type: "stream-error",
+        messageId: data.messageId,
+        error: data.error,
+        errorType: (data.errorType ?? "unknown") as StreamErrorMessage["errorType"],
+      };
+      this.emitChatEvent(streamError);
+    };
+
+    this.aiListeners.push({ event: "error", handler: errorHandler });
+    this.aiService.on("error", errorHandler as never);
+  }
+
+  private emitChatEvent(message: WorkspaceChatMessage): void {
+    this.assertNotDisposed("emitChatEvent");
+    this.emitter.emit("chat-event", {
+      workspaceId: this.workspaceId,
+      message,
+    } satisfies AgentSessionChatEvent);
+  }
+
+  private assertNotDisposed(operation: string): void {
+    assert(!this.disposed, `AgentSession.${operation} called after dispose`);
+  }
+}
diff --git a/src/services/aiService.ts b/src/services/aiService.ts
index e6e59ed8c9..8cdbf3eb44 100644
--- a/src/services/aiService.ts
+++ b/src/services/aiService.ts
@@ -509,6 +509,12 @@ export class AIService extends EventEmitter {
 
       // Apply tool policy to filter tools (if policy provided)
       const tools = applyToolPolicy(allTools, toolPolicy);
+      log.info("AIService.streamMessage: tool configuration", {
+        workspaceId,
+        model: modelString,
+        toolNames: Object.keys(tools),
+        hasToolPolicy: Boolean(toolPolicy),
+      });
 
       // Create assistant message placeholder with historySequence from backend
       const assistantMessageId = `assistant-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts
index fa8b561725..63e9cce91e 100644
--- a/src/services/ipcMain.ts
+++ b/src/services/ipcMain.ts
@@ -1,3 +1,4 @@
+import assert from "node:assert/strict";
 import type { BrowserWindow, IpcMain as ElectronIpcMain } from "electron";
 import { spawn, spawnSync } from "child_process";
 import * as path from "path";
@@ -14,33 +15,18 @@ import { removeWorktreeSafe, removeWorktree, pruneWorktrees } from "@/services/g
 import { AIService } from "@/services/aiService";
 import { HistoryService } from "@/services/historyService";
 import { PartialService } from "@/services/partialService";
-import { createCmuxMessage, type CmuxMessage } from "@/types/message";
+import { AgentSession } from "@/services/agentSession";
+import type { CmuxMessage } from "@/types/message";
 import { log } from "@/services/log";
-import type {
-  StreamStartEvent,
-  StreamDeltaEvent,
-  StreamEndEvent,
-  StreamAbortEvent,
-  ToolCallStartEvent,
-  ToolCallDeltaEvent,
-  ToolCallEndEvent,
-  ErrorEvent,
-} from "@/types/stream";
 import { IPC_CHANNELS, getChatChannel } from "@/constants/ipc-constants";
 import type { SendMessageError } from "@/types/errors";
-import type { StreamErrorMessage, SendMessageOptions, DeleteMessage } from "@/types/ipc";
-import { Ok, Err, type Result } from "@/types/result";
+import type { SendMessageOptions, DeleteMessage } from "@/types/ipc";
+import { Ok, Err } from "@/types/result";
 import { validateWorkspaceName } from "@/utils/validation/workspaceValidation";
 import { createBashTool } from "@/services/tools/bash";
 import type { BashToolResult } from "@/types/tools";
-
 import { secretsToRecord } from "@/types/secrets";
 
-const createUnknownSendMessageError = (raw: string): SendMessageError => ({
-  type: "unknown",
-  raw,
-});
-
 /**
  * IpcMain - Manages all IPC handlers and service coordination
  *
@@ -59,6 +45,11 @@ export class IpcMain {
   private readonly historyService: HistoryService;
   private readonly partialService: PartialService;
   private readonly aiService: AIService;
+  private readonly sessions = new Map<string, AgentSession>();
+  private readonly sessionSubscriptions = new Map<
+    string,
+    { chat: () => void; metadata: () => void }
+  >();
   private mainWindow: BrowserWindow | null = null;
   private registered = false;
 
@@ -69,6 +60,68 @@ export class IpcMain {
     this.aiService = new AIService(config, this.historyService, this.partialService);
   }
 
+  private getOrCreateSession(workspaceId: string): AgentSession {
+    assert(typeof workspaceId === "string", "workspaceId must be a string");
+    const trimmed = workspaceId.trim();
+    assert(trimmed.length > 0, "workspaceId must not be empty");
+
+    let session = this.sessions.get(trimmed);
+    if (session) {
+      return session;
+    }
+
+    session = new AgentSession({
+      workspaceId: trimmed,
+      config: this.config,
+      historyService: this.historyService,
+      partialService: this.partialService,
+      aiService: this.aiService,
+    });
+
+    const chatUnsubscribe = session.onChatEvent((event) => {
+      if (!this.mainWindow) {
+        return;
+      }
+      const channel = getChatChannel(event.workspaceId);
+      this.mainWindow.webContents.send(channel, event.message);
+    });
+
+    const metadataUnsubscribe = session.onMetadataEvent((event) => {
+      if (!this.mainWindow) {
+        return;
+      }
+      this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
+        workspaceId: event.workspaceId,
+        metadata: event.metadata,
+      });
+    });
+
+    this.sessions.set(trimmed, session);
+    this.sessionSubscriptions.set(trimmed, {
+      chat: chatUnsubscribe,
+      metadata: metadataUnsubscribe,
+    });
+
+    return session;
+  }
+
+  private disposeSession(workspaceId: string): void {
+    const session = this.sessions.get(workspaceId);
+    if (!session) {
+      return;
+    }
+
+    const subscriptions = this.sessionSubscriptions.get(workspaceId);
+    if (subscriptions) {
+      subscriptions.chat();
+      subscriptions.metadata();
+      this.sessionSubscriptions.delete(workspaceId);
+    }
+
+    session.dispose();
+    this.sessions.delete(workspaceId);
+  }
+
   /**
    * Register all IPC handlers and setup event forwarding
    * @param ipcMain - Electron's ipcMain module
@@ -90,7 +143,6 @@ export class IpcMain {
     this.registerProviderHandlers(ipcMain);
     this.registerProjectHandlers(ipcMain);
     this.registerSubscriptionHandlers(ipcMain);
-    this.setupEventForwarding();
     this.registered = true;
   }
 
@@ -121,65 +173,6 @@ export class IpcMain {
     });
   }
 
-  /**
-   * Helper method: Stream AI response with history
-   * Shared logic between sendMessage and resumeStream handlers
-   */
-  private async streamWithHistory(
-    workspaceId: string,
-    modelString: string,
-    options?: SendMessageOptions
-  ): Promise<Result<void, SendMessageError>> {
-    const {
-      thinkingLevel,
-      toolPolicy,
-      additionalSystemInstructions,
-      maxOutputTokens,
-      providerOptions,
-      mode,
-    } = options ?? {};
-
-    // Commit any existing partial to history BEFORE loading
-    // This ensures interrupted messages are included in the AI's context
-    await this.partialService.commitToHistory(workspaceId);
-
-    // Get full conversation history
-    const historyResult = await this.historyService.getHistory(workspaceId);
-    if (!historyResult.success) {
-      log.error("Failed to get conversation history:", historyResult.error);
-      return {
-        success: false,
-        error: createUnknownSendMessageError(historyResult.error),
-      };
-    }
-
-    // Stream the AI response
-    log.debug("Calling aiService.streamMessage", {
-      workspaceId,
-      thinkingLevel,
-      modelString,
-      toolPolicy,
-      additionalSystemInstructions,
-      maxOutputTokens,
-      providerOptions,
-    });
-
-    const streamResult = await this.aiService.streamMessage(
-      historyResult.data,
-      workspaceId,
-      modelString,
-      thinkingLevel,
-      toolPolicy,
-      undefined,
-      additionalSystemInstructions,
-      maxOutputTokens,
-      providerOptions,
-      mode
-    );
-    log.debug("Stream completed", { workspaceId });
-    return streamResult;
-  }
-
   private registerWorkspaceHandlers(ipcMain: ElectronIpcMain): void {
     ipcMain.handle(
       IPC_CHANNELS.WORKSPACE_CREATE,
@@ -235,10 +228,8 @@ export class IpcMain {
           });
 
           // Emit metadata event for new workspace
-          this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
-            workspaceId,
-            metadata,
-          });
+          const session = this.getOrCreateSession(workspaceId);
+          session.emitMetadata(metadata);
 
           return {
             success: true,
@@ -380,16 +371,20 @@ export class IpcMain {
           });
 
           // Emit metadata event for old workspace deletion
-          this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
-            workspaceId,
-            metadata: null,
-          });
+          const oldSession = this.sessions.get(workspaceId);
+          if (oldSession) {
+            oldSession.emitMetadata(null);
+            this.disposeSession(workspaceId);
+          } else if (this.mainWindow) {
+            this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
+              workspaceId,
+              metadata: null,
+            });
+          }
 
           // Emit metadata event for new workspace
-          this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
-            workspaceId: newWorkspaceId,
-            metadata: newMetadata,
-          });
+          const newSession = this.getOrCreateSession(newWorkspaceId);
+          newSession.emitMetadata(newMetadata);
 
           return Ok({ newWorkspaceId });
         } catch (error) {
@@ -421,113 +416,23 @@ export class IpcMain {
         message: string,
         options?: SendMessageOptions & { imageParts?: Array<{ image: string; mimeType: string }> }
       ) => {
-        const {
-          editMessageId,
-          thinkingLevel,
-          model,
-          toolPolicy,
-          additionalSystemInstructions,
-          maxOutputTokens,
-          providerOptions,
-          imageParts,
-          mode, // Passed to streamWithHistory for plan file loading
-        } = options ?? {};
         log.debug("sendMessage handler: Received", {
           workspaceId,
           messagePreview: message.substring(0, 50),
-          editMessageId,
-          thinkingLevel,
-          model,
-          toolPolicy,
-          mode,
-          additionalSystemInstructions,
-          maxOutputTokens,
-          providerOptions,
+          mode: options?.mode,
+          options,
         });
         try {
-          // Reject empty messages - use interruptStream() to interrupt active streams
-          if (!message.trim() && (!imageParts || imageParts.length === 0)) {
-            log.debug("sendMessage handler: Rejected empty message (use interruptStream instead)");
-            return {
-              success: false,
-              error: {
-                type: "unknown",
-                raw: "Empty message not allowed. Use interruptStream() to interrupt active streams.",
-              },
-            };
-          }
-
-          // If editing, truncate history after the message being edited
-          if (editMessageId) {
-            const truncateResult = await this.historyService.truncateAfterMessage(
+          const session = this.getOrCreateSession(workspaceId);
+          const result = await session.sendMessage(message, options);
+          if (!result.success) {
+            log.error("sendMessage handler: session returned error", {
               workspaceId,
-              editMessageId
-            );
-            if (!truncateResult.success) {
-              log.error("Failed to truncate history for edit:", truncateResult.error);
-              return {
-                success: false,
-                error: createUnknownSendMessageError(truncateResult.error),
-              };
-            }
-            // Note: We don't send a clear event here. The aggregator will handle
-            // replacement automatically when the new message arrives with the same historySequence
-          }
-
-          // Create user message with text and optional image parts
-          const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
-          const additionalParts = imageParts?.map((img) => ({
-            type: "image" as const,
-            image: img.image,
-            mimeType: img.mimeType,
-          }));
-          if (additionalParts && additionalParts.length > 0) {
-            log.debug("sendMessage: Creating message with images", {
-              imageCount: additionalParts.length,
-              mimeTypes: additionalParts.map((p) => p.mimeType),
+              error: result.error,
             });
           }
-          const userMessage = createCmuxMessage(
-            messageId,
-            "user",
-            message,
-            {
-              // historySequence will be assigned by historyService.appendToHistory()
-              timestamp: Date.now(),
-              toolPolicy, // Store for historical record and compaction detection
-            },
-            additionalParts
-          );
-
-          // Append user message to history
-          const appendResult = await this.historyService.appendToHistory(workspaceId, userMessage);
-          if (!appendResult.success) {
-            log.error("Failed to append message to history:", appendResult.error);
-            return {
-              success: false,
-              error: createUnknownSendMessageError(appendResult.error),
-            };
-          }
-
-          // Broadcast the user message immediately to the frontend
-          if (this.mainWindow) {
-            this.mainWindow.webContents.send(getChatChannel(workspaceId), userMessage);
-          }
-
-          // Stream the AI response
-          if (!model) {
-            log.error("No model provided by frontend");
-            return {
-              success: false,
-              error: createUnknownSendMessageError(
-                "No model specified. Please select a model using /model command."
-              ),
-            };
-          }
-
-          return await this.streamWithHistory(workspaceId, model, options);
+          return result;
         } catch (error) {
-          // Convert to SendMessageError for typed error handling
           const errorMessage = error instanceof Error ? error.message : String(error);
           log.error("Unexpected error in sendMessage handler:", error);
           const sendError: SendMessageError = {
@@ -547,15 +452,15 @@ export class IpcMain {
           options,
         });
         try {
-          // Idempotent: if stream already active, return success (not error)
-          // This makes client code simpler and more resilient
-          if (this.aiService.isStreaming(workspaceId)) {
-            log.debug("resumeStream handler: Stream already active, returning success");
-            return { success: true };
+          const session = this.getOrCreateSession(workspaceId);
+          const result = await session.resumeStream(options);
+          if (!result.success) {
+            log.error("resumeStream handler: session returned error", {
+              workspaceId,
+              error: result.error,
+            });
           }
-
-          // Stream the AI response with existing history (no new user message)
-          return await this.streamWithHistory(workspaceId, options.model, options);
+          return result;
         } catch (error) {
           // Convert to SendMessageError for typed error handling
           const errorMessage = error instanceof Error ? error.message : String(error);
@@ -572,13 +477,8 @@ export class IpcMain {
     ipcMain.handle(IPC_CHANNELS.WORKSPACE_INTERRUPT_STREAM, async (_event, workspaceId: string) => {
       log.debug("interruptStream handler: Received", { workspaceId });
       try {
-        // Idempotent: if not streaming, return success (not error)
-        if (!this.aiService.isStreaming(workspaceId)) {
-          log.debug("interruptStream handler: Not streaming, returning success");
-          return { success: true, data: undefined };
-        }
-
-        const stopResult = await this.aiService.stopStream(workspaceId);
+        const session = this.getOrCreateSession(workspaceId);
+        const stopResult = await session.interruptStream();
         if (!stopResult.success) {
           log.error("Failed to stop stream:", stopResult.error);
           return { success: false, error: stopResult.error };
@@ -896,10 +796,17 @@ export class IpcMain {
       }
 
       // Emit metadata event for workspace removal (with null metadata to indicate deletion)
-      this.mainWindow?.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
-        workspaceId,
-        metadata: null, // null indicates workspace was deleted
-      });
+      const existingSession = this.sessions.get(workspaceId);
+      if (existingSession) {
+        existingSession.emitMetadata(null);
+      } else if (this.mainWindow) {
+        this.mainWindow.webContents.send(IPC_CHANNELS.WORKSPACE_METADATA, {
+          workspaceId,
+          metadata: null,
+        });
+      }
+
+      this.disposeSession(workspaceId);
 
       return { success: true };
     } catch (error) {
@@ -1072,31 +979,15 @@ export class IpcMain {
     // Handle subscription events for chat history
     ipcMain.on(`workspace:chat:subscribe`, (_event, workspaceId: string) => {
       void (async () => {
+        const session = this.getOrCreateSession(workspaceId);
         const chatChannel = getChatChannel(workspaceId);
 
-        const history = await this.historyService.getHistory(workspaceId);
-        if (history.success) {
-          for (const msg of history.data) {
-            this.mainWindow?.webContents.send(chatChannel, msg);
-          }
-
-          // Check if there's an active stream or a partial message
-          const streamInfo = this.aiService.getStreamInfo(workspaceId);
-          const partial = await this.partialService.readPartial(workspaceId);
-
-          if (streamInfo) {
-            // Stream is actively running - replay events to re-establish streaming context
-            // Events flow: StreamManager → AIService → IpcMain → renderer
-            // This ensures frontend receives stream-start and creates activeStream entry
-            // so that stream-end can properly clean up the streaming indicator
-            this.aiService.replayStream(workspaceId);
-          } else if (partial) {
-            // No active stream but there's a partial - send as regular message (shows CONTINUE)
-            this.mainWindow?.webContents.send(chatChannel, partial);
+        await session.replayHistory((event) => {
+          if (!this.mainWindow) {
+            return;
           }
-        }
-
-        this.mainWindow?.webContents.send(chatChannel, { type: "caught-up" });
+          this.mainWindow.webContents.send(chatChannel, event.message);
+        });
       })();
     });
 
@@ -1118,89 +1009,6 @@ export class IpcMain {
     });
   }
 
-  private setupEventForwarding(): void {
-    // Set up event listeners for AI service
-    this.aiService.on("stream-start", (data: StreamStartEvent) => {
-      if (this.mainWindow) {
-        // Send the actual stream-start event
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-
-    this.aiService.on("stream-delta", (data: StreamDeltaEvent) => {
-      if (this.mainWindow) {
-        // Send ONLY the delta event - efficient IPC usage
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-
-    this.aiService.on("stream-end", (data: StreamEndEvent) => {
-      if (this.mainWindow) {
-        // Send the stream-end event with final content and metadata
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-
-    // Forward tool events to renderer
-    this.aiService.on("tool-call-start", (data: ToolCallStartEvent) => {
-      if (this.mainWindow) {
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-
-    this.aiService.on("tool-call-delta", (data: ToolCallDeltaEvent) => {
-      if (this.mainWindow) {
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-
-    this.aiService.on("tool-call-end", (data: ToolCallEndEvent) => {
-      if (this.mainWindow) {
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-
-    // Forward reasoning events to renderer
-    this.aiService.on(
-      "reasoning-delta",
-      (data: { type: string; workspaceId: string; messageId: string; delta: string }) => {
-        if (this.mainWindow) {
-          this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-        }
-      }
-    );
-
-    this.aiService.on(
-      "reasoning-end",
-      (data: { type: string; workspaceId: string; messageId: string }) => {
-        if (this.mainWindow) {
-          this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-        }
-      }
-    );
-
-    this.aiService.on("error", (data: ErrorEvent) => {
-      if (this.mainWindow) {
-        // Send properly typed StreamErrorMessage
-        const errorMessage: StreamErrorMessage = {
-          type: "stream-error",
-          messageId: data.messageId,
-          error: data.error,
-          errorType: data.errorType ?? "unknown",
-        };
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), errorMessage);
-      }
-    });
-
-    // Handle stream abort events
-    this.aiService.on("stream-abort", (data: StreamAbortEvent) => {
-      if (this.mainWindow) {
-        // Forward complete abort event including metadata (usage, duration)
-        this.mainWindow.webContents.send(getChatChannel(data.workspaceId), data);
-      }
-    });
-  }
-
   /**
    * Check if a command is available in the system PATH
    */
diff --git a/src/services/utils/sendMessageError.ts b/src/services/utils/sendMessageError.ts
new file mode 100644
index 0000000000..6b34497796
--- /dev/null
+++ b/src/services/utils/sendMessageError.ts
@@ -0,0 +1,17 @@
+import assert from "node:assert/strict";
+import type { SendMessageError } from "@/types/errors";
+
+/**
+ * Helper to wrap arbitrary errors into SendMessageError structures.
+ * Enforces that the raw string is non-empty for defensive debugging.
+ */
+export const createUnknownSendMessageError = (raw: string): SendMessageError => {
+  assert(typeof raw === "string", "Expected raw error to be a string");
+  const trimmed = raw.trim();
+  assert(trimmed.length > 0, "createUnknownSendMessageError requires a non-empty message");
+
+  return {
+    type: "unknown",
+    raw: trimmed,
+  };
+};
diff --git a/src/utils/ai/providerOptions.ts b/src/utils/ai/providerOptions.ts
index b4926aa7ce..65497ef46e 100644
--- a/src/utils/ai/providerOptions.ts
+++ b/src/utils/ai/providerOptions.ts
@@ -93,7 +93,7 @@ export function buildProviderOptions(
         }),
       },
     };
-    log.info("buildProviderOptions: Returning Anthropic options", options);
+    log.debug("buildProviderOptions: Returning Anthropic options", options);
     return options;
   }
 
diff --git a/src/utils/providers/ensureProvidersConfig.ts b/src/utils/providers/ensureProvidersConfig.ts
new file mode 100644
index 0000000000..0b205dcfa4
--- /dev/null
+++ b/src/utils/providers/ensureProvidersConfig.ts
@@ -0,0 +1,112 @@
+import type { Config, ProviderConfig, ProvidersConfig } from "@/config";
+
+const trim = (value: unknown): string => (typeof value === "string" ? value.trim() : "");
+
+const hasApiKey = (config: ProviderConfig | undefined): boolean =>
+  Boolean(config && typeof config.apiKey === "string" && config.apiKey.trim().length > 0);
+
+const hasAnyConfiguredProvider = (providers: ProvidersConfig | null | undefined): boolean => {
+  if (!providers) {
+    return false;
+  }
+
+  return Object.values(providers).some((providerConfig) => hasApiKey(providerConfig));
+};
+
+const buildProvidersFromEnv = (env: NodeJS.ProcessEnv): ProvidersConfig => {
+  const providers: ProvidersConfig = {};
+
+  const anthropicKey = trim(env.ANTHROPIC_API_KEY);
+  if (anthropicKey.length > 0) {
+    const entry: ProviderConfig = { apiKey: anthropicKey };
+
+    const baseUrl = trim(env.ANTHROPIC_BASE_URL);
+    if (baseUrl.length > 0) {
+      entry.baseUrl = baseUrl;
+    }
+
+    providers.anthropic = entry;
+  }
+
+  const openAIKey = trim(env.OPENAI_API_KEY);
+  if (openAIKey.length > 0) {
+    const entry: ProviderConfig = { apiKey: openAIKey };
+
+    const baseUrlCandidates = [env.OPENAI_BASE_URL, env.OPENAI_API_BASE];
+    for (const candidate of baseUrlCandidates) {
+      const baseUrl = trim(candidate);
+      if (baseUrl.length > 0) {
+        entry.baseUrl = baseUrl;
+        break;
+      }
+    }
+
+    const organization = trim(env.OPENAI_ORG_ID);
+    if (organization.length > 0) {
+      entry.organization = organization;
+    }
+
+    providers.openai = entry;
+  }
+
+  if (!providers.openai) {
+    const azureKey = trim(env.AZURE_OPENAI_API_KEY);
+    const azureEndpoint = trim(env.AZURE_OPENAI_ENDPOINT);
+
+    if (azureKey.length > 0 && azureEndpoint.length > 0) {
+      const entry: ProviderConfig = {
+        apiKey: azureKey,
+        baseUrl: azureEndpoint,
+      };
+
+      const deployment = trim(env.AZURE_OPENAI_DEPLOYMENT);
+      if (deployment.length > 0) {
+        entry.defaultModel = deployment;
+      }
+
+      const apiVersion = trim(env.AZURE_OPENAI_API_VERSION);
+      if (apiVersion.length > 0) {
+        entry.apiVersion = apiVersion;
+      }
+
+      providers.openai = entry;
+    }
+  }
+
+  return providers;
+};
+
+export const ensureProvidersConfig = (
+  config: Config,
+  env: NodeJS.ProcessEnv = process.env
+): ProvidersConfig => {
+  if (!config) {
+    throw new Error("Config instance is required to ensure providers configuration");
+  }
+
+  const existingProviders = config.loadProvidersConfig();
+  if (hasAnyConfiguredProvider(existingProviders)) {
+    if (!existingProviders) {
+      throw new Error(
+        "Providers config reported configured providers but returned null. Please validate providers.jsonc."
+      );
+    }
+    return existingProviders;
+  }
+
+  const providersFromEnv = buildProvidersFromEnv(env);
+  if (!hasAnyConfiguredProvider(providersFromEnv)) {
+    throw new Error(
+      "No provider credentials found. Configure providers.jsonc or set ANTHROPIC_API_KEY / OPENAI_API_KEY."
+    );
+  }
+
+  config.saveProvidersConfig(providersFromEnv);
+  return providersFromEnv;
+};
+
+export const getProvidersFromEnv = (env: NodeJS.ProcessEnv = process.env): ProvidersConfig =>
+  buildProvidersFromEnv(env);
+
+export const hasAnyProvidersConfigured = (providers: ProvidersConfig | null | undefined): boolean =>
+  hasAnyConfiguredProvider(providers);