Skip to content

Commit ab5a724

Browse files
authored
🤖 fix: repair terminal-bench agent path and source execution (#701)
Corrects the file path for the terminal-bench agent CLI entry point and fixes worker resolution when running from source. ## Changes - **benchmarks/terminal_bench/mux-run.sh**: Updated path from `src/debug/agentSessionCli.ts` to `src/cli/debug/agentSessionCli.ts`. - **src/node/utils/main/workerPool.ts**: Added support for resolving `tokenizer.worker.ts` when running directly from source (Bun), fixing crash when `dist/` is missing. - **docs/benchmarking.md**: Updated documentation to reflect the correct CLI path. ## Verification - Verified locally that `src/cli/debug/agentSessionCli.ts` starts successfully and processes input without worker errors. - Fixes the '0 successful trials' issue in nightly benchmarks caused by the agent crashing on startup.
1 parent c5475e1 commit ab5a724

File tree

5 files changed

+75
-6
lines changed

5 files changed

+75
-6
lines changed

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,10 @@ build/icon.png: docs/img/logo.webp scripts/generate-icons.ts
213213
@bun scripts/generate-icons.ts png
214214

215215
## Quality checks (can run in parallel)
216-
static-check: lint typecheck fmt-check check-eager-imports ## Run all static checks (includes startup performance checks)
216+
static-check: lint typecheck fmt-check check-eager-imports check-bench-agent ## Run all static checks (includes startup performance checks)
217+
218+
check-bench-agent: ## Verify terminal-bench agent configuration and imports
219+
@./scripts/check-bench-agent.sh
217220

218221
lint: node_modules/.installed ## Run ESLint (typecheck runs in separate target)
219222
@./scripts/lint.sh

benchmarks/terminal_bench/mux-run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ ensure_git_repo "${project_path}"
7777
log "starting mux agent session for ${project_path}"
7878
cd "${MUX_APP_ROOT}"
7979

80-
cmd=(bun src/debug/agentSessionCli.ts
80+
cmd=(bun src/cli/debug/agentSessionCli.ts
8181
--config-root "${MUX_CONFIG_ROOT}"
8282
--project-path "${project_path}"
8383
--workspace-path "${project_path}"

docs/benchmarking.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ The adapter lives in `benchmarks/terminal_bench/mux_agent.py`. For each task it:
6565

6666
1. Copies the mux repository (package manifests + `src/`) into `/tmp/mux-app` inside the container.
6767
2. Ensures Bun exists, then runs `bun install --frozen-lockfile`.
68-
3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`).
68+
3. Launches `src/cli/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`).
6969

7070
`MUX_MODEL` accepts either the mux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide.
7171

scripts/check-bench-agent.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# This script verifies that the terminal-bench agent entry point
5+
# referenced in mux-run.sh is valid and can be executed (imports resolve).
6+
7+
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
8+
MUX_RUN_SH="$REPO_ROOT/benchmarks/terminal_bench/mux-run.sh"
9+
10+
echo "Checking terminal-bench agent configuration..."
11+
12+
if [[ ! -f "$MUX_RUN_SH" ]]; then
13+
echo "❌ Error: $MUX_RUN_SH not found"
14+
exit 1
15+
fi
16+
17+
# Extract the agent CLI path from mux-run.sh
18+
# Looks for line like: cmd=(bun src/cli/debug/agentSessionCli.ts
19+
CLI_PATH_MATCH=$(grep -o "bun src/.*\.ts" "$MUX_RUN_SH" | head -1 | cut -d' ' -f2)
20+
21+
if [[ -z "$CLI_PATH_MATCH" ]]; then
22+
echo "❌ Error: Could not find agent CLI path in $MUX_RUN_SH"
23+
exit 1
24+
fi
25+
26+
FULL_CLI_PATH="$REPO_ROOT/$CLI_PATH_MATCH"
27+
28+
echo "Found agent CLI path: $CLI_PATH_MATCH"
29+
30+
if [[ ! -f "$FULL_CLI_PATH" ]]; then
31+
echo "❌ Error: Referenced file $FULL_CLI_PATH does not exist"
32+
exit 1
33+
fi
34+
35+
echo "Verifying agent CLI startup (checking imports)..."
36+
37+
# Run with --help or no args to check if it boots without crashing on imports
38+
# We expect it to fail with "Unknown option" or "workspace-path required" but NOT with "Module not found" or "worker error"
39+
if ! output=$(bun "$FULL_CLI_PATH" --help 2>&1); then
40+
# It failed, which is expected (no args/bad args), but we need to check WHY
41+
exit_code=$?
42+
43+
# Check for known import/worker errors
44+
if echo "$output" | grep -qE "Module not found|Worker error|Cannot find module"; then
45+
echo "❌ Error: Agent CLI failed to start due to import/worker errors:"
46+
echo "$output"
47+
exit 1
48+
fi
49+
50+
# If it failed just because of arguments, that's fine - it means the code loaded.
51+
echo "✅ Agent CLI loaded successfully (ignoring argument errors)"
52+
else
53+
echo "✅ Agent CLI ran successfully"
54+
fi
55+
56+
echo "Terminal-bench agent check passed."

src/node/utils/main/workerPool.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { Worker } from "node:worker_threads";
2-
import { join, dirname, sep } from "node:path";
2+
import { join, dirname, sep, extname } from "node:path";
33

44
interface WorkerRequest {
55
messageId: number;
@@ -37,15 +37,25 @@ const hasDist = pathParts.includes("dist");
3737
const srcIndex = pathParts.lastIndexOf("src");
3838

3939
let workerDir: string;
40-
if (srcIndex !== -1 && !hasDist) {
40+
let workerFile = "tokenizer.worker.js";
41+
42+
// Check if we're running under Bun (not Node with ts-jest)
43+
// ts-jest transpiles .ts files but runs them via Node, which can't load .ts workers
44+
const isBun = !!(process as unknown as { isBun?: boolean }).isBun;
45+
46+
if (isBun && extname(__filename) === ".ts") {
47+
// Running from source via Bun - use .ts worker directly
48+
workerDir = currentDir;
49+
workerFile = "tokenizer.worker.ts";
50+
} else if (srcIndex !== -1 && !hasDist) {
4151
// Replace 'src' with 'dist' in the path (only if not already in dist)
4252
pathParts[srcIndex] = "dist";
4353
workerDir = pathParts.join(sep);
4454
} else {
4555
workerDir = currentDir;
4656
}
4757

48-
const workerPath = join(workerDir, "tokenizer.worker.js");
58+
const workerPath = join(workerDir, workerFile);
4959
const worker = new Worker(workerPath);
5060

5161
// Handle messages from worker

0 commit comments

Comments
 (0)