11from __future__ import annotations
22
3- import io
43import os
54import shlex
6- import tarfile
7- import tempfile
85from pathlib import Path
9- from typing import Any
6+ from typing import Any , Sequence
107
118from terminal_bench .agents .base_agent import AgentResult
129from terminal_bench .agents .installed_agents .abstract_installed_agent import (
1512from terminal_bench .terminal .models import TerminalCommand
1613from terminal_bench .terminal .tmux_session import TmuxSession
1714
15+ from .cmux_payload import build_app_archive , stage_payload
16+
1817
1918class CmuxAgent (AbstractInstalledAgent ):
2019 """
@@ -25,10 +24,51 @@ class CmuxAgent(AbstractInstalledAgent):
2524 _ARCHIVE_NAME = "cmux-app.tar.gz"
2625 _RUNNER_NAME = "cmux-run.sh"
2726 _DEFAULT_TRUNK = "main"
28- _DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
27+ _DEFAULT_MODEL = "anthropic:claude-sonnet-4-5"
28+ _DEFAULT_PROJECT_CANDIDATES = "/workspace:/app:/workspaces:/root/project"
29+ _INCLUDE_PATHS : Sequence [str ] = (
30+ "package.json" ,
31+ "bun.lock" ,
32+ "bunfig.toml" ,
33+ "tsconfig.json" ,
34+ "tsconfig.main.json" ,
35+ "src" ,
36+ )
37+
38+ _PROVIDER_ENV_KEYS : Sequence [str ] = (
39+ "ANTHROPIC_API_KEY" ,
40+ "ANTHROPIC_BASE_URL" ,
41+ "OPENAI_API_KEY" ,
42+ "OPENAI_BASE_URL" ,
43+ "OPENAI_API_BASE" ,
44+ "OPENAI_ORG_ID" ,
45+ "AZURE_OPENAI_API_KEY" ,
46+ "AZURE_OPENAI_ENDPOINT" ,
47+ "AZURE_OPENAI_DEPLOYMENT" ,
48+ "AZURE_OPENAI_API_VERSION" ,
49+ )
50+
51+ _CONFIG_ENV_KEYS : Sequence [str ] = (
52+ "CMUX_AGENT_GIT_URL" ,
53+ "CMUX_BUN_INSTALL_URL" ,
54+ "CMUX_PROJECT_PATH" ,
55+ "CMUX_PROJECT_CANDIDATES" ,
56+ "CMUX_TRUNK" ,
57+ "CMUX_MODEL" ,
58+ "CMUX_TIMEOUT_MS" ,
59+ "CMUX_THINKING_LEVEL" ,
60+ "CMUX_CONFIG_ROOT" ,
61+ "CMUX_APP_ROOT" ,
62+ "CMUX_WORKSPACE_ID" ,
63+ "CMUX_MODE" ,
64+ )
2965
3066 def __init__ (
31- self , mode : str | None = None , thinking_level : str | None = None , ** kwargs : Any
67+ self ,
68+ model_name : str = "anthropic:claude-sonnet-4-5" ,
69+ mode : str | None = None ,
70+ thinking_level : str | None = None ,
71+ ** kwargs : Any ,
3272 ) -> None :
3373 super ().__init__ (** kwargs )
3474 repo_root_env = os .environ .get ("CMUX_AGENT_REPO_ROOT" )
@@ -40,47 +80,27 @@ def __init__(
4080 if not repo_root .exists ():
4181 raise RuntimeError (f"cmux repo root { repo_root } does not exist" )
4282
83+ runner_path = Path (__file__ ).with_name (self ._RUNNER_NAME )
84+ if not runner_path .is_file ():
85+ raise RuntimeError (f"cmux runner script missing at { runner_path } " )
86+
87+ self ._runner_path = runner_path
4388 self ._repo_root = repo_root
4489 self ._archive_bytes : bytes | None = None
45- self ._prepared_container_id : str | None = None
90+ self ._staged_container_id : str | None = None
4691 self ._mode = mode .lower () if mode else None
4792 self ._thinking_level = thinking_level .lower () if thinking_level else None
93+ self ._model_name = (model_name or "" ).strip ()
4894
4995 @staticmethod
5096 def name () -> str :
5197 return "cmux"
5298
5399 @property
54100 def _env (self ) -> dict [str , str ]:
55- keys = [
56- "ANTHROPIC_API_KEY" ,
57- "ANTHROPIC_BASE_URL" ,
58- "OPENAI_API_KEY" ,
59- "OPENAI_BASE_URL" ,
60- "OPENAI_API_BASE" ,
61- "OPENAI_ORG_ID" ,
62- "AZURE_OPENAI_API_KEY" ,
63- "AZURE_OPENAI_ENDPOINT" ,
64- "AZURE_OPENAI_DEPLOYMENT" ,
65- "AZURE_OPENAI_API_VERSION" ,
66- "MISTRAL_API_KEY" ,
67- "GOOGLE_API_KEY" ,
68- "OPENROUTER_API_KEY" ,
69- "CMUX_AGENT_GIT_URL" ,
70- "CMUX_BUN_INSTALL_URL" ,
71- "CMUX_PROJECT_PATH" ,
72- "CMUX_PROJECT_CANDIDATES" ,
73- "CMUX_TRUNK" ,
74- "CMUX_MODEL" ,
75- "CMUX_TIMEOUT_MS" ,
76- "CMUX_THINKING_LEVEL" ,
77- "CMUX_CONFIG_ROOT" ,
78- "CMUX_APP_ROOT" ,
79- "CMUX_WORKSPACE_ID" ,
80- ]
81-
82101 env : dict [str , str ] = {}
83- for key in keys :
102+
103+ for key in (* self ._PROVIDER_ENV_KEYS , * self ._CONFIG_ENV_KEYS ):
84104 value = os .environ .get (key )
85105 if value :
86106 env [key ] = value
@@ -92,30 +112,61 @@ def _env(self) -> dict[str, str]:
92112 env .setdefault ("CMUX_WORKSPACE_ID" , "cmux-bench" )
93113 env .setdefault ("CMUX_THINKING_LEVEL" , "high" )
94114 env .setdefault ("CMUX_MODE" , "exec" )
115+ env .setdefault ("CMUX_PROJECT_CANDIDATES" , self ._DEFAULT_PROJECT_CANDIDATES )
95116
96- model_value = env .get ("CMUX_MODEL" )
97- if model_value and "/" in model_value and ":" not in model_value :
117+ model_value = self ._model_name or env ["CMUX_MODEL" ]
118+ model_value = model_value .strip ()
119+ if not model_value :
120+ raise ValueError ("CMUX_MODEL must be a non-empty string" )
121+ if "/" in model_value and ":" not in model_value :
98122 provider , model_name = model_value .split ("/" , 1 )
99- env ["CMUX_MODEL" ] = f"{ provider } :{ model_name } "
100-
101- thinking_value = self ._thinking_level or env .get ("CMUX_THINKING_LEVEL" )
102- if thinking_value :
103- normalized = thinking_value .strip ().lower ()
104- if normalized not in {"off" , "low" , "medium" , "high" }:
105- raise ValueError (
106- "CMUX_THINKING_LEVEL must be one of off, low, medium, high"
107- )
108- env ["CMUX_THINKING_LEVEL" ] = normalized
109-
110- mode_value = self ._mode or env .get ("CMUX_MODE" )
111- if mode_value :
112- normalized_mode = mode_value .strip ().lower ()
113- if normalized_mode in {"exec" , "execute" }:
114- env ["CMUX_MODE" ] = "exec"
115- elif normalized_mode == "plan" :
116- env ["CMUX_MODE" ] = "plan"
117- else :
118- raise ValueError ("CMUX_MODE must be one of plan, exec, or execute" )
123+ model_value = f"{ provider } :{ model_name } "
124+ env ["CMUX_MODEL" ] = model_value
125+
126+ thinking_value = self ._thinking_level or env ["CMUX_THINKING_LEVEL" ]
127+ normalized_thinking = thinking_value .strip ().lower ()
128+ if normalized_thinking not in {"off" , "low" , "medium" , "high" }:
129+ raise ValueError (
130+ "CMUX_THINKING_LEVEL must be one of off, low, medium, high"
131+ )
132+ env ["CMUX_THINKING_LEVEL" ] = normalized_thinking
133+
134+ mode_value = self ._mode or env ["CMUX_MODE" ]
135+ normalized_mode = mode_value .strip ().lower ()
136+ if normalized_mode in {"exec" , "execute" }:
137+ env ["CMUX_MODE" ] = "exec"
138+ elif normalized_mode == "plan" :
139+ env ["CMUX_MODE" ] = "plan"
140+ else :
141+ raise ValueError ("CMUX_MODE must be one of plan, exec, or execute" )
142+
143+ config_root = env ["CMUX_CONFIG_ROOT" ].strip ()
144+ app_root = env ["CMUX_APP_ROOT" ].strip ()
145+ workspace_id = env ["CMUX_WORKSPACE_ID" ].strip ()
146+ project_candidates = env ["CMUX_PROJECT_CANDIDATES" ].strip ()
147+ if not config_root :
148+ raise ValueError ("CMUX_CONFIG_ROOT must be set" )
149+ if not app_root :
150+ raise ValueError ("CMUX_APP_ROOT must be set" )
151+ if not workspace_id :
152+ raise ValueError ("CMUX_WORKSPACE_ID must be set" )
153+ if not project_candidates :
154+ raise ValueError ("CMUX_PROJECT_CANDIDATES must be set" )
155+ env ["CMUX_CONFIG_ROOT" ] = config_root
156+ env ["CMUX_APP_ROOT" ] = app_root
157+ env ["CMUX_WORKSPACE_ID" ] = workspace_id
158+ env ["CMUX_PROJECT_CANDIDATES" ] = project_candidates
159+
160+ timeout_value = env .get ("CMUX_TIMEOUT_MS" )
161+ if timeout_value :
162+ timeout_value = timeout_value .strip ()
163+ if not timeout_value .isdigit ():
164+ raise ValueError ("CMUX_TIMEOUT_MS must be an integer expressed in ms" )
165+ env ["CMUX_TIMEOUT_MS" ] = timeout_value
166+
167+ project_path = env .get ("CMUX_PROJECT_PATH" )
168+ if project_path is not None and not project_path .strip ():
169+ raise ValueError ("CMUX_PROJECT_PATH must be non-empty when provided" )
119170
120171 return env
121172
@@ -132,80 +183,34 @@ def perform_task(
132183 if not instruction or not instruction .strip ():
133184 raise ValueError ("instruction must be a non-empty string" )
134185
135- self ._prepare_payloads (session )
186+ self ._ensure_payload_staged (session )
136187 return super ().perform_task (
137188 instruction = instruction , session = session , logging_dir = logging_dir
138189 )
139190
140- def _prepare_payloads (self , session : TmuxSession ) -> None :
191+ def _ensure_payload_staged (self , session : TmuxSession ) -> None :
141192 container_id = getattr (session .container , "id" , None )
142- if container_id and container_id == self ._prepared_container_id :
193+ if container_id and container_id == self ._staged_container_id :
143194 return
144195
145196 archive = self ._build_archive ()
146- temp_path : Path | None = None
147- try :
148- with tempfile .NamedTemporaryFile (
149- suffix = ".tar.gz" , delete = False
150- ) as temp_file :
151- temp_file .write (archive )
152- temp_path = Path (temp_file .name )
153- except Exception as error :
154- raise RuntimeError (
155- f"failed to materialize cmux archive: { error } "
156- ) from error
157-
158- try :
159- assert temp_path is not None , "temporary archive path missing"
160- session .copy_to_container (
161- paths = temp_path ,
162- container_dir = "/installed-agent" ,
163- container_filename = self ._ARCHIVE_NAME ,
164- )
165- finally :
166- if temp_path is not None :
167- temp_path .unlink (missing_ok = True )
168-
169- runner_path = Path (__file__ ).with_name (self ._RUNNER_NAME )
170- if not runner_path .exists ():
171- raise RuntimeError (f"cmux runner script missing at { runner_path } " )
172-
173- session .copy_to_container (
174- paths = runner_path ,
175- container_dir = "/installed-agent" ,
176- container_filename = self ._RUNNER_NAME ,
197+ stage_payload (
198+ session = session ,
199+ archive_bytes = archive ,
200+ archive_name = self ._ARCHIVE_NAME ,
201+ runner_path = self ._runner_path ,
177202 )
178203
179204 if container_id :
180- self ._prepared_container_id = container_id
205+ self ._staged_container_id = container_id
181206
182207 def _build_archive (self ) -> bytes :
183208 if self ._archive_bytes is not None :
184209 return self ._archive_bytes
185210
186- include_paths = [
187- "package.json" ,
188- "bun.lock" ,
189- "bunfig.toml" ,
190- "tsconfig.json" ,
191- "tsconfig.main.json" ,
192- "src" ,
193- ]
194-
195- buffer = io .BytesIO ()
196- with tarfile .open (fileobj = buffer , mode = "w:gz" ) as tar :
197- for relative in include_paths :
198- source_path = self ._repo_root / relative
199- if not source_path .exists ():
200- raise FileNotFoundError (f"Required file { source_path } not found" )
201- tar .add (
202- source_path ,
203- arcname = relative ,
204- recursive = True ,
205- )
206- buffer .seek (0 )
207- self ._archive_bytes = buffer .getvalue ()
208- return self ._archive_bytes
211+ archive = build_app_archive (self ._repo_root , self ._INCLUDE_PATHS )
212+ self ._archive_bytes = archive
213+ return archive
209214
210215 def _run_agent_commands (self , instruction : str ) -> list [TerminalCommand ]:
211216 escaped = shlex .quote (instruction )
0 commit comments