"""Direct tests for tool-batch confidence, verification, and recovery helpers."""
from __future__ import annotations
from pathlib import Path
from types import SimpleNamespace
import pytest
from loader.llm.base import Message, Role, ToolCall
from loader.runtime.context import RuntimeContext
from loader.runtime.events import AgentEvent
from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
from loader.runtime.permissions import (
PermissionMode,
build_permission_policy,
load_permission_rules,
)
from loader.runtime.reasoning_types import (
ActionVerification,
ConfidenceAssessment,
ConfidenceLevel,
)
from loader.runtime.recovery import RecoveryContext
from loader.runtime.tool_batch_checks import (
ToolBatchConfidenceGate,
ToolBatchVerificationGate,
)
from loader.runtime.tool_batch_recovery import ToolBatchRecoveryController
from loader.tools.base import ToolResult as RegistryToolResult
from loader.tools.base import create_default_registry
from tests.helpers.runtime_harness import ScriptedBackend
class FakeSession:
def __init__(self, messages: list[Message]) -> None:
self.messages = list(messages)
def append(self, message: Message) -> None:
self.messages.append(message)
class FakeCodeFilter:
def reset(self) -> None:
return None
class FakeSafeguards:
def __init__(self) -> None:
self.action_tracker = object()
self.validator = object()
self.code_filter = FakeCodeFilter()
def filter_stream_chunk(self, content: str) -> str:
return content
def filter_complete_content(self, content: str) -> str:
return content
def should_steer(self) -> bool:
return False
def get_steering_message(self) -> str | None:
return None
def record_response(self, content: str) -> None:
return None
def build_context(
*,
temp_dir: Path,
messages: list[Message],
assess_confidence,
verify_action,
recovery_context: RecoveryContext | None = None,
confidence_scoring: bool = False,
verification: bool = False,
min_confidence_for_action: int = 3,
) -> RuntimeContext:
registry = create_default_registry(temp_dir)
registry.configure_workspace_root(temp_dir)
rule_status = load_permission_rules(temp_dir)
policy = build_permission_policy(
active_mode=PermissionMode.WORKSPACE_WRITE,
workspace_root=temp_dir,
tool_requirements=registry.get_tool_requirements(),
rules=rule_status.rules,
)
return RuntimeContext(
project_root=temp_dir,
backend=ScriptedBackend(),
registry=registry,
session=FakeSession(messages), # type: ignore[arg-type]
config=SimpleNamespace(
force_react=False,
max_recovery_attempts=2,
auto_recover=True,
reasoning=SimpleNamespace(
rollback=False,
show_rollback_plan=False,
completion_check=True,
max_continuation_prompts=5,
self_critique=False,
confidence_scoring=confidence_scoring,
min_confidence_for_action=min_confidence_for_action,
verification=verification,
),
),
capability_profile=SimpleNamespace(supports_native_tools=True), # type: ignore[arg-type]
project_context=None,
permission_policy=policy,
permission_config_status=rule_status,
workflow_mode="execute",
safeguards=FakeSafeguards(),
reasoning=SimpleNamespace(
assess_confidence=assess_confidence,
verify_action=verify_action,
),
recovery_context=recovery_context,
)
def tool_outcome(
*,
tool_call: ToolCall,
output: str,
is_error: bool,
) -> ToolExecutionOutcome:
return ToolExecutionOutcome(
tool_call=tool_call,
state=ToolExecutionState.EXECUTED,
message=Message.tool_result_message(
tool_call_id=tool_call.id,
display_content=output,
result_content=output,
is_error=is_error,
),
event_content=output,
is_error=is_error,
result_output=output,
registry_result=RegistryToolResult(output=output, is_error=is_error),
)
@pytest.mark.asyncio
async def test_tool_batch_confidence_gate_skips_low_confidence_actions(
temp_dir: Path,
) -> None:
captured: dict[str, str] = {}
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
captured["context"] = context
return ConfidenceAssessment(
action=f"{tool_name} with {tool_args}",
tool_name=tool_name,
tool_args=tool_args,
level=ConfidenceLevel.LOW,
reasoning="Need more context first.",
risks=["Unknown file contents"],
)
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
raise AssertionError("Verification should not run here")
context = build_context(
temp_dir=temp_dir,
messages=[
Message(role=Role.USER, content="Inspect the README."),
Message(role=Role.ASSISTANT, content="I'll read it next."),
],
assess_confidence=assess_confidence,
verify_action=verify_action,
confidence_scoring=True,
)
gate = ToolBatchConfidenceGate(context)
tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
should_skip = await gate.should_skip(tool_call=tool_call, emit=emit)
assert should_skip is True
assert "Inspect the README." in captured["context"]
assert context.session.messages[-1].role == Role.USER
assert "[LOW CONFIDENCE WARNING]" in context.session.messages[-1].content
assert [event.type for event in events] == ["confidence"]
@pytest.mark.asyncio
async def test_tool_batch_verification_gate_requests_correction(
temp_dir: Path,
) -> None:
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
return ActionVerification(
tool_name=tool_name,
tool_args=tool_args,
expected_outcome="Success",
actual_result=result,
verified=True,
discrepancies=["Output did not match the requested content"],
needs_correction=True,
correction_suggestion="Read the file before editing again.",
)
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
verification=True,
)
gate = ToolBatchVerificationGate(context)
tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
outcome = tool_outcome(tool_call=tool_call, output="unexpected contents", is_error=False)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
should_continue = await gate.should_continue(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert should_continue is True
assert context.session.messages[-1].role == Role.USER
assert "[VERIFICATION FAILED]" in context.session.messages[-1].content
assert [event.type for event in events] == ["verification"]
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_returns_follow_up(
temp_dir: Path,
) -> None:
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
raise AssertionError("Verification should not run here")
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
)
context.session.current_task = (
"Update index.html so every chapter link and title matches the real HTML files in chapters/."
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(id="bash-1", name="bash", arguments={"command": "pytest"})
outcome = tool_outcome(tool_call=tool_call, output="command failed", is_error=True)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert follow_up is not None
assert context.recovery_context is not None
assert "Previous attempts:" in follow_up.content
assert any(event.type == "recovery" for event in events)
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_includes_known_state_for_missing_file(
temp_dir: Path,
) -> None:
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
raise AssertionError("Verification should not run here")
messages = [
Message(
role=Role.TOOL,
content=(
"Observation [glob]: Result: "
"/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n"
"/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n"
"/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n"
"/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html"
),
tool_results=[],
),
Message(
role=Role.ASSISTANT,
content="I already inspected the setup chapter.",
tool_calls=[
ToolCall(
id="read-setup",
name="read",
arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
)
],
),
Message.tool_result_message(
tool_call_id="read-setup",
display_content="
Chapter 2: Setting Up Fortran
\n",
result_content="Chapter 2: Setting Up Fortran
\n",
),
Message(
role=Role.TOOL,
content=(
"Observation [notepad_write_working]: Result: "
"- 02-basic-syntax.html -> 02-setup.html\n"
"- 03-variables-data-types.html -> 03-basics.html\n"
"- 04-operators-expressions.html -> 04-variables.html"
),
tool_results=[],
),
Message(
role=Role.ASSISTANT,
content="I should update the index now.",
tool_calls=[
ToolCall(
id="read-index",
name="read",
arguments={"file_path": "~/Loader/guides/fortran/index.html"},
)
],
),
]
context = build_context(
temp_dir=temp_dir,
messages=messages,
assess_confidence=assess_confidence,
verify_action=verify_action,
)
context.session.current_task = (
"Update ~/Loader/guides/fortran/index.html with the right chapter links."
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="read-missing",
name="read",
arguments={"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
)
outcome = tool_outcome(
tool_call=tool_call,
output="File not found: ~/Loader/guides/fortran/chapters/04-data-types.html",
is_error=True,
)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert follow_up is not None
assert "## CONTINUE FROM KNOWN STATE" in follow_up.content
assert "apply the fix using confirmed findings" in follow_up.content
assert "## ACTION BIAS FOR THIS RECOVERY" in follow_up.content
assert "Prefer edit/write/patch on the target file" in follow_up.content
assert "04-variables.html" in follow_up.content
assert "02-basic-syntax.html -> 02-setup.html" in follow_up.content
assert any(event.type == "recovery" for event in events)
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_suggests_known_sibling_files(
temp_dir: Path,
) -> None:
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
raise AssertionError("Verification should not run here")
chapters = temp_dir / "chapters"
chapters.mkdir()
(chapters / "04-variables.html").write_text(
"Chapter 4: Variables and Data Types
\n"
)
(chapters / "05-input-output.html").write_text(
"Chapter 5: Input and Output
\n"
)
messages: list[Message] = []
context = build_context(
temp_dir=temp_dir,
messages=messages,
assess_confidence=assess_confidence,
verify_action=verify_action,
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="read-missing",
name="read",
arguments={"file_path": str(chapters / "04-data-types.html")},
)
outcome = tool_outcome(
tool_call=tool_call,
output=f"File not found: {chapters / '04-data-types.html'}",
is_error=True,
)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert follow_up is not None
assert "## LIKELY FILE CANDIDATES" in follow_up.content
assert "`04-variables.html`" in follow_up.content
assert "instead of retrying the missing path" in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_includes_current_html_target_excerpt(
temp_dir: Path,
) -> None:
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
raise AssertionError("Verification should not run here")
chapters = temp_dir / "chapters"
chapters.mkdir()
(chapters / "01-introduction.html").write_text(
"Chapter 1: Introduction to Fortran
\n"
)
(chapters / "02-setup.html").write_text(
"Chapter 2: Setting Up Your Environment
\n"
)
index = temp_dir / "index.html"
index.write_text(
"Table of Contents
\n"
"\n"
)
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
)
context.session.current_task = (
"Update index.html so every chapter link and title matches the real HTML files in chapters/."
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="patch-index",
name="patch",
arguments={
"file_path": str(index),
"hunks": [
{
"old_start": 1,
"old_lines": 1,
"new_start": 1,
"new_lines": 1,
"lines": ["-bad", "+good"],
}
],
},
)
outcome = tool_outcome(
tool_call=tool_call,
output="Patch failed: hunk did not apply cleanly",
is_error=True,
)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert follow_up is not None
assert "## CURRENT TARGET EXCERPT" in follow_up.content
assert "- Target file:" in follow_up.content
assert "index.html" in follow_up.content
assert (
"Closest on-disk block to the requested patch:" in follow_up.content
or "Current file contents near the requested patch location:" in follow_up.content
)
assert '1 | Table of Contents
' in follow_up.content
assert (
'3 | Chapter 1: Introduction to Fortran'
in follow_up.content
)
assert "Use the exact on-disk text above" in follow_up.content
assert "Verified chapter inventory:" not in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_includes_current_target_excerpt_for_edit_mismatch(
temp_dir: Path,
) -> None:
async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
raise AssertionError("Verification should not run here")
guide = temp_dir / "guide.md"
guide.write_text(
"# Loader Guide\n"
"\n"
"## Overview\n"
"Loader helps agentic coding workflows.\n"
"\n"
"## Status\n"
"The runtime is stable.\n"
)
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
)
context.session.current_task = "Update guide.md to mention the runtime is resilient."
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="edit-guide",
name="edit",
arguments={
"file_path": str(guide),
"old_string": "## Runtime\nThe runtime is stable.\n",
"new_string": "## Runtime\nThe runtime is resilient.\n",
},
)
outcome = tool_outcome(
tool_call=tool_call,
output="old_string not found in file. Make sure it matches exactly.",
is_error=True,
)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=lambda event: _noop_emit(event),
)
assert follow_up is not None
assert "## CURRENT TARGET EXCERPT" in follow_up.content
assert "- Target file:" in follow_up.content
assert "guide.md" in follow_up.content
assert "Closest on-disk block to the requested edit:" in follow_up.content
assert "6 | ## Status" in follow_up.content
assert "7 | The runtime is stable." in follow_up.content
assert "replace the containing block in one edit" in follow_up.content
assert "## STALE EDIT RECOVERY" in follow_up.content
assert "do not retry it from memory" in follow_up.content
assert "complete replacement file" in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_scopes_known_state_to_active_target(
temp_dir: Path,
) -> None:
async def assess_confidence(
tool_name: str,
tool_args: dict,
context: str,
) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(
tool_name: str,
tool_args: dict,
result: str,
expected: str = "",
) -> ActionVerification:
raise AssertionError("Verification should not run here")
nginx_chapters = temp_dir / "nginx" / "chapters"
nginx_chapters.mkdir(parents=True)
nginx_index = temp_dir / "nginx" / "index.html"
nginx_index.write_text(
"Table of Contents
\n"
"\n"
)
(nginx_chapters / "01_getting_started.html").write_text(
"Getting Started with NGINX
\n"
)
context = build_context(
temp_dir=temp_dir,
messages=[
Message(
role=Role.TOOL,
content=(
"Observation [read]: Result: "
f"{temp_dir / 'fortran' / 'index.html'}\n"
"Semantic verification preview: validated 12 toc links in index.html"
),
),
],
assess_confidence=assess_confidence,
verify_action=verify_action,
)
context.session.current_task = ( # type: ignore[attr-defined]
"Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
"for the structure and cadence of the guide. We are going to make an all "
"new equally thorough guide on how to use the nginx tool. It will live in "
"~/Loader/guides/nginx/index.html and ~/Loader/guides/nginx/chapters/."
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="edit-nginx",
name="edit",
arguments={
"file_path": str(nginx_index),
"old_string": "",
"new_string": "",
},
)
outcome = tool_outcome(
tool_call=tool_call,
output=(
"Tool execution error: EditTool.execute() missing 1 required positional "
"argument: 'new_string'"
),
is_error=True,
)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert follow_up is not None
assert (
"Preferred next step: Update "
f"`{temp_dir / 'fortran' / 'index.html'}`"
) not in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_prioritizes_active_verification_repair_target(
temp_dir: Path,
) -> None:
async def assess_confidence(
tool_name: str,
tool_args: dict,
context: str,
) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(
tool_name: str,
tool_args: dict,
result: str,
expected: str = "",
) -> ActionVerification:
raise AssertionError("Verification should not run here")
nginx_root = temp_dir / "Loader" / "guides" / "nginx"
chapters = nginx_root / "chapters"
chapters.mkdir(parents=True)
index = nginx_root / "index.html"
index.write_text(
"\n"
)
(chapters / "01-getting-started.html").write_text("Getting Started
\n")
repair_message = (
"[DEFINITION OF DONE CHECK FAILED]\n"
"Repair focus:\n"
f"- Fix the broken local reference `chapters/01-introduction.html` in `{index}`.\n"
f"- Immediate next step: edit `{index}`.\n"
f"- If the broken reference should remain, create `{chapters / '01-introduction.html'}`; "
"otherwise remove or replace `chapters/01-introduction.html`.\n"
"- Do not reread unrelated reference materials or restart discovery while this "
"concrete repair target is unresolved.\n"
)
context = build_context(
temp_dir=temp_dir,
messages=[
Message(role=Role.USER, content=repair_message),
Message(
role=Role.TOOL,
content=(
"Observation [glob]: Result: "
f"{chapters / '01-getting-started.html'}"
),
),
],
assess_confidence=assess_confidence,
verify_action=verify_action,
)
context.session.current_task = ( # type: ignore[attr-defined]
"Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
"for the structure and cadence of the guide. We are going to make an all "
"new equally thorough guide on how to use the nginx tool."
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="read-bad-path",
name="read",
arguments={"path": "~/nginx-guide/chapter1.html"},
)
outcome = tool_outcome(
tool_call=tool_call,
output="File not found: ~/nginx-guide/chapter1.html",
is_error=True,
)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=lambda event: _noop_emit(event),
)
assert follow_up is not None
assert "## ACTIVE REPAIR TARGET" in follow_up.content
assert str(index) in follow_up.content
assert "chapters/01-introduction.html" in follow_up.content
assert "Do not go back to the original reference guide" in follow_up.content
assert "Current task: Have a look at ~/Loader/guides/fortran" not in follow_up.content
assert "~/nginx-guide/chapter1.html" in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_reuses_context_for_related_missing_files(
temp_dir: Path,
) -> None:
async def assess_confidence(
tool_name: str,
tool_args: dict,
context: str,
) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(
tool_name: str,
tool_args: dict,
result: str,
expected: str = "",
) -> ActionVerification:
raise AssertionError("Verification should not run here")
existing = RecoveryContext(
original_tool="read",
original_args={"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
max_retries=3,
)
existing.add_attempt(
"read",
{"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
"File not found: ~/Loader/guides/fortran/chapters/04-data-types.html",
)
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
recovery_context=existing,
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="read-missing-2",
name="read",
arguments={"file_path": "~/Loader/guides/fortran/chapters/02-basic-syntax.html"},
)
outcome = tool_outcome(
tool_call=tool_call,
output="File not found: ~/Loader/guides/fortran/chapters/02-basic-syntax.html",
is_error=True,
)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=lambda event: _noop_emit(event),
)
assert follow_up is not None
assert context.recovery_context is existing
assert len(existing.attempts) == 2
assert "## Current attempt: 2/3" in follow_up.content
assert "02-basic-syntax.html" in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_uses_generic_loop_guidance(
temp_dir: Path,
) -> None:
async def assess_confidence(
tool_name: str,
tool_args: dict,
context: str,
) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(
tool_name: str,
tool_args: dict,
result: str,
expected: str = "",
) -> ActionVerification:
raise AssertionError("Verification should not run here")
existing = RecoveryContext(
original_tool="read",
original_args={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
max_retries=3,
)
existing.add_attempt(
"read",
{"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
"File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
)
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
recovery_context=existing,
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="read-missing-repeat",
name="read",
arguments={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
)
outcome = tool_outcome(
tool_call=tool_call,
output="File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
is_error=True,
)
events: list[AgentEvent] = []
async def emit(event: AgentEvent) -> None:
events.append(event)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=emit,
)
assert follow_up is not None
assert any(event.type == "error" for event in events)
error_event = next(event for event in events if event.type == "error")
assert "read a config file first" not in error_event.content
assert "verify the current result" in error_event.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_surfaces_missing_write_payload_fix(
temp_dir: Path,
) -> None:
async def assess_confidence(
tool_name: str,
tool_args: dict,
context: str,
) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(
tool_name: str,
tool_args: dict,
result: str,
expected: str = "",
) -> ActionVerification:
raise AssertionError("Verification should not run here")
context = build_context(
temp_dir=temp_dir,
messages=[
Message(
role=Role.USER,
content="Create ~/Loader/guides/nginx/index.html",
)
],
assess_confidence=assess_confidence,
verify_action=verify_action,
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="write-metadata-only",
name="write",
arguments={
"file_path": "~/Loader/guides/nginx/index.html",
"content_chars": 1354,
"content_lines": 30,
},
)
outcome = tool_outcome(
tool_call=tool_call,
output=(
"[Validation warning] Writing empty content to file\n"
"Tool execution error: WriteTool.execute() missing 1 required "
"positional argument: 'content'"
),
is_error=True,
)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=lambda event: _noop_emit(event),
)
assert follow_up is not None
assert "## PAYLOAD FORMAT FIX" in follow_up.content
assert "content_chars" in follow_up.content
assert "write(file_path=..., content='...')" in follow_up.content
assert "index.html" in follow_up.content
@pytest.mark.asyncio
async def test_tool_batch_recovery_controller_resets_context_for_unrelated_failures(
temp_dir: Path,
) -> None:
async def assess_confidence(
tool_name: str,
tool_args: dict,
context: str,
) -> ConfidenceAssessment:
raise AssertionError("Confidence should not run here")
async def verify_action(
tool_name: str,
tool_args: dict,
result: str,
expected: str = "",
) -> ActionVerification:
raise AssertionError("Verification should not run here")
existing = RecoveryContext(
original_tool="read",
original_args={"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
max_retries=3,
)
existing.add_attempt(
"read",
{"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
"File not found: ~/Loader/guides/fortran/chapters/04-data-types.html",
)
context = build_context(
temp_dir=temp_dir,
messages=[],
assess_confidence=assess_confidence,
verify_action=verify_action,
recovery_context=existing,
)
controller = ToolBatchRecoveryController(context)
tool_call = ToolCall(
id="bash-timeout",
name="bash",
arguments={"command": "pytest"},
)
outcome = tool_outcome(
tool_call=tool_call,
output="command failed",
is_error=True,
)
follow_up = await controller.build_follow_up(
tool_call=tool_call,
outcome=outcome,
emit=lambda event: _noop_emit(event),
)
assert follow_up is not None
assert context.recovery_context is not None
assert context.recovery_context is not existing
assert len(context.recovery_context.attempts) == 1
assert "## Current attempt: 1/2" in follow_up.content
async def _noop_emit(event: AgentEvent) -> None:
return None