| 1 | """Tests for finalization helpers on RuntimeContext.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from pathlib import Path |
| 6 | from types import SimpleNamespace |
| 7 | |
| 8 | import pytest |
| 9 | |
| 10 | from loader.llm.base import Message, Role |
| 11 | from loader.runtime.context import RuntimeContext, RuntimeLegacyServices |
| 12 | from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done |
| 13 | from loader.runtime.events import TurnSummary |
| 14 | from loader.runtime.finalization import TurnFinalizer |
| 15 | from loader.runtime.permissions import ( |
| 16 | PermissionMode, |
| 17 | build_permission_policy, |
| 18 | load_permission_rules, |
| 19 | ) |
| 20 | from loader.runtime.tracing import RuntimeTracer |
| 21 | from loader.tools.base import create_default_registry |
| 22 | from tests.helpers.runtime_harness import ScriptedBackend |
| 23 | |
| 24 | |
| 25 | class FakeSession: |
| 26 | def __init__(self) -> None: |
| 27 | self.messages: list[Message] = [] |
| 28 | self.session_id = "session-test-123" |
| 29 | self.recorded_calls: list[dict[str, object]] = [] |
| 30 | |
| 31 | def append(self, message: Message) -> None: |
| 32 | self.messages.append(message) |
| 33 | |
| 34 | def record_turn_usage( |
| 35 | self, |
| 36 | usage: dict[str, int], |
| 37 | *, |
| 38 | tool_calls: int, |
| 39 | iterations: int, |
| 40 | ) -> dict[str, int]: |
| 41 | payload = { |
| 42 | "usage": dict(usage), |
| 43 | "tool_calls": tool_calls, |
| 44 | "iterations": iterations, |
| 45 | } |
| 46 | self.recorded_calls.append(payload) |
| 47 | return {"turns": 1, "tool_calls": tool_calls, "iterations": iterations} |
| 48 | |
| 49 | |
| 50 | class FakeCodeFilter: |
| 51 | def reset(self) -> None: |
| 52 | return None |
| 53 | |
| 54 | |
| 55 | class FakeSafeguards: |
| 56 | def __init__(self) -> None: |
| 57 | self.action_tracker = object() |
| 58 | self.validator = object() |
| 59 | self.code_filter = FakeCodeFilter() |
| 60 | |
| 61 | def filter_stream_chunk(self, content: str) -> str: |
| 62 | return content |
| 63 | |
| 64 | def filter_complete_content(self, content: str) -> str: |
| 65 | return content |
| 66 | |
| 67 | def should_steer(self) -> bool: |
| 68 | return False |
| 69 | |
| 70 | def get_steering_message(self) -> str | None: |
| 71 | return None |
| 72 | |
| 73 | def record_response(self, content: str) -> None: |
| 74 | return None |
| 75 | |
| 76 | def detect_text_loop(self, content: str) -> tuple[bool, str]: |
| 77 | return False, "" |
| 78 | |
| 79 | def detect_loop(self) -> tuple[bool, str]: |
| 80 | return False, "" |
| 81 | |
| 82 | |
| 83 | def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext: |
| 84 | registry = create_default_registry(temp_dir) |
| 85 | registry.configure_workspace_root(temp_dir) |
| 86 | rule_status = load_permission_rules(temp_dir) |
| 87 | policy = build_permission_policy( |
| 88 | active_mode=PermissionMode.WORKSPACE_WRITE, |
| 89 | workspace_root=temp_dir, |
| 90 | tool_requirements=registry.get_tool_requirements(), |
| 91 | rules=rule_status.rules, |
| 92 | ) |
| 93 | return RuntimeContext( |
| 94 | project_root=temp_dir, |
| 95 | backend=ScriptedBackend(), |
| 96 | registry=registry, |
| 97 | session=session, # type: ignore[arg-type] |
| 98 | config=SimpleNamespace( |
| 99 | force_react=False, |
| 100 | verification_retry_budget=3, |
| 101 | reasoning=SimpleNamespace( |
| 102 | rollback=False, |
| 103 | show_rollback_plan=False, |
| 104 | completion_check=True, |
| 105 | use_quick_completion=True, |
| 106 | max_continuation_prompts=5, |
| 107 | self_critique=False, |
| 108 | confidence_scoring=False, |
| 109 | min_confidence_for_action=3, |
| 110 | verification=False, |
| 111 | ), |
| 112 | ), |
| 113 | capability_profile=SimpleNamespace(supports_native_tools=True), # type: ignore[arg-type] |
| 114 | project_context=None, |
| 115 | permission_policy=policy, |
| 116 | permission_config_status=rule_status, |
| 117 | workflow_mode="execute", |
| 118 | safeguards=FakeSafeguards(), |
| 119 | legacy=RuntimeLegacyServices( |
| 120 | message_history=lambda: session.messages, |
| 121 | drain_steering_queue=lambda: [], |
| 122 | queue_steering_message=lambda message: None, |
| 123 | set_workflow_mode=lambda mode: None, |
| 124 | refresh_capability_profile=lambda: None, |
| 125 | ), |
| 126 | ) |
| 127 | |
| 128 | |
| 129 | def test_turn_finalizer_finalize_summary_uses_runtime_context( |
| 130 | temp_dir: Path, |
| 131 | monkeypatch: pytest.MonkeyPatch, |
| 132 | ) -> None: |
| 133 | session = FakeSession() |
| 134 | context = build_context(temp_dir, session) |
| 135 | tracer = RuntimeTracer() |
| 136 | tracer.record("turn.completed", reason="done") |
| 137 | finalizer = TurnFinalizer( |
| 138 | context, |
| 139 | tracer, |
| 140 | DefinitionOfDoneStore(temp_dir), |
| 141 | set_workflow_mode=lambda mode, dod, emit, summary, reason: None, # type: ignore[arg-type] |
| 142 | ) |
| 143 | dod = create_definition_of_done("Finish the task") |
| 144 | dod.status = "done" |
| 145 | summary = TurnSummary( |
| 146 | final_response="All set.", |
| 147 | definition_of_done=dod, |
| 148 | iterations=2, |
| 149 | usage={"prompt_tokens": 10}, |
| 150 | tool_result_messages=[Message(role=Role.TOOL, content="tool output")], |
| 151 | ) |
| 152 | captured: dict[str, str] = {} |
| 153 | |
| 154 | def capture_definition_of_done(self, summary_text: str) -> None: |
| 155 | captured["summary"] = summary_text |
| 156 | |
| 157 | monkeypatch.setattr( |
| 158 | "loader.runtime.finalization.MemoryStore.capture_definition_of_done", |
| 159 | capture_definition_of_done, |
| 160 | ) |
| 161 | |
| 162 | final_summary = finalizer.finalize_summary(summary) |
| 163 | |
| 164 | assert final_summary.session_id == "session-test-123" |
| 165 | assert final_summary.cumulative_usage == {"turns": 1, "tool_calls": 1, "iterations": 2} |
| 166 | assert session.recorded_calls == [ |
| 167 | { |
| 168 | "usage": {"prompt_tokens": 10, "tool_calls": 1, "iterations": 2}, |
| 169 | "tool_calls": 1, |
| 170 | "iterations": 2, |
| 171 | } |
| 172 | ] |
| 173 | assert "summary" in captured |
| 174 | assert final_summary.trace |