Python · 5547 bytes Raw Blame History
1 """Tests for finalization helpers on RuntimeContext."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6 from types import SimpleNamespace
7
8 import pytest
9
10 from loader.llm.base import Message, Role
11 from loader.runtime.context import RuntimeContext, RuntimeLegacyServices
12 from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
13 from loader.runtime.events import TurnSummary
14 from loader.runtime.finalization import TurnFinalizer
15 from loader.runtime.permissions import (
16 PermissionMode,
17 build_permission_policy,
18 load_permission_rules,
19 )
20 from loader.runtime.tracing import RuntimeTracer
21 from loader.tools.base import create_default_registry
22 from tests.helpers.runtime_harness import ScriptedBackend
23
24
25 class FakeSession:
26 def __init__(self) -> None:
27 self.messages: list[Message] = []
28 self.session_id = "session-test-123"
29 self.recorded_calls: list[dict[str, object]] = []
30
31 def append(self, message: Message) -> None:
32 self.messages.append(message)
33
34 def record_turn_usage(
35 self,
36 usage: dict[str, int],
37 *,
38 tool_calls: int,
39 iterations: int,
40 ) -> dict[str, int]:
41 payload = {
42 "usage": dict(usage),
43 "tool_calls": tool_calls,
44 "iterations": iterations,
45 }
46 self.recorded_calls.append(payload)
47 return {"turns": 1, "tool_calls": tool_calls, "iterations": iterations}
48
49
50 class FakeCodeFilter:
51 def reset(self) -> None:
52 return None
53
54
55 class FakeSafeguards:
56 def __init__(self) -> None:
57 self.action_tracker = object()
58 self.validator = object()
59 self.code_filter = FakeCodeFilter()
60
61 def filter_stream_chunk(self, content: str) -> str:
62 return content
63
64 def filter_complete_content(self, content: str) -> str:
65 return content
66
67 def should_steer(self) -> bool:
68 return False
69
70 def get_steering_message(self) -> str | None:
71 return None
72
73 def record_response(self, content: str) -> None:
74 return None
75
76 def detect_text_loop(self, content: str) -> tuple[bool, str]:
77 return False, ""
78
79 def detect_loop(self) -> tuple[bool, str]:
80 return False, ""
81
82
83 def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
84 registry = create_default_registry(temp_dir)
85 registry.configure_workspace_root(temp_dir)
86 rule_status = load_permission_rules(temp_dir)
87 policy = build_permission_policy(
88 active_mode=PermissionMode.WORKSPACE_WRITE,
89 workspace_root=temp_dir,
90 tool_requirements=registry.get_tool_requirements(),
91 rules=rule_status.rules,
92 )
93 return RuntimeContext(
94 project_root=temp_dir,
95 backend=ScriptedBackend(),
96 registry=registry,
97 session=session, # type: ignore[arg-type]
98 config=SimpleNamespace(
99 force_react=False,
100 verification_retry_budget=3,
101 reasoning=SimpleNamespace(
102 rollback=False,
103 show_rollback_plan=False,
104 completion_check=True,
105 use_quick_completion=True,
106 max_continuation_prompts=5,
107 self_critique=False,
108 confidence_scoring=False,
109 min_confidence_for_action=3,
110 verification=False,
111 ),
112 ),
113 capability_profile=SimpleNamespace(supports_native_tools=True), # type: ignore[arg-type]
114 project_context=None,
115 permission_policy=policy,
116 permission_config_status=rule_status,
117 workflow_mode="execute",
118 safeguards=FakeSafeguards(),
119 legacy=RuntimeLegacyServices(
120 message_history=lambda: session.messages,
121 drain_steering_queue=lambda: [],
122 queue_steering_message=lambda message: None,
123 set_workflow_mode=lambda mode: None,
124 refresh_capability_profile=lambda: None,
125 ),
126 )
127
128
129 def test_turn_finalizer_finalize_summary_uses_runtime_context(
130 temp_dir: Path,
131 monkeypatch: pytest.MonkeyPatch,
132 ) -> None:
133 session = FakeSession()
134 context = build_context(temp_dir, session)
135 tracer = RuntimeTracer()
136 tracer.record("turn.completed", reason="done")
137 finalizer = TurnFinalizer(
138 context,
139 tracer,
140 DefinitionOfDoneStore(temp_dir),
141 set_workflow_mode=lambda mode, dod, emit, summary, reason: None, # type: ignore[arg-type]
142 )
143 dod = create_definition_of_done("Finish the task")
144 dod.status = "done"
145 summary = TurnSummary(
146 final_response="All set.",
147 definition_of_done=dod,
148 iterations=2,
149 usage={"prompt_tokens": 10},
150 tool_result_messages=[Message(role=Role.TOOL, content="tool output")],
151 )
152 captured: dict[str, str] = {}
153
154 def capture_definition_of_done(self, summary_text: str) -> None:
155 captured["summary"] = summary_text
156
157 monkeypatch.setattr(
158 "loader.runtime.finalization.MemoryStore.capture_definition_of_done",
159 capture_definition_of_done,
160 )
161
162 final_summary = finalizer.finalize_summary(summary)
163
164 assert final_summary.session_id == "session-test-123"
165 assert final_summary.cumulative_usage == {"turns": 1, "tool_calls": 1, "iterations": 2}
166 assert session.recorded_calls == [
167 {
168 "usage": {"prompt_tokens": 10, "tool_calls": 1, "iterations": 2},
169 "tool_calls": 1,
170 "iterations": 2,
171 }
172 ]
173 assert "summary" in captured
174 assert final_summary.trace