tenseleyflow/loader / 2733fcf

Browse files

Add direct tests for runtime response policy helpers

Authored by espadonne
SHA
2733fcf0399ea3e37e0c88cd45b255290454e088
Parents
f2c9862
Tree
35e083a

2 changed files

StatusFile+-
M tests/test_completion_policy.py 159 0
M tests/test_repair.py 53 1
tests/test_completion_policy.pymodified
@@ -1,10 +1,107 @@
11
 """Tests for completion-policy helpers."""
22
 
3
+from __future__ import annotations
4
+
5
+from pathlib import Path
6
+from types import SimpleNamespace
7
+
8
+import pytest
9
+
10
+from loader.llm.base import Message, Role
311
 from loader.runtime.completion_policy import CompletionPolicy
12
+from loader.runtime.context import RuntimeContext
13
+from loader.runtime.events import TurnSummary
14
+from loader.runtime.permissions import (
15
+    PermissionMode,
16
+    build_permission_policy,
17
+    load_permission_rules,
18
+)
419
 from loader.runtime.task_completion import (
520
     detect_premature_completion,
621
     get_continuation_prompt,
722
 )
23
+from loader.tools.base import create_default_registry
24
+from tests.helpers.runtime_harness import ScriptedBackend
25
+
26
+
27
+class FakeCodeFilter:
28
+    def reset(self) -> None:
29
+        return None
30
+
31
+
32
+class FakeSafeguards:
33
+    def __init__(self, *, text_loop: tuple[bool, str] = (False, "")) -> None:
34
+        self.action_tracker = object()
35
+        self.validator = object()
36
+        self.code_filter = FakeCodeFilter()
37
+        self._text_loop = text_loop
38
+        self.recorded: list[str] = []
39
+
40
+    def filter_stream_chunk(self, content: str) -> str:
41
+        return content
42
+
43
+    def filter_complete_content(self, content: str) -> str:
44
+        return content
45
+
46
+    def should_steer(self) -> bool:
47
+        return False
48
+
49
+    def get_steering_message(self) -> str | None:
50
+        return None
51
+
52
+    def record_response(self, content: str) -> None:
53
+        self.recorded.append(content)
54
+
55
+    def detect_text_loop(self, content: str) -> tuple[bool, str]:
56
+        return self._text_loop
57
+
58
+    def detect_loop(self) -> tuple[bool, str]:
59
+        return False, ""
60
+
61
+
62
+class FakeSession:
63
+    def __init__(self) -> None:
64
+        self.messages: list[Message] = []
65
+
66
+    def append(self, message: Message) -> None:
67
+        self.messages.append(message)
68
+
69
+
70
+def build_context(
71
+    temp_dir: Path,
72
+    *,
73
+    safeguards: FakeSafeguards,
74
+    max_continuation_prompts: int = 5,
75
+    use_quick_completion: bool = True,
76
+) -> RuntimeContext:
77
+    registry = create_default_registry(temp_dir)
78
+    registry.configure_workspace_root(temp_dir)
79
+    rule_status = load_permission_rules(temp_dir)
80
+    policy = build_permission_policy(
81
+        active_mode=PermissionMode.WORKSPACE_WRITE,
82
+        workspace_root=temp_dir,
83
+        tool_requirements=registry.get_tool_requirements(),
84
+        rules=rule_status.rules,
85
+    )
86
+    return RuntimeContext(
87
+        project_root=temp_dir,
88
+        backend=ScriptedBackend(),
89
+        registry=registry,
90
+        session=FakeSession(),  # type: ignore[arg-type]
91
+        config=SimpleNamespace(
92
+            force_react=False,
93
+            reasoning=SimpleNamespace(
94
+                max_continuation_prompts=max_continuation_prompts,
95
+                use_quick_completion=use_quick_completion,
96
+            ),
97
+        ),
98
+        capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
99
+        project_context=None,
100
+        permission_policy=policy,
101
+        permission_config_status=rule_status,
102
+        workflow_mode="execute",
103
+        safeguards=safeguards,
104
+    )
8105
 
9106
 
10107
 def test_completion_policy_finalize_response_text_keeps_original_response() -> None:
@@ -32,3 +129,65 @@ def test_get_continuation_prompt_surfaces_missing_verification_steps() -> None:
32129
     )
33130
 
34131
     assert "Run the tests" in prompt or "verify it works" in prompt
132
+
133
+
134
+@pytest.mark.asyncio
135
+async def test_completion_policy_stops_for_text_loop_using_runtime_context(
136
+    temp_dir: Path,
137
+) -> None:
138
+    context = build_context(
139
+        temp_dir,
140
+        safeguards=FakeSafeguards(text_loop=(True, "assistant repeated the same summary")),
141
+    )
142
+    policy = CompletionPolicy(context)
143
+    summary = TurnSummary(final_response="")
144
+    events = []
145
+
146
+    async def emit(event) -> None:
147
+        events.append(event)
148
+
149
+    decision = await policy.maybe_stop_for_text_loop(
150
+        content="Same summary again.",
151
+        emit=emit,
152
+        summary=summary,
153
+    )
154
+
155
+    assert decision.should_stop is True
156
+    assert summary.final_response.startswith("I seem to be repeating myself")
157
+    assert summary.assistant_messages[-1].role == Role.ASSISTANT
158
+    assert context.session.messages[-1].content == summary.final_response
159
+    assert events[0].type == "error"
160
+    assert events[1].type == "response"
161
+
162
+
163
+@pytest.mark.asyncio
164
+async def test_completion_policy_requests_continuation_using_runtime_context(
165
+    temp_dir: Path,
166
+) -> None:
167
+    context = build_context(
168
+        temp_dir,
169
+        safeguards=FakeSafeguards(),
170
+    )
171
+    policy = CompletionPolicy(context)
172
+    events = []
173
+
174
+    async def emit(event) -> None:
175
+        events.append(event)
176
+
177
+    decision = await policy.maybe_continue_for_completion(
178
+        content="I can handle that.",
179
+        response_content="I can handle that.",
180
+        task="Create the file and verify it works.",
181
+        actions_taken=[],
182
+        continuation_count=0,
183
+        emit=emit,
184
+    )
185
+
186
+    assert decision.should_continue is True
187
+    assert context.session.messages[-2] == Message(
188
+        role=Role.ASSISTANT,
189
+        content="I can handle that.",
190
+    )
191
+    assert context.session.messages[-1].role == Role.USER
192
+    assert "verify it works" in context.session.messages[-1].content.lower()
193
+    assert events[0].type == "completion_check"
tests/test_repair.pymodified
@@ -2,6 +2,7 @@
22
 
33
 from __future__ import annotations
44
 
5
+import json
56
 from pathlib import Path
67
 from types import SimpleNamespace
78
 
@@ -88,7 +89,9 @@ def build_context(
8889
     )
8990
 
9091
 
91
-def test_response_repairer_uses_context_legacy_raw_fallback(temp_dir: Path) -> None:
92
+def test_response_repairer_uses_runtime_parser_for_bracket_tool_fallback(
93
+    temp_dir: Path,
94
+) -> None:
9295
     context = build_context(
9396
         temp_dir=temp_dir,
9497
         use_react=False,
@@ -112,3 +115,52 @@ def test_response_repairer_uses_context_legacy_raw_fallback(temp_dir: Path) -> N
112115
     ]
113116
     assert analysis.tool_source == "raw_text"
114117
     assert analysis.clear_stream is True
118
+
119
+
120
+def test_response_repairer_recovers_todowrite_from_runtime_registry(
121
+    temp_dir: Path,
122
+) -> None:
123
+    context = build_context(
124
+        temp_dir=temp_dir,
125
+        use_react=False,
126
+    )
127
+    repairer = ResponseRepairer(context)
128
+
129
+    analysis = repairer.analyze_response(
130
+        content="I'll track the work first.",
131
+        response_content=json.dumps(
132
+            {
133
+                "name": "TodoWrite",
134
+                "arguments": {
135
+                    "todos": [
136
+                        {
137
+                            "content": "Run tests",
138
+                            "active_form": "Running tests",
139
+                            "status": "in_progress",
140
+                        }
141
+                    ]
142
+                },
143
+            }
144
+        ),
145
+        tool_calls=[],
146
+        extracted_iterations=0,
147
+        max_extracted_iterations=3,
148
+    )
149
+
150
+    assert analysis.tool_source == "raw_text"
151
+    assert analysis.clear_stream is True
152
+    assert analysis.tool_calls == [
153
+        ToolCall(
154
+            id="call_0",
155
+            name="TodoWrite",
156
+            arguments={
157
+                "todos": [
158
+                    {
159
+                        "content": "Run tests",
160
+                        "active_form": "Running tests",
161
+                        "status": "in_progress",
162
+                    }
163
+                ]
164
+            },
165
+        )
166
+    ]