Python · 89100 bytes Raw Blame History
1 """Tests for permission policy and tool lifecycle hooks."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.llm.base import Message, Role, ToolCall
10 from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
11 from loader.runtime.executor import ToolExecutionState, ToolExecutor
12 from loader.runtime.hooks import (
13 ActiveRepairMutationScopeHook,
14 ActiveRepairScopeHook,
15 BaseToolHook,
16 FilePathAliasHook,
17 HookContext,
18 HookDecision,
19 HookManager,
20 HookResult,
21 LateReferenceDriftHook,
22 MissingPlannedOutputReadHook,
23 RelativePathContextHook,
24 SearchPathAliasHook,
25 )
26 from loader.runtime.permissions import (
27 PermissionMode,
28 PermissionOverride,
29 PermissionRuleDisposition,
30 PermissionRuleSet,
31 build_permission_policy,
32 )
33 from loader.runtime.safeguard_services import ActionTracker
34 from loader.runtime.tracing import RuntimeTracer
35 from loader.tools.base import create_default_registry
36
37
38 class RecordingHook(BaseToolHook):
39 """Hook that records lifecycle events."""
40
41 def __init__(self, events: list[str]) -> None:
42 self.events = events
43
44 async def pre_tool_use(self, context) -> HookResult:
45 self.events.append("pre_tool_use")
46 return HookResult()
47
48 async def post_tool_use(self, context) -> HookResult:
49 self.events.append("post_tool_use")
50 return HookResult()
51
52 async def post_tool_use_failure(self, context) -> HookResult:
53 self.events.append("post_tool_use_failure")
54 return HookResult()
55
56
57 class DenyInPreHook(BaseToolHook):
58 """Hook that denies execution before the tool runs."""
59
60 def __init__(self, events: list[str]) -> None:
61 self.events = events
62
63 async def pre_tool_use(self, context) -> HookResult:
64 self.events.append("pre_tool_use")
65 return HookResult(
66 decision=HookDecision.DENY,
67 message="[Blocked - denied by test hook]",
68 terminal_state="blocked",
69 )
70
71 async def post_tool_use_failure(self, context) -> HookResult:
72 self.events.append("post_tool_use_failure")
73 return HookResult()
74
75
76 @pytest.mark.asyncio
77 async def test_permission_policy_honors_overrides(temp_dir: Path) -> None:
78 policy = build_permission_policy(
79 active_mode=PermissionMode.READ_ONLY,
80 workspace_root=temp_dir,
81 tool_requirements={"write": PermissionMode.WORKSPACE_WRITE},
82 )
83
84 denied = policy.authorize("write")
85 allowed = policy.authorize("write", override=PermissionOverride.ALLOW)
86 asked = policy.authorize("write", override=PermissionOverride.ASK)
87
88 assert denied.decision.value == "deny"
89 assert allowed.allowed
90 assert asked.decision.value == "ask"
91
92
93 def test_permission_mode_parsing_supports_prompt_and_allow() -> None:
94 assert PermissionMode.from_str("prompt") == PermissionMode.PROMPT
95 assert PermissionMode.from_str("allow") == PermissionMode.ALLOW
96
97
98 def test_permission_policy_honors_rule_precedence(temp_dir: Path) -> None:
99 policy = build_permission_policy(
100 active_mode=PermissionMode.ALLOW,
101 workspace_root=temp_dir,
102 tool_requirements={"write": PermissionMode.WORKSPACE_WRITE},
103 rules=PermissionRuleSet.from_dict(
104 {
105 "allow": [{"tool": "write", "contains": "safe change"}],
106 "deny": [{"tool": "write", "path_contains": "secrets"}],
107 "ask": [{"tool": "write", "path_contains": "README"}],
108 }
109 ),
110 )
111
112 denied = policy.authorize(
113 "write",
114 arguments={
115 "file_path": str(temp_dir / "secrets.txt"),
116 "content": "safe change\n",
117 },
118 )
119 asked = policy.authorize(
120 "write",
121 arguments={
122 "file_path": str(temp_dir / "README.md"),
123 "content": "safe change\n",
124 },
125 )
126 allowed = policy.authorize(
127 "write",
128 arguments={
129 "file_path": str(temp_dir / "notes.txt"),
130 "content": "safe change\n",
131 },
132 )
133
134 assert denied.decision.value == "deny"
135 assert denied.matched_disposition == PermissionRuleDisposition.DENY
136 assert asked.decision.value == "ask"
137 assert asked.matched_disposition == PermissionRuleDisposition.ASK
138 assert allowed.decision.value == "allow"
139 assert allowed.matched_disposition == PermissionRuleDisposition.ALLOW
140
141
142 @pytest.mark.asyncio
143 async def test_prompt_mode_executor_prompts_once_and_respects_denial(
144 temp_dir: Path,
145 ) -> None:
146 prompts: list[tuple[str, str, str]] = []
147 registry = create_default_registry(temp_dir)
148 policy = build_permission_policy(
149 active_mode=PermissionMode.PROMPT,
150 workspace_root=temp_dir,
151 tool_requirements=registry.get_tool_requirements(),
152 )
153 executor = ToolExecutor(registry, RuntimeTracer(), policy)
154 target = temp_dir / "prompted.txt"
155
156 async def deny(tool_name: str, message: str, details: str) -> bool:
157 prompts.append((tool_name, message, details))
158 return False
159
160 outcome = await executor.execute_tool_call(
161 ToolCall(
162 id="write-1",
163 name="write",
164 arguments={"file_path": str(target), "content": "prompted\n"},
165 ),
166 source="native",
167 on_confirmation=deny,
168 )
169
170 assert outcome.state == ToolExecutionState.DECLINED
171 assert not target.exists()
172 assert len(prompts) == 1
173 assert "active_mode=prompt" in prompts[0][2]
174 assert "required_mode=workspace-write" in prompts[0][2]
175
176
177 @pytest.mark.asyncio
178 async def test_allow_mode_executor_skips_prompt_for_destructive_write(
179 temp_dir: Path,
180 ) -> None:
181 prompts: list[str] = []
182 registry = create_default_registry(temp_dir)
183 policy = build_permission_policy(
184 active_mode=PermissionMode.ALLOW,
185 workspace_root=temp_dir,
186 tool_requirements=registry.get_tool_requirements(),
187 )
188 executor = ToolExecutor(registry, RuntimeTracer(), policy)
189 target = temp_dir / "allowed.txt"
190
191 async def unexpected(tool_name: str, message: str, details: str) -> bool:
192 prompts.append(tool_name)
193 return False
194
195 outcome = await executor.execute_tool_call(
196 ToolCall(
197 id="write-1",
198 name="write",
199 arguments={"file_path": str(target), "content": "allowed\n"},
200 ),
201 source="native",
202 on_confirmation=unexpected,
203 )
204
205 assert outcome.state == ToolExecutionState.EXECUTED
206 assert target.read_text() == "allowed\n"
207 assert prompts == []
208
209
210 @pytest.mark.asyncio
211 async def test_executor_accepts_edit_content_alias_for_new_string(
212 temp_dir: Path,
213 ) -> None:
214 registry = create_default_registry(temp_dir)
215 policy = build_permission_policy(
216 active_mode=PermissionMode.ALLOW,
217 workspace_root=temp_dir,
218 tool_requirements=registry.get_tool_requirements(),
219 )
220 executor = ToolExecutor(registry, RuntimeTracer(), policy)
221 target = temp_dir / "guide.html"
222 target.write_text("<h1>Old</h1>\n")
223
224 outcome = await executor.execute_tool_call(
225 ToolCall(
226 id="edit-1",
227 name="edit",
228 arguments={
229 "file_path": str(target),
230 "old_string": "<h1>Old</h1>",
231 "content": "<h1>New</h1>",
232 },
233 ),
234 source="native",
235 )
236
237 assert outcome.state == ToolExecutionState.EXECUTED
238 assert target.read_text() == "<h1>New</h1>\n"
239 assert outcome.tool_call.arguments["new_string"] == "<h1>New</h1>"
240
241
242 @pytest.mark.asyncio
243 async def test_ask_rule_prompts_even_when_allow_mode(temp_dir: Path) -> None:
244 prompts: list[str] = []
245 registry = create_default_registry(temp_dir)
246 policy = build_permission_policy(
247 active_mode=PermissionMode.ALLOW,
248 workspace_root=temp_dir,
249 tool_requirements=registry.get_tool_requirements(),
250 rules=PermissionRuleSet.from_dict(
251 {"ask": [{"tool": "write", "path_contains": "README"}]}
252 ),
253 )
254 executor = ToolExecutor(registry, RuntimeTracer(), policy)
255 target = temp_dir / "README.md"
256
257 async def deny(tool_name: str, message: str, details: str) -> bool:
258 prompts.append(details)
259 return False
260
261 outcome = await executor.execute_tool_call(
262 ToolCall(
263 id="write-1",
264 name="write",
265 arguments={"file_path": str(target), "content": "no thanks\n"},
266 ),
267 source="native",
268 on_confirmation=deny,
269 )
270
271 assert outcome.state == ToolExecutionState.DECLINED
272 assert not target.exists()
273 assert len(prompts) == 1
274 assert "matched_ask_rule=tool=write, path_contains=README" in prompts[0]
275
276
277 @pytest.mark.asyncio
278 async def test_hook_lifecycle_runs_in_order_for_success(temp_dir: Path) -> None:
279 events: list[str] = []
280 registry = create_default_registry(temp_dir)
281 policy = build_permission_policy(
282 active_mode=PermissionMode.WORKSPACE_WRITE,
283 workspace_root=temp_dir,
284 tool_requirements=registry.get_tool_requirements(),
285 )
286 executor = ToolExecutor(
287 registry,
288 RuntimeTracer(),
289 policy,
290 hooks=HookManager([RecordingHook(events)]),
291 )
292 target = temp_dir / "hook-success.txt"
293
294 outcome = await executor.execute_tool_call(
295 ToolCall(
296 id="write-1",
297 name="write",
298 arguments={"file_path": str(target), "content": "hook success\n"},
299 ),
300 source="native",
301 skip_confirmation=True,
302 )
303
304 assert outcome.state == ToolExecutionState.EXECUTED
305 assert events == ["pre_tool_use", "post_tool_use"]
306 assert target.read_text() == "hook success\n"
307
308
309 @pytest.mark.asyncio
310 async def test_pre_hook_deny_still_runs_failure_hook_once(temp_dir: Path) -> None:
311 events: list[str] = []
312 registry = create_default_registry(temp_dir)
313 policy = build_permission_policy(
314 active_mode=PermissionMode.WORKSPACE_WRITE,
315 workspace_root=temp_dir,
316 tool_requirements=registry.get_tool_requirements(),
317 )
318 executor = ToolExecutor(
319 registry,
320 RuntimeTracer(),
321 policy,
322 hooks=HookManager([DenyInPreHook(events)]),
323 )
324 target = temp_dir / "hook-denied.txt"
325
326 outcome = await executor.execute_tool_call(
327 ToolCall(
328 id="write-1",
329 name="write",
330 arguments={"file_path": str(target), "content": "should not exist\n"},
331 ),
332 source="native",
333 skip_confirmation=True,
334 )
335
336 assert outcome.state == ToolExecutionState.BLOCKED
337 assert events == ["pre_tool_use", "post_tool_use_failure"]
338 assert not target.exists()
339 assert len(outcome.message.tool_results) == 1
340 assert "denied by test hook" in outcome.event_content
341
342
343 @pytest.mark.asyncio
344 @pytest.mark.parametrize(
345 ("tool_name", "arguments", "expected_path"),
346 [
347 ("read", {"file": "notes.txt"}, "notes.txt"),
348 ("write", {"filepath": "notes.txt", "content": "hello\n"}, "notes.txt"),
349 (
350 "edit",
351 {"filePath": "notes.txt", "old_string": "before", "new_string": "after"},
352 "notes.txt",
353 ),
354 ("patch", {"path": "notes.txt", "hunks": []}, "notes.txt"),
355 ],
356 )
357 async def test_file_path_alias_hook_canonicalizes_common_aliases(
358 temp_dir: Path,
359 tool_name: str,
360 arguments: dict[str, object],
361 expected_path: str,
362 ) -> None:
363 registry = create_default_registry(temp_dir)
364 policy = build_permission_policy(
365 active_mode=PermissionMode.WORKSPACE_WRITE,
366 workspace_root=temp_dir,
367 tool_requirements=registry.get_tool_requirements(),
368 )
369 hook = FilePathAliasHook()
370
371 result = await hook.pre_tool_use(
372 HookContext(
373 tool_call=ToolCall(id=f"{tool_name}-1", name=tool_name, arguments=arguments),
374 tool=registry.get(tool_name),
375 registry=registry,
376 permission_policy=policy,
377 source="native",
378 )
379 )
380
381 assert result.updated_arguments is not None
382 assert result.updated_arguments["file_path"] == expected_path
383 for alias in ("file", "filepath", "filePath", "filename", "path"):
384 assert alias not in result.updated_arguments
385
386
387 @pytest.mark.asyncio
388 @pytest.mark.parametrize(
389 ("tool_name", "arguments", "expected_path"),
390 [
391 ("glob", {"pattern": "*.html", "directory": "chapters"}, "chapters"),
392 ("grep", {"pattern": "alpha", "dir": "src"}, "src"),
393 ],
394 )
395 async def test_search_path_alias_hook_canonicalizes_common_aliases(
396 temp_dir: Path,
397 tool_name: str,
398 arguments: dict[str, object],
399 expected_path: str,
400 ) -> None:
401 registry = create_default_registry(temp_dir)
402 policy = build_permission_policy(
403 active_mode=PermissionMode.WORKSPACE_WRITE,
404 workspace_root=temp_dir,
405 tool_requirements=registry.get_tool_requirements(),
406 )
407 hook = SearchPathAliasHook()
408
409 result = await hook.pre_tool_use(
410 HookContext(
411 tool_call=ToolCall(id=f"{tool_name}-1", name=tool_name, arguments=arguments),
412 tool=registry.get(tool_name),
413 registry=registry,
414 permission_policy=policy,
415 source="native",
416 )
417 )
418
419 assert result.updated_arguments is not None
420 assert result.updated_arguments["path"] == expected_path
421 for alias in ("directory", "dir", "folder"):
422 assert alias not in result.updated_arguments
423
424
425 @pytest.mark.asyncio
426 async def test_search_path_alias_hook_splits_full_glob_pattern(
427 temp_dir: Path,
428 ) -> None:
429 registry = create_default_registry(temp_dir)
430 policy = build_permission_policy(
431 active_mode=PermissionMode.WORKSPACE_WRITE,
432 workspace_root=temp_dir,
433 tool_requirements=registry.get_tool_requirements(),
434 )
435 hook = SearchPathAliasHook()
436 chapters = temp_dir / "chapters"
437
438 result = await hook.pre_tool_use(
439 HookContext(
440 tool_call=ToolCall(
441 id="glob-1",
442 name="glob",
443 arguments={"pattern": f"{chapters}/*.html"},
444 ),
445 tool=registry.get("glob"),
446 registry=registry,
447 permission_policy=policy,
448 source="native",
449 )
450 )
451
452 assert result.updated_arguments is not None
453 assert result.updated_arguments["path"] == str(chapters)
454 assert result.updated_arguments["pattern"] == "*.html"
455
456
457 @pytest.mark.asyncio
458 async def test_search_path_alias_hook_splits_implicit_recursive_glob_parent(
459 temp_dir: Path,
460 ) -> None:
461 registry = create_default_registry(temp_dir)
462 policy = build_permission_policy(
463 active_mode=PermissionMode.WORKSPACE_WRITE,
464 workspace_root=temp_dir,
465 tool_requirements=registry.get_tool_requirements(),
466 )
467 hook = SearchPathAliasHook()
468
469 result = await hook.pre_tool_use(
470 HookContext(
471 tool_call=ToolCall(
472 id="glob-implicit-1",
473 name="glob",
474 arguments={"pattern": "**/Loader/guides/nginx/chapters/*.html"},
475 ),
476 tool=registry.get("glob"),
477 registry=registry,
478 permission_policy=policy,
479 source="native",
480 )
481 )
482
483 assert result.updated_arguments is not None
484 assert result.updated_arguments["path"] == "Loader/guides/nginx/chapters"
485 assert result.updated_arguments["pattern"] == "*.html"
486
487
488 @pytest.mark.asyncio
489 async def test_search_path_alias_hook_leaves_fully_generic_recursive_glob_unchanged(
490 temp_dir: Path,
491 ) -> None:
492 registry = create_default_registry(temp_dir)
493 policy = build_permission_policy(
494 active_mode=PermissionMode.WORKSPACE_WRITE,
495 workspace_root=temp_dir,
496 tool_requirements=registry.get_tool_requirements(),
497 )
498 hook = SearchPathAliasHook()
499
500 result = await hook.pre_tool_use(
501 HookContext(
502 tool_call=ToolCall(
503 id="glob-generic-1",
504 name="glob",
505 arguments={"pattern": "**/*.html"},
506 ),
507 tool=registry.get("glob"),
508 registry=registry,
509 permission_policy=policy,
510 source="native",
511 )
512 )
513
514 assert result.updated_arguments is None
515
516
517 @pytest.mark.asyncio
518 async def test_relative_path_context_hook_remaps_workspace_mirror_of_external_root(
519 temp_dir: Path,
520 ) -> None:
521 workspace_root = temp_dir / "workspace"
522 workspace_root.mkdir()
523 external_root = temp_dir / "external-home"
524 external_fortran = external_root / "Loader" / "guides" / "fortran"
525 external_fortran.mkdir(parents=True)
526 (external_fortran / "index.html").write_text("<html></html>\n")
527 (external_root / "Loader" / "guides").mkdir(exist_ok=True)
528
529 registry = create_default_registry(workspace_root)
530 policy = build_permission_policy(
531 active_mode=PermissionMode.WORKSPACE_WRITE,
532 workspace_root=workspace_root,
533 tool_requirements=registry.get_tool_requirements(),
534 )
535 action_tracker = ActionTracker()
536 action_tracker.record_tool_call(
537 "read",
538 {"file_path": str(external_fortran / "index.html")},
539 )
540 hook = RelativePathContextHook(action_tracker, workspace_root)
541
542 mirrored_workspace_path = workspace_root / "Loader" / "guides" / "nginx" / "index.html"
543 expected_external_path = external_root / "Loader" / "guides" / "nginx" / "index.html"
544
545 result = await hook.pre_tool_use(
546 HookContext(
547 tool_call=ToolCall(
548 id="write-1",
549 name="write",
550 arguments={
551 "file_path": str(mirrored_workspace_path),
552 "content": "<html></html>\n",
553 },
554 ),
555 tool=registry.get("write"),
556 registry=registry,
557 permission_policy=policy,
558 source="native",
559 )
560 )
561
562 assert result.updated_arguments is not None
563 assert Path(result.updated_arguments["file_path"]).resolve() == expected_external_path.resolve()
564 resolved_loader_root = (external_root / "Loader").resolve()
565 assert result.injected_messages == [
566 (
567 "[Path anchor correction] A repo-local mirror path was remapped to the "
568 f"established output root under `{resolved_loader_root}`. Keep future "
569 "file/search tool calls on that external root and use `index.html` there "
570 "instead of re-anchoring work to the workspace checkout."
571 )
572 ]
573
574
575 @pytest.mark.asyncio
576 async def test_relative_path_context_hook_prefers_external_search_ancestor_over_workspace_match(
577 temp_dir: Path,
578 ) -> None:
579 workspace_root = temp_dir / "workspace"
580 (workspace_root / "guides").mkdir(parents=True)
581 external_root = temp_dir / "external-home"
582 external_fortran = external_root / "Loader" / "guides" / "fortran"
583 external_fortran.mkdir(parents=True)
584 (external_fortran / "index.html").write_text("<html></html>\n")
585
586 registry = create_default_registry(workspace_root)
587 policy = build_permission_policy(
588 active_mode=PermissionMode.WORKSPACE_WRITE,
589 workspace_root=workspace_root,
590 tool_requirements=registry.get_tool_requirements(),
591 )
592 action_tracker = ActionTracker()
593 action_tracker.record_tool_call(
594 "read",
595 {"file_path": str(external_fortran / "index.html")},
596 )
597 hook = RelativePathContextHook(action_tracker, workspace_root)
598
599 result = await hook.pre_tool_use(
600 HookContext(
601 tool_call=ToolCall(
602 id="glob-ancestor-1",
603 name="glob",
604 arguments={"path": "guides", "pattern": "**"},
605 ),
606 tool=registry.get("glob"),
607 registry=registry,
608 permission_policy=policy,
609 source="native",
610 )
611 )
612
613 assert result.updated_arguments is not None
614 assert Path(result.updated_arguments["path"]).resolve() == (
615 external_root / "Loader" / "guides"
616 ).resolve()
617
618
619 class FakeSession:
620 def __init__(self, *, active_dod_path: str, messages: list[Message]) -> None:
621 self.active_dod_path = active_dod_path
622 self.messages = messages
623
624
625 @pytest.mark.asyncio
626 async def test_active_repair_scope_hook_blocks_reference_reads_while_fixing(
627 temp_dir: Path,
628 ) -> None:
629 registry = create_default_registry(temp_dir)
630 policy = build_permission_policy(
631 active_mode=PermissionMode.WORKSPACE_WRITE,
632 workspace_root=temp_dir,
633 tool_requirements=registry.get_tool_requirements(),
634 )
635 dod_store = DefinitionOfDoneStore(temp_dir)
636 dod = create_definition_of_done("Repair the active artifact set")
637 dod.status = "fixing"
638 dod_path = dod_store.save(dod)
639 repair_target = temp_dir / "guide" / "index.html"
640 session = FakeSession(
641 active_dod_path=str(dod_path),
642 messages=[
643 Message(
644 role=Role.ASSISTANT,
645 content=(
646 "Repair focus:\n"
647 f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
648 f"- Immediate next step: edit `{repair_target}`.\n"
649 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-introduction.html'}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
650 ),
651 )
652 ],
653 )
654 hook = ActiveRepairScopeHook(
655 dod_store=dod_store,
656 project_root=temp_dir,
657 session=session,
658 )
659
660 result = await hook.pre_tool_use(
661 HookContext(
662 tool_call=ToolCall(
663 id="read-1",
664 name="read",
665 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
666 ),
667 tool=registry.get("read"),
668 registry=registry,
669 permission_policy=policy,
670 source="native",
671 )
672 )
673
674 assert result.decision == HookDecision.DENY
675 assert result.terminal_state == "blocked"
676 assert result.message is not None
677 assert "active repair scope" in result.message
678 assert str(repair_target) in result.message
679
680
681 @pytest.mark.asyncio
682 async def test_active_repair_scope_hook_blocks_stale_memory_reads_while_fixing(
683 temp_dir: Path,
684 ) -> None:
685 registry = create_default_registry(temp_dir)
686 policy = build_permission_policy(
687 active_mode=PermissionMode.WORKSPACE_WRITE,
688 workspace_root=temp_dir,
689 tool_requirements=registry.get_tool_requirements(),
690 )
691 dod_store = DefinitionOfDoneStore(temp_dir)
692 dod = create_definition_of_done("Repair the active artifact set")
693 dod.status = "fixing"
694 dod_path = dod_store.save(dod)
695 repair_target = temp_dir / "guide" / "chapters" / "05-load-balancing.html"
696 session = FakeSession(
697 active_dod_path=str(dod_path),
698 messages=[
699 Message(
700 role=Role.USER,
701 content=(
702 "[DEFINITION OF DONE CHECK STILL FAILING]\n"
703 "HTML guide content quality issues:\n"
704 "Repair focus:\n"
705 f"- {repair_target}: thin content (1500 text chars, expected at least 1758)\n"
706 f"- Immediate next step: edit `{repair_target}`.\n"
707 ),
708 )
709 ],
710 )
711 hook = ActiveRepairScopeHook(
712 dod_store=dod_store,
713 project_root=temp_dir,
714 session=session,
715 )
716
717 result = await hook.pre_tool_use(
718 HookContext(
719 tool_call=ToolCall(
720 id="memory-1",
721 name="notepad_read",
722 arguments={},
723 ),
724 tool=registry.get("notepad_read"),
725 registry=registry,
726 permission_policy=policy,
727 source="native",
728 )
729 )
730
731 assert result.decision == HookDecision.DENY
732 assert result.terminal_state == "blocked"
733 assert result.message is not None
734 assert "durable memory may be stale" in result.message
735 assert "trust the active verifier/DoD" in result.message
736 assert str(repair_target) in result.message
737
738
739 @pytest.mark.asyncio
740 async def test_active_repair_scope_hook_allows_reads_inside_active_artifact_set(
741 temp_dir: Path,
742 ) -> None:
743 registry = create_default_registry(temp_dir)
744 policy = build_permission_policy(
745 active_mode=PermissionMode.WORKSPACE_WRITE,
746 workspace_root=temp_dir,
747 tool_requirements=registry.get_tool_requirements(),
748 )
749 dod_store = DefinitionOfDoneStore(temp_dir)
750 dod = create_definition_of_done("Repair the active artifact set")
751 dod.status = "fixing"
752 dod_path = dod_store.save(dod)
753 repair_target = temp_dir / "guide" / "index.html"
754 chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
755 session = FakeSession(
756 active_dod_path=str(dod_path),
757 messages=[
758 Message(
759 role=Role.ASSISTANT,
760 content=(
761 "Repair focus:\n"
762 f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
763 f"- Fix the broken local reference `../styles.css` in `{chapter_path}`.\n"
764 f"- Immediate next step: edit `{repair_target}`.\n"
765 f"- If the broken reference should remain, create `{chapter_path}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
766 ),
767 )
768 ],
769 )
770 hook = ActiveRepairScopeHook(
771 dod_store=dod_store,
772 project_root=temp_dir,
773 session=session,
774 )
775
776 result = await hook.pre_tool_use(
777 HookContext(
778 tool_call=ToolCall(
779 id="read-1",
780 name="read",
781 arguments={"file_path": str(chapter_path)},
782 ),
783 tool=registry.get("read"),
784 registry=registry,
785 permission_policy=policy,
786 source="native",
787 )
788 )
789
790 assert result.decision == HookDecision.CONTINUE
791
792
793 @pytest.mark.asyncio
794 async def test_active_repair_scope_hook_allows_existing_sibling_reads_with_source_of_truth_hint(
795 temp_dir: Path,
796 ) -> None:
797 registry = create_default_registry(temp_dir)
798 policy = build_permission_policy(
799 active_mode=PermissionMode.WORKSPACE_WRITE,
800 workspace_root=temp_dir,
801 tool_requirements=registry.get_tool_requirements(),
802 )
803 dod_store = DefinitionOfDoneStore(temp_dir)
804 dod = create_definition_of_done("Repair the active artifact set")
805 dod.status = "fixing"
806 dod_path = dod_store.save(dod)
807 repair_target = temp_dir / "guide" / "index.html"
808 chapter_dir = temp_dir / "guide" / "chapters"
809 chapter_dir.mkdir(parents=True, exist_ok=True)
810 sibling = chapter_dir / "03-basic-usage.html"
811 sibling.write_text("<h1>Basic Usage</h1>\n")
812 session = FakeSession(
813 active_dod_path=str(dod_path),
814 messages=[
815 Message(
816 role=Role.ASSISTANT,
817 content=(
818 "Repair focus:\n"
819 f"- Fix the broken local reference `chapters/02-installation.html` in `{repair_target}`.\n"
820 f"- Immediate next step: edit `{repair_target}`.\n"
821 f"- If the broken reference should remain, create `{chapter_dir / '02-installation.html'}`; otherwise remove or replace `chapters/02-installation.html`.\n"
822 "- Use the existing artifact files as the source of truth while repairing this file: "
823 f"`{repair_target}`.\n"
824 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
825 ),
826 )
827 ],
828 )
829 hook = ActiveRepairScopeHook(
830 dod_store=dod_store,
831 project_root=temp_dir,
832 session=session,
833 )
834
835 result = await hook.pre_tool_use(
836 HookContext(
837 tool_call=ToolCall(
838 id="read-1",
839 name="read",
840 arguments={"file_path": str(sibling)},
841 ),
842 tool=registry.get("read"),
843 registry=registry,
844 permission_policy=policy,
845 source="native",
846 )
847 )
848
849 assert result.decision == HookDecision.CONTINUE
850
851
852 @pytest.mark.asyncio
853 async def test_active_repair_scope_hook_allows_verification_source_outside_repair_target(
854 temp_dir: Path,
855 ) -> None:
856 registry = create_default_registry(temp_dir)
857 policy = build_permission_policy(
858 active_mode=PermissionMode.WORKSPACE_WRITE,
859 workspace_root=temp_dir,
860 tool_requirements=registry.get_tool_requirements(),
861 )
862 dod_store = DefinitionOfDoneStore(temp_dir)
863 dod = create_definition_of_done("Repair the active artifact set")
864 dod.status = "in_progress"
865 dod_path = dod_store.save(dod)
866 repair_target = temp_dir / "guide" / "chapters" / "06-troubleshooting.html"
867 session = FakeSession(
868 active_dod_path=str(dod_path),
869 messages=[
870 Message(
871 role=Role.ASSISTANT,
872 content=(
873 "Repair focus:\n"
874 f"- Fix the broken local reference `01-introduction.html` in `{repair_target}`.\n"
875 f"- Immediate next step: edit `{repair_target}`.\n"
876 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
877 ),
878 )
879 ],
880 )
881 hook = ActiveRepairScopeHook(
882 dod_store=dod_store,
883 project_root=temp_dir,
884 session=session,
885 )
886
887 result = await hook.pre_tool_use(
888 HookContext(
889 tool_call=ToolCall(
890 id="verify-1",
891 name="read",
892 arguments={"file_path": str(temp_dir / "guide" / "index.html")},
893 ),
894 tool=registry.get("read"),
895 registry=registry,
896 permission_policy=policy,
897 source="verification",
898 )
899 )
900
901 assert result.decision == HookDecision.CONTINUE
902
903
904 @pytest.mark.asyncio
905 async def test_active_repair_scope_hook_blocks_local_rereads_outside_concrete_repair_files(
906 temp_dir: Path,
907 ) -> None:
908 registry = create_default_registry(temp_dir)
909 policy = build_permission_policy(
910 active_mode=PermissionMode.WORKSPACE_WRITE,
911 workspace_root=temp_dir,
912 tool_requirements=registry.get_tool_requirements(),
913 )
914 dod_store = DefinitionOfDoneStore(temp_dir)
915 dod = create_definition_of_done("Repair the active artifact set")
916 dod.status = "in_progress"
917 dod_path = dod_store.save(dod)
918 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
919 stylesheet = temp_dir / "guide" / "styles.css"
920 other_chapter = temp_dir / "guide" / "chapters" / "01-getting-started.html"
921 session = FakeSession(
922 active_dod_path=str(dod_path),
923 messages=[
924 Message(
925 role=Role.ASSISTANT,
926 content=(
927 "Repair focus:\n"
928 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
929 f"- Fix the broken local reference `../styles.css` in `{temp_dir / 'guide' / 'chapters' / '06-troubleshooting.html'}`.\n"
930 f"- Immediate next step: edit `{repair_target}`.\n"
931 f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
932 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
933 ),
934 )
935 ],
936 )
937 hook = ActiveRepairScopeHook(
938 dod_store=dod_store,
939 project_root=temp_dir,
940 session=session,
941 )
942
943 result = await hook.pre_tool_use(
944 HookContext(
945 tool_call=ToolCall(
946 id="read-1",
947 name="read",
948 arguments={"file_path": str(other_chapter)},
949 ),
950 tool=registry.get("read"),
951 registry=registry,
952 permission_policy=policy,
953 source="native",
954 )
955 )
956
957 assert result.decision == HookDecision.DENY
958 assert result.terminal_state == "blocked"
959 assert result.message is not None
960 assert "active repair scope" in result.message
961 assert str(repair_target) in result.message
962 assert str(stylesheet) in result.message
963
964
965 @pytest.mark.asyncio
966 async def test_active_repair_scope_hook_blocks_broad_glob_during_concrete_repair(
967 temp_dir: Path,
968 ) -> None:
969 registry = create_default_registry(temp_dir)
970 policy = build_permission_policy(
971 active_mode=PermissionMode.WORKSPACE_WRITE,
972 workspace_root=temp_dir,
973 tool_requirements=registry.get_tool_requirements(),
974 )
975 dod_store = DefinitionOfDoneStore(temp_dir)
976 dod = create_definition_of_done("Repair the generated guide")
977 dod.status = "fixing"
978 dod_path = dod_store.save(dod)
979 guide_root = temp_dir / "guide"
980 chapters = guide_root / "chapters"
981 chapters.mkdir(parents=True)
982 repair_target = guide_root / "index.html"
983 repair_target.write_text("<h1>Guide</h1>\n")
984 (chapters / "01-introduction.html").write_text("<h1>Intro</h1>\n")
985 session = FakeSession(
986 active_dod_path=str(dod_path),
987 messages=[
988 Message(
989 role=Role.ASSISTANT,
990 content=(
991 "Repair focus:\n"
992 f"- Improve `{repair_target}`: insufficient structured content.\n"
993 f"- Immediate next step: edit `{repair_target}`.\n"
994 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
995 ),
996 )
997 ],
998 )
999 hook = ActiveRepairScopeHook(
1000 dod_store=dod_store,
1001 project_root=temp_dir,
1002 session=session,
1003 )
1004
1005 result = await hook.pre_tool_use(
1006 HookContext(
1007 tool_call=ToolCall(
1008 id="glob-1",
1009 name="glob",
1010 arguments={"path": str(guide_root), "pattern": "**/*.html"},
1011 ),
1012 tool=registry.get("glob"),
1013 registry=registry,
1014 permission_policy=policy,
1015 source="native",
1016 )
1017 )
1018
1019 assert result.decision == HookDecision.DENY
1020 assert result.terminal_state == "blocked"
1021 assert result.message is not None
1022 assert "active repair scope" in result.message
1023 assert str(repair_target) in result.message
1024
1025
1026 @pytest.mark.asyncio
1027 async def test_active_repair_scope_hook_blocks_repair_audit_loop_after_repeated_source_reads(
1028 temp_dir: Path,
1029 ) -> None:
1030 registry = create_default_registry(temp_dir)
1031 policy = build_permission_policy(
1032 active_mode=PermissionMode.WORKSPACE_WRITE,
1033 workspace_root=temp_dir,
1034 tool_requirements=registry.get_tool_requirements(),
1035 )
1036 dod_store = DefinitionOfDoneStore(temp_dir)
1037 dod = create_definition_of_done("Repair the active artifact set")
1038 dod.status = "fixing"
1039 dod_path = dod_store.save(dod)
1040 guide_root = temp_dir / "guide"
1041 chapter_dir = guide_root / "chapters"
1042 chapter_dir.mkdir(parents=True, exist_ok=True)
1043 repair_target = guide_root / "index.html"
1044 repair_target.write_text("<h1>Guide</h1>\n")
1045 intro = chapter_dir / "01-introduction.html"
1046 install = chapter_dir / "02-installation.html"
1047 intro.write_text("<h1>Intro</h1>\n")
1048 install.write_text("<h1>Install</h1>\n")
1049 session = FakeSession(
1050 active_dod_path=str(dod_path),
1051 messages=[
1052 Message(
1053 role=Role.ASSISTANT,
1054 content=(
1055 "Repair focus:\n"
1056 f"- Fix the broken local reference `chapters/02-installation.html` in `{repair_target}`.\n"
1057 f"- Immediate next step: edit `{repair_target}`.\n"
1058 f"- If the broken reference should remain, create `{install}`; otherwise remove or replace `chapters/02-installation.html`.\n"
1059 "- Use the existing artifact files as the source of truth while repairing this file: "
1060 f"`{repair_target}`, `{intro}`, `{install}`.\n"
1061 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1062 ),
1063 )
1064 ],
1065 )
1066 hook = ActiveRepairScopeHook(
1067 dod_store=dod_store,
1068 project_root=temp_dir,
1069 session=session,
1070 )
1071
1072 def make_context(index: int) -> HookContext:
1073 target = repair_target if index % 2 else intro
1074 return HookContext(
1075 tool_call=ToolCall(
1076 id=f"read-{index}",
1077 name="read",
1078 arguments={"file_path": str(target)},
1079 ),
1080 tool=registry.get("read"),
1081 registry=registry,
1082 permission_policy=policy,
1083 source="native",
1084 )
1085
1086 for index in range(1, 5):
1087 context = make_context(index)
1088 result = await hook.pre_tool_use(context)
1089 assert result.decision == HookDecision.CONTINUE
1090 await hook.post_tool_use(context)
1091
1092 blocked = await hook.pre_tool_use(make_context(5))
1093
1094 assert blocked.decision == HookDecision.DENY
1095 assert blocked.terminal_state == "blocked"
1096 assert blocked.message is not None
1097 assert "repair audit loop" in blocked.message
1098
1099
1100 @pytest.mark.asyncio
1101 async def test_active_repair_scope_audit_loop_names_next_missing_repair_target(
1102 temp_dir: Path,
1103 ) -> None:
1104 registry = create_default_registry(temp_dir)
1105 policy = build_permission_policy(
1106 active_mode=PermissionMode.WORKSPACE_WRITE,
1107 workspace_root=temp_dir,
1108 tool_requirements=registry.get_tool_requirements(),
1109 )
1110 dod_store = DefinitionOfDoneStore(temp_dir)
1111 dod = create_definition_of_done("Repair the active artifact set")
1112 dod.status = "fixing"
1113 dod_path = dod_store.save(dod)
1114 guide_root = temp_dir / "guide"
1115 chapter_dir = guide_root / "chapters"
1116 chapter_dir.mkdir(parents=True, exist_ok=True)
1117 repair_target = chapter_dir / "04-reverse-proxy.html"
1118 next_missing = chapter_dir / "05-load-balancing.html"
1119 repair_target.write_text("<h1>Reverse Proxy</h1>\n")
1120 session = FakeSession(
1121 active_dod_path=str(dod_path),
1122 messages=[
1123 Message(
1124 role=Role.ASSISTANT,
1125 content=(
1126 "Repair focus:\n"
1127 f"- Fix the broken local reference `05-load-balancing.html` in `{repair_target}`.\n"
1128 f"- Immediate next step: edit `{repair_target}`.\n"
1129 f"- If the broken reference should remain, create `{next_missing}`; otherwise remove or replace `05-load-balancing.html`.\n"
1130 "- Use the existing artifact files as the source of truth while repairing this file: "
1131 f"`{repair_target}`, `{next_missing}`.\n"
1132 ),
1133 )
1134 ],
1135 )
1136 hook = ActiveRepairScopeHook(
1137 dod_store=dod_store,
1138 project_root=temp_dir,
1139 session=session,
1140 )
1141
1142 def make_context(index: int) -> HookContext:
1143 return HookContext(
1144 tool_call=ToolCall(
1145 id=f"read-{index}",
1146 name="read",
1147 arguments={"file_path": str(repair_target)},
1148 ),
1149 tool=registry.get("read"),
1150 registry=registry,
1151 permission_policy=policy,
1152 source="native",
1153 )
1154
1155 for index in range(1, 5):
1156 context = make_context(index)
1157 result = await hook.pre_tool_use(context)
1158 assert result.decision == HookDecision.CONTINUE
1159 await hook.post_tool_use(context)
1160
1161 blocked = await hook.pre_tool_use(make_context(5))
1162
1163 assert blocked.decision == HookDecision.DENY
1164 assert blocked.message is not None
1165 assert "repair audit loop" in blocked.message
1166 assert str(next_missing) in blocked.message
1167
1168
1169 @pytest.mark.asyncio
1170 async def test_active_repair_scope_hook_allows_scoped_glob_within_active_artifact_roots(
1171 temp_dir: Path,
1172 ) -> None:
1173 registry = create_default_registry(temp_dir)
1174 policy = build_permission_policy(
1175 active_mode=PermissionMode.WORKSPACE_WRITE,
1176 workspace_root=temp_dir,
1177 tool_requirements=registry.get_tool_requirements(),
1178 )
1179 dod_store = DefinitionOfDoneStore(temp_dir)
1180 dod = create_definition_of_done("Repair the active artifact set")
1181 dod.status = "in_progress"
1182 dod_path = dod_store.save(dod)
1183 repair_target = temp_dir / "guide" / "index.html"
1184 guide_root = temp_dir / "guide"
1185 session = FakeSession(
1186 active_dod_path=str(dod_path),
1187 messages=[
1188 Message(
1189 role=Role.ASSISTANT,
1190 content=(
1191 "Repair focus:\n"
1192 f"- Fix the broken local reference `chapters/troubleshooting.html` in `{repair_target}`.\n"
1193 f"- Immediate next step: edit `{repair_target}`.\n"
1194 f"- If the broken reference should remain, create `{guide_root / 'chapters' / 'troubleshooting.html'}`; otherwise remove or replace `chapters/troubleshooting.html`.\n"
1195 "- Use the existing artifact files as the source of truth while repairing this file: "
1196 f"`{guide_root / 'chapters' / 'introduction.html'}`, `{guide_root / 'chapters' / 'installation.html'}`, `{guide_root / 'chapters' / 'configuration.html'}`.\n"
1197 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1198 ),
1199 )
1200 ],
1201 )
1202 hook = ActiveRepairScopeHook(
1203 dod_store=dod_store,
1204 project_root=temp_dir,
1205 session=session,
1206 )
1207
1208 result = await hook.pre_tool_use(
1209 HookContext(
1210 tool_call=ToolCall(
1211 id="glob-1",
1212 name="glob",
1213 arguments={
1214 "path": str(temp_dir),
1215 "pattern": "**/guide/chapters/*.html",
1216 },
1217 ),
1218 tool=registry.get("glob"),
1219 registry=registry,
1220 permission_policy=policy,
1221 source="native",
1222 )
1223 )
1224
1225 assert result.decision == HookDecision.CONTINUE
1226
1227
1228 @pytest.mark.asyncio
1229 async def test_active_repair_scope_hook_allows_declared_missing_sibling_reads(
1230 temp_dir: Path,
1231 ) -> None:
1232 registry = create_default_registry(temp_dir)
1233 policy = build_permission_policy(
1234 active_mode=PermissionMode.WORKSPACE_WRITE,
1235 workspace_root=temp_dir,
1236 tool_requirements=registry.get_tool_requirements(),
1237 )
1238 dod_store = DefinitionOfDoneStore(temp_dir)
1239 dod = create_definition_of_done("Repair the active artifact set")
1240 dod.status = "in_progress"
1241 dod_path = dod_store.save(dod)
1242 guide_root = temp_dir / "guide"
1243 chapters = guide_root / "chapters"
1244 chapters.mkdir(parents=True)
1245 repair_target = guide_root / "index.html"
1246 existing_chapter = chapters / "overview.html"
1247 next_chapter = chapters / "installation.html"
1248 repair_target.write_text(
1249 "\n".join(
1250 [
1251 "<html>",
1252 '<a href="chapters/overview.html">Overview</a>',
1253 '<a href="chapters/installation.html">Installation</a>',
1254 "</html>",
1255 ]
1256 )
1257 + "\n"
1258 )
1259 existing_chapter.write_text("<h1>Overview</h1>\n")
1260
1261 session = FakeSession(
1262 active_dod_path=str(dod_path),
1263 messages=[
1264 Message(
1265 role=Role.ASSISTANT,
1266 content=(
1267 "Repair focus:\n"
1268 f"- Fix the broken local reference `chapters/overview.html` in `{repair_target}`.\n"
1269 f"- Immediate next step: edit `{repair_target}`.\n"
1270 f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/overview.html`.\n"
1271 "- Use the existing artifact files as the source of truth while repairing this file: "
1272 f"`{existing_chapter}`.\n"
1273 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1274 ),
1275 )
1276 ],
1277 )
1278 hook = ActiveRepairScopeHook(
1279 dod_store=dod_store,
1280 project_root=temp_dir,
1281 session=session,
1282 )
1283
1284 result = await hook.pre_tool_use(
1285 HookContext(
1286 tool_call=ToolCall(
1287 id="read-allowed-sibling",
1288 name="read",
1289 arguments={"file_path": str(next_chapter)},
1290 ),
1291 tool=registry.get("read"),
1292 registry=registry,
1293 permission_policy=policy,
1294 source="native",
1295 )
1296 )
1297
1298 assert result.decision == HookDecision.CONTINUE
1299
1300
1301 @pytest.mark.asyncio
1302 async def test_active_repair_scope_hook_blocks_reference_reads_during_in_progress_repair(
1303 temp_dir: Path,
1304 ) -> None:
1305 registry = create_default_registry(temp_dir)
1306 policy = build_permission_policy(
1307 active_mode=PermissionMode.WORKSPACE_WRITE,
1308 workspace_root=temp_dir,
1309 tool_requirements=registry.get_tool_requirements(),
1310 )
1311 dod_store = DefinitionOfDoneStore(temp_dir)
1312 dod = create_definition_of_done("Repair the active artifact set")
1313 dod.status = "in_progress"
1314 dod_path = dod_store.save(dod)
1315 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1316 session = FakeSession(
1317 active_dod_path=str(dod_path),
1318 messages=[
1319 Message(
1320 role=Role.ASSISTANT,
1321 content=(
1322 "Repair focus:\n"
1323 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1324 f"- Immediate next step: edit `{repair_target}`.\n"
1325 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1326 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1327 ),
1328 )
1329 ],
1330 )
1331 hook = ActiveRepairScopeHook(
1332 dod_store=dod_store,
1333 project_root=temp_dir,
1334 session=session,
1335 )
1336
1337 result = await hook.pre_tool_use(
1338 HookContext(
1339 tool_call=ToolCall(
1340 id="read-1",
1341 name="read",
1342 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1343 ),
1344 tool=registry.get("read"),
1345 registry=registry,
1346 permission_policy=policy,
1347 source="native",
1348 )
1349 )
1350
1351 assert result.decision == HookDecision.DENY
1352 assert result.terminal_state == "blocked"
1353 assert result.message is not None
1354 assert "active repair scope" in result.message
1355
1356
1357 @pytest.mark.asyncio
1358 async def test_active_repair_mutation_scope_hook_blocks_writes_outside_named_repair_files(
1359 temp_dir: Path,
1360 ) -> None:
1361 registry = create_default_registry(temp_dir)
1362 policy = build_permission_policy(
1363 active_mode=PermissionMode.WORKSPACE_WRITE,
1364 workspace_root=temp_dir,
1365 tool_requirements=registry.get_tool_requirements(),
1366 )
1367 dod_store = DefinitionOfDoneStore(temp_dir)
1368 dod = create_definition_of_done("Repair the active artifact set")
1369 dod.status = "in_progress"
1370 dod_path = dod_store.save(dod)
1371 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1372 chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
1373 session = FakeSession(
1374 active_dod_path=str(dod_path),
1375 messages=[
1376 Message(
1377 role=Role.ASSISTANT,
1378 content=(
1379 "Repair focus:\n"
1380 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1381 f"- Immediate next step: edit `{repair_target}`.\n"
1382 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1383 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1384 ),
1385 )
1386 ],
1387 )
1388 hook = ActiveRepairMutationScopeHook(
1389 dod_store=dod_store,
1390 project_root=temp_dir,
1391 session=session,
1392 )
1393
1394 result = await hook.pre_tool_use(
1395 HookContext(
1396 tool_call=ToolCall(
1397 id="edit-1",
1398 name="edit",
1399 arguments={"file_path": str(chapter_path), "old_string": "old", "new_string": "new"},
1400 ),
1401 tool=registry.get("edit"),
1402 registry=registry,
1403 permission_policy=policy,
1404 source="native",
1405 )
1406 )
1407
1408 assert result.decision == HookDecision.DENY
1409 assert result.terminal_state == "blocked"
1410 assert result.message is not None
1411 assert "active repair mutation scope" in result.message
1412 assert str(repair_target) in result.message
1413
1414
1415 @pytest.mark.asyncio
1416 async def test_active_repair_mutation_scope_hook_allows_expected_repair_file_writes(
1417 temp_dir: Path,
1418 ) -> None:
1419 registry = create_default_registry(temp_dir)
1420 policy = build_permission_policy(
1421 active_mode=PermissionMode.WORKSPACE_WRITE,
1422 workspace_root=temp_dir,
1423 tool_requirements=registry.get_tool_requirements(),
1424 )
1425 dod_store = DefinitionOfDoneStore(temp_dir)
1426 dod = create_definition_of_done("Repair the active artifact set")
1427 dod.status = "in_progress"
1428 dod_path = dod_store.save(dod)
1429 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1430 stylesheet = temp_dir / "guide" / "styles.css"
1431 session = FakeSession(
1432 active_dod_path=str(dod_path),
1433 messages=[
1434 Message(
1435 role=Role.ASSISTANT,
1436 content=(
1437 "Repair focus:\n"
1438 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1439 f"- Immediate next step: edit `{repair_target}`.\n"
1440 f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
1441 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1442 ),
1443 )
1444 ],
1445 )
1446 hook = ActiveRepairMutationScopeHook(
1447 dod_store=dod_store,
1448 project_root=temp_dir,
1449 session=session,
1450 )
1451
1452 result = await hook.pre_tool_use(
1453 HookContext(
1454 tool_call=ToolCall(
1455 id="write-1",
1456 name="write",
1457 arguments={"file_path": str(stylesheet), "content": "body { color: #222; }\n"},
1458 ),
1459 tool=registry.get("write"),
1460 registry=registry,
1461 permission_policy=policy,
1462 source="native",
1463 )
1464 )
1465
1466 assert result.decision == HookDecision.CONTINUE
1467
1468
1469 @pytest.mark.asyncio
1470 async def test_active_repair_mutation_scope_hook_allows_declared_missing_sibling_outputs(
1471 temp_dir: Path,
1472 ) -> None:
1473 registry = create_default_registry(temp_dir)
1474 policy = build_permission_policy(
1475 active_mode=PermissionMode.WORKSPACE_WRITE,
1476 workspace_root=temp_dir,
1477 tool_requirements=registry.get_tool_requirements(),
1478 )
1479 dod_store = DefinitionOfDoneStore(temp_dir)
1480 dod = create_definition_of_done("Repair the active artifact set")
1481 dod.status = "in_progress"
1482 dod_path = dod_store.save(dod)
1483 guide_root = temp_dir / "guide"
1484 chapters = guide_root / "chapters"
1485 chapters.mkdir(parents=True)
1486 repair_target = guide_root / "index.html"
1487 existing_chapter = chapters / "01-introduction.html"
1488 next_chapter = chapters / "02-installation.html"
1489 repair_target.write_text(
1490 "\n".join(
1491 [
1492 "<html>",
1493 '<a href="chapters/01-introduction.html">Introduction</a>',
1494 '<a href="chapters/02-installation.html">Installation</a>',
1495 "</html>",
1496 ]
1497 )
1498 + "\n"
1499 )
1500 existing_chapter.write_text("<h1>Introduction</h1>\n")
1501
1502 session = FakeSession(
1503 active_dod_path=str(dod_path),
1504 messages=[
1505 Message(
1506 role=Role.ASSISTANT,
1507 content=(
1508 "Repair focus:\n"
1509 f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
1510 f"- Immediate next step: edit `{repair_target}`.\n"
1511 f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
1512 "- Use the existing artifact files as the source of truth while repairing this file: "
1513 f"`{existing_chapter}`.\n"
1514 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1515 ),
1516 )
1517 ],
1518 )
1519 hook = ActiveRepairMutationScopeHook(
1520 dod_store=dod_store,
1521 project_root=temp_dir,
1522 session=session,
1523 )
1524
1525 result = await hook.pre_tool_use(
1526 HookContext(
1527 tool_call=ToolCall(
1528 id="write-2",
1529 name="write",
1530 arguments={"file_path": str(next_chapter), "content": "<h1>Installation</h1>\n"},
1531 ),
1532 tool=registry.get("write"),
1533 registry=registry,
1534 permission_policy=policy,
1535 source="native",
1536 )
1537 )
1538
1539 assert result.decision == HookDecision.CONTINUE
1540
1541
1542 @pytest.mark.asyncio
1543 async def test_active_repair_mutation_scope_hook_blocks_broad_mutating_bash(
1544 temp_dir: Path,
1545 ) -> None:
1546 registry = create_default_registry(temp_dir)
1547 policy = build_permission_policy(
1548 active_mode=PermissionMode.WORKSPACE_WRITE,
1549 workspace_root=temp_dir,
1550 tool_requirements=registry.get_tool_requirements(),
1551 )
1552 dod_store = DefinitionOfDoneStore(temp_dir)
1553 dod = create_definition_of_done("Repair the active artifact set")
1554 dod.status = "in_progress"
1555 dod_path = dod_store.save(dod)
1556 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1557 session = FakeSession(
1558 active_dod_path=str(dod_path),
1559 messages=[
1560 Message(
1561 role=Role.ASSISTANT,
1562 content=(
1563 "Repair focus:\n"
1564 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1565 f"- Immediate next step: edit `{repair_target}`.\n"
1566 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1567 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1568 ),
1569 )
1570 ],
1571 )
1572 hook = ActiveRepairMutationScopeHook(
1573 dod_store=dod_store,
1574 project_root=temp_dir,
1575 session=session,
1576 )
1577
1578 result = await hook.pre_tool_use(
1579 HookContext(
1580 tool_call=ToolCall(
1581 id="bash-1",
1582 name="bash",
1583 arguments={"command": f"mkdir -p {temp_dir / 'guide' / 'assets'}"},
1584 ),
1585 tool=registry.get("bash"),
1586 registry=registry,
1587 permission_policy=policy,
1588 source="native",
1589 )
1590 )
1591
1592 assert result.decision == HookDecision.DENY
1593 assert result.terminal_state == "blocked"
1594 assert result.message is not None
1595 assert "active repair mutation scope" in result.message
1596 assert str(repair_target) in result.message
1597
1598
1599 @pytest.mark.asyncio
1600 async def test_late_reference_drift_hook_blocks_out_of_scope_reference_reads(
1601 temp_dir: Path,
1602 ) -> None:
1603 registry = create_default_registry(temp_dir)
1604 policy = build_permission_policy(
1605 active_mode=PermissionMode.WORKSPACE_WRITE,
1606 workspace_root=temp_dir,
1607 tool_requirements=registry.get_tool_requirements(),
1608 )
1609 dod_store = DefinitionOfDoneStore(temp_dir)
1610 dod = create_definition_of_done("Create a multi-file guide from a reference")
1611 dod.status = "in_progress"
1612 plan_path = temp_dir / "implementation.md"
1613 plan_path.write_text(
1614 "# File Changes\n"
1615 "- `guide/index.html`\n"
1616 "- `guide/chapters/01-getting-started.html`\n"
1617 "- `guide/chapters/02-installation.html`\n"
1618 "- `guide/chapters/03-first-website.html`\n"
1619 )
1620 dod.implementation_plan = str(plan_path)
1621 dod_path = dod_store.save(dod)
1622 guide_dir = temp_dir / "guide" / "chapters"
1623 guide_dir.mkdir(parents=True, exist_ok=True)
1624 (temp_dir / "guide" / "index.html").write_text("index")
1625 (guide_dir / "01-getting-started.html").write_text("one")
1626 (guide_dir / "02-installation.html").write_text("two")
1627 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1628 hook = LateReferenceDriftHook(
1629 dod_store=dod_store,
1630 project_root=temp_dir,
1631 session=session,
1632 )
1633
1634 result = await hook.pre_tool_use(
1635 HookContext(
1636 tool_call=ToolCall(
1637 id="read-1",
1638 name="read",
1639 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1640 ),
1641 tool=registry.get("read"),
1642 registry=registry,
1643 permission_policy=policy,
1644 source="native",
1645 )
1646 )
1647
1648 assert result.decision == HookDecision.DENY
1649 assert result.terminal_state == "blocked"
1650 assert result.message is not None
1651 assert "late reference drift" in result.message
1652 assert "03-first-website.html" in result.message
1653
1654
1655 @pytest.mark.asyncio
1656 async def test_late_reference_drift_hook_allows_reads_inside_planned_artifact_set(
1657 temp_dir: Path,
1658 ) -> None:
1659 registry = create_default_registry(temp_dir)
1660 policy = build_permission_policy(
1661 active_mode=PermissionMode.WORKSPACE_WRITE,
1662 workspace_root=temp_dir,
1663 tool_requirements=registry.get_tool_requirements(),
1664 )
1665 dod_store = DefinitionOfDoneStore(temp_dir)
1666 dod = create_definition_of_done("Create a multi-file guide from a reference")
1667 dod.status = "in_progress"
1668 plan_path = temp_dir / "implementation.md"
1669 plan_path.write_text(
1670 "# File Changes\n"
1671 "- `guide/index.html`\n"
1672 "- `guide/chapters/01-getting-started.html`\n"
1673 "- `guide/chapters/02-installation.html`\n"
1674 "- `guide/chapters/03-first-website.html`\n"
1675 )
1676 dod.implementation_plan = str(plan_path)
1677 dod_path = dod_store.save(dod)
1678 guide_dir = temp_dir / "guide" / "chapters"
1679 guide_dir.mkdir(parents=True, exist_ok=True)
1680 target = guide_dir / "02-installation.html"
1681 (temp_dir / "guide" / "index.html").write_text("index")
1682 (guide_dir / "01-getting-started.html").write_text("one")
1683 target.write_text("two")
1684 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1685 hook = LateReferenceDriftHook(
1686 dod_store=dod_store,
1687 project_root=temp_dir,
1688 session=session,
1689 )
1690
1691 result = await hook.pre_tool_use(
1692 HookContext(
1693 tool_call=ToolCall(
1694 id="read-1",
1695 name="read",
1696 arguments={"file_path": str(target)},
1697 ),
1698 tool=registry.get("read"),
1699 registry=registry,
1700 permission_policy=policy,
1701 source="native",
1702 )
1703 )
1704
1705 assert result.decision == HookDecision.CONTINUE
1706
1707
1708 @pytest.mark.asyncio
1709 async def test_late_reference_drift_hook_blocks_reference_reopen_after_study_and_first_output(
1710 temp_dir: Path,
1711 ) -> None:
1712 registry = create_default_registry(temp_dir)
1713 policy = build_permission_policy(
1714 active_mode=PermissionMode.WORKSPACE_WRITE,
1715 workspace_root=temp_dir,
1716 tool_requirements=registry.get_tool_requirements(),
1717 )
1718 dod_store = DefinitionOfDoneStore(temp_dir)
1719 dod = create_definition_of_done("Create a multi-file guide from a reference")
1720 dod.status = "in_progress"
1721 dod.completed_items = [
1722 "First, examine the existing reference guide structure to understand the format and cadence",
1723 ]
1724 plan_path = temp_dir / "implementation.md"
1725 plan_path.write_text(
1726 "# File Changes\n"
1727 "- `guide/index.html`\n"
1728 "- `guide/chapters/01-getting-started.html`\n"
1729 "- `guide/chapters/02-installation.html`\n"
1730 )
1731 dod.implementation_plan = str(plan_path)
1732 guide_dir = temp_dir / "guide" / "chapters"
1733 guide_dir.mkdir(parents=True, exist_ok=True)
1734 (temp_dir / "guide" / "index.html").write_text("index")
1735 dod_path = dod_store.save(dod)
1736 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1737 hook = LateReferenceDriftHook(
1738 dod_store=dod_store,
1739 project_root=temp_dir,
1740 session=session,
1741 )
1742
1743 result = await hook.pre_tool_use(
1744 HookContext(
1745 tool_call=ToolCall(
1746 id="read-reference",
1747 name="read",
1748 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1749 ),
1750 tool=registry.get("read"),
1751 registry=registry,
1752 permission_policy=policy,
1753 source="native",
1754 )
1755 )
1756
1757 assert result.decision == HookDecision.DENY
1758 assert result.terminal_state == "blocked"
1759 assert result.message is not None
1760 assert "late reference drift" in result.message
1761 assert "01-getting-started.html" in result.message
1762
1763
1764 @pytest.mark.asyncio
1765 async def test_late_reference_drift_hook_blocks_reference_reads_after_artifacts_exist(
1766 temp_dir: Path,
1767 ) -> None:
1768 registry = create_default_registry(temp_dir)
1769 policy = build_permission_policy(
1770 active_mode=PermissionMode.WORKSPACE_WRITE,
1771 workspace_root=temp_dir,
1772 tool_requirements=registry.get_tool_requirements(),
1773 )
1774 dod_store = DefinitionOfDoneStore(temp_dir)
1775 dod = create_definition_of_done("Create a multi-file guide from a reference")
1776 dod.status = "in_progress"
1777 plan_path = temp_dir / "implementation.md"
1778 plan_path.write_text(
1779 "\n".join(
1780 [
1781 "# Implementation Plan",
1782 "",
1783 "## File Changes",
1784 f"- `{temp_dir / 'guide'}`",
1785 f"- `{temp_dir / 'guide' / 'chapters'}`",
1786 f"- `{temp_dir / 'guide' / 'index.html'}`",
1787 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1788 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1789 "",
1790 ]
1791 )
1792 )
1793 dod.implementation_plan = str(plan_path)
1794 guide_dir = temp_dir / "guide" / "chapters"
1795 guide_dir.mkdir(parents=True, exist_ok=True)
1796 (temp_dir / "guide" / "index.html").write_text("index")
1797 (guide_dir / "01-getting-started.html").write_text("one")
1798 (guide_dir / "02-installation.html").write_text("two")
1799 dod_path = dod_store.save(dod)
1800 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1801 hook = LateReferenceDriftHook(
1802 dod_store=dod_store,
1803 project_root=temp_dir,
1804 session=session,
1805 )
1806
1807 result = await hook.pre_tool_use(
1808 HookContext(
1809 tool_call=ToolCall(
1810 id="read-1",
1811 name="read",
1812 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1813 ),
1814 tool=registry.get("read"),
1815 registry=registry,
1816 permission_policy=policy,
1817 source="native",
1818 )
1819 )
1820
1821 assert result.decision == HookDecision.DENY
1822 assert result.terminal_state == "blocked"
1823 assert result.message is not None
1824 assert "completed artifact set scope" in result.message
1825 assert str(temp_dir / "guide") in result.message
1826
1827
1828 @pytest.mark.asyncio
1829 async def test_late_reference_drift_hook_blocks_reference_reads_when_outputs_exist_but_need_quality(
1830 temp_dir: Path,
1831 ) -> None:
1832 registry = create_default_registry(temp_dir)
1833 policy = build_permission_policy(
1834 active_mode=PermissionMode.WORKSPACE_WRITE,
1835 workspace_root=temp_dir,
1836 tool_requirements=registry.get_tool_requirements(),
1837 )
1838 dod_store = DefinitionOfDoneStore(temp_dir)
1839 dod = create_definition_of_done("Create an equally thorough multi-page HTML guide.")
1840 dod.status = "in_progress"
1841 dod.pending_items.append("Improve generated guide depth and formatting")
1842 plan_path = temp_dir / "implementation.md"
1843 plan_path.write_text(
1844 "\n".join(
1845 [
1846 "# Implementation Plan",
1847 "",
1848 "## File Changes",
1849 f"- `{temp_dir / 'guide' / 'index.html'}`",
1850 f"- `{temp_dir / 'guide' / 'chapters'}/`",
1851 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1852 "",
1853 ]
1854 )
1855 )
1856 dod.implementation_plan = str(plan_path)
1857 guide_dir = temp_dir / "guide" / "chapters"
1858 guide_dir.mkdir(parents=True, exist_ok=True)
1859 (temp_dir / "guide" / "index.html").write_text(
1860 '<h1>Guide</h1><a href="chapters/01-getting-started.html">One</a>\n'
1861 )
1862 (guide_dir / "01-getting-started.html").write_text("<h1>One</h1><p>thin</p>\n")
1863 dod_path = dod_store.save(dod)
1864 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1865 hook = LateReferenceDriftHook(
1866 dod_store=dod_store,
1867 project_root=temp_dir,
1868 session=session,
1869 )
1870
1871 result = await hook.pre_tool_use(
1872 HookContext(
1873 tool_call=ToolCall(
1874 id="read-reference",
1875 name="read",
1876 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1877 ),
1878 tool=registry.get("read"),
1879 registry=registry,
1880 permission_policy=policy,
1881 source="native",
1882 )
1883 )
1884
1885 assert result.decision == HookDecision.DENY
1886 assert result.message is not None
1887 assert "completed artifact set scope" in result.message
1888 assert str(temp_dir / "guide") in result.message
1889
1890
1891 @pytest.mark.asyncio
1892 async def test_late_reference_drift_hook_allows_verification_reference_reads_after_artifacts_exist(
1893 temp_dir: Path,
1894 ) -> None:
1895 registry = create_default_registry(temp_dir)
1896 policy = build_permission_policy(
1897 active_mode=PermissionMode.WORKSPACE_WRITE,
1898 workspace_root=temp_dir,
1899 tool_requirements=registry.get_tool_requirements(),
1900 )
1901 dod_store = DefinitionOfDoneStore(temp_dir)
1902 dod = create_definition_of_done("Create a multi-file guide from a reference")
1903 dod.status = "in_progress"
1904 plan_path = temp_dir / "implementation.md"
1905 plan_path.write_text(
1906 "\n".join(
1907 [
1908 "# Implementation Plan",
1909 "",
1910 "## File Changes",
1911 f"- `{temp_dir / 'guide'}`",
1912 f"- `{temp_dir / 'guide' / 'chapters'}`",
1913 f"- `{temp_dir / 'guide' / 'index.html'}`",
1914 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1915 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1916 "",
1917 ]
1918 )
1919 )
1920 dod.implementation_plan = str(plan_path)
1921 guide_dir = temp_dir / "guide" / "chapters"
1922 guide_dir.mkdir(parents=True, exist_ok=True)
1923 (temp_dir / "guide" / "index.html").write_text("index")
1924 (guide_dir / "01-getting-started.html").write_text("one")
1925 (guide_dir / "02-installation.html").write_text("two")
1926 dod_path = dod_store.save(dod)
1927 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1928 hook = LateReferenceDriftHook(
1929 dod_store=dod_store,
1930 project_root=temp_dir,
1931 session=session,
1932 )
1933
1934 result = await hook.pre_tool_use(
1935 HookContext(
1936 tool_call=ToolCall(
1937 id="read-verify-1",
1938 name="read",
1939 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1940 ),
1941 tool=registry.get("read"),
1942 registry=registry,
1943 permission_policy=policy,
1944 source="verification",
1945 )
1946 )
1947
1948 assert result.decision == HookDecision.CONTINUE
1949
1950
1951 @pytest.mark.asyncio
1952 async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits(
1953 temp_dir: Path,
1954 ) -> None:
1955 registry = create_default_registry(temp_dir)
1956 policy = build_permission_policy(
1957 active_mode=PermissionMode.WORKSPACE_WRITE,
1958 workspace_root=temp_dir,
1959 tool_requirements=registry.get_tool_requirements(),
1960 )
1961 dod_store = DefinitionOfDoneStore(temp_dir)
1962 dod = create_definition_of_done("Create a multi-file guide from a reference")
1963 dod.status = "in_progress"
1964 plan_path = temp_dir / "implementation.md"
1965 plan_path.write_text(
1966 "\n".join(
1967 [
1968 "# Implementation Plan",
1969 "",
1970 "## File Changes",
1971 f"- `{temp_dir / 'guide' / 'index.html'}`",
1972 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1973 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1974 "",
1975 ]
1976 )
1977 )
1978 dod.implementation_plan = str(plan_path)
1979 guide_dir = temp_dir / "guide" / "chapters"
1980 guide_dir.mkdir(parents=True, exist_ok=True)
1981 target = guide_dir / "02-installation.html"
1982 (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
1983 (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
1984 target.write_text("<h1>Installation</h1>\n")
1985 dod_path = dod_store.save(dod)
1986 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1987 hook = LateReferenceDriftHook(
1988 dod_store=dod_store,
1989 project_root=temp_dir,
1990 session=session,
1991 )
1992
1993 def make_context(index: int) -> HookContext:
1994 return HookContext(
1995 tool_call=ToolCall(
1996 id=f"read-{index}",
1997 name="read",
1998 arguments={"file_path": str(target)},
1999 ),
2000 tool=registry.get("read"),
2001 registry=registry,
2002 permission_policy=policy,
2003 source="native",
2004 )
2005
2006 for index in range(1, 5):
2007 context = make_context(index)
2008 result = await hook.pre_tool_use(context)
2009 assert result.decision == HookDecision.CONTINUE
2010 await hook.post_tool_use(context)
2011
2012 blocked = await hook.pre_tool_use(make_context(5))
2013
2014 assert blocked.decision == HookDecision.DENY
2015 assert blocked.terminal_state == "blocked"
2016 assert blocked.message is not None
2017 assert "post-build audit loop" in blocked.message
2018
2019
2020 @pytest.mark.asyncio
2021 async def test_late_reference_drift_hook_requires_edit_during_active_repair_audit_loop(
2022 temp_dir: Path,
2023 ) -> None:
2024 registry = create_default_registry(temp_dir)
2025 policy = build_permission_policy(
2026 active_mode=PermissionMode.WORKSPACE_WRITE,
2027 workspace_root=temp_dir,
2028 tool_requirements=registry.get_tool_requirements(),
2029 )
2030 dod_store = DefinitionOfDoneStore(temp_dir)
2031 dod = create_definition_of_done("Create a multi-file guide from a reference")
2032 dod.status = "in_progress"
2033 guide_root = temp_dir / "guide"
2034 chapters = guide_root / "chapters"
2035 chapters.mkdir(parents=True, exist_ok=True)
2036 index_path = guide_root / "index.html"
2037 intro_path = chapters / "01-introduction.html"
2038 config_path = chapters / "03-basic-configuration.html"
2039 index_path.write_text("<h1>Nginx Guide</h1>\n")
2040 intro_path.write_text("<h1>Introduction</h1>\n")
2041 config_path.write_text("<h1>Configuration</h1>\n")
2042 plan_path = temp_dir / "implementation.md"
2043 plan_path.write_text(
2044 "\n".join(
2045 [
2046 "# Implementation Plan",
2047 "",
2048 "## File Changes",
2049 f"- `{index_path}`",
2050 f"- `{chapters}/`",
2051 "",
2052 ]
2053 )
2054 )
2055 dod.implementation_plan = str(plan_path)
2056 dod_path = dod_store.save(dod)
2057 session = FakeSession(
2058 active_dod_path=str(dod_path),
2059 messages=[
2060 Message(
2061 role=Role.USER,
2062 content=(
2063 "Repair focus:\n"
2064 f"- Improve `{index_path}`: insufficient structured content.\n"
2065 f"- Improve `{intro_path}`: insufficient structured content.\n"
2066 f"- Improve `{config_path}`: thin content.\n"
2067 f"- Immediate next step: edit `{index_path}` with a substantial expansion.\n"
2068 ),
2069 )
2070 ],
2071 )
2072 hook = LateReferenceDriftHook(
2073 dod_store=dod_store,
2074 project_root=temp_dir,
2075 session=session,
2076 )
2077
2078 def make_context(index: int) -> HookContext:
2079 return HookContext(
2080 tool_call=ToolCall(
2081 id=f"read-{index}",
2082 name="read",
2083 arguments={"file_path": str(index_path)},
2084 ),
2085 tool=registry.get("read"),
2086 registry=registry,
2087 permission_policy=policy,
2088 source="native",
2089 )
2090
2091 for index in range(1, 5):
2092 context = make_context(index)
2093 result = await hook.pre_tool_use(context)
2094 assert result.decision == HookDecision.CONTINUE
2095 await hook.post_tool_use(context)
2096
2097 blocked = await hook.pre_tool_use(make_context(5))
2098
2099 assert blocked.decision == HookDecision.DENY
2100 assert blocked.message is not None
2101 assert "post-build audit loop" in blocked.message
2102 assert "make one concrete edit, patch, or write" in blocked.message
2103 assert "Do not finish with a final response" in blocked.message
2104 assert str(index_path.resolve(strict=False)) in blocked.message
2105
2106
2107 @pytest.mark.asyncio
2108 async def test_late_reference_drift_hook_allows_post_build_self_audits_during_verification(
2109 temp_dir: Path,
2110 ) -> None:
2111 registry = create_default_registry(temp_dir)
2112 policy = build_permission_policy(
2113 active_mode=PermissionMode.WORKSPACE_WRITE,
2114 workspace_root=temp_dir,
2115 tool_requirements=registry.get_tool_requirements(),
2116 )
2117 dod_store = DefinitionOfDoneStore(temp_dir)
2118 dod = create_definition_of_done("Create a multi-file guide from a reference")
2119 dod.status = "in_progress"
2120 plan_path = temp_dir / "implementation.md"
2121 plan_path.write_text(
2122 "\n".join(
2123 [
2124 "# Implementation Plan",
2125 "",
2126 "## File Changes",
2127 f"- `{temp_dir / 'guide' / 'index.html'}`",
2128 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2129 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2130 "",
2131 ]
2132 )
2133 )
2134 dod.implementation_plan = str(plan_path)
2135 guide_dir = temp_dir / "guide" / "chapters"
2136 guide_dir.mkdir(parents=True, exist_ok=True)
2137 target = guide_dir / "02-installation.html"
2138 (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
2139 (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
2140 target.write_text("<h1>Installation</h1>\n")
2141 dod_path = dod_store.save(dod)
2142 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2143 hook = LateReferenceDriftHook(
2144 dod_store=dod_store,
2145 project_root=temp_dir,
2146 session=session,
2147 )
2148
2149 def make_context(index: int) -> HookContext:
2150 return HookContext(
2151 tool_call=ToolCall(
2152 id=f"read-verify-{index}",
2153 name="read",
2154 arguments={"file_path": str(target)},
2155 ),
2156 tool=registry.get("read"),
2157 registry=registry,
2158 permission_policy=policy,
2159 source="verification",
2160 )
2161
2162 for index in range(1, 5):
2163 context = make_context(index)
2164 result = await hook.pre_tool_use(context)
2165 assert result.decision == HookDecision.CONTINUE
2166 await hook.post_tool_use(context)
2167
2168 result = await hook.pre_tool_use(make_context(5))
2169
2170 assert result.decision == HookDecision.CONTINUE
2171
2172
2173 @pytest.mark.asyncio
2174 async def test_late_reference_drift_hook_blocks_relative_bash_reference_reads_after_artifacts_exist(
2175 temp_dir: Path,
2176 ) -> None:
2177 registry = create_default_registry(temp_dir)
2178 policy = build_permission_policy(
2179 active_mode=PermissionMode.WORKSPACE_WRITE,
2180 workspace_root=temp_dir,
2181 tool_requirements=registry.get_tool_requirements(),
2182 )
2183 dod_store = DefinitionOfDoneStore(temp_dir)
2184 dod = create_definition_of_done("Create a multi-file guide from a reference")
2185 dod.status = "in_progress"
2186 plan_path = temp_dir / "implementation.md"
2187 plan_path.write_text(
2188 "\n".join(
2189 [
2190 "# Implementation Plan",
2191 "",
2192 "## File Changes",
2193 f"- `{temp_dir / 'guide'}`",
2194 f"- `{temp_dir / 'guide' / 'chapters'}`",
2195 f"- `{temp_dir / 'guide' / 'index.html'}`",
2196 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2197 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2198 "",
2199 ]
2200 )
2201 )
2202 dod.implementation_plan = str(plan_path)
2203 guide_dir = temp_dir / "guide" / "chapters"
2204 guide_dir.mkdir(parents=True, exist_ok=True)
2205 (temp_dir / "guide" / "index.html").write_text("index")
2206 (guide_dir / "01-getting-started.html").write_text("one")
2207 (guide_dir / "02-installation.html").write_text("two")
2208 dod_path = dod_store.save(dod)
2209 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2210 hook = LateReferenceDriftHook(
2211 dod_store=dod_store,
2212 project_root=temp_dir,
2213 session=session,
2214 )
2215
2216 result = await hook.pre_tool_use(
2217 HookContext(
2218 tool_call=ToolCall(
2219 id="bash-relative-reference-1",
2220 name="bash",
2221 arguments={
2222 "command": f"cd {temp_dir} && ls -la reference/"
2223 },
2224 ),
2225 tool=registry.get("bash"),
2226 registry=registry,
2227 permission_policy=policy,
2228 source="native",
2229 )
2230 )
2231
2232 assert result.decision == HookDecision.DENY
2233 assert result.terminal_state == "blocked"
2234 assert result.message is not None
2235 assert "completed artifact set scope" in result.message
2236
2237
2238 @pytest.mark.asyncio
2239 async def test_late_reference_drift_hook_allows_relative_bash_post_build_audit_loop_during_verification(
2240 temp_dir: Path,
2241 ) -> None:
2242 registry = create_default_registry(temp_dir)
2243 policy = build_permission_policy(
2244 active_mode=PermissionMode.WORKSPACE_WRITE,
2245 workspace_root=temp_dir,
2246 tool_requirements=registry.get_tool_requirements(),
2247 )
2248 dod_store = DefinitionOfDoneStore(temp_dir)
2249 dod = create_definition_of_done("Create a multi-file guide from a reference")
2250 dod.status = "in_progress"
2251 plan_path = temp_dir / "implementation.md"
2252 plan_path.write_text(
2253 "\n".join(
2254 [
2255 "# Implementation Plan",
2256 "",
2257 "## File Changes",
2258 f"- `{temp_dir / 'guide' / 'index.html'}`",
2259 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2260 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2261 "",
2262 ]
2263 )
2264 )
2265 dod.implementation_plan = str(plan_path)
2266 guide_dir = temp_dir / "guide" / "chapters"
2267 guide_dir.mkdir(parents=True, exist_ok=True)
2268 (temp_dir / "guide" / "index.html").write_text("<h1>Guide</h1>\n")
2269 (guide_dir / "01-getting-started.html").write_text("<h1>One</h1>\n")
2270 (guide_dir / "02-installation.html").write_text("<h1>Two</h1>\n")
2271 dod_path = dod_store.save(dod)
2272 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2273 hook = LateReferenceDriftHook(
2274 dod_store=dod_store,
2275 project_root=temp_dir,
2276 session=session,
2277 )
2278
2279 def make_context(index: int) -> HookContext:
2280 return HookContext(
2281 tool_call=ToolCall(
2282 id=f"bash-relative-audit-{index}",
2283 name="bash",
2284 arguments={
2285 "command": f"cd {temp_dir} && ls -la guide/chapters/"
2286 },
2287 ),
2288 tool=registry.get("bash"),
2289 registry=registry,
2290 permission_policy=policy,
2291 source="verification",
2292 )
2293
2294 for index in range(1, 5):
2295 context = make_context(index)
2296 result = await hook.pre_tool_use(context)
2297 assert result.decision == HookDecision.CONTINUE
2298 await hook.post_tool_use(context)
2299
2300 result = await hook.pre_tool_use(make_context(5))
2301
2302 assert result.decision == HookDecision.CONTINUE
2303
2304
2305 @pytest.mark.asyncio
2306 async def test_late_reference_drift_hook_does_not_treat_empty_output_dir_as_complete_artifact_set(
2307 temp_dir: Path,
2308 ) -> None:
2309 registry = create_default_registry(temp_dir)
2310 policy = build_permission_policy(
2311 active_mode=PermissionMode.WORKSPACE_WRITE,
2312 workspace_root=temp_dir,
2313 tool_requirements=registry.get_tool_requirements(),
2314 )
2315 dod_store = DefinitionOfDoneStore(temp_dir)
2316 dod = create_definition_of_done("Create a multi-file guide from a reference")
2317 dod.status = "in_progress"
2318 dod.completed_items = ["Create chapter files with appropriate content"]
2319 plan_path = temp_dir / "implementation.md"
2320 plan_path.write_text(
2321 "\n".join(
2322 [
2323 "# Implementation Plan",
2324 "",
2325 "## File Changes",
2326 f"- `{temp_dir / 'guide' / 'index.html'}`",
2327 f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
2328 "",
2329 "## Execution Order",
2330 "- Create chapter files with appropriate content",
2331 ]
2332 )
2333 )
2334 dod.implementation_plan = str(plan_path)
2335 guide_dir = temp_dir / "guide" / "chapters"
2336 guide_dir.mkdir(parents=True, exist_ok=True)
2337 (temp_dir / "guide" / "index.html").write_text("index")
2338 dod_path = dod_store.save(dod)
2339 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2340 hook = LateReferenceDriftHook(
2341 dod_store=dod_store,
2342 project_root=temp_dir,
2343 session=session,
2344 )
2345
2346 result = await hook.pre_tool_use(
2347 HookContext(
2348 tool_call=ToolCall(
2349 id="read-1",
2350 name="read",
2351 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
2352 ),
2353 tool=registry.get("read"),
2354 registry=registry,
2355 permission_policy=policy,
2356 source="native",
2357 )
2358 )
2359
2360 assert result.decision == HookDecision.CONTINUE
2361
2362
2363 @pytest.mark.asyncio
2364 async def test_late_reference_drift_hook_blocks_when_html_outputs_declare_missing_files(
2365 temp_dir: Path,
2366 ) -> None:
2367 registry = create_default_registry(temp_dir)
2368 policy = build_permission_policy(
2369 active_mode=PermissionMode.WORKSPACE_WRITE,
2370 workspace_root=temp_dir,
2371 tool_requirements=registry.get_tool_requirements(),
2372 )
2373 dod_store = DefinitionOfDoneStore(temp_dir)
2374 dod = create_definition_of_done("Create a multi-file guide from a reference")
2375 dod.status = "in_progress"
2376 dod.completed_items = ["Create chapter files with appropriate content"]
2377 plan_path = temp_dir / "implementation.md"
2378 plan_path.write_text(
2379 "\n".join(
2380 [
2381 "# Implementation Plan",
2382 "",
2383 "## File Changes",
2384 f"- `{temp_dir / 'guide' / 'index.html'}`",
2385 f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
2386 "",
2387 "## Execution Order",
2388 "- Create chapter files with appropriate content",
2389 ]
2390 )
2391 )
2392 dod.implementation_plan = str(plan_path)
2393 guide_dir = temp_dir / "guide"
2394 chapters = guide_dir / "chapters"
2395 chapters.mkdir(parents=True, exist_ok=True)
2396 index = guide_dir / "index.html"
2397 index.write_text(
2398 '<a href="chapters/01-getting-started.html">One</a>\n'
2399 '<a href="chapters/02-installation.html">Two</a>\n'
2400 )
2401 (chapters / "01-getting-started.html").write_text("one")
2402 dod.touched_files = [str(index), str(chapters / "01-getting-started.html")]
2403 dod_path = dod_store.save(dod)
2404 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2405 hook = LateReferenceDriftHook(
2406 dod_store=dod_store,
2407 project_root=temp_dir,
2408 session=session,
2409 )
2410
2411 result = await hook.pre_tool_use(
2412 HookContext(
2413 tool_call=ToolCall(
2414 id="read-1",
2415 name="read",
2416 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
2417 ),
2418 tool=registry.get("read"),
2419 registry=registry,
2420 permission_policy=policy,
2421 source="native",
2422 )
2423 )
2424
2425 assert result.decision == HookDecision.DENY
2426 assert result.terminal_state == "blocked"
2427 assert result.message is not None
2428 assert "late reference drift" in result.message
2429 assert "02-installation.html" in result.message
2430
2431
2432 @pytest.mark.asyncio
2433 async def test_missing_planned_output_read_hook_blocks_reads_of_declared_missing_output(
2434 temp_dir: Path,
2435 ) -> None:
2436 registry = create_default_registry(temp_dir)
2437 policy = build_permission_policy(
2438 active_mode=PermissionMode.WORKSPACE_WRITE,
2439 workspace_root=temp_dir,
2440 tool_requirements=registry.get_tool_requirements(),
2441 )
2442 dod_store = DefinitionOfDoneStore(temp_dir)
2443 dod = create_definition_of_done("Create a multi-file guide from a reference")
2444 dod.status = "in_progress"
2445 plan_path = temp_dir / "implementation.md"
2446 guide_root = temp_dir / "guide"
2447 chapters = guide_root / "chapters"
2448 plan_path.write_text(
2449 "\n".join(
2450 [
2451 "# Implementation Plan",
2452 "",
2453 "## File Changes",
2454 f"- `{guide_root / 'index.html'}`",
2455 f"- `{chapters}/`",
2456 "",
2457 ]
2458 )
2459 )
2460 dod.implementation_plan = str(plan_path)
2461 chapters.mkdir(parents=True, exist_ok=True)
2462 (guide_root / "index.html").write_text(
2463 "\n".join(
2464 [
2465 "<html>",
2466 '<a href="chapters/01-introduction.html">Chapter 1: Introduction</a>',
2467 '<a href="chapters/02-installation.html">Chapter 2: Installation</a>',
2468 '<a href="chapters/03-configuration-basics.html">Chapter 3: Configuration Basics</a>',
2469 "</html>",
2470 ]
2471 )
2472 + "\n"
2473 )
2474 (chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
2475 (chapters / "02-installation.html").write_text("<h1>Installation</h1>\n")
2476 dod_path = dod_store.save(dod)
2477 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2478 hook = MissingPlannedOutputReadHook(
2479 dod_store=dod_store,
2480 project_root=temp_dir,
2481 session=session,
2482 )
2483 missing_target = chapters / "03-configuration-basics.html"
2484
2485 result = await hook.pre_tool_use(
2486 HookContext(
2487 tool_call=ToolCall(
2488 id="read-missing-output",
2489 name="read",
2490 arguments={"file_path": str(missing_target)},
2491 ),
2492 tool=registry.get("read"),
2493 registry=registry,
2494 permission_policy=policy,
2495 source="native",
2496 )
2497 )
2498
2499 assert result.decision == HookDecision.DENY
2500 assert result.terminal_state == "blocked"
2501 assert result.message is not None
2502 assert "missing planned output artifact" in result.message
2503 assert 'write(file_path="' in result.message
2504 assert "03-configuration-basics.html" in result.message
2505 assert "Chapter 3: Configuration Basics" in result.message
2506 assert "02-installation.html" in result.message