Python · 81889 bytes Raw Blame History
1 """Tests for permission policy and tool lifecycle hooks."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.llm.base import Message, Role, ToolCall
10 from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
11 from loader.runtime.executor import ToolExecutionState, ToolExecutor
12 from loader.runtime.hooks import (
13 ActiveRepairMutationScopeHook,
14 ActiveRepairScopeHook,
15 BaseToolHook,
16 FilePathAliasHook,
17 HookContext,
18 HookDecision,
19 HookManager,
20 HookResult,
21 LateReferenceDriftHook,
22 MissingPlannedOutputReadHook,
23 RelativePathContextHook,
24 SearchPathAliasHook,
25 )
26 from loader.runtime.permissions import (
27 PermissionMode,
28 PermissionOverride,
29 PermissionRuleDisposition,
30 PermissionRuleSet,
31 build_permission_policy,
32 )
33 from loader.runtime.safeguard_services import ActionTracker
34 from loader.runtime.tracing import RuntimeTracer
35 from loader.tools.base import create_default_registry
36
37
38 class RecordingHook(BaseToolHook):
39 """Hook that records lifecycle events."""
40
41 def __init__(self, events: list[str]) -> None:
42 self.events = events
43
44 async def pre_tool_use(self, context) -> HookResult:
45 self.events.append("pre_tool_use")
46 return HookResult()
47
48 async def post_tool_use(self, context) -> HookResult:
49 self.events.append("post_tool_use")
50 return HookResult()
51
52 async def post_tool_use_failure(self, context) -> HookResult:
53 self.events.append("post_tool_use_failure")
54 return HookResult()
55
56
57 class DenyInPreHook(BaseToolHook):
58 """Hook that denies execution before the tool runs."""
59
60 def __init__(self, events: list[str]) -> None:
61 self.events = events
62
63 async def pre_tool_use(self, context) -> HookResult:
64 self.events.append("pre_tool_use")
65 return HookResult(
66 decision=HookDecision.DENY,
67 message="[Blocked - denied by test hook]",
68 terminal_state="blocked",
69 )
70
71 async def post_tool_use_failure(self, context) -> HookResult:
72 self.events.append("post_tool_use_failure")
73 return HookResult()
74
75
76 @pytest.mark.asyncio
77 async def test_permission_policy_honors_overrides(temp_dir: Path) -> None:
78 policy = build_permission_policy(
79 active_mode=PermissionMode.READ_ONLY,
80 workspace_root=temp_dir,
81 tool_requirements={"write": PermissionMode.WORKSPACE_WRITE},
82 )
83
84 denied = policy.authorize("write")
85 allowed = policy.authorize("write", override=PermissionOverride.ALLOW)
86 asked = policy.authorize("write", override=PermissionOverride.ASK)
87
88 assert denied.decision.value == "deny"
89 assert allowed.allowed
90 assert asked.decision.value == "ask"
91
92
93 def test_permission_mode_parsing_supports_prompt_and_allow() -> None:
94 assert PermissionMode.from_str("prompt") == PermissionMode.PROMPT
95 assert PermissionMode.from_str("allow") == PermissionMode.ALLOW
96
97
98 def test_permission_policy_honors_rule_precedence(temp_dir: Path) -> None:
99 policy = build_permission_policy(
100 active_mode=PermissionMode.ALLOW,
101 workspace_root=temp_dir,
102 tool_requirements={"write": PermissionMode.WORKSPACE_WRITE},
103 rules=PermissionRuleSet.from_dict(
104 {
105 "allow": [{"tool": "write", "contains": "safe change"}],
106 "deny": [{"tool": "write", "path_contains": "secrets"}],
107 "ask": [{"tool": "write", "path_contains": "README"}],
108 }
109 ),
110 )
111
112 denied = policy.authorize(
113 "write",
114 arguments={
115 "file_path": str(temp_dir / "secrets.txt"),
116 "content": "safe change\n",
117 },
118 )
119 asked = policy.authorize(
120 "write",
121 arguments={
122 "file_path": str(temp_dir / "README.md"),
123 "content": "safe change\n",
124 },
125 )
126 allowed = policy.authorize(
127 "write",
128 arguments={
129 "file_path": str(temp_dir / "notes.txt"),
130 "content": "safe change\n",
131 },
132 )
133
134 assert denied.decision.value == "deny"
135 assert denied.matched_disposition == PermissionRuleDisposition.DENY
136 assert asked.decision.value == "ask"
137 assert asked.matched_disposition == PermissionRuleDisposition.ASK
138 assert allowed.decision.value == "allow"
139 assert allowed.matched_disposition == PermissionRuleDisposition.ALLOW
140
141
142 @pytest.mark.asyncio
143 async def test_prompt_mode_executor_prompts_once_and_respects_denial(
144 temp_dir: Path,
145 ) -> None:
146 prompts: list[tuple[str, str, str]] = []
147 registry = create_default_registry(temp_dir)
148 policy = build_permission_policy(
149 active_mode=PermissionMode.PROMPT,
150 workspace_root=temp_dir,
151 tool_requirements=registry.get_tool_requirements(),
152 )
153 executor = ToolExecutor(registry, RuntimeTracer(), policy)
154 target = temp_dir / "prompted.txt"
155
156 async def deny(tool_name: str, message: str, details: str) -> bool:
157 prompts.append((tool_name, message, details))
158 return False
159
160 outcome = await executor.execute_tool_call(
161 ToolCall(
162 id="write-1",
163 name="write",
164 arguments={"file_path": str(target), "content": "prompted\n"},
165 ),
166 source="native",
167 on_confirmation=deny,
168 )
169
170 assert outcome.state == ToolExecutionState.DECLINED
171 assert not target.exists()
172 assert len(prompts) == 1
173 assert "active_mode=prompt" in prompts[0][2]
174 assert "required_mode=workspace-write" in prompts[0][2]
175
176
177 @pytest.mark.asyncio
178 async def test_allow_mode_executor_skips_prompt_for_destructive_write(
179 temp_dir: Path,
180 ) -> None:
181 prompts: list[str] = []
182 registry = create_default_registry(temp_dir)
183 policy = build_permission_policy(
184 active_mode=PermissionMode.ALLOW,
185 workspace_root=temp_dir,
186 tool_requirements=registry.get_tool_requirements(),
187 )
188 executor = ToolExecutor(registry, RuntimeTracer(), policy)
189 target = temp_dir / "allowed.txt"
190
191 async def unexpected(tool_name: str, message: str, details: str) -> bool:
192 prompts.append(tool_name)
193 return False
194
195 outcome = await executor.execute_tool_call(
196 ToolCall(
197 id="write-1",
198 name="write",
199 arguments={"file_path": str(target), "content": "allowed\n"},
200 ),
201 source="native",
202 on_confirmation=unexpected,
203 )
204
205 assert outcome.state == ToolExecutionState.EXECUTED
206 assert target.read_text() == "allowed\n"
207 assert prompts == []
208
209
210 @pytest.mark.asyncio
211 async def test_executor_accepts_edit_content_alias_for_new_string(
212 temp_dir: Path,
213 ) -> None:
214 registry = create_default_registry(temp_dir)
215 policy = build_permission_policy(
216 active_mode=PermissionMode.ALLOW,
217 workspace_root=temp_dir,
218 tool_requirements=registry.get_tool_requirements(),
219 )
220 executor = ToolExecutor(registry, RuntimeTracer(), policy)
221 target = temp_dir / "guide.html"
222 target.write_text("<h1>Old</h1>\n")
223
224 outcome = await executor.execute_tool_call(
225 ToolCall(
226 id="edit-1",
227 name="edit",
228 arguments={
229 "file_path": str(target),
230 "old_string": "<h1>Old</h1>",
231 "content": "<h1>New</h1>",
232 },
233 ),
234 source="native",
235 )
236
237 assert outcome.state == ToolExecutionState.EXECUTED
238 assert target.read_text() == "<h1>New</h1>\n"
239 assert outcome.tool_call.arguments["new_string"] == "<h1>New</h1>"
240
241
242 @pytest.mark.asyncio
243 async def test_ask_rule_prompts_even_when_allow_mode(temp_dir: Path) -> None:
244 prompts: list[str] = []
245 registry = create_default_registry(temp_dir)
246 policy = build_permission_policy(
247 active_mode=PermissionMode.ALLOW,
248 workspace_root=temp_dir,
249 tool_requirements=registry.get_tool_requirements(),
250 rules=PermissionRuleSet.from_dict(
251 {"ask": [{"tool": "write", "path_contains": "README"}]}
252 ),
253 )
254 executor = ToolExecutor(registry, RuntimeTracer(), policy)
255 target = temp_dir / "README.md"
256
257 async def deny(tool_name: str, message: str, details: str) -> bool:
258 prompts.append(details)
259 return False
260
261 outcome = await executor.execute_tool_call(
262 ToolCall(
263 id="write-1",
264 name="write",
265 arguments={"file_path": str(target), "content": "no thanks\n"},
266 ),
267 source="native",
268 on_confirmation=deny,
269 )
270
271 assert outcome.state == ToolExecutionState.DECLINED
272 assert not target.exists()
273 assert len(prompts) == 1
274 assert "matched_ask_rule=tool=write, path_contains=README" in prompts[0]
275
276
277 @pytest.mark.asyncio
278 async def test_hook_lifecycle_runs_in_order_for_success(temp_dir: Path) -> None:
279 events: list[str] = []
280 registry = create_default_registry(temp_dir)
281 policy = build_permission_policy(
282 active_mode=PermissionMode.WORKSPACE_WRITE,
283 workspace_root=temp_dir,
284 tool_requirements=registry.get_tool_requirements(),
285 )
286 executor = ToolExecutor(
287 registry,
288 RuntimeTracer(),
289 policy,
290 hooks=HookManager([RecordingHook(events)]),
291 )
292 target = temp_dir / "hook-success.txt"
293
294 outcome = await executor.execute_tool_call(
295 ToolCall(
296 id="write-1",
297 name="write",
298 arguments={"file_path": str(target), "content": "hook success\n"},
299 ),
300 source="native",
301 skip_confirmation=True,
302 )
303
304 assert outcome.state == ToolExecutionState.EXECUTED
305 assert events == ["pre_tool_use", "post_tool_use"]
306 assert target.read_text() == "hook success\n"
307
308
309 @pytest.mark.asyncio
310 async def test_pre_hook_deny_still_runs_failure_hook_once(temp_dir: Path) -> None:
311 events: list[str] = []
312 registry = create_default_registry(temp_dir)
313 policy = build_permission_policy(
314 active_mode=PermissionMode.WORKSPACE_WRITE,
315 workspace_root=temp_dir,
316 tool_requirements=registry.get_tool_requirements(),
317 )
318 executor = ToolExecutor(
319 registry,
320 RuntimeTracer(),
321 policy,
322 hooks=HookManager([DenyInPreHook(events)]),
323 )
324 target = temp_dir / "hook-denied.txt"
325
326 outcome = await executor.execute_tool_call(
327 ToolCall(
328 id="write-1",
329 name="write",
330 arguments={"file_path": str(target), "content": "should not exist\n"},
331 ),
332 source="native",
333 skip_confirmation=True,
334 )
335
336 assert outcome.state == ToolExecutionState.BLOCKED
337 assert events == ["pre_tool_use", "post_tool_use_failure"]
338 assert not target.exists()
339 assert len(outcome.message.tool_results) == 1
340 assert "denied by test hook" in outcome.event_content
341
342
343 @pytest.mark.asyncio
344 @pytest.mark.parametrize(
345 ("tool_name", "arguments", "expected_path"),
346 [
347 ("read", {"file": "notes.txt"}, "notes.txt"),
348 ("write", {"filepath": "notes.txt", "content": "hello\n"}, "notes.txt"),
349 (
350 "edit",
351 {"filePath": "notes.txt", "old_string": "before", "new_string": "after"},
352 "notes.txt",
353 ),
354 ("patch", {"path": "notes.txt", "hunks": []}, "notes.txt"),
355 ],
356 )
357 async def test_file_path_alias_hook_canonicalizes_common_aliases(
358 temp_dir: Path,
359 tool_name: str,
360 arguments: dict[str, object],
361 expected_path: str,
362 ) -> None:
363 registry = create_default_registry(temp_dir)
364 policy = build_permission_policy(
365 active_mode=PermissionMode.WORKSPACE_WRITE,
366 workspace_root=temp_dir,
367 tool_requirements=registry.get_tool_requirements(),
368 )
369 hook = FilePathAliasHook()
370
371 result = await hook.pre_tool_use(
372 HookContext(
373 tool_call=ToolCall(id=f"{tool_name}-1", name=tool_name, arguments=arguments),
374 tool=registry.get(tool_name),
375 registry=registry,
376 permission_policy=policy,
377 source="native",
378 )
379 )
380
381 assert result.updated_arguments is not None
382 assert result.updated_arguments["file_path"] == expected_path
383 for alias in ("file", "filepath", "filePath", "filename", "path"):
384 assert alias not in result.updated_arguments
385
386
387 @pytest.mark.asyncio
388 @pytest.mark.parametrize(
389 ("tool_name", "arguments", "expected_path"),
390 [
391 ("glob", {"pattern": "*.html", "directory": "chapters"}, "chapters"),
392 ("grep", {"pattern": "alpha", "dir": "src"}, "src"),
393 ],
394 )
395 async def test_search_path_alias_hook_canonicalizes_common_aliases(
396 temp_dir: Path,
397 tool_name: str,
398 arguments: dict[str, object],
399 expected_path: str,
400 ) -> None:
401 registry = create_default_registry(temp_dir)
402 policy = build_permission_policy(
403 active_mode=PermissionMode.WORKSPACE_WRITE,
404 workspace_root=temp_dir,
405 tool_requirements=registry.get_tool_requirements(),
406 )
407 hook = SearchPathAliasHook()
408
409 result = await hook.pre_tool_use(
410 HookContext(
411 tool_call=ToolCall(id=f"{tool_name}-1", name=tool_name, arguments=arguments),
412 tool=registry.get(tool_name),
413 registry=registry,
414 permission_policy=policy,
415 source="native",
416 )
417 )
418
419 assert result.updated_arguments is not None
420 assert result.updated_arguments["path"] == expected_path
421 for alias in ("directory", "dir", "folder"):
422 assert alias not in result.updated_arguments
423
424
425 @pytest.mark.asyncio
426 async def test_search_path_alias_hook_splits_full_glob_pattern(
427 temp_dir: Path,
428 ) -> None:
429 registry = create_default_registry(temp_dir)
430 policy = build_permission_policy(
431 active_mode=PermissionMode.WORKSPACE_WRITE,
432 workspace_root=temp_dir,
433 tool_requirements=registry.get_tool_requirements(),
434 )
435 hook = SearchPathAliasHook()
436 chapters = temp_dir / "chapters"
437
438 result = await hook.pre_tool_use(
439 HookContext(
440 tool_call=ToolCall(
441 id="glob-1",
442 name="glob",
443 arguments={"pattern": f"{chapters}/*.html"},
444 ),
445 tool=registry.get("glob"),
446 registry=registry,
447 permission_policy=policy,
448 source="native",
449 )
450 )
451
452 assert result.updated_arguments is not None
453 assert result.updated_arguments["path"] == str(chapters)
454 assert result.updated_arguments["pattern"] == "*.html"
455
456
457 @pytest.mark.asyncio
458 async def test_search_path_alias_hook_splits_implicit_recursive_glob_parent(
459 temp_dir: Path,
460 ) -> None:
461 registry = create_default_registry(temp_dir)
462 policy = build_permission_policy(
463 active_mode=PermissionMode.WORKSPACE_WRITE,
464 workspace_root=temp_dir,
465 tool_requirements=registry.get_tool_requirements(),
466 )
467 hook = SearchPathAliasHook()
468
469 result = await hook.pre_tool_use(
470 HookContext(
471 tool_call=ToolCall(
472 id="glob-implicit-1",
473 name="glob",
474 arguments={"pattern": "**/Loader/guides/nginx/chapters/*.html"},
475 ),
476 tool=registry.get("glob"),
477 registry=registry,
478 permission_policy=policy,
479 source="native",
480 )
481 )
482
483 assert result.updated_arguments is not None
484 assert result.updated_arguments["path"] == "Loader/guides/nginx/chapters"
485 assert result.updated_arguments["pattern"] == "*.html"
486
487
488 @pytest.mark.asyncio
489 async def test_search_path_alias_hook_leaves_fully_generic_recursive_glob_unchanged(
490 temp_dir: Path,
491 ) -> None:
492 registry = create_default_registry(temp_dir)
493 policy = build_permission_policy(
494 active_mode=PermissionMode.WORKSPACE_WRITE,
495 workspace_root=temp_dir,
496 tool_requirements=registry.get_tool_requirements(),
497 )
498 hook = SearchPathAliasHook()
499
500 result = await hook.pre_tool_use(
501 HookContext(
502 tool_call=ToolCall(
503 id="glob-generic-1",
504 name="glob",
505 arguments={"pattern": "**/*.html"},
506 ),
507 tool=registry.get("glob"),
508 registry=registry,
509 permission_policy=policy,
510 source="native",
511 )
512 )
513
514 assert result.updated_arguments is None
515
516
517 @pytest.mark.asyncio
518 async def test_relative_path_context_hook_remaps_workspace_mirror_of_external_root(
519 temp_dir: Path,
520 ) -> None:
521 workspace_root = temp_dir / "workspace"
522 workspace_root.mkdir()
523 external_root = temp_dir / "external-home"
524 external_fortran = external_root / "Loader" / "guides" / "fortran"
525 external_fortran.mkdir(parents=True)
526 (external_fortran / "index.html").write_text("<html></html>\n")
527 (external_root / "Loader" / "guides").mkdir(exist_ok=True)
528
529 registry = create_default_registry(workspace_root)
530 policy = build_permission_policy(
531 active_mode=PermissionMode.WORKSPACE_WRITE,
532 workspace_root=workspace_root,
533 tool_requirements=registry.get_tool_requirements(),
534 )
535 action_tracker = ActionTracker()
536 action_tracker.record_tool_call(
537 "read",
538 {"file_path": str(external_fortran / "index.html")},
539 )
540 hook = RelativePathContextHook(action_tracker, workspace_root)
541
542 mirrored_workspace_path = workspace_root / "Loader" / "guides" / "nginx" / "index.html"
543 expected_external_path = external_root / "Loader" / "guides" / "nginx" / "index.html"
544
545 result = await hook.pre_tool_use(
546 HookContext(
547 tool_call=ToolCall(
548 id="write-1",
549 name="write",
550 arguments={
551 "file_path": str(mirrored_workspace_path),
552 "content": "<html></html>\n",
553 },
554 ),
555 tool=registry.get("write"),
556 registry=registry,
557 permission_policy=policy,
558 source="native",
559 )
560 )
561
562 assert result.updated_arguments is not None
563 assert Path(result.updated_arguments["file_path"]).resolve() == expected_external_path.resolve()
564 resolved_loader_root = (external_root / "Loader").resolve()
565 assert result.injected_messages == [
566 (
567 "[Path anchor correction] A repo-local mirror path was remapped to the "
568 f"established output root under `{resolved_loader_root}`. Keep future "
569 "file/search tool calls on that external root and use `index.html` there "
570 "instead of re-anchoring work to the workspace checkout."
571 )
572 ]
573
574
575 @pytest.mark.asyncio
576 async def test_relative_path_context_hook_prefers_external_search_ancestor_over_workspace_match(
577 temp_dir: Path,
578 ) -> None:
579 workspace_root = temp_dir / "workspace"
580 (workspace_root / "guides").mkdir(parents=True)
581 external_root = temp_dir / "external-home"
582 external_fortran = external_root / "Loader" / "guides" / "fortran"
583 external_fortran.mkdir(parents=True)
584 (external_fortran / "index.html").write_text("<html></html>\n")
585
586 registry = create_default_registry(workspace_root)
587 policy = build_permission_policy(
588 active_mode=PermissionMode.WORKSPACE_WRITE,
589 workspace_root=workspace_root,
590 tool_requirements=registry.get_tool_requirements(),
591 )
592 action_tracker = ActionTracker()
593 action_tracker.record_tool_call(
594 "read",
595 {"file_path": str(external_fortran / "index.html")},
596 )
597 hook = RelativePathContextHook(action_tracker, workspace_root)
598
599 result = await hook.pre_tool_use(
600 HookContext(
601 tool_call=ToolCall(
602 id="glob-ancestor-1",
603 name="glob",
604 arguments={"path": "guides", "pattern": "**"},
605 ),
606 tool=registry.get("glob"),
607 registry=registry,
608 permission_policy=policy,
609 source="native",
610 )
611 )
612
613 assert result.updated_arguments is not None
614 assert Path(result.updated_arguments["path"]).resolve() == (
615 external_root / "Loader" / "guides"
616 ).resolve()
617
618
619 class FakeSession:
620 def __init__(self, *, active_dod_path: str, messages: list[Message]) -> None:
621 self.active_dod_path = active_dod_path
622 self.messages = messages
623
624
625 @pytest.mark.asyncio
626 async def test_active_repair_scope_hook_blocks_reference_reads_while_fixing(
627 temp_dir: Path,
628 ) -> None:
629 registry = create_default_registry(temp_dir)
630 policy = build_permission_policy(
631 active_mode=PermissionMode.WORKSPACE_WRITE,
632 workspace_root=temp_dir,
633 tool_requirements=registry.get_tool_requirements(),
634 )
635 dod_store = DefinitionOfDoneStore(temp_dir)
636 dod = create_definition_of_done("Repair the active artifact set")
637 dod.status = "fixing"
638 dod_path = dod_store.save(dod)
639 repair_target = temp_dir / "guide" / "index.html"
640 session = FakeSession(
641 active_dod_path=str(dod_path),
642 messages=[
643 Message(
644 role=Role.ASSISTANT,
645 content=(
646 "Repair focus:\n"
647 f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
648 f"- Immediate next step: edit `{repair_target}`.\n"
649 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-introduction.html'}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
650 ),
651 )
652 ],
653 )
654 hook = ActiveRepairScopeHook(
655 dod_store=dod_store,
656 project_root=temp_dir,
657 session=session,
658 )
659
660 result = await hook.pre_tool_use(
661 HookContext(
662 tool_call=ToolCall(
663 id="read-1",
664 name="read",
665 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
666 ),
667 tool=registry.get("read"),
668 registry=registry,
669 permission_policy=policy,
670 source="native",
671 )
672 )
673
674 assert result.decision == HookDecision.DENY
675 assert result.terminal_state == "blocked"
676 assert result.message is not None
677 assert "active repair scope" in result.message
678 assert str(repair_target) in result.message
679
680
681 @pytest.mark.asyncio
682 async def test_active_repair_scope_hook_allows_reads_inside_active_artifact_set(
683 temp_dir: Path,
684 ) -> None:
685 registry = create_default_registry(temp_dir)
686 policy = build_permission_policy(
687 active_mode=PermissionMode.WORKSPACE_WRITE,
688 workspace_root=temp_dir,
689 tool_requirements=registry.get_tool_requirements(),
690 )
691 dod_store = DefinitionOfDoneStore(temp_dir)
692 dod = create_definition_of_done("Repair the active artifact set")
693 dod.status = "fixing"
694 dod_path = dod_store.save(dod)
695 repair_target = temp_dir / "guide" / "index.html"
696 chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
697 session = FakeSession(
698 active_dod_path=str(dod_path),
699 messages=[
700 Message(
701 role=Role.ASSISTANT,
702 content=(
703 "Repair focus:\n"
704 f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
705 f"- Fix the broken local reference `../styles.css` in `{chapter_path}`.\n"
706 f"- Immediate next step: edit `{repair_target}`.\n"
707 f"- If the broken reference should remain, create `{chapter_path}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
708 ),
709 )
710 ],
711 )
712 hook = ActiveRepairScopeHook(
713 dod_store=dod_store,
714 project_root=temp_dir,
715 session=session,
716 )
717
718 result = await hook.pre_tool_use(
719 HookContext(
720 tool_call=ToolCall(
721 id="read-1",
722 name="read",
723 arguments={"file_path": str(chapter_path)},
724 ),
725 tool=registry.get("read"),
726 registry=registry,
727 permission_policy=policy,
728 source="native",
729 )
730 )
731
732 assert result.decision == HookDecision.CONTINUE
733
734
735 @pytest.mark.asyncio
736 async def test_active_repair_scope_hook_allows_existing_sibling_reads_with_source_of_truth_hint(
737 temp_dir: Path,
738 ) -> None:
739 registry = create_default_registry(temp_dir)
740 policy = build_permission_policy(
741 active_mode=PermissionMode.WORKSPACE_WRITE,
742 workspace_root=temp_dir,
743 tool_requirements=registry.get_tool_requirements(),
744 )
745 dod_store = DefinitionOfDoneStore(temp_dir)
746 dod = create_definition_of_done("Repair the active artifact set")
747 dod.status = "fixing"
748 dod_path = dod_store.save(dod)
749 repair_target = temp_dir / "guide" / "index.html"
750 chapter_dir = temp_dir / "guide" / "chapters"
751 chapter_dir.mkdir(parents=True, exist_ok=True)
752 sibling = chapter_dir / "03-basic-usage.html"
753 sibling.write_text("<h1>Basic Usage</h1>\n")
754 session = FakeSession(
755 active_dod_path=str(dod_path),
756 messages=[
757 Message(
758 role=Role.ASSISTANT,
759 content=(
760 "Repair focus:\n"
761 f"- Fix the broken local reference `chapters/02-installation.html` in `{repair_target}`.\n"
762 f"- Immediate next step: edit `{repair_target}`.\n"
763 f"- If the broken reference should remain, create `{chapter_dir / '02-installation.html'}`; otherwise remove or replace `chapters/02-installation.html`.\n"
764 "- Use the existing artifact files as the source of truth while repairing this file: "
765 f"`{repair_target}`.\n"
766 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
767 ),
768 )
769 ],
770 )
771 hook = ActiveRepairScopeHook(
772 dod_store=dod_store,
773 project_root=temp_dir,
774 session=session,
775 )
776
777 result = await hook.pre_tool_use(
778 HookContext(
779 tool_call=ToolCall(
780 id="read-1",
781 name="read",
782 arguments={"file_path": str(sibling)},
783 ),
784 tool=registry.get("read"),
785 registry=registry,
786 permission_policy=policy,
787 source="native",
788 )
789 )
790
791 assert result.decision == HookDecision.CONTINUE
792
793
794 @pytest.mark.asyncio
795 async def test_active_repair_scope_hook_allows_verification_source_outside_repair_target(
796 temp_dir: Path,
797 ) -> None:
798 registry = create_default_registry(temp_dir)
799 policy = build_permission_policy(
800 active_mode=PermissionMode.WORKSPACE_WRITE,
801 workspace_root=temp_dir,
802 tool_requirements=registry.get_tool_requirements(),
803 )
804 dod_store = DefinitionOfDoneStore(temp_dir)
805 dod = create_definition_of_done("Repair the active artifact set")
806 dod.status = "in_progress"
807 dod_path = dod_store.save(dod)
808 repair_target = temp_dir / "guide" / "chapters" / "06-troubleshooting.html"
809 session = FakeSession(
810 active_dod_path=str(dod_path),
811 messages=[
812 Message(
813 role=Role.ASSISTANT,
814 content=(
815 "Repair focus:\n"
816 f"- Fix the broken local reference `01-introduction.html` in `{repair_target}`.\n"
817 f"- Immediate next step: edit `{repair_target}`.\n"
818 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
819 ),
820 )
821 ],
822 )
823 hook = ActiveRepairScopeHook(
824 dod_store=dod_store,
825 project_root=temp_dir,
826 session=session,
827 )
828
829 result = await hook.pre_tool_use(
830 HookContext(
831 tool_call=ToolCall(
832 id="verify-1",
833 name="read",
834 arguments={"file_path": str(temp_dir / "guide" / "index.html")},
835 ),
836 tool=registry.get("read"),
837 registry=registry,
838 permission_policy=policy,
839 source="verification",
840 )
841 )
842
843 assert result.decision == HookDecision.CONTINUE
844
845
846 @pytest.mark.asyncio
847 async def test_active_repair_scope_hook_blocks_local_rereads_outside_concrete_repair_files(
848 temp_dir: Path,
849 ) -> None:
850 registry = create_default_registry(temp_dir)
851 policy = build_permission_policy(
852 active_mode=PermissionMode.WORKSPACE_WRITE,
853 workspace_root=temp_dir,
854 tool_requirements=registry.get_tool_requirements(),
855 )
856 dod_store = DefinitionOfDoneStore(temp_dir)
857 dod = create_definition_of_done("Repair the active artifact set")
858 dod.status = "in_progress"
859 dod_path = dod_store.save(dod)
860 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
861 stylesheet = temp_dir / "guide" / "styles.css"
862 other_chapter = temp_dir / "guide" / "chapters" / "01-getting-started.html"
863 session = FakeSession(
864 active_dod_path=str(dod_path),
865 messages=[
866 Message(
867 role=Role.ASSISTANT,
868 content=(
869 "Repair focus:\n"
870 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
871 f"- Fix the broken local reference `../styles.css` in `{temp_dir / 'guide' / 'chapters' / '06-troubleshooting.html'}`.\n"
872 f"- Immediate next step: edit `{repair_target}`.\n"
873 f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
874 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
875 ),
876 )
877 ],
878 )
879 hook = ActiveRepairScopeHook(
880 dod_store=dod_store,
881 project_root=temp_dir,
882 session=session,
883 )
884
885 result = await hook.pre_tool_use(
886 HookContext(
887 tool_call=ToolCall(
888 id="read-1",
889 name="read",
890 arguments={"file_path": str(other_chapter)},
891 ),
892 tool=registry.get("read"),
893 registry=registry,
894 permission_policy=policy,
895 source="native",
896 )
897 )
898
899 assert result.decision == HookDecision.DENY
900 assert result.terminal_state == "blocked"
901 assert result.message is not None
902 assert "active repair scope" in result.message
903 assert str(repair_target) in result.message
904 assert str(stylesheet) in result.message
905
906
907 @pytest.mark.asyncio
908 async def test_active_repair_scope_hook_blocks_repair_audit_loop_after_repeated_source_reads(
909 temp_dir: Path,
910 ) -> None:
911 registry = create_default_registry(temp_dir)
912 policy = build_permission_policy(
913 active_mode=PermissionMode.WORKSPACE_WRITE,
914 workspace_root=temp_dir,
915 tool_requirements=registry.get_tool_requirements(),
916 )
917 dod_store = DefinitionOfDoneStore(temp_dir)
918 dod = create_definition_of_done("Repair the active artifact set")
919 dod.status = "fixing"
920 dod_path = dod_store.save(dod)
921 guide_root = temp_dir / "guide"
922 chapter_dir = guide_root / "chapters"
923 chapter_dir.mkdir(parents=True, exist_ok=True)
924 repair_target = guide_root / "index.html"
925 repair_target.write_text("<h1>Guide</h1>\n")
926 intro = chapter_dir / "01-introduction.html"
927 install = chapter_dir / "02-installation.html"
928 intro.write_text("<h1>Intro</h1>\n")
929 install.write_text("<h1>Install</h1>\n")
930 session = FakeSession(
931 active_dod_path=str(dod_path),
932 messages=[
933 Message(
934 role=Role.ASSISTANT,
935 content=(
936 "Repair focus:\n"
937 f"- Fix the broken local reference `chapters/02-installation.html` in `{repair_target}`.\n"
938 f"- Immediate next step: edit `{repair_target}`.\n"
939 f"- If the broken reference should remain, create `{install}`; otherwise remove or replace `chapters/02-installation.html`.\n"
940 "- Use the existing artifact files as the source of truth while repairing this file: "
941 f"`{repair_target}`, `{intro}`, `{install}`.\n"
942 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
943 ),
944 )
945 ],
946 )
947 hook = ActiveRepairScopeHook(
948 dod_store=dod_store,
949 project_root=temp_dir,
950 session=session,
951 )
952
953 def make_context(index: int) -> HookContext:
954 target = repair_target if index % 2 else intro
955 return HookContext(
956 tool_call=ToolCall(
957 id=f"read-{index}",
958 name="read",
959 arguments={"file_path": str(target)},
960 ),
961 tool=registry.get("read"),
962 registry=registry,
963 permission_policy=policy,
964 source="native",
965 )
966
967 for index in range(1, 5):
968 context = make_context(index)
969 result = await hook.pre_tool_use(context)
970 assert result.decision == HookDecision.CONTINUE
971 await hook.post_tool_use(context)
972
973 blocked = await hook.pre_tool_use(make_context(5))
974
975 assert blocked.decision == HookDecision.DENY
976 assert blocked.terminal_state == "blocked"
977 assert blocked.message is not None
978 assert "repair audit loop" in blocked.message
979
980
981 @pytest.mark.asyncio
982 async def test_active_repair_scope_audit_loop_names_next_missing_repair_target(
983 temp_dir: Path,
984 ) -> None:
985 registry = create_default_registry(temp_dir)
986 policy = build_permission_policy(
987 active_mode=PermissionMode.WORKSPACE_WRITE,
988 workspace_root=temp_dir,
989 tool_requirements=registry.get_tool_requirements(),
990 )
991 dod_store = DefinitionOfDoneStore(temp_dir)
992 dod = create_definition_of_done("Repair the active artifact set")
993 dod.status = "fixing"
994 dod_path = dod_store.save(dod)
995 guide_root = temp_dir / "guide"
996 chapter_dir = guide_root / "chapters"
997 chapter_dir.mkdir(parents=True, exist_ok=True)
998 repair_target = chapter_dir / "04-reverse-proxy.html"
999 next_missing = chapter_dir / "05-load-balancing.html"
1000 repair_target.write_text("<h1>Reverse Proxy</h1>\n")
1001 session = FakeSession(
1002 active_dod_path=str(dod_path),
1003 messages=[
1004 Message(
1005 role=Role.ASSISTANT,
1006 content=(
1007 "Repair focus:\n"
1008 f"- Fix the broken local reference `05-load-balancing.html` in `{repair_target}`.\n"
1009 f"- Immediate next step: edit `{repair_target}`.\n"
1010 f"- If the broken reference should remain, create `{next_missing}`; otherwise remove or replace `05-load-balancing.html`.\n"
1011 "- Use the existing artifact files as the source of truth while repairing this file: "
1012 f"`{repair_target}`, `{next_missing}`.\n"
1013 ),
1014 )
1015 ],
1016 )
1017 hook = ActiveRepairScopeHook(
1018 dod_store=dod_store,
1019 project_root=temp_dir,
1020 session=session,
1021 )
1022
1023 def make_context(index: int) -> HookContext:
1024 return HookContext(
1025 tool_call=ToolCall(
1026 id=f"read-{index}",
1027 name="read",
1028 arguments={"file_path": str(repair_target)},
1029 ),
1030 tool=registry.get("read"),
1031 registry=registry,
1032 permission_policy=policy,
1033 source="native",
1034 )
1035
1036 for index in range(1, 5):
1037 context = make_context(index)
1038 result = await hook.pre_tool_use(context)
1039 assert result.decision == HookDecision.CONTINUE
1040 await hook.post_tool_use(context)
1041
1042 blocked = await hook.pre_tool_use(make_context(5))
1043
1044 assert blocked.decision == HookDecision.DENY
1045 assert blocked.message is not None
1046 assert "repair audit loop" in blocked.message
1047 assert str(next_missing) in blocked.message
1048
1049
1050 @pytest.mark.asyncio
1051 async def test_active_repair_scope_hook_allows_scoped_glob_within_active_artifact_roots(
1052 temp_dir: Path,
1053 ) -> None:
1054 registry = create_default_registry(temp_dir)
1055 policy = build_permission_policy(
1056 active_mode=PermissionMode.WORKSPACE_WRITE,
1057 workspace_root=temp_dir,
1058 tool_requirements=registry.get_tool_requirements(),
1059 )
1060 dod_store = DefinitionOfDoneStore(temp_dir)
1061 dod = create_definition_of_done("Repair the active artifact set")
1062 dod.status = "in_progress"
1063 dod_path = dod_store.save(dod)
1064 repair_target = temp_dir / "guide" / "index.html"
1065 guide_root = temp_dir / "guide"
1066 session = FakeSession(
1067 active_dod_path=str(dod_path),
1068 messages=[
1069 Message(
1070 role=Role.ASSISTANT,
1071 content=(
1072 "Repair focus:\n"
1073 f"- Fix the broken local reference `chapters/troubleshooting.html` in `{repair_target}`.\n"
1074 f"- Immediate next step: edit `{repair_target}`.\n"
1075 f"- If the broken reference should remain, create `{guide_root / 'chapters' / 'troubleshooting.html'}`; otherwise remove or replace `chapters/troubleshooting.html`.\n"
1076 "- Use the existing artifact files as the source of truth while repairing this file: "
1077 f"`{guide_root / 'chapters' / 'introduction.html'}`, `{guide_root / 'chapters' / 'installation.html'}`, `{guide_root / 'chapters' / 'configuration.html'}`.\n"
1078 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1079 ),
1080 )
1081 ],
1082 )
1083 hook = ActiveRepairScopeHook(
1084 dod_store=dod_store,
1085 project_root=temp_dir,
1086 session=session,
1087 )
1088
1089 result = await hook.pre_tool_use(
1090 HookContext(
1091 tool_call=ToolCall(
1092 id="glob-1",
1093 name="glob",
1094 arguments={
1095 "path": str(temp_dir),
1096 "pattern": "**/guide/chapters/*.html",
1097 },
1098 ),
1099 tool=registry.get("glob"),
1100 registry=registry,
1101 permission_policy=policy,
1102 source="native",
1103 )
1104 )
1105
1106 assert result.decision == HookDecision.CONTINUE
1107
1108
1109 @pytest.mark.asyncio
1110 async def test_active_repair_scope_hook_allows_declared_missing_sibling_reads(
1111 temp_dir: Path,
1112 ) -> None:
1113 registry = create_default_registry(temp_dir)
1114 policy = build_permission_policy(
1115 active_mode=PermissionMode.WORKSPACE_WRITE,
1116 workspace_root=temp_dir,
1117 tool_requirements=registry.get_tool_requirements(),
1118 )
1119 dod_store = DefinitionOfDoneStore(temp_dir)
1120 dod = create_definition_of_done("Repair the active artifact set")
1121 dod.status = "in_progress"
1122 dod_path = dod_store.save(dod)
1123 guide_root = temp_dir / "guide"
1124 chapters = guide_root / "chapters"
1125 chapters.mkdir(parents=True)
1126 repair_target = guide_root / "index.html"
1127 existing_chapter = chapters / "overview.html"
1128 next_chapter = chapters / "installation.html"
1129 repair_target.write_text(
1130 "\n".join(
1131 [
1132 "<html>",
1133 '<a href="chapters/overview.html">Overview</a>',
1134 '<a href="chapters/installation.html">Installation</a>',
1135 "</html>",
1136 ]
1137 )
1138 + "\n"
1139 )
1140 existing_chapter.write_text("<h1>Overview</h1>\n")
1141
1142 session = FakeSession(
1143 active_dod_path=str(dod_path),
1144 messages=[
1145 Message(
1146 role=Role.ASSISTANT,
1147 content=(
1148 "Repair focus:\n"
1149 f"- Fix the broken local reference `chapters/overview.html` in `{repair_target}`.\n"
1150 f"- Immediate next step: edit `{repair_target}`.\n"
1151 f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/overview.html`.\n"
1152 "- Use the existing artifact files as the source of truth while repairing this file: "
1153 f"`{existing_chapter}`.\n"
1154 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1155 ),
1156 )
1157 ],
1158 )
1159 hook = ActiveRepairScopeHook(
1160 dod_store=dod_store,
1161 project_root=temp_dir,
1162 session=session,
1163 )
1164
1165 result = await hook.pre_tool_use(
1166 HookContext(
1167 tool_call=ToolCall(
1168 id="read-allowed-sibling",
1169 name="read",
1170 arguments={"file_path": str(next_chapter)},
1171 ),
1172 tool=registry.get("read"),
1173 registry=registry,
1174 permission_policy=policy,
1175 source="native",
1176 )
1177 )
1178
1179 assert result.decision == HookDecision.CONTINUE
1180
1181
1182 @pytest.mark.asyncio
1183 async def test_active_repair_scope_hook_blocks_reference_reads_during_in_progress_repair(
1184 temp_dir: Path,
1185 ) -> None:
1186 registry = create_default_registry(temp_dir)
1187 policy = build_permission_policy(
1188 active_mode=PermissionMode.WORKSPACE_WRITE,
1189 workspace_root=temp_dir,
1190 tool_requirements=registry.get_tool_requirements(),
1191 )
1192 dod_store = DefinitionOfDoneStore(temp_dir)
1193 dod = create_definition_of_done("Repair the active artifact set")
1194 dod.status = "in_progress"
1195 dod_path = dod_store.save(dod)
1196 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1197 session = FakeSession(
1198 active_dod_path=str(dod_path),
1199 messages=[
1200 Message(
1201 role=Role.ASSISTANT,
1202 content=(
1203 "Repair focus:\n"
1204 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1205 f"- Immediate next step: edit `{repair_target}`.\n"
1206 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1207 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1208 ),
1209 )
1210 ],
1211 )
1212 hook = ActiveRepairScopeHook(
1213 dod_store=dod_store,
1214 project_root=temp_dir,
1215 session=session,
1216 )
1217
1218 result = await hook.pre_tool_use(
1219 HookContext(
1220 tool_call=ToolCall(
1221 id="read-1",
1222 name="read",
1223 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1224 ),
1225 tool=registry.get("read"),
1226 registry=registry,
1227 permission_policy=policy,
1228 source="native",
1229 )
1230 )
1231
1232 assert result.decision == HookDecision.DENY
1233 assert result.terminal_state == "blocked"
1234 assert result.message is not None
1235 assert "active repair scope" in result.message
1236
1237
1238 @pytest.mark.asyncio
1239 async def test_active_repair_mutation_scope_hook_blocks_writes_outside_named_repair_files(
1240 temp_dir: Path,
1241 ) -> None:
1242 registry = create_default_registry(temp_dir)
1243 policy = build_permission_policy(
1244 active_mode=PermissionMode.WORKSPACE_WRITE,
1245 workspace_root=temp_dir,
1246 tool_requirements=registry.get_tool_requirements(),
1247 )
1248 dod_store = DefinitionOfDoneStore(temp_dir)
1249 dod = create_definition_of_done("Repair the active artifact set")
1250 dod.status = "in_progress"
1251 dod_path = dod_store.save(dod)
1252 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1253 chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
1254 session = FakeSession(
1255 active_dod_path=str(dod_path),
1256 messages=[
1257 Message(
1258 role=Role.ASSISTANT,
1259 content=(
1260 "Repair focus:\n"
1261 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1262 f"- Immediate next step: edit `{repair_target}`.\n"
1263 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1264 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1265 ),
1266 )
1267 ],
1268 )
1269 hook = ActiveRepairMutationScopeHook(
1270 dod_store=dod_store,
1271 project_root=temp_dir,
1272 session=session,
1273 )
1274
1275 result = await hook.pre_tool_use(
1276 HookContext(
1277 tool_call=ToolCall(
1278 id="edit-1",
1279 name="edit",
1280 arguments={"file_path": str(chapter_path), "old_string": "old", "new_string": "new"},
1281 ),
1282 tool=registry.get("edit"),
1283 registry=registry,
1284 permission_policy=policy,
1285 source="native",
1286 )
1287 )
1288
1289 assert result.decision == HookDecision.DENY
1290 assert result.terminal_state == "blocked"
1291 assert result.message is not None
1292 assert "active repair mutation scope" in result.message
1293 assert str(repair_target) in result.message
1294
1295
1296 @pytest.mark.asyncio
1297 async def test_active_repair_mutation_scope_hook_allows_expected_repair_file_writes(
1298 temp_dir: Path,
1299 ) -> None:
1300 registry = create_default_registry(temp_dir)
1301 policy = build_permission_policy(
1302 active_mode=PermissionMode.WORKSPACE_WRITE,
1303 workspace_root=temp_dir,
1304 tool_requirements=registry.get_tool_requirements(),
1305 )
1306 dod_store = DefinitionOfDoneStore(temp_dir)
1307 dod = create_definition_of_done("Repair the active artifact set")
1308 dod.status = "in_progress"
1309 dod_path = dod_store.save(dod)
1310 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1311 stylesheet = temp_dir / "guide" / "styles.css"
1312 session = FakeSession(
1313 active_dod_path=str(dod_path),
1314 messages=[
1315 Message(
1316 role=Role.ASSISTANT,
1317 content=(
1318 "Repair focus:\n"
1319 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1320 f"- Immediate next step: edit `{repair_target}`.\n"
1321 f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
1322 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1323 ),
1324 )
1325 ],
1326 )
1327 hook = ActiveRepairMutationScopeHook(
1328 dod_store=dod_store,
1329 project_root=temp_dir,
1330 session=session,
1331 )
1332
1333 result = await hook.pre_tool_use(
1334 HookContext(
1335 tool_call=ToolCall(
1336 id="write-1",
1337 name="write",
1338 arguments={"file_path": str(stylesheet), "content": "body { color: #222; }\n"},
1339 ),
1340 tool=registry.get("write"),
1341 registry=registry,
1342 permission_policy=policy,
1343 source="native",
1344 )
1345 )
1346
1347 assert result.decision == HookDecision.CONTINUE
1348
1349
1350 @pytest.mark.asyncio
1351 async def test_active_repair_mutation_scope_hook_allows_declared_missing_sibling_outputs(
1352 temp_dir: Path,
1353 ) -> None:
1354 registry = create_default_registry(temp_dir)
1355 policy = build_permission_policy(
1356 active_mode=PermissionMode.WORKSPACE_WRITE,
1357 workspace_root=temp_dir,
1358 tool_requirements=registry.get_tool_requirements(),
1359 )
1360 dod_store = DefinitionOfDoneStore(temp_dir)
1361 dod = create_definition_of_done("Repair the active artifact set")
1362 dod.status = "in_progress"
1363 dod_path = dod_store.save(dod)
1364 guide_root = temp_dir / "guide"
1365 chapters = guide_root / "chapters"
1366 chapters.mkdir(parents=True)
1367 repair_target = guide_root / "index.html"
1368 existing_chapter = chapters / "01-introduction.html"
1369 next_chapter = chapters / "02-installation.html"
1370 repair_target.write_text(
1371 "\n".join(
1372 [
1373 "<html>",
1374 '<a href="chapters/01-introduction.html">Introduction</a>',
1375 '<a href="chapters/02-installation.html">Installation</a>',
1376 "</html>",
1377 ]
1378 )
1379 + "\n"
1380 )
1381 existing_chapter.write_text("<h1>Introduction</h1>\n")
1382
1383 session = FakeSession(
1384 active_dod_path=str(dod_path),
1385 messages=[
1386 Message(
1387 role=Role.ASSISTANT,
1388 content=(
1389 "Repair focus:\n"
1390 f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
1391 f"- Immediate next step: edit `{repair_target}`.\n"
1392 f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
1393 "- Use the existing artifact files as the source of truth while repairing this file: "
1394 f"`{existing_chapter}`.\n"
1395 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1396 ),
1397 )
1398 ],
1399 )
1400 hook = ActiveRepairMutationScopeHook(
1401 dod_store=dod_store,
1402 project_root=temp_dir,
1403 session=session,
1404 )
1405
1406 result = await hook.pre_tool_use(
1407 HookContext(
1408 tool_call=ToolCall(
1409 id="write-2",
1410 name="write",
1411 arguments={"file_path": str(next_chapter), "content": "<h1>Installation</h1>\n"},
1412 ),
1413 tool=registry.get("write"),
1414 registry=registry,
1415 permission_policy=policy,
1416 source="native",
1417 )
1418 )
1419
1420 assert result.decision == HookDecision.CONTINUE
1421
1422
1423 @pytest.mark.asyncio
1424 async def test_active_repair_mutation_scope_hook_blocks_broad_mutating_bash(
1425 temp_dir: Path,
1426 ) -> None:
1427 registry = create_default_registry(temp_dir)
1428 policy = build_permission_policy(
1429 active_mode=PermissionMode.WORKSPACE_WRITE,
1430 workspace_root=temp_dir,
1431 tool_requirements=registry.get_tool_requirements(),
1432 )
1433 dod_store = DefinitionOfDoneStore(temp_dir)
1434 dod = create_definition_of_done("Repair the active artifact set")
1435 dod.status = "in_progress"
1436 dod_path = dod_store.save(dod)
1437 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1438 session = FakeSession(
1439 active_dod_path=str(dod_path),
1440 messages=[
1441 Message(
1442 role=Role.ASSISTANT,
1443 content=(
1444 "Repair focus:\n"
1445 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1446 f"- Immediate next step: edit `{repair_target}`.\n"
1447 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1448 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1449 ),
1450 )
1451 ],
1452 )
1453 hook = ActiveRepairMutationScopeHook(
1454 dod_store=dod_store,
1455 project_root=temp_dir,
1456 session=session,
1457 )
1458
1459 result = await hook.pre_tool_use(
1460 HookContext(
1461 tool_call=ToolCall(
1462 id="bash-1",
1463 name="bash",
1464 arguments={"command": f"mkdir -p {temp_dir / 'guide' / 'assets'}"},
1465 ),
1466 tool=registry.get("bash"),
1467 registry=registry,
1468 permission_policy=policy,
1469 source="native",
1470 )
1471 )
1472
1473 assert result.decision == HookDecision.DENY
1474 assert result.terminal_state == "blocked"
1475 assert result.message is not None
1476 assert "active repair mutation scope" in result.message
1477 assert str(repair_target) in result.message
1478
1479
1480 @pytest.mark.asyncio
1481 async def test_late_reference_drift_hook_blocks_out_of_scope_reference_reads(
1482 temp_dir: Path,
1483 ) -> None:
1484 registry = create_default_registry(temp_dir)
1485 policy = build_permission_policy(
1486 active_mode=PermissionMode.WORKSPACE_WRITE,
1487 workspace_root=temp_dir,
1488 tool_requirements=registry.get_tool_requirements(),
1489 )
1490 dod_store = DefinitionOfDoneStore(temp_dir)
1491 dod = create_definition_of_done("Create a multi-file guide from a reference")
1492 dod.status = "in_progress"
1493 plan_path = temp_dir / "implementation.md"
1494 plan_path.write_text(
1495 "# File Changes\n"
1496 "- `guide/index.html`\n"
1497 "- `guide/chapters/01-getting-started.html`\n"
1498 "- `guide/chapters/02-installation.html`\n"
1499 "- `guide/chapters/03-first-website.html`\n"
1500 )
1501 dod.implementation_plan = str(plan_path)
1502 dod_path = dod_store.save(dod)
1503 guide_dir = temp_dir / "guide" / "chapters"
1504 guide_dir.mkdir(parents=True, exist_ok=True)
1505 (temp_dir / "guide" / "index.html").write_text("index")
1506 (guide_dir / "01-getting-started.html").write_text("one")
1507 (guide_dir / "02-installation.html").write_text("two")
1508 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1509 hook = LateReferenceDriftHook(
1510 dod_store=dod_store,
1511 project_root=temp_dir,
1512 session=session,
1513 )
1514
1515 result = await hook.pre_tool_use(
1516 HookContext(
1517 tool_call=ToolCall(
1518 id="read-1",
1519 name="read",
1520 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1521 ),
1522 tool=registry.get("read"),
1523 registry=registry,
1524 permission_policy=policy,
1525 source="native",
1526 )
1527 )
1528
1529 assert result.decision == HookDecision.DENY
1530 assert result.terminal_state == "blocked"
1531 assert result.message is not None
1532 assert "late reference drift" in result.message
1533 assert "03-first-website.html" in result.message
1534
1535
1536 @pytest.mark.asyncio
1537 async def test_late_reference_drift_hook_allows_reads_inside_planned_artifact_set(
1538 temp_dir: Path,
1539 ) -> None:
1540 registry = create_default_registry(temp_dir)
1541 policy = build_permission_policy(
1542 active_mode=PermissionMode.WORKSPACE_WRITE,
1543 workspace_root=temp_dir,
1544 tool_requirements=registry.get_tool_requirements(),
1545 )
1546 dod_store = DefinitionOfDoneStore(temp_dir)
1547 dod = create_definition_of_done("Create a multi-file guide from a reference")
1548 dod.status = "in_progress"
1549 plan_path = temp_dir / "implementation.md"
1550 plan_path.write_text(
1551 "# File Changes\n"
1552 "- `guide/index.html`\n"
1553 "- `guide/chapters/01-getting-started.html`\n"
1554 "- `guide/chapters/02-installation.html`\n"
1555 "- `guide/chapters/03-first-website.html`\n"
1556 )
1557 dod.implementation_plan = str(plan_path)
1558 dod_path = dod_store.save(dod)
1559 guide_dir = temp_dir / "guide" / "chapters"
1560 guide_dir.mkdir(parents=True, exist_ok=True)
1561 target = guide_dir / "02-installation.html"
1562 (temp_dir / "guide" / "index.html").write_text("index")
1563 (guide_dir / "01-getting-started.html").write_text("one")
1564 target.write_text("two")
1565 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1566 hook = LateReferenceDriftHook(
1567 dod_store=dod_store,
1568 project_root=temp_dir,
1569 session=session,
1570 )
1571
1572 result = await hook.pre_tool_use(
1573 HookContext(
1574 tool_call=ToolCall(
1575 id="read-1",
1576 name="read",
1577 arguments={"file_path": str(target)},
1578 ),
1579 tool=registry.get("read"),
1580 registry=registry,
1581 permission_policy=policy,
1582 source="native",
1583 )
1584 )
1585
1586 assert result.decision == HookDecision.CONTINUE
1587
1588
1589 @pytest.mark.asyncio
1590 async def test_late_reference_drift_hook_blocks_reference_reopen_after_study_and_first_output(
1591 temp_dir: Path,
1592 ) -> None:
1593 registry = create_default_registry(temp_dir)
1594 policy = build_permission_policy(
1595 active_mode=PermissionMode.WORKSPACE_WRITE,
1596 workspace_root=temp_dir,
1597 tool_requirements=registry.get_tool_requirements(),
1598 )
1599 dod_store = DefinitionOfDoneStore(temp_dir)
1600 dod = create_definition_of_done("Create a multi-file guide from a reference")
1601 dod.status = "in_progress"
1602 dod.completed_items = [
1603 "First, examine the existing reference guide structure to understand the format and cadence",
1604 ]
1605 plan_path = temp_dir / "implementation.md"
1606 plan_path.write_text(
1607 "# File Changes\n"
1608 "- `guide/index.html`\n"
1609 "- `guide/chapters/01-getting-started.html`\n"
1610 "- `guide/chapters/02-installation.html`\n"
1611 )
1612 dod.implementation_plan = str(plan_path)
1613 guide_dir = temp_dir / "guide" / "chapters"
1614 guide_dir.mkdir(parents=True, exist_ok=True)
1615 (temp_dir / "guide" / "index.html").write_text("index")
1616 dod_path = dod_store.save(dod)
1617 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1618 hook = LateReferenceDriftHook(
1619 dod_store=dod_store,
1620 project_root=temp_dir,
1621 session=session,
1622 )
1623
1624 result = await hook.pre_tool_use(
1625 HookContext(
1626 tool_call=ToolCall(
1627 id="read-reference",
1628 name="read",
1629 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1630 ),
1631 tool=registry.get("read"),
1632 registry=registry,
1633 permission_policy=policy,
1634 source="native",
1635 )
1636 )
1637
1638 assert result.decision == HookDecision.DENY
1639 assert result.terminal_state == "blocked"
1640 assert result.message is not None
1641 assert "late reference drift" in result.message
1642 assert "01-getting-started.html" in result.message
1643
1644
1645 @pytest.mark.asyncio
1646 async def test_late_reference_drift_hook_blocks_reference_reads_after_artifacts_exist(
1647 temp_dir: Path,
1648 ) -> None:
1649 registry = create_default_registry(temp_dir)
1650 policy = build_permission_policy(
1651 active_mode=PermissionMode.WORKSPACE_WRITE,
1652 workspace_root=temp_dir,
1653 tool_requirements=registry.get_tool_requirements(),
1654 )
1655 dod_store = DefinitionOfDoneStore(temp_dir)
1656 dod = create_definition_of_done("Create a multi-file guide from a reference")
1657 dod.status = "in_progress"
1658 plan_path = temp_dir / "implementation.md"
1659 plan_path.write_text(
1660 "\n".join(
1661 [
1662 "# Implementation Plan",
1663 "",
1664 "## File Changes",
1665 f"- `{temp_dir / 'guide'}`",
1666 f"- `{temp_dir / 'guide' / 'chapters'}`",
1667 f"- `{temp_dir / 'guide' / 'index.html'}`",
1668 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1669 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1670 "",
1671 ]
1672 )
1673 )
1674 dod.implementation_plan = str(plan_path)
1675 guide_dir = temp_dir / "guide" / "chapters"
1676 guide_dir.mkdir(parents=True, exist_ok=True)
1677 (temp_dir / "guide" / "index.html").write_text("index")
1678 (guide_dir / "01-getting-started.html").write_text("one")
1679 (guide_dir / "02-installation.html").write_text("two")
1680 dod_path = dod_store.save(dod)
1681 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1682 hook = LateReferenceDriftHook(
1683 dod_store=dod_store,
1684 project_root=temp_dir,
1685 session=session,
1686 )
1687
1688 result = await hook.pre_tool_use(
1689 HookContext(
1690 tool_call=ToolCall(
1691 id="read-1",
1692 name="read",
1693 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1694 ),
1695 tool=registry.get("read"),
1696 registry=registry,
1697 permission_policy=policy,
1698 source="native",
1699 )
1700 )
1701
1702 assert result.decision == HookDecision.DENY
1703 assert result.terminal_state == "blocked"
1704 assert result.message is not None
1705 assert "completed artifact set scope" in result.message
1706 assert str(temp_dir / "guide") in result.message
1707
1708
1709 @pytest.mark.asyncio
1710 async def test_late_reference_drift_hook_blocks_reference_reads_when_outputs_exist_but_need_quality(
1711 temp_dir: Path,
1712 ) -> None:
1713 registry = create_default_registry(temp_dir)
1714 policy = build_permission_policy(
1715 active_mode=PermissionMode.WORKSPACE_WRITE,
1716 workspace_root=temp_dir,
1717 tool_requirements=registry.get_tool_requirements(),
1718 )
1719 dod_store = DefinitionOfDoneStore(temp_dir)
1720 dod = create_definition_of_done("Create an equally thorough multi-page HTML guide.")
1721 dod.status = "in_progress"
1722 dod.pending_items.append("Improve generated guide depth and formatting")
1723 plan_path = temp_dir / "implementation.md"
1724 plan_path.write_text(
1725 "\n".join(
1726 [
1727 "# Implementation Plan",
1728 "",
1729 "## File Changes",
1730 f"- `{temp_dir / 'guide' / 'index.html'}`",
1731 f"- `{temp_dir / 'guide' / 'chapters'}/`",
1732 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1733 "",
1734 ]
1735 )
1736 )
1737 dod.implementation_plan = str(plan_path)
1738 guide_dir = temp_dir / "guide" / "chapters"
1739 guide_dir.mkdir(parents=True, exist_ok=True)
1740 (temp_dir / "guide" / "index.html").write_text(
1741 '<h1>Guide</h1><a href="chapters/01-getting-started.html">One</a>\n'
1742 )
1743 (guide_dir / "01-getting-started.html").write_text("<h1>One</h1><p>thin</p>\n")
1744 dod_path = dod_store.save(dod)
1745 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1746 hook = LateReferenceDriftHook(
1747 dod_store=dod_store,
1748 project_root=temp_dir,
1749 session=session,
1750 )
1751
1752 result = await hook.pre_tool_use(
1753 HookContext(
1754 tool_call=ToolCall(
1755 id="read-reference",
1756 name="read",
1757 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1758 ),
1759 tool=registry.get("read"),
1760 registry=registry,
1761 permission_policy=policy,
1762 source="native",
1763 )
1764 )
1765
1766 assert result.decision == HookDecision.DENY
1767 assert result.message is not None
1768 assert "completed artifact set scope" in result.message
1769 assert str(temp_dir / "guide") in result.message
1770
1771
1772 @pytest.mark.asyncio
1773 async def test_late_reference_drift_hook_allows_verification_reference_reads_after_artifacts_exist(
1774 temp_dir: Path,
1775 ) -> None:
1776 registry = create_default_registry(temp_dir)
1777 policy = build_permission_policy(
1778 active_mode=PermissionMode.WORKSPACE_WRITE,
1779 workspace_root=temp_dir,
1780 tool_requirements=registry.get_tool_requirements(),
1781 )
1782 dod_store = DefinitionOfDoneStore(temp_dir)
1783 dod = create_definition_of_done("Create a multi-file guide from a reference")
1784 dod.status = "in_progress"
1785 plan_path = temp_dir / "implementation.md"
1786 plan_path.write_text(
1787 "\n".join(
1788 [
1789 "# Implementation Plan",
1790 "",
1791 "## File Changes",
1792 f"- `{temp_dir / 'guide'}`",
1793 f"- `{temp_dir / 'guide' / 'chapters'}`",
1794 f"- `{temp_dir / 'guide' / 'index.html'}`",
1795 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1796 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1797 "",
1798 ]
1799 )
1800 )
1801 dod.implementation_plan = str(plan_path)
1802 guide_dir = temp_dir / "guide" / "chapters"
1803 guide_dir.mkdir(parents=True, exist_ok=True)
1804 (temp_dir / "guide" / "index.html").write_text("index")
1805 (guide_dir / "01-getting-started.html").write_text("one")
1806 (guide_dir / "02-installation.html").write_text("two")
1807 dod_path = dod_store.save(dod)
1808 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1809 hook = LateReferenceDriftHook(
1810 dod_store=dod_store,
1811 project_root=temp_dir,
1812 session=session,
1813 )
1814
1815 result = await hook.pre_tool_use(
1816 HookContext(
1817 tool_call=ToolCall(
1818 id="read-verify-1",
1819 name="read",
1820 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1821 ),
1822 tool=registry.get("read"),
1823 registry=registry,
1824 permission_policy=policy,
1825 source="verification",
1826 )
1827 )
1828
1829 assert result.decision == HookDecision.CONTINUE
1830
1831
1832 @pytest.mark.asyncio
1833 async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits(
1834 temp_dir: Path,
1835 ) -> None:
1836 registry = create_default_registry(temp_dir)
1837 policy = build_permission_policy(
1838 active_mode=PermissionMode.WORKSPACE_WRITE,
1839 workspace_root=temp_dir,
1840 tool_requirements=registry.get_tool_requirements(),
1841 )
1842 dod_store = DefinitionOfDoneStore(temp_dir)
1843 dod = create_definition_of_done("Create a multi-file guide from a reference")
1844 dod.status = "in_progress"
1845 plan_path = temp_dir / "implementation.md"
1846 plan_path.write_text(
1847 "\n".join(
1848 [
1849 "# Implementation Plan",
1850 "",
1851 "## File Changes",
1852 f"- `{temp_dir / 'guide' / 'index.html'}`",
1853 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1854 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1855 "",
1856 ]
1857 )
1858 )
1859 dod.implementation_plan = str(plan_path)
1860 guide_dir = temp_dir / "guide" / "chapters"
1861 guide_dir.mkdir(parents=True, exist_ok=True)
1862 target = guide_dir / "02-installation.html"
1863 (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
1864 (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
1865 target.write_text("<h1>Installation</h1>\n")
1866 dod_path = dod_store.save(dod)
1867 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1868 hook = LateReferenceDriftHook(
1869 dod_store=dod_store,
1870 project_root=temp_dir,
1871 session=session,
1872 )
1873
1874 def make_context(index: int) -> HookContext:
1875 return HookContext(
1876 tool_call=ToolCall(
1877 id=f"read-{index}",
1878 name="read",
1879 arguments={"file_path": str(target)},
1880 ),
1881 tool=registry.get("read"),
1882 registry=registry,
1883 permission_policy=policy,
1884 source="native",
1885 )
1886
1887 for index in range(1, 5):
1888 context = make_context(index)
1889 result = await hook.pre_tool_use(context)
1890 assert result.decision == HookDecision.CONTINUE
1891 await hook.post_tool_use(context)
1892
1893 blocked = await hook.pre_tool_use(make_context(5))
1894
1895 assert blocked.decision == HookDecision.DENY
1896 assert blocked.terminal_state == "blocked"
1897 assert blocked.message is not None
1898 assert "post-build audit loop" in blocked.message
1899
1900
1901 @pytest.mark.asyncio
1902 async def test_late_reference_drift_hook_allows_post_build_self_audits_during_verification(
1903 temp_dir: Path,
1904 ) -> None:
1905 registry = create_default_registry(temp_dir)
1906 policy = build_permission_policy(
1907 active_mode=PermissionMode.WORKSPACE_WRITE,
1908 workspace_root=temp_dir,
1909 tool_requirements=registry.get_tool_requirements(),
1910 )
1911 dod_store = DefinitionOfDoneStore(temp_dir)
1912 dod = create_definition_of_done("Create a multi-file guide from a reference")
1913 dod.status = "in_progress"
1914 plan_path = temp_dir / "implementation.md"
1915 plan_path.write_text(
1916 "\n".join(
1917 [
1918 "# Implementation Plan",
1919 "",
1920 "## File Changes",
1921 f"- `{temp_dir / 'guide' / 'index.html'}`",
1922 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1923 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1924 "",
1925 ]
1926 )
1927 )
1928 dod.implementation_plan = str(plan_path)
1929 guide_dir = temp_dir / "guide" / "chapters"
1930 guide_dir.mkdir(parents=True, exist_ok=True)
1931 target = guide_dir / "02-installation.html"
1932 (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
1933 (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
1934 target.write_text("<h1>Installation</h1>\n")
1935 dod_path = dod_store.save(dod)
1936 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1937 hook = LateReferenceDriftHook(
1938 dod_store=dod_store,
1939 project_root=temp_dir,
1940 session=session,
1941 )
1942
1943 def make_context(index: int) -> HookContext:
1944 return HookContext(
1945 tool_call=ToolCall(
1946 id=f"read-verify-{index}",
1947 name="read",
1948 arguments={"file_path": str(target)},
1949 ),
1950 tool=registry.get("read"),
1951 registry=registry,
1952 permission_policy=policy,
1953 source="verification",
1954 )
1955
1956 for index in range(1, 5):
1957 context = make_context(index)
1958 result = await hook.pre_tool_use(context)
1959 assert result.decision == HookDecision.CONTINUE
1960 await hook.post_tool_use(context)
1961
1962 result = await hook.pre_tool_use(make_context(5))
1963
1964 assert result.decision == HookDecision.CONTINUE
1965
1966
1967 @pytest.mark.asyncio
1968 async def test_late_reference_drift_hook_blocks_relative_bash_reference_reads_after_artifacts_exist(
1969 temp_dir: Path,
1970 ) -> None:
1971 registry = create_default_registry(temp_dir)
1972 policy = build_permission_policy(
1973 active_mode=PermissionMode.WORKSPACE_WRITE,
1974 workspace_root=temp_dir,
1975 tool_requirements=registry.get_tool_requirements(),
1976 )
1977 dod_store = DefinitionOfDoneStore(temp_dir)
1978 dod = create_definition_of_done("Create a multi-file guide from a reference")
1979 dod.status = "in_progress"
1980 plan_path = temp_dir / "implementation.md"
1981 plan_path.write_text(
1982 "\n".join(
1983 [
1984 "# Implementation Plan",
1985 "",
1986 "## File Changes",
1987 f"- `{temp_dir / 'guide'}`",
1988 f"- `{temp_dir / 'guide' / 'chapters'}`",
1989 f"- `{temp_dir / 'guide' / 'index.html'}`",
1990 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1991 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1992 "",
1993 ]
1994 )
1995 )
1996 dod.implementation_plan = str(plan_path)
1997 guide_dir = temp_dir / "guide" / "chapters"
1998 guide_dir.mkdir(parents=True, exist_ok=True)
1999 (temp_dir / "guide" / "index.html").write_text("index")
2000 (guide_dir / "01-getting-started.html").write_text("one")
2001 (guide_dir / "02-installation.html").write_text("two")
2002 dod_path = dod_store.save(dod)
2003 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2004 hook = LateReferenceDriftHook(
2005 dod_store=dod_store,
2006 project_root=temp_dir,
2007 session=session,
2008 )
2009
2010 result = await hook.pre_tool_use(
2011 HookContext(
2012 tool_call=ToolCall(
2013 id="bash-relative-reference-1",
2014 name="bash",
2015 arguments={
2016 "command": f"cd {temp_dir} && ls -la reference/"
2017 },
2018 ),
2019 tool=registry.get("bash"),
2020 registry=registry,
2021 permission_policy=policy,
2022 source="native",
2023 )
2024 )
2025
2026 assert result.decision == HookDecision.DENY
2027 assert result.terminal_state == "blocked"
2028 assert result.message is not None
2029 assert "completed artifact set scope" in result.message
2030
2031
2032 @pytest.mark.asyncio
2033 async def test_late_reference_drift_hook_allows_relative_bash_post_build_audit_loop_during_verification(
2034 temp_dir: Path,
2035 ) -> None:
2036 registry = create_default_registry(temp_dir)
2037 policy = build_permission_policy(
2038 active_mode=PermissionMode.WORKSPACE_WRITE,
2039 workspace_root=temp_dir,
2040 tool_requirements=registry.get_tool_requirements(),
2041 )
2042 dod_store = DefinitionOfDoneStore(temp_dir)
2043 dod = create_definition_of_done("Create a multi-file guide from a reference")
2044 dod.status = "in_progress"
2045 plan_path = temp_dir / "implementation.md"
2046 plan_path.write_text(
2047 "\n".join(
2048 [
2049 "# Implementation Plan",
2050 "",
2051 "## File Changes",
2052 f"- `{temp_dir / 'guide' / 'index.html'}`",
2053 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2054 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2055 "",
2056 ]
2057 )
2058 )
2059 dod.implementation_plan = str(plan_path)
2060 guide_dir = temp_dir / "guide" / "chapters"
2061 guide_dir.mkdir(parents=True, exist_ok=True)
2062 (temp_dir / "guide" / "index.html").write_text("<h1>Guide</h1>\n")
2063 (guide_dir / "01-getting-started.html").write_text("<h1>One</h1>\n")
2064 (guide_dir / "02-installation.html").write_text("<h1>Two</h1>\n")
2065 dod_path = dod_store.save(dod)
2066 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2067 hook = LateReferenceDriftHook(
2068 dod_store=dod_store,
2069 project_root=temp_dir,
2070 session=session,
2071 )
2072
2073 def make_context(index: int) -> HookContext:
2074 return HookContext(
2075 tool_call=ToolCall(
2076 id=f"bash-relative-audit-{index}",
2077 name="bash",
2078 arguments={
2079 "command": f"cd {temp_dir} && ls -la guide/chapters/"
2080 },
2081 ),
2082 tool=registry.get("bash"),
2083 registry=registry,
2084 permission_policy=policy,
2085 source="verification",
2086 )
2087
2088 for index in range(1, 5):
2089 context = make_context(index)
2090 result = await hook.pre_tool_use(context)
2091 assert result.decision == HookDecision.CONTINUE
2092 await hook.post_tool_use(context)
2093
2094 result = await hook.pre_tool_use(make_context(5))
2095
2096 assert result.decision == HookDecision.CONTINUE
2097
2098
2099 @pytest.mark.asyncio
2100 async def test_late_reference_drift_hook_does_not_treat_empty_output_dir_as_complete_artifact_set(
2101 temp_dir: Path,
2102 ) -> None:
2103 registry = create_default_registry(temp_dir)
2104 policy = build_permission_policy(
2105 active_mode=PermissionMode.WORKSPACE_WRITE,
2106 workspace_root=temp_dir,
2107 tool_requirements=registry.get_tool_requirements(),
2108 )
2109 dod_store = DefinitionOfDoneStore(temp_dir)
2110 dod = create_definition_of_done("Create a multi-file guide from a reference")
2111 dod.status = "in_progress"
2112 dod.completed_items = ["Create chapter files with appropriate content"]
2113 plan_path = temp_dir / "implementation.md"
2114 plan_path.write_text(
2115 "\n".join(
2116 [
2117 "# Implementation Plan",
2118 "",
2119 "## File Changes",
2120 f"- `{temp_dir / 'guide' / 'index.html'}`",
2121 f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
2122 "",
2123 "## Execution Order",
2124 "- Create chapter files with appropriate content",
2125 ]
2126 )
2127 )
2128 dod.implementation_plan = str(plan_path)
2129 guide_dir = temp_dir / "guide" / "chapters"
2130 guide_dir.mkdir(parents=True, exist_ok=True)
2131 (temp_dir / "guide" / "index.html").write_text("index")
2132 dod_path = dod_store.save(dod)
2133 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2134 hook = LateReferenceDriftHook(
2135 dod_store=dod_store,
2136 project_root=temp_dir,
2137 session=session,
2138 )
2139
2140 result = await hook.pre_tool_use(
2141 HookContext(
2142 tool_call=ToolCall(
2143 id="read-1",
2144 name="read",
2145 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
2146 ),
2147 tool=registry.get("read"),
2148 registry=registry,
2149 permission_policy=policy,
2150 source="native",
2151 )
2152 )
2153
2154 assert result.decision == HookDecision.CONTINUE
2155
2156
2157 @pytest.mark.asyncio
2158 async def test_late_reference_drift_hook_blocks_when_html_outputs_declare_missing_files(
2159 temp_dir: Path,
2160 ) -> None:
2161 registry = create_default_registry(temp_dir)
2162 policy = build_permission_policy(
2163 active_mode=PermissionMode.WORKSPACE_WRITE,
2164 workspace_root=temp_dir,
2165 tool_requirements=registry.get_tool_requirements(),
2166 )
2167 dod_store = DefinitionOfDoneStore(temp_dir)
2168 dod = create_definition_of_done("Create a multi-file guide from a reference")
2169 dod.status = "in_progress"
2170 dod.completed_items = ["Create chapter files with appropriate content"]
2171 plan_path = temp_dir / "implementation.md"
2172 plan_path.write_text(
2173 "\n".join(
2174 [
2175 "# Implementation Plan",
2176 "",
2177 "## File Changes",
2178 f"- `{temp_dir / 'guide' / 'index.html'}`",
2179 f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
2180 "",
2181 "## Execution Order",
2182 "- Create chapter files with appropriate content",
2183 ]
2184 )
2185 )
2186 dod.implementation_plan = str(plan_path)
2187 guide_dir = temp_dir / "guide"
2188 chapters = guide_dir / "chapters"
2189 chapters.mkdir(parents=True, exist_ok=True)
2190 index = guide_dir / "index.html"
2191 index.write_text(
2192 '<a href="chapters/01-getting-started.html">One</a>\n'
2193 '<a href="chapters/02-installation.html">Two</a>\n'
2194 )
2195 (chapters / "01-getting-started.html").write_text("one")
2196 dod.touched_files = [str(index), str(chapters / "01-getting-started.html")]
2197 dod_path = dod_store.save(dod)
2198 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2199 hook = LateReferenceDriftHook(
2200 dod_store=dod_store,
2201 project_root=temp_dir,
2202 session=session,
2203 )
2204
2205 result = await hook.pre_tool_use(
2206 HookContext(
2207 tool_call=ToolCall(
2208 id="read-1",
2209 name="read",
2210 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
2211 ),
2212 tool=registry.get("read"),
2213 registry=registry,
2214 permission_policy=policy,
2215 source="native",
2216 )
2217 )
2218
2219 assert result.decision == HookDecision.DENY
2220 assert result.terminal_state == "blocked"
2221 assert result.message is not None
2222 assert "late reference drift" in result.message
2223 assert "02-installation.html" in result.message
2224
2225
2226 @pytest.mark.asyncio
2227 async def test_missing_planned_output_read_hook_blocks_reads_of_declared_missing_output(
2228 temp_dir: Path,
2229 ) -> None:
2230 registry = create_default_registry(temp_dir)
2231 policy = build_permission_policy(
2232 active_mode=PermissionMode.WORKSPACE_WRITE,
2233 workspace_root=temp_dir,
2234 tool_requirements=registry.get_tool_requirements(),
2235 )
2236 dod_store = DefinitionOfDoneStore(temp_dir)
2237 dod = create_definition_of_done("Create a multi-file guide from a reference")
2238 dod.status = "in_progress"
2239 plan_path = temp_dir / "implementation.md"
2240 guide_root = temp_dir / "guide"
2241 chapters = guide_root / "chapters"
2242 plan_path.write_text(
2243 "\n".join(
2244 [
2245 "# Implementation Plan",
2246 "",
2247 "## File Changes",
2248 f"- `{guide_root / 'index.html'}`",
2249 f"- `{chapters}/`",
2250 "",
2251 ]
2252 )
2253 )
2254 dod.implementation_plan = str(plan_path)
2255 chapters.mkdir(parents=True, exist_ok=True)
2256 (guide_root / "index.html").write_text(
2257 "\n".join(
2258 [
2259 "<html>",
2260 '<a href="chapters/01-introduction.html">Chapter 1: Introduction</a>',
2261 '<a href="chapters/02-installation.html">Chapter 2: Installation</a>',
2262 '<a href="chapters/03-configuration-basics.html">Chapter 3: Configuration Basics</a>',
2263 "</html>",
2264 ]
2265 )
2266 + "\n"
2267 )
2268 (chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
2269 (chapters / "02-installation.html").write_text("<h1>Installation</h1>\n")
2270 dod_path = dod_store.save(dod)
2271 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2272 hook = MissingPlannedOutputReadHook(
2273 dod_store=dod_store,
2274 project_root=temp_dir,
2275 session=session,
2276 )
2277 missing_target = chapters / "03-configuration-basics.html"
2278
2279 result = await hook.pre_tool_use(
2280 HookContext(
2281 tool_call=ToolCall(
2282 id="read-missing-output",
2283 name="read",
2284 arguments={"file_path": str(missing_target)},
2285 ),
2286 tool=registry.get("read"),
2287 registry=registry,
2288 permission_policy=policy,
2289 source="native",
2290 )
2291 )
2292
2293 assert result.decision == HookDecision.DENY
2294 assert result.terminal_state == "blocked"
2295 assert result.message is not None
2296 assert "missing planned output artifact" in result.message
2297 assert 'write(file_path="' in result.message
2298 assert "03-configuration-basics.html" in result.message
2299 assert "Chapter 3: Configuration Basics" in result.message
2300 assert "02-installation.html" in result.message