Python · 90204 bytes Raw Blame History
1 """Tests for permission policy and tool lifecycle hooks."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.llm.base import Message, Role, ToolCall
10 from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
11 from loader.runtime.executor import ToolExecutionState, ToolExecutor
12 from loader.runtime.hooks import (
13 ActiveRepairMutationScopeHook,
14 ActiveRepairScopeHook,
15 BaseToolHook,
16 FilePathAliasHook,
17 HookContext,
18 HookDecision,
19 HookManager,
20 HookResult,
21 LateReferenceDriftHook,
22 MissingPlannedOutputReadHook,
23 RelativePathContextHook,
24 SearchPathAliasHook,
25 )
26 from loader.runtime.permissions import (
27 PermissionMode,
28 PermissionOverride,
29 PermissionRuleDisposition,
30 PermissionRuleSet,
31 build_permission_policy,
32 )
33 from loader.runtime.safeguard_services import ActionTracker
34 from loader.runtime.tracing import RuntimeTracer
35 from loader.tools.base import create_default_registry
36
37
38 class RecordingHook(BaseToolHook):
39 """Hook that records lifecycle events."""
40
41 def __init__(self, events: list[str]) -> None:
42 self.events = events
43
44 async def pre_tool_use(self, context) -> HookResult:
45 self.events.append("pre_tool_use")
46 return HookResult()
47
48 async def post_tool_use(self, context) -> HookResult:
49 self.events.append("post_tool_use")
50 return HookResult()
51
52 async def post_tool_use_failure(self, context) -> HookResult:
53 self.events.append("post_tool_use_failure")
54 return HookResult()
55
56
57 class DenyInPreHook(BaseToolHook):
58 """Hook that denies execution before the tool runs."""
59
60 def __init__(self, events: list[str]) -> None:
61 self.events = events
62
63 async def pre_tool_use(self, context) -> HookResult:
64 self.events.append("pre_tool_use")
65 return HookResult(
66 decision=HookDecision.DENY,
67 message="[Blocked - denied by test hook]",
68 terminal_state="blocked",
69 )
70
71 async def post_tool_use_failure(self, context) -> HookResult:
72 self.events.append("post_tool_use_failure")
73 return HookResult()
74
75
76 @pytest.mark.asyncio
77 async def test_permission_policy_honors_overrides(temp_dir: Path) -> None:
78 policy = build_permission_policy(
79 active_mode=PermissionMode.READ_ONLY,
80 workspace_root=temp_dir,
81 tool_requirements={"write": PermissionMode.WORKSPACE_WRITE},
82 )
83
84 denied = policy.authorize("write")
85 allowed = policy.authorize("write", override=PermissionOverride.ALLOW)
86 asked = policy.authorize("write", override=PermissionOverride.ASK)
87
88 assert denied.decision.value == "deny"
89 assert allowed.allowed
90 assert asked.decision.value == "ask"
91
92
93 def test_permission_mode_parsing_supports_prompt_and_allow() -> None:
94 assert PermissionMode.from_str("prompt") == PermissionMode.PROMPT
95 assert PermissionMode.from_str("allow") == PermissionMode.ALLOW
96
97
98 def test_permission_policy_honors_rule_precedence(temp_dir: Path) -> None:
99 policy = build_permission_policy(
100 active_mode=PermissionMode.ALLOW,
101 workspace_root=temp_dir,
102 tool_requirements={"write": PermissionMode.WORKSPACE_WRITE},
103 rules=PermissionRuleSet.from_dict(
104 {
105 "allow": [{"tool": "write", "contains": "safe change"}],
106 "deny": [{"tool": "write", "path_contains": "secrets"}],
107 "ask": [{"tool": "write", "path_contains": "README"}],
108 }
109 ),
110 )
111
112 denied = policy.authorize(
113 "write",
114 arguments={
115 "file_path": str(temp_dir / "secrets.txt"),
116 "content": "safe change\n",
117 },
118 )
119 asked = policy.authorize(
120 "write",
121 arguments={
122 "file_path": str(temp_dir / "README.md"),
123 "content": "safe change\n",
124 },
125 )
126 allowed = policy.authorize(
127 "write",
128 arguments={
129 "file_path": str(temp_dir / "notes.txt"),
130 "content": "safe change\n",
131 },
132 )
133
134 assert denied.decision.value == "deny"
135 assert denied.matched_disposition == PermissionRuleDisposition.DENY
136 assert asked.decision.value == "ask"
137 assert asked.matched_disposition == PermissionRuleDisposition.ASK
138 assert allowed.decision.value == "allow"
139 assert allowed.matched_disposition == PermissionRuleDisposition.ALLOW
140
141
142 @pytest.mark.asyncio
143 async def test_prompt_mode_executor_prompts_once_and_respects_denial(
144 temp_dir: Path,
145 ) -> None:
146 prompts: list[tuple[str, str, str]] = []
147 registry = create_default_registry(temp_dir)
148 policy = build_permission_policy(
149 active_mode=PermissionMode.PROMPT,
150 workspace_root=temp_dir,
151 tool_requirements=registry.get_tool_requirements(),
152 )
153 executor = ToolExecutor(registry, RuntimeTracer(), policy)
154 target = temp_dir / "prompted.txt"
155
156 async def deny(tool_name: str, message: str, details: str) -> bool:
157 prompts.append((tool_name, message, details))
158 return False
159
160 outcome = await executor.execute_tool_call(
161 ToolCall(
162 id="write-1",
163 name="write",
164 arguments={"file_path": str(target), "content": "prompted\n"},
165 ),
166 source="native",
167 on_confirmation=deny,
168 )
169
170 assert outcome.state == ToolExecutionState.DECLINED
171 assert not target.exists()
172 assert len(prompts) == 1
173 assert "active_mode=prompt" in prompts[0][2]
174 assert "required_mode=workspace-write" in prompts[0][2]
175
176
177 @pytest.mark.asyncio
178 async def test_allow_mode_executor_skips_prompt_for_destructive_write(
179 temp_dir: Path,
180 ) -> None:
181 prompts: list[str] = []
182 registry = create_default_registry(temp_dir)
183 policy = build_permission_policy(
184 active_mode=PermissionMode.ALLOW,
185 workspace_root=temp_dir,
186 tool_requirements=registry.get_tool_requirements(),
187 )
188 executor = ToolExecutor(registry, RuntimeTracer(), policy)
189 target = temp_dir / "allowed.txt"
190
191 async def unexpected(tool_name: str, message: str, details: str) -> bool:
192 prompts.append(tool_name)
193 return False
194
195 outcome = await executor.execute_tool_call(
196 ToolCall(
197 id="write-1",
198 name="write",
199 arguments={"file_path": str(target), "content": "allowed\n"},
200 ),
201 source="native",
202 on_confirmation=unexpected,
203 )
204
205 assert outcome.state == ToolExecutionState.EXECUTED
206 assert target.read_text() == "allowed\n"
207 assert prompts == []
208
209
210 @pytest.mark.asyncio
211 async def test_executor_accepts_edit_content_alias_for_new_string(
212 temp_dir: Path,
213 ) -> None:
214 registry = create_default_registry(temp_dir)
215 policy = build_permission_policy(
216 active_mode=PermissionMode.ALLOW,
217 workspace_root=temp_dir,
218 tool_requirements=registry.get_tool_requirements(),
219 )
220 executor = ToolExecutor(registry, RuntimeTracer(), policy)
221 target = temp_dir / "guide.html"
222 target.write_text("<h1>Old</h1>\n")
223
224 outcome = await executor.execute_tool_call(
225 ToolCall(
226 id="edit-1",
227 name="edit",
228 arguments={
229 "file_path": str(target),
230 "old_string": "<h1>Old</h1>",
231 "content": "<h1>New</h1>",
232 },
233 ),
234 source="native",
235 )
236
237 assert outcome.state == ToolExecutionState.EXECUTED
238 assert target.read_text() == "<h1>New</h1>\n"
239 assert outcome.tool_call.arguments["new_string"] == "<h1>New</h1>"
240
241
242 @pytest.mark.asyncio
243 async def test_executor_maps_native_ls_alias_to_read_only_bash(
244 temp_dir: Path,
245 monkeypatch: pytest.MonkeyPatch,
246 ) -> None:
247 monkeypatch.setenv("HOME", str(temp_dir))
248 target_dir = temp_dir / "Loader"
249 target_dir.mkdir()
250 (target_dir / "notes.txt").write_text("details\n")
251 registry = create_default_registry(temp_dir)
252 policy = build_permission_policy(
253 active_mode=PermissionMode.WORKSPACE_WRITE,
254 workspace_root=temp_dir,
255 tool_requirements=registry.get_tool_requirements(),
256 )
257 executor = ToolExecutor(registry, RuntimeTracer(), policy)
258
259 outcome = await executor.execute_tool_call(
260 ToolCall(
261 id="ls-1",
262 name="ls",
263 arguments={"path": "~/Loader"},
264 ),
265 source="native",
266 )
267
268 assert outcome.state == ToolExecutionState.EXECUTED
269 assert outcome.tool_call.name == "bash"
270 assert outcome.tool_call.arguments["command"] == f"ls {target_dir}"
271 assert outcome.required_permission == PermissionMode.READ_ONLY
272 assert "notes.txt" in outcome.result_output
273
274
275 @pytest.mark.asyncio
276 async def test_ask_rule_prompts_even_when_allow_mode(temp_dir: Path) -> None:
277 prompts: list[str] = []
278 registry = create_default_registry(temp_dir)
279 policy = build_permission_policy(
280 active_mode=PermissionMode.ALLOW,
281 workspace_root=temp_dir,
282 tool_requirements=registry.get_tool_requirements(),
283 rules=PermissionRuleSet.from_dict(
284 {"ask": [{"tool": "write", "path_contains": "README"}]}
285 ),
286 )
287 executor = ToolExecutor(registry, RuntimeTracer(), policy)
288 target = temp_dir / "README.md"
289
290 async def deny(tool_name: str, message: str, details: str) -> bool:
291 prompts.append(details)
292 return False
293
294 outcome = await executor.execute_tool_call(
295 ToolCall(
296 id="write-1",
297 name="write",
298 arguments={"file_path": str(target), "content": "no thanks\n"},
299 ),
300 source="native",
301 on_confirmation=deny,
302 )
303
304 assert outcome.state == ToolExecutionState.DECLINED
305 assert not target.exists()
306 assert len(prompts) == 1
307 assert "matched_ask_rule=tool=write, path_contains=README" in prompts[0]
308
309
310 @pytest.mark.asyncio
311 async def test_hook_lifecycle_runs_in_order_for_success(temp_dir: Path) -> None:
312 events: list[str] = []
313 registry = create_default_registry(temp_dir)
314 policy = build_permission_policy(
315 active_mode=PermissionMode.WORKSPACE_WRITE,
316 workspace_root=temp_dir,
317 tool_requirements=registry.get_tool_requirements(),
318 )
319 executor = ToolExecutor(
320 registry,
321 RuntimeTracer(),
322 policy,
323 hooks=HookManager([RecordingHook(events)]),
324 )
325 target = temp_dir / "hook-success.txt"
326
327 outcome = await executor.execute_tool_call(
328 ToolCall(
329 id="write-1",
330 name="write",
331 arguments={"file_path": str(target), "content": "hook success\n"},
332 ),
333 source="native",
334 skip_confirmation=True,
335 )
336
337 assert outcome.state == ToolExecutionState.EXECUTED
338 assert events == ["pre_tool_use", "post_tool_use"]
339 assert target.read_text() == "hook success\n"
340
341
342 @pytest.mark.asyncio
343 async def test_pre_hook_deny_still_runs_failure_hook_once(temp_dir: Path) -> None:
344 events: list[str] = []
345 registry = create_default_registry(temp_dir)
346 policy = build_permission_policy(
347 active_mode=PermissionMode.WORKSPACE_WRITE,
348 workspace_root=temp_dir,
349 tool_requirements=registry.get_tool_requirements(),
350 )
351 executor = ToolExecutor(
352 registry,
353 RuntimeTracer(),
354 policy,
355 hooks=HookManager([DenyInPreHook(events)]),
356 )
357 target = temp_dir / "hook-denied.txt"
358
359 outcome = await executor.execute_tool_call(
360 ToolCall(
361 id="write-1",
362 name="write",
363 arguments={"file_path": str(target), "content": "should not exist\n"},
364 ),
365 source="native",
366 skip_confirmation=True,
367 )
368
369 assert outcome.state == ToolExecutionState.BLOCKED
370 assert events == ["pre_tool_use", "post_tool_use_failure"]
371 assert not target.exists()
372 assert len(outcome.message.tool_results) == 1
373 assert "denied by test hook" in outcome.event_content
374
375
376 @pytest.mark.asyncio
377 @pytest.mark.parametrize(
378 ("tool_name", "arguments", "expected_path"),
379 [
380 ("read", {"file": "notes.txt"}, "notes.txt"),
381 ("write", {"filepath": "notes.txt", "content": "hello\n"}, "notes.txt"),
382 (
383 "edit",
384 {"filePath": "notes.txt", "old_string": "before", "new_string": "after"},
385 "notes.txt",
386 ),
387 ("patch", {"path": "notes.txt", "hunks": []}, "notes.txt"),
388 ],
389 )
390 async def test_file_path_alias_hook_canonicalizes_common_aliases(
391 temp_dir: Path,
392 tool_name: str,
393 arguments: dict[str, object],
394 expected_path: str,
395 ) -> None:
396 registry = create_default_registry(temp_dir)
397 policy = build_permission_policy(
398 active_mode=PermissionMode.WORKSPACE_WRITE,
399 workspace_root=temp_dir,
400 tool_requirements=registry.get_tool_requirements(),
401 )
402 hook = FilePathAliasHook()
403
404 result = await hook.pre_tool_use(
405 HookContext(
406 tool_call=ToolCall(id=f"{tool_name}-1", name=tool_name, arguments=arguments),
407 tool=registry.get(tool_name),
408 registry=registry,
409 permission_policy=policy,
410 source="native",
411 )
412 )
413
414 assert result.updated_arguments is not None
415 assert result.updated_arguments["file_path"] == expected_path
416 for alias in ("file", "filepath", "filePath", "filename", "path"):
417 assert alias not in result.updated_arguments
418
419
420 @pytest.mark.asyncio
421 @pytest.mark.parametrize(
422 ("tool_name", "arguments", "expected_path"),
423 [
424 ("glob", {"pattern": "*.html", "directory": "chapters"}, "chapters"),
425 ("grep", {"pattern": "alpha", "dir": "src"}, "src"),
426 ],
427 )
428 async def test_search_path_alias_hook_canonicalizes_common_aliases(
429 temp_dir: Path,
430 tool_name: str,
431 arguments: dict[str, object],
432 expected_path: str,
433 ) -> None:
434 registry = create_default_registry(temp_dir)
435 policy = build_permission_policy(
436 active_mode=PermissionMode.WORKSPACE_WRITE,
437 workspace_root=temp_dir,
438 tool_requirements=registry.get_tool_requirements(),
439 )
440 hook = SearchPathAliasHook()
441
442 result = await hook.pre_tool_use(
443 HookContext(
444 tool_call=ToolCall(id=f"{tool_name}-1", name=tool_name, arguments=arguments),
445 tool=registry.get(tool_name),
446 registry=registry,
447 permission_policy=policy,
448 source="native",
449 )
450 )
451
452 assert result.updated_arguments is not None
453 assert result.updated_arguments["path"] == expected_path
454 for alias in ("directory", "dir", "folder"):
455 assert alias not in result.updated_arguments
456
457
458 @pytest.mark.asyncio
459 async def test_search_path_alias_hook_splits_full_glob_pattern(
460 temp_dir: Path,
461 ) -> None:
462 registry = create_default_registry(temp_dir)
463 policy = build_permission_policy(
464 active_mode=PermissionMode.WORKSPACE_WRITE,
465 workspace_root=temp_dir,
466 tool_requirements=registry.get_tool_requirements(),
467 )
468 hook = SearchPathAliasHook()
469 chapters = temp_dir / "chapters"
470
471 result = await hook.pre_tool_use(
472 HookContext(
473 tool_call=ToolCall(
474 id="glob-1",
475 name="glob",
476 arguments={"pattern": f"{chapters}/*.html"},
477 ),
478 tool=registry.get("glob"),
479 registry=registry,
480 permission_policy=policy,
481 source="native",
482 )
483 )
484
485 assert result.updated_arguments is not None
486 assert result.updated_arguments["path"] == str(chapters)
487 assert result.updated_arguments["pattern"] == "*.html"
488
489
490 @pytest.mark.asyncio
491 async def test_search_path_alias_hook_splits_implicit_recursive_glob_parent(
492 temp_dir: Path,
493 ) -> None:
494 registry = create_default_registry(temp_dir)
495 policy = build_permission_policy(
496 active_mode=PermissionMode.WORKSPACE_WRITE,
497 workspace_root=temp_dir,
498 tool_requirements=registry.get_tool_requirements(),
499 )
500 hook = SearchPathAliasHook()
501
502 result = await hook.pre_tool_use(
503 HookContext(
504 tool_call=ToolCall(
505 id="glob-implicit-1",
506 name="glob",
507 arguments={"pattern": "**/Loader/guides/nginx/chapters/*.html"},
508 ),
509 tool=registry.get("glob"),
510 registry=registry,
511 permission_policy=policy,
512 source="native",
513 )
514 )
515
516 assert result.updated_arguments is not None
517 assert result.updated_arguments["path"] == "Loader/guides/nginx/chapters"
518 assert result.updated_arguments["pattern"] == "*.html"
519
520
521 @pytest.mark.asyncio
522 async def test_search_path_alias_hook_leaves_fully_generic_recursive_glob_unchanged(
523 temp_dir: Path,
524 ) -> None:
525 registry = create_default_registry(temp_dir)
526 policy = build_permission_policy(
527 active_mode=PermissionMode.WORKSPACE_WRITE,
528 workspace_root=temp_dir,
529 tool_requirements=registry.get_tool_requirements(),
530 )
531 hook = SearchPathAliasHook()
532
533 result = await hook.pre_tool_use(
534 HookContext(
535 tool_call=ToolCall(
536 id="glob-generic-1",
537 name="glob",
538 arguments={"pattern": "**/*.html"},
539 ),
540 tool=registry.get("glob"),
541 registry=registry,
542 permission_policy=policy,
543 source="native",
544 )
545 )
546
547 assert result.updated_arguments is None
548
549
550 @pytest.mark.asyncio
551 async def test_relative_path_context_hook_remaps_workspace_mirror_of_external_root(
552 temp_dir: Path,
553 ) -> None:
554 workspace_root = temp_dir / "workspace"
555 workspace_root.mkdir()
556 external_root = temp_dir / "external-home"
557 external_fortran = external_root / "Loader" / "guides" / "fortran"
558 external_fortran.mkdir(parents=True)
559 (external_fortran / "index.html").write_text("<html></html>\n")
560 (external_root / "Loader" / "guides").mkdir(exist_ok=True)
561
562 registry = create_default_registry(workspace_root)
563 policy = build_permission_policy(
564 active_mode=PermissionMode.WORKSPACE_WRITE,
565 workspace_root=workspace_root,
566 tool_requirements=registry.get_tool_requirements(),
567 )
568 action_tracker = ActionTracker()
569 action_tracker.record_tool_call(
570 "read",
571 {"file_path": str(external_fortran / "index.html")},
572 )
573 hook = RelativePathContextHook(action_tracker, workspace_root)
574
575 mirrored_workspace_path = workspace_root / "Loader" / "guides" / "nginx" / "index.html"
576 expected_external_path = external_root / "Loader" / "guides" / "nginx" / "index.html"
577
578 result = await hook.pre_tool_use(
579 HookContext(
580 tool_call=ToolCall(
581 id="write-1",
582 name="write",
583 arguments={
584 "file_path": str(mirrored_workspace_path),
585 "content": "<html></html>\n",
586 },
587 ),
588 tool=registry.get("write"),
589 registry=registry,
590 permission_policy=policy,
591 source="native",
592 )
593 )
594
595 assert result.updated_arguments is not None
596 assert Path(result.updated_arguments["file_path"]).resolve() == expected_external_path.resolve()
597 resolved_loader_root = (external_root / "Loader").resolve()
598 assert result.injected_messages == [
599 (
600 "[Path anchor correction] A repo-local mirror path was remapped to the "
601 f"established output root under `{resolved_loader_root}`. Keep future "
602 "file/search tool calls on that external root and use `index.html` there "
603 "instead of re-anchoring work to the workspace checkout."
604 )
605 ]
606
607
608 @pytest.mark.asyncio
609 async def test_relative_path_context_hook_prefers_external_search_ancestor_over_workspace_match(
610 temp_dir: Path,
611 ) -> None:
612 workspace_root = temp_dir / "workspace"
613 (workspace_root / "guides").mkdir(parents=True)
614 external_root = temp_dir / "external-home"
615 external_fortran = external_root / "Loader" / "guides" / "fortran"
616 external_fortran.mkdir(parents=True)
617 (external_fortran / "index.html").write_text("<html></html>\n")
618
619 registry = create_default_registry(workspace_root)
620 policy = build_permission_policy(
621 active_mode=PermissionMode.WORKSPACE_WRITE,
622 workspace_root=workspace_root,
623 tool_requirements=registry.get_tool_requirements(),
624 )
625 action_tracker = ActionTracker()
626 action_tracker.record_tool_call(
627 "read",
628 {"file_path": str(external_fortran / "index.html")},
629 )
630 hook = RelativePathContextHook(action_tracker, workspace_root)
631
632 result = await hook.pre_tool_use(
633 HookContext(
634 tool_call=ToolCall(
635 id="glob-ancestor-1",
636 name="glob",
637 arguments={"path": "guides", "pattern": "**"},
638 ),
639 tool=registry.get("glob"),
640 registry=registry,
641 permission_policy=policy,
642 source="native",
643 )
644 )
645
646 assert result.updated_arguments is not None
647 assert Path(result.updated_arguments["path"]).resolve() == (
648 external_root / "Loader" / "guides"
649 ).resolve()
650
651
652 class FakeSession:
653 def __init__(self, *, active_dod_path: str, messages: list[Message]) -> None:
654 self.active_dod_path = active_dod_path
655 self.messages = messages
656
657
658 @pytest.mark.asyncio
659 async def test_active_repair_scope_hook_blocks_reference_reads_while_fixing(
660 temp_dir: Path,
661 ) -> None:
662 registry = create_default_registry(temp_dir)
663 policy = build_permission_policy(
664 active_mode=PermissionMode.WORKSPACE_WRITE,
665 workspace_root=temp_dir,
666 tool_requirements=registry.get_tool_requirements(),
667 )
668 dod_store = DefinitionOfDoneStore(temp_dir)
669 dod = create_definition_of_done("Repair the active artifact set")
670 dod.status = "fixing"
671 dod_path = dod_store.save(dod)
672 repair_target = temp_dir / "guide" / "index.html"
673 session = FakeSession(
674 active_dod_path=str(dod_path),
675 messages=[
676 Message(
677 role=Role.ASSISTANT,
678 content=(
679 "Repair focus:\n"
680 f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
681 f"- Immediate next step: edit `{repair_target}`.\n"
682 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-introduction.html'}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
683 ),
684 )
685 ],
686 )
687 hook = ActiveRepairScopeHook(
688 dod_store=dod_store,
689 project_root=temp_dir,
690 session=session,
691 )
692
693 result = await hook.pre_tool_use(
694 HookContext(
695 tool_call=ToolCall(
696 id="read-1",
697 name="read",
698 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
699 ),
700 tool=registry.get("read"),
701 registry=registry,
702 permission_policy=policy,
703 source="native",
704 )
705 )
706
707 assert result.decision == HookDecision.DENY
708 assert result.terminal_state == "blocked"
709 assert result.message is not None
710 assert "active repair scope" in result.message
711 assert str(repair_target) in result.message
712
713
714 @pytest.mark.asyncio
715 async def test_active_repair_scope_hook_blocks_stale_memory_reads_while_fixing(
716 temp_dir: Path,
717 ) -> None:
718 registry = create_default_registry(temp_dir)
719 policy = build_permission_policy(
720 active_mode=PermissionMode.WORKSPACE_WRITE,
721 workspace_root=temp_dir,
722 tool_requirements=registry.get_tool_requirements(),
723 )
724 dod_store = DefinitionOfDoneStore(temp_dir)
725 dod = create_definition_of_done("Repair the active artifact set")
726 dod.status = "fixing"
727 dod_path = dod_store.save(dod)
728 repair_target = temp_dir / "guide" / "chapters" / "05-load-balancing.html"
729 session = FakeSession(
730 active_dod_path=str(dod_path),
731 messages=[
732 Message(
733 role=Role.USER,
734 content=(
735 "[DEFINITION OF DONE CHECK STILL FAILING]\n"
736 "HTML guide content quality issues:\n"
737 "Repair focus:\n"
738 f"- {repair_target}: thin content (1500 text chars, expected at least 1758)\n"
739 f"- Immediate next step: edit `{repair_target}`.\n"
740 ),
741 )
742 ],
743 )
744 hook = ActiveRepairScopeHook(
745 dod_store=dod_store,
746 project_root=temp_dir,
747 session=session,
748 )
749
750 result = await hook.pre_tool_use(
751 HookContext(
752 tool_call=ToolCall(
753 id="memory-1",
754 name="notepad_read",
755 arguments={},
756 ),
757 tool=registry.get("notepad_read"),
758 registry=registry,
759 permission_policy=policy,
760 source="native",
761 )
762 )
763
764 assert result.decision == HookDecision.DENY
765 assert result.terminal_state == "blocked"
766 assert result.message is not None
767 assert "durable memory may be stale" in result.message
768 assert "trust the active verifier/DoD" in result.message
769 assert str(repair_target) in result.message
770
771
772 @pytest.mark.asyncio
773 async def test_active_repair_scope_hook_allows_reads_inside_active_artifact_set(
774 temp_dir: Path,
775 ) -> None:
776 registry = create_default_registry(temp_dir)
777 policy = build_permission_policy(
778 active_mode=PermissionMode.WORKSPACE_WRITE,
779 workspace_root=temp_dir,
780 tool_requirements=registry.get_tool_requirements(),
781 )
782 dod_store = DefinitionOfDoneStore(temp_dir)
783 dod = create_definition_of_done("Repair the active artifact set")
784 dod.status = "fixing"
785 dod_path = dod_store.save(dod)
786 repair_target = temp_dir / "guide" / "index.html"
787 chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
788 session = FakeSession(
789 active_dod_path=str(dod_path),
790 messages=[
791 Message(
792 role=Role.ASSISTANT,
793 content=(
794 "Repair focus:\n"
795 f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
796 f"- Fix the broken local reference `../styles.css` in `{chapter_path}`.\n"
797 f"- Immediate next step: edit `{repair_target}`.\n"
798 f"- If the broken reference should remain, create `{chapter_path}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
799 ),
800 )
801 ],
802 )
803 hook = ActiveRepairScopeHook(
804 dod_store=dod_store,
805 project_root=temp_dir,
806 session=session,
807 )
808
809 result = await hook.pre_tool_use(
810 HookContext(
811 tool_call=ToolCall(
812 id="read-1",
813 name="read",
814 arguments={"file_path": str(chapter_path)},
815 ),
816 tool=registry.get("read"),
817 registry=registry,
818 permission_policy=policy,
819 source="native",
820 )
821 )
822
823 assert result.decision == HookDecision.CONTINUE
824
825
826 @pytest.mark.asyncio
827 async def test_active_repair_scope_hook_allows_existing_sibling_reads_with_source_of_truth_hint(
828 temp_dir: Path,
829 ) -> None:
830 registry = create_default_registry(temp_dir)
831 policy = build_permission_policy(
832 active_mode=PermissionMode.WORKSPACE_WRITE,
833 workspace_root=temp_dir,
834 tool_requirements=registry.get_tool_requirements(),
835 )
836 dod_store = DefinitionOfDoneStore(temp_dir)
837 dod = create_definition_of_done("Repair the active artifact set")
838 dod.status = "fixing"
839 dod_path = dod_store.save(dod)
840 repair_target = temp_dir / "guide" / "index.html"
841 chapter_dir = temp_dir / "guide" / "chapters"
842 chapter_dir.mkdir(parents=True, exist_ok=True)
843 sibling = chapter_dir / "03-basic-usage.html"
844 sibling.write_text("<h1>Basic Usage</h1>\n")
845 session = FakeSession(
846 active_dod_path=str(dod_path),
847 messages=[
848 Message(
849 role=Role.ASSISTANT,
850 content=(
851 "Repair focus:\n"
852 f"- Fix the broken local reference `chapters/02-installation.html` in `{repair_target}`.\n"
853 f"- Immediate next step: edit `{repair_target}`.\n"
854 f"- If the broken reference should remain, create `{chapter_dir / '02-installation.html'}`; otherwise remove or replace `chapters/02-installation.html`.\n"
855 "- Use the existing artifact files as the source of truth while repairing this file: "
856 f"`{repair_target}`.\n"
857 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
858 ),
859 )
860 ],
861 )
862 hook = ActiveRepairScopeHook(
863 dod_store=dod_store,
864 project_root=temp_dir,
865 session=session,
866 )
867
868 result = await hook.pre_tool_use(
869 HookContext(
870 tool_call=ToolCall(
871 id="read-1",
872 name="read",
873 arguments={"file_path": str(sibling)},
874 ),
875 tool=registry.get("read"),
876 registry=registry,
877 permission_policy=policy,
878 source="native",
879 )
880 )
881
882 assert result.decision == HookDecision.CONTINUE
883
884
885 @pytest.mark.asyncio
886 async def test_active_repair_scope_hook_allows_verification_source_outside_repair_target(
887 temp_dir: Path,
888 ) -> None:
889 registry = create_default_registry(temp_dir)
890 policy = build_permission_policy(
891 active_mode=PermissionMode.WORKSPACE_WRITE,
892 workspace_root=temp_dir,
893 tool_requirements=registry.get_tool_requirements(),
894 )
895 dod_store = DefinitionOfDoneStore(temp_dir)
896 dod = create_definition_of_done("Repair the active artifact set")
897 dod.status = "in_progress"
898 dod_path = dod_store.save(dod)
899 repair_target = temp_dir / "guide" / "chapters" / "06-troubleshooting.html"
900 session = FakeSession(
901 active_dod_path=str(dod_path),
902 messages=[
903 Message(
904 role=Role.ASSISTANT,
905 content=(
906 "Repair focus:\n"
907 f"- Fix the broken local reference `01-introduction.html` in `{repair_target}`.\n"
908 f"- Immediate next step: edit `{repair_target}`.\n"
909 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
910 ),
911 )
912 ],
913 )
914 hook = ActiveRepairScopeHook(
915 dod_store=dod_store,
916 project_root=temp_dir,
917 session=session,
918 )
919
920 result = await hook.pre_tool_use(
921 HookContext(
922 tool_call=ToolCall(
923 id="verify-1",
924 name="read",
925 arguments={"file_path": str(temp_dir / "guide" / "index.html")},
926 ),
927 tool=registry.get("read"),
928 registry=registry,
929 permission_policy=policy,
930 source="verification",
931 )
932 )
933
934 assert result.decision == HookDecision.CONTINUE
935
936
937 @pytest.mark.asyncio
938 async def test_active_repair_scope_hook_blocks_local_rereads_outside_concrete_repair_files(
939 temp_dir: Path,
940 ) -> None:
941 registry = create_default_registry(temp_dir)
942 policy = build_permission_policy(
943 active_mode=PermissionMode.WORKSPACE_WRITE,
944 workspace_root=temp_dir,
945 tool_requirements=registry.get_tool_requirements(),
946 )
947 dod_store = DefinitionOfDoneStore(temp_dir)
948 dod = create_definition_of_done("Repair the active artifact set")
949 dod.status = "in_progress"
950 dod_path = dod_store.save(dod)
951 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
952 stylesheet = temp_dir / "guide" / "styles.css"
953 other_chapter = temp_dir / "guide" / "chapters" / "01-getting-started.html"
954 session = FakeSession(
955 active_dod_path=str(dod_path),
956 messages=[
957 Message(
958 role=Role.ASSISTANT,
959 content=(
960 "Repair focus:\n"
961 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
962 f"- Fix the broken local reference `../styles.css` in `{temp_dir / 'guide' / 'chapters' / '06-troubleshooting.html'}`.\n"
963 f"- Immediate next step: edit `{repair_target}`.\n"
964 f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
965 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
966 ),
967 )
968 ],
969 )
970 hook = ActiveRepairScopeHook(
971 dod_store=dod_store,
972 project_root=temp_dir,
973 session=session,
974 )
975
976 result = await hook.pre_tool_use(
977 HookContext(
978 tool_call=ToolCall(
979 id="read-1",
980 name="read",
981 arguments={"file_path": str(other_chapter)},
982 ),
983 tool=registry.get("read"),
984 registry=registry,
985 permission_policy=policy,
986 source="native",
987 )
988 )
989
990 assert result.decision == HookDecision.DENY
991 assert result.terminal_state == "blocked"
992 assert result.message is not None
993 assert "active repair scope" in result.message
994 assert str(repair_target) in result.message
995 assert str(stylesheet) in result.message
996
997
998 @pytest.mark.asyncio
999 async def test_active_repair_scope_hook_blocks_broad_glob_during_concrete_repair(
1000 temp_dir: Path,
1001 ) -> None:
1002 registry = create_default_registry(temp_dir)
1003 policy = build_permission_policy(
1004 active_mode=PermissionMode.WORKSPACE_WRITE,
1005 workspace_root=temp_dir,
1006 tool_requirements=registry.get_tool_requirements(),
1007 )
1008 dod_store = DefinitionOfDoneStore(temp_dir)
1009 dod = create_definition_of_done("Repair the generated guide")
1010 dod.status = "fixing"
1011 dod_path = dod_store.save(dod)
1012 guide_root = temp_dir / "guide"
1013 chapters = guide_root / "chapters"
1014 chapters.mkdir(parents=True)
1015 repair_target = guide_root / "index.html"
1016 repair_target.write_text("<h1>Guide</h1>\n")
1017 (chapters / "01-introduction.html").write_text("<h1>Intro</h1>\n")
1018 session = FakeSession(
1019 active_dod_path=str(dod_path),
1020 messages=[
1021 Message(
1022 role=Role.ASSISTANT,
1023 content=(
1024 "Repair focus:\n"
1025 f"- Improve `{repair_target}`: insufficient structured content.\n"
1026 f"- Immediate next step: edit `{repair_target}`.\n"
1027 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1028 ),
1029 )
1030 ],
1031 )
1032 hook = ActiveRepairScopeHook(
1033 dod_store=dod_store,
1034 project_root=temp_dir,
1035 session=session,
1036 )
1037
1038 result = await hook.pre_tool_use(
1039 HookContext(
1040 tool_call=ToolCall(
1041 id="glob-1",
1042 name="glob",
1043 arguments={"path": str(guide_root), "pattern": "**/*.html"},
1044 ),
1045 tool=registry.get("glob"),
1046 registry=registry,
1047 permission_policy=policy,
1048 source="native",
1049 )
1050 )
1051
1052 assert result.decision == HookDecision.DENY
1053 assert result.terminal_state == "blocked"
1054 assert result.message is not None
1055 assert "active repair scope" in result.message
1056 assert str(repair_target) in result.message
1057
1058
1059 @pytest.mark.asyncio
1060 async def test_active_repair_scope_hook_blocks_repair_audit_loop_after_repeated_source_reads(
1061 temp_dir: Path,
1062 ) -> None:
1063 registry = create_default_registry(temp_dir)
1064 policy = build_permission_policy(
1065 active_mode=PermissionMode.WORKSPACE_WRITE,
1066 workspace_root=temp_dir,
1067 tool_requirements=registry.get_tool_requirements(),
1068 )
1069 dod_store = DefinitionOfDoneStore(temp_dir)
1070 dod = create_definition_of_done("Repair the active artifact set")
1071 dod.status = "fixing"
1072 dod_path = dod_store.save(dod)
1073 guide_root = temp_dir / "guide"
1074 chapter_dir = guide_root / "chapters"
1075 chapter_dir.mkdir(parents=True, exist_ok=True)
1076 repair_target = guide_root / "index.html"
1077 repair_target.write_text("<h1>Guide</h1>\n")
1078 intro = chapter_dir / "01-introduction.html"
1079 install = chapter_dir / "02-installation.html"
1080 intro.write_text("<h1>Intro</h1>\n")
1081 install.write_text("<h1>Install</h1>\n")
1082 session = FakeSession(
1083 active_dod_path=str(dod_path),
1084 messages=[
1085 Message(
1086 role=Role.ASSISTANT,
1087 content=(
1088 "Repair focus:\n"
1089 f"- Fix the broken local reference `chapters/02-installation.html` in `{repair_target}`.\n"
1090 f"- Immediate next step: edit `{repair_target}`.\n"
1091 f"- If the broken reference should remain, create `{install}`; otherwise remove or replace `chapters/02-installation.html`.\n"
1092 "- Use the existing artifact files as the source of truth while repairing this file: "
1093 f"`{repair_target}`, `{intro}`, `{install}`.\n"
1094 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1095 ),
1096 )
1097 ],
1098 )
1099 hook = ActiveRepairScopeHook(
1100 dod_store=dod_store,
1101 project_root=temp_dir,
1102 session=session,
1103 )
1104
1105 def make_context(index: int) -> HookContext:
1106 target = repair_target if index % 2 else intro
1107 return HookContext(
1108 tool_call=ToolCall(
1109 id=f"read-{index}",
1110 name="read",
1111 arguments={"file_path": str(target)},
1112 ),
1113 tool=registry.get("read"),
1114 registry=registry,
1115 permission_policy=policy,
1116 source="native",
1117 )
1118
1119 for index in range(1, 5):
1120 context = make_context(index)
1121 result = await hook.pre_tool_use(context)
1122 assert result.decision == HookDecision.CONTINUE
1123 await hook.post_tool_use(context)
1124
1125 blocked = await hook.pre_tool_use(make_context(5))
1126
1127 assert blocked.decision == HookDecision.DENY
1128 assert blocked.terminal_state == "blocked"
1129 assert blocked.message is not None
1130 assert "repair audit loop" in blocked.message
1131
1132
1133 @pytest.mark.asyncio
1134 async def test_active_repair_scope_audit_loop_names_next_missing_repair_target(
1135 temp_dir: Path,
1136 ) -> None:
1137 registry = create_default_registry(temp_dir)
1138 policy = build_permission_policy(
1139 active_mode=PermissionMode.WORKSPACE_WRITE,
1140 workspace_root=temp_dir,
1141 tool_requirements=registry.get_tool_requirements(),
1142 )
1143 dod_store = DefinitionOfDoneStore(temp_dir)
1144 dod = create_definition_of_done("Repair the active artifact set")
1145 dod.status = "fixing"
1146 dod_path = dod_store.save(dod)
1147 guide_root = temp_dir / "guide"
1148 chapter_dir = guide_root / "chapters"
1149 chapter_dir.mkdir(parents=True, exist_ok=True)
1150 repair_target = chapter_dir / "04-reverse-proxy.html"
1151 next_missing = chapter_dir / "05-load-balancing.html"
1152 repair_target.write_text("<h1>Reverse Proxy</h1>\n")
1153 session = FakeSession(
1154 active_dod_path=str(dod_path),
1155 messages=[
1156 Message(
1157 role=Role.ASSISTANT,
1158 content=(
1159 "Repair focus:\n"
1160 f"- Fix the broken local reference `05-load-balancing.html` in `{repair_target}`.\n"
1161 f"- Immediate next step: edit `{repair_target}`.\n"
1162 f"- If the broken reference should remain, create `{next_missing}`; otherwise remove or replace `05-load-balancing.html`.\n"
1163 "- Use the existing artifact files as the source of truth while repairing this file: "
1164 f"`{repair_target}`, `{next_missing}`.\n"
1165 ),
1166 )
1167 ],
1168 )
1169 hook = ActiveRepairScopeHook(
1170 dod_store=dod_store,
1171 project_root=temp_dir,
1172 session=session,
1173 )
1174
1175 def make_context(index: int) -> HookContext:
1176 return HookContext(
1177 tool_call=ToolCall(
1178 id=f"read-{index}",
1179 name="read",
1180 arguments={"file_path": str(repair_target)},
1181 ),
1182 tool=registry.get("read"),
1183 registry=registry,
1184 permission_policy=policy,
1185 source="native",
1186 )
1187
1188 for index in range(1, 5):
1189 context = make_context(index)
1190 result = await hook.pre_tool_use(context)
1191 assert result.decision == HookDecision.CONTINUE
1192 await hook.post_tool_use(context)
1193
1194 blocked = await hook.pre_tool_use(make_context(5))
1195
1196 assert blocked.decision == HookDecision.DENY
1197 assert blocked.message is not None
1198 assert "repair audit loop" in blocked.message
1199 assert str(next_missing) in blocked.message
1200
1201
1202 @pytest.mark.asyncio
1203 async def test_active_repair_scope_hook_allows_scoped_glob_within_active_artifact_roots(
1204 temp_dir: Path,
1205 ) -> None:
1206 registry = create_default_registry(temp_dir)
1207 policy = build_permission_policy(
1208 active_mode=PermissionMode.WORKSPACE_WRITE,
1209 workspace_root=temp_dir,
1210 tool_requirements=registry.get_tool_requirements(),
1211 )
1212 dod_store = DefinitionOfDoneStore(temp_dir)
1213 dod = create_definition_of_done("Repair the active artifact set")
1214 dod.status = "in_progress"
1215 dod_path = dod_store.save(dod)
1216 repair_target = temp_dir / "guide" / "index.html"
1217 guide_root = temp_dir / "guide"
1218 session = FakeSession(
1219 active_dod_path=str(dod_path),
1220 messages=[
1221 Message(
1222 role=Role.ASSISTANT,
1223 content=(
1224 "Repair focus:\n"
1225 f"- Fix the broken local reference `chapters/troubleshooting.html` in `{repair_target}`.\n"
1226 f"- Immediate next step: edit `{repair_target}`.\n"
1227 f"- If the broken reference should remain, create `{guide_root / 'chapters' / 'troubleshooting.html'}`; otherwise remove or replace `chapters/troubleshooting.html`.\n"
1228 "- Use the existing artifact files as the source of truth while repairing this file: "
1229 f"`{guide_root / 'chapters' / 'introduction.html'}`, `{guide_root / 'chapters' / 'installation.html'}`, `{guide_root / 'chapters' / 'configuration.html'}`.\n"
1230 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1231 ),
1232 )
1233 ],
1234 )
1235 hook = ActiveRepairScopeHook(
1236 dod_store=dod_store,
1237 project_root=temp_dir,
1238 session=session,
1239 )
1240
1241 result = await hook.pre_tool_use(
1242 HookContext(
1243 tool_call=ToolCall(
1244 id="glob-1",
1245 name="glob",
1246 arguments={
1247 "path": str(temp_dir),
1248 "pattern": "**/guide/chapters/*.html",
1249 },
1250 ),
1251 tool=registry.get("glob"),
1252 registry=registry,
1253 permission_policy=policy,
1254 source="native",
1255 )
1256 )
1257
1258 assert result.decision == HookDecision.CONTINUE
1259
1260
1261 @pytest.mark.asyncio
1262 async def test_active_repair_scope_hook_allows_declared_missing_sibling_reads(
1263 temp_dir: Path,
1264 ) -> None:
1265 registry = create_default_registry(temp_dir)
1266 policy = build_permission_policy(
1267 active_mode=PermissionMode.WORKSPACE_WRITE,
1268 workspace_root=temp_dir,
1269 tool_requirements=registry.get_tool_requirements(),
1270 )
1271 dod_store = DefinitionOfDoneStore(temp_dir)
1272 dod = create_definition_of_done("Repair the active artifact set")
1273 dod.status = "in_progress"
1274 dod_path = dod_store.save(dod)
1275 guide_root = temp_dir / "guide"
1276 chapters = guide_root / "chapters"
1277 chapters.mkdir(parents=True)
1278 repair_target = guide_root / "index.html"
1279 existing_chapter = chapters / "overview.html"
1280 next_chapter = chapters / "installation.html"
1281 repair_target.write_text(
1282 "\n".join(
1283 [
1284 "<html>",
1285 '<a href="chapters/overview.html">Overview</a>',
1286 '<a href="chapters/installation.html">Installation</a>',
1287 "</html>",
1288 ]
1289 )
1290 + "\n"
1291 )
1292 existing_chapter.write_text("<h1>Overview</h1>\n")
1293
1294 session = FakeSession(
1295 active_dod_path=str(dod_path),
1296 messages=[
1297 Message(
1298 role=Role.ASSISTANT,
1299 content=(
1300 "Repair focus:\n"
1301 f"- Fix the broken local reference `chapters/overview.html` in `{repair_target}`.\n"
1302 f"- Immediate next step: edit `{repair_target}`.\n"
1303 f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/overview.html`.\n"
1304 "- Use the existing artifact files as the source of truth while repairing this file: "
1305 f"`{existing_chapter}`.\n"
1306 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1307 ),
1308 )
1309 ],
1310 )
1311 hook = ActiveRepairScopeHook(
1312 dod_store=dod_store,
1313 project_root=temp_dir,
1314 session=session,
1315 )
1316
1317 result = await hook.pre_tool_use(
1318 HookContext(
1319 tool_call=ToolCall(
1320 id="read-allowed-sibling",
1321 name="read",
1322 arguments={"file_path": str(next_chapter)},
1323 ),
1324 tool=registry.get("read"),
1325 registry=registry,
1326 permission_policy=policy,
1327 source="native",
1328 )
1329 )
1330
1331 assert result.decision == HookDecision.CONTINUE
1332
1333
1334 @pytest.mark.asyncio
1335 async def test_active_repair_scope_hook_blocks_reference_reads_during_in_progress_repair(
1336 temp_dir: Path,
1337 ) -> None:
1338 registry = create_default_registry(temp_dir)
1339 policy = build_permission_policy(
1340 active_mode=PermissionMode.WORKSPACE_WRITE,
1341 workspace_root=temp_dir,
1342 tool_requirements=registry.get_tool_requirements(),
1343 )
1344 dod_store = DefinitionOfDoneStore(temp_dir)
1345 dod = create_definition_of_done("Repair the active artifact set")
1346 dod.status = "in_progress"
1347 dod_path = dod_store.save(dod)
1348 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1349 session = FakeSession(
1350 active_dod_path=str(dod_path),
1351 messages=[
1352 Message(
1353 role=Role.ASSISTANT,
1354 content=(
1355 "Repair focus:\n"
1356 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1357 f"- Immediate next step: edit `{repair_target}`.\n"
1358 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1359 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1360 ),
1361 )
1362 ],
1363 )
1364 hook = ActiveRepairScopeHook(
1365 dod_store=dod_store,
1366 project_root=temp_dir,
1367 session=session,
1368 )
1369
1370 result = await hook.pre_tool_use(
1371 HookContext(
1372 tool_call=ToolCall(
1373 id="read-1",
1374 name="read",
1375 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1376 ),
1377 tool=registry.get("read"),
1378 registry=registry,
1379 permission_policy=policy,
1380 source="native",
1381 )
1382 )
1383
1384 assert result.decision == HookDecision.DENY
1385 assert result.terminal_state == "blocked"
1386 assert result.message is not None
1387 assert "active repair scope" in result.message
1388
1389
1390 @pytest.mark.asyncio
1391 async def test_active_repair_mutation_scope_hook_blocks_writes_outside_named_repair_files(
1392 temp_dir: Path,
1393 ) -> None:
1394 registry = create_default_registry(temp_dir)
1395 policy = build_permission_policy(
1396 active_mode=PermissionMode.WORKSPACE_WRITE,
1397 workspace_root=temp_dir,
1398 tool_requirements=registry.get_tool_requirements(),
1399 )
1400 dod_store = DefinitionOfDoneStore(temp_dir)
1401 dod = create_definition_of_done("Repair the active artifact set")
1402 dod.status = "in_progress"
1403 dod_path = dod_store.save(dod)
1404 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1405 chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
1406 session = FakeSession(
1407 active_dod_path=str(dod_path),
1408 messages=[
1409 Message(
1410 role=Role.ASSISTANT,
1411 content=(
1412 "Repair focus:\n"
1413 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1414 f"- Immediate next step: edit `{repair_target}`.\n"
1415 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1416 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1417 ),
1418 )
1419 ],
1420 )
1421 hook = ActiveRepairMutationScopeHook(
1422 dod_store=dod_store,
1423 project_root=temp_dir,
1424 session=session,
1425 )
1426
1427 result = await hook.pre_tool_use(
1428 HookContext(
1429 tool_call=ToolCall(
1430 id="edit-1",
1431 name="edit",
1432 arguments={"file_path": str(chapter_path), "old_string": "old", "new_string": "new"},
1433 ),
1434 tool=registry.get("edit"),
1435 registry=registry,
1436 permission_policy=policy,
1437 source="native",
1438 )
1439 )
1440
1441 assert result.decision == HookDecision.DENY
1442 assert result.terminal_state == "blocked"
1443 assert result.message is not None
1444 assert "active repair mutation scope" in result.message
1445 assert str(repair_target) in result.message
1446
1447
1448 @pytest.mark.asyncio
1449 async def test_active_repair_mutation_scope_hook_allows_expected_repair_file_writes(
1450 temp_dir: Path,
1451 ) -> None:
1452 registry = create_default_registry(temp_dir)
1453 policy = build_permission_policy(
1454 active_mode=PermissionMode.WORKSPACE_WRITE,
1455 workspace_root=temp_dir,
1456 tool_requirements=registry.get_tool_requirements(),
1457 )
1458 dod_store = DefinitionOfDoneStore(temp_dir)
1459 dod = create_definition_of_done("Repair the active artifact set")
1460 dod.status = "in_progress"
1461 dod_path = dod_store.save(dod)
1462 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1463 stylesheet = temp_dir / "guide" / "styles.css"
1464 session = FakeSession(
1465 active_dod_path=str(dod_path),
1466 messages=[
1467 Message(
1468 role=Role.ASSISTANT,
1469 content=(
1470 "Repair focus:\n"
1471 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1472 f"- Immediate next step: edit `{repair_target}`.\n"
1473 f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
1474 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1475 ),
1476 )
1477 ],
1478 )
1479 hook = ActiveRepairMutationScopeHook(
1480 dod_store=dod_store,
1481 project_root=temp_dir,
1482 session=session,
1483 )
1484
1485 result = await hook.pre_tool_use(
1486 HookContext(
1487 tool_call=ToolCall(
1488 id="write-1",
1489 name="write",
1490 arguments={"file_path": str(stylesheet), "content": "body { color: #222; }\n"},
1491 ),
1492 tool=registry.get("write"),
1493 registry=registry,
1494 permission_policy=policy,
1495 source="native",
1496 )
1497 )
1498
1499 assert result.decision == HookDecision.CONTINUE
1500
1501
1502 @pytest.mark.asyncio
1503 async def test_active_repair_mutation_scope_hook_allows_declared_missing_sibling_outputs(
1504 temp_dir: Path,
1505 ) -> None:
1506 registry = create_default_registry(temp_dir)
1507 policy = build_permission_policy(
1508 active_mode=PermissionMode.WORKSPACE_WRITE,
1509 workspace_root=temp_dir,
1510 tool_requirements=registry.get_tool_requirements(),
1511 )
1512 dod_store = DefinitionOfDoneStore(temp_dir)
1513 dod = create_definition_of_done("Repair the active artifact set")
1514 dod.status = "in_progress"
1515 dod_path = dod_store.save(dod)
1516 guide_root = temp_dir / "guide"
1517 chapters = guide_root / "chapters"
1518 chapters.mkdir(parents=True)
1519 repair_target = guide_root / "index.html"
1520 existing_chapter = chapters / "01-introduction.html"
1521 next_chapter = chapters / "02-installation.html"
1522 repair_target.write_text(
1523 "\n".join(
1524 [
1525 "<html>",
1526 '<a href="chapters/01-introduction.html">Introduction</a>',
1527 '<a href="chapters/02-installation.html">Installation</a>',
1528 "</html>",
1529 ]
1530 )
1531 + "\n"
1532 )
1533 existing_chapter.write_text("<h1>Introduction</h1>\n")
1534
1535 session = FakeSession(
1536 active_dod_path=str(dod_path),
1537 messages=[
1538 Message(
1539 role=Role.ASSISTANT,
1540 content=(
1541 "Repair focus:\n"
1542 f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
1543 f"- Immediate next step: edit `{repair_target}`.\n"
1544 f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
1545 "- Use the existing artifact files as the source of truth while repairing this file: "
1546 f"`{existing_chapter}`.\n"
1547 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1548 ),
1549 )
1550 ],
1551 )
1552 hook = ActiveRepairMutationScopeHook(
1553 dod_store=dod_store,
1554 project_root=temp_dir,
1555 session=session,
1556 )
1557
1558 result = await hook.pre_tool_use(
1559 HookContext(
1560 tool_call=ToolCall(
1561 id="write-2",
1562 name="write",
1563 arguments={"file_path": str(next_chapter), "content": "<h1>Installation</h1>\n"},
1564 ),
1565 tool=registry.get("write"),
1566 registry=registry,
1567 permission_policy=policy,
1568 source="native",
1569 )
1570 )
1571
1572 assert result.decision == HookDecision.CONTINUE
1573
1574
1575 @pytest.mark.asyncio
1576 async def test_active_repair_mutation_scope_hook_blocks_broad_mutating_bash(
1577 temp_dir: Path,
1578 ) -> None:
1579 registry = create_default_registry(temp_dir)
1580 policy = build_permission_policy(
1581 active_mode=PermissionMode.WORKSPACE_WRITE,
1582 workspace_root=temp_dir,
1583 tool_requirements=registry.get_tool_requirements(),
1584 )
1585 dod_store = DefinitionOfDoneStore(temp_dir)
1586 dod = create_definition_of_done("Repair the active artifact set")
1587 dod.status = "in_progress"
1588 dod_path = dod_store.save(dod)
1589 repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1590 session = FakeSession(
1591 active_dod_path=str(dod_path),
1592 messages=[
1593 Message(
1594 role=Role.ASSISTANT,
1595 content=(
1596 "Repair focus:\n"
1597 f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1598 f"- Immediate next step: edit `{repair_target}`.\n"
1599 f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1600 "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1601 ),
1602 )
1603 ],
1604 )
1605 hook = ActiveRepairMutationScopeHook(
1606 dod_store=dod_store,
1607 project_root=temp_dir,
1608 session=session,
1609 )
1610
1611 result = await hook.pre_tool_use(
1612 HookContext(
1613 tool_call=ToolCall(
1614 id="bash-1",
1615 name="bash",
1616 arguments={"command": f"mkdir -p {temp_dir / 'guide' / 'assets'}"},
1617 ),
1618 tool=registry.get("bash"),
1619 registry=registry,
1620 permission_policy=policy,
1621 source="native",
1622 )
1623 )
1624
1625 assert result.decision == HookDecision.DENY
1626 assert result.terminal_state == "blocked"
1627 assert result.message is not None
1628 assert "active repair mutation scope" in result.message
1629 assert str(repair_target) in result.message
1630
1631
1632 @pytest.mark.asyncio
1633 async def test_late_reference_drift_hook_blocks_out_of_scope_reference_reads(
1634 temp_dir: Path,
1635 ) -> None:
1636 registry = create_default_registry(temp_dir)
1637 policy = build_permission_policy(
1638 active_mode=PermissionMode.WORKSPACE_WRITE,
1639 workspace_root=temp_dir,
1640 tool_requirements=registry.get_tool_requirements(),
1641 )
1642 dod_store = DefinitionOfDoneStore(temp_dir)
1643 dod = create_definition_of_done("Create a multi-file guide from a reference")
1644 dod.status = "in_progress"
1645 plan_path = temp_dir / "implementation.md"
1646 plan_path.write_text(
1647 "# File Changes\n"
1648 "- `guide/index.html`\n"
1649 "- `guide/chapters/01-getting-started.html`\n"
1650 "- `guide/chapters/02-installation.html`\n"
1651 "- `guide/chapters/03-first-website.html`\n"
1652 )
1653 dod.implementation_plan = str(plan_path)
1654 dod_path = dod_store.save(dod)
1655 guide_dir = temp_dir / "guide" / "chapters"
1656 guide_dir.mkdir(parents=True, exist_ok=True)
1657 (temp_dir / "guide" / "index.html").write_text("index")
1658 (guide_dir / "01-getting-started.html").write_text("one")
1659 (guide_dir / "02-installation.html").write_text("two")
1660 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1661 hook = LateReferenceDriftHook(
1662 dod_store=dod_store,
1663 project_root=temp_dir,
1664 session=session,
1665 )
1666
1667 result = await hook.pre_tool_use(
1668 HookContext(
1669 tool_call=ToolCall(
1670 id="read-1",
1671 name="read",
1672 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1673 ),
1674 tool=registry.get("read"),
1675 registry=registry,
1676 permission_policy=policy,
1677 source="native",
1678 )
1679 )
1680
1681 assert result.decision == HookDecision.DENY
1682 assert result.terminal_state == "blocked"
1683 assert result.message is not None
1684 assert "late reference drift" in result.message
1685 assert "03-first-website.html" in result.message
1686
1687
1688 @pytest.mark.asyncio
1689 async def test_late_reference_drift_hook_allows_reads_inside_planned_artifact_set(
1690 temp_dir: Path,
1691 ) -> None:
1692 registry = create_default_registry(temp_dir)
1693 policy = build_permission_policy(
1694 active_mode=PermissionMode.WORKSPACE_WRITE,
1695 workspace_root=temp_dir,
1696 tool_requirements=registry.get_tool_requirements(),
1697 )
1698 dod_store = DefinitionOfDoneStore(temp_dir)
1699 dod = create_definition_of_done("Create a multi-file guide from a reference")
1700 dod.status = "in_progress"
1701 plan_path = temp_dir / "implementation.md"
1702 plan_path.write_text(
1703 "# File Changes\n"
1704 "- `guide/index.html`\n"
1705 "- `guide/chapters/01-getting-started.html`\n"
1706 "- `guide/chapters/02-installation.html`\n"
1707 "- `guide/chapters/03-first-website.html`\n"
1708 )
1709 dod.implementation_plan = str(plan_path)
1710 dod_path = dod_store.save(dod)
1711 guide_dir = temp_dir / "guide" / "chapters"
1712 guide_dir.mkdir(parents=True, exist_ok=True)
1713 target = guide_dir / "02-installation.html"
1714 (temp_dir / "guide" / "index.html").write_text("index")
1715 (guide_dir / "01-getting-started.html").write_text("one")
1716 target.write_text("two")
1717 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1718 hook = LateReferenceDriftHook(
1719 dod_store=dod_store,
1720 project_root=temp_dir,
1721 session=session,
1722 )
1723
1724 result = await hook.pre_tool_use(
1725 HookContext(
1726 tool_call=ToolCall(
1727 id="read-1",
1728 name="read",
1729 arguments={"file_path": str(target)},
1730 ),
1731 tool=registry.get("read"),
1732 registry=registry,
1733 permission_policy=policy,
1734 source="native",
1735 )
1736 )
1737
1738 assert result.decision == HookDecision.CONTINUE
1739
1740
1741 @pytest.mark.asyncio
1742 async def test_late_reference_drift_hook_blocks_reference_reopen_after_study_and_first_output(
1743 temp_dir: Path,
1744 ) -> None:
1745 registry = create_default_registry(temp_dir)
1746 policy = build_permission_policy(
1747 active_mode=PermissionMode.WORKSPACE_WRITE,
1748 workspace_root=temp_dir,
1749 tool_requirements=registry.get_tool_requirements(),
1750 )
1751 dod_store = DefinitionOfDoneStore(temp_dir)
1752 dod = create_definition_of_done("Create a multi-file guide from a reference")
1753 dod.status = "in_progress"
1754 dod.completed_items = [
1755 "First, examine the existing reference guide structure to understand the format and cadence",
1756 ]
1757 plan_path = temp_dir / "implementation.md"
1758 plan_path.write_text(
1759 "# File Changes\n"
1760 "- `guide/index.html`\n"
1761 "- `guide/chapters/01-getting-started.html`\n"
1762 "- `guide/chapters/02-installation.html`\n"
1763 )
1764 dod.implementation_plan = str(plan_path)
1765 guide_dir = temp_dir / "guide" / "chapters"
1766 guide_dir.mkdir(parents=True, exist_ok=True)
1767 (temp_dir / "guide" / "index.html").write_text("index")
1768 dod_path = dod_store.save(dod)
1769 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1770 hook = LateReferenceDriftHook(
1771 dod_store=dod_store,
1772 project_root=temp_dir,
1773 session=session,
1774 )
1775
1776 result = await hook.pre_tool_use(
1777 HookContext(
1778 tool_call=ToolCall(
1779 id="read-reference",
1780 name="read",
1781 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1782 ),
1783 tool=registry.get("read"),
1784 registry=registry,
1785 permission_policy=policy,
1786 source="native",
1787 )
1788 )
1789
1790 assert result.decision == HookDecision.DENY
1791 assert result.terminal_state == "blocked"
1792 assert result.message is not None
1793 assert "late reference drift" in result.message
1794 assert "01-getting-started.html" in result.message
1795
1796
1797 @pytest.mark.asyncio
1798 async def test_late_reference_drift_hook_blocks_reference_reads_after_artifacts_exist(
1799 temp_dir: Path,
1800 ) -> None:
1801 registry = create_default_registry(temp_dir)
1802 policy = build_permission_policy(
1803 active_mode=PermissionMode.WORKSPACE_WRITE,
1804 workspace_root=temp_dir,
1805 tool_requirements=registry.get_tool_requirements(),
1806 )
1807 dod_store = DefinitionOfDoneStore(temp_dir)
1808 dod = create_definition_of_done("Create a multi-file guide from a reference")
1809 dod.status = "in_progress"
1810 plan_path = temp_dir / "implementation.md"
1811 plan_path.write_text(
1812 "\n".join(
1813 [
1814 "# Implementation Plan",
1815 "",
1816 "## File Changes",
1817 f"- `{temp_dir / 'guide'}`",
1818 f"- `{temp_dir / 'guide' / 'chapters'}`",
1819 f"- `{temp_dir / 'guide' / 'index.html'}`",
1820 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1821 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1822 "",
1823 ]
1824 )
1825 )
1826 dod.implementation_plan = str(plan_path)
1827 guide_dir = temp_dir / "guide" / "chapters"
1828 guide_dir.mkdir(parents=True, exist_ok=True)
1829 (temp_dir / "guide" / "index.html").write_text("index")
1830 (guide_dir / "01-getting-started.html").write_text("one")
1831 (guide_dir / "02-installation.html").write_text("two")
1832 dod_path = dod_store.save(dod)
1833 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1834 hook = LateReferenceDriftHook(
1835 dod_store=dod_store,
1836 project_root=temp_dir,
1837 session=session,
1838 )
1839
1840 result = await hook.pre_tool_use(
1841 HookContext(
1842 tool_call=ToolCall(
1843 id="read-1",
1844 name="read",
1845 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1846 ),
1847 tool=registry.get("read"),
1848 registry=registry,
1849 permission_policy=policy,
1850 source="native",
1851 )
1852 )
1853
1854 assert result.decision == HookDecision.DENY
1855 assert result.terminal_state == "blocked"
1856 assert result.message is not None
1857 assert "completed artifact set scope" in result.message
1858 assert str(temp_dir / "guide") in result.message
1859
1860
1861 @pytest.mark.asyncio
1862 async def test_late_reference_drift_hook_blocks_reference_reads_when_outputs_exist_but_need_quality(
1863 temp_dir: Path,
1864 ) -> None:
1865 registry = create_default_registry(temp_dir)
1866 policy = build_permission_policy(
1867 active_mode=PermissionMode.WORKSPACE_WRITE,
1868 workspace_root=temp_dir,
1869 tool_requirements=registry.get_tool_requirements(),
1870 )
1871 dod_store = DefinitionOfDoneStore(temp_dir)
1872 dod = create_definition_of_done("Create an equally thorough multi-page HTML guide.")
1873 dod.status = "in_progress"
1874 dod.pending_items.append("Improve generated guide depth and formatting")
1875 plan_path = temp_dir / "implementation.md"
1876 plan_path.write_text(
1877 "\n".join(
1878 [
1879 "# Implementation Plan",
1880 "",
1881 "## File Changes",
1882 f"- `{temp_dir / 'guide' / 'index.html'}`",
1883 f"- `{temp_dir / 'guide' / 'chapters'}/`",
1884 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1885 "",
1886 ]
1887 )
1888 )
1889 dod.implementation_plan = str(plan_path)
1890 guide_dir = temp_dir / "guide" / "chapters"
1891 guide_dir.mkdir(parents=True, exist_ok=True)
1892 (temp_dir / "guide" / "index.html").write_text(
1893 '<h1>Guide</h1><a href="chapters/01-getting-started.html">One</a>\n'
1894 )
1895 (guide_dir / "01-getting-started.html").write_text("<h1>One</h1><p>thin</p>\n")
1896 dod_path = dod_store.save(dod)
1897 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1898 hook = LateReferenceDriftHook(
1899 dod_store=dod_store,
1900 project_root=temp_dir,
1901 session=session,
1902 )
1903
1904 result = await hook.pre_tool_use(
1905 HookContext(
1906 tool_call=ToolCall(
1907 id="read-reference",
1908 name="read",
1909 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1910 ),
1911 tool=registry.get("read"),
1912 registry=registry,
1913 permission_policy=policy,
1914 source="native",
1915 )
1916 )
1917
1918 assert result.decision == HookDecision.DENY
1919 assert result.message is not None
1920 assert "completed artifact set scope" in result.message
1921 assert str(temp_dir / "guide") in result.message
1922
1923
1924 @pytest.mark.asyncio
1925 async def test_late_reference_drift_hook_allows_verification_reference_reads_after_artifacts_exist(
1926 temp_dir: Path,
1927 ) -> None:
1928 registry = create_default_registry(temp_dir)
1929 policy = build_permission_policy(
1930 active_mode=PermissionMode.WORKSPACE_WRITE,
1931 workspace_root=temp_dir,
1932 tool_requirements=registry.get_tool_requirements(),
1933 )
1934 dod_store = DefinitionOfDoneStore(temp_dir)
1935 dod = create_definition_of_done("Create a multi-file guide from a reference")
1936 dod.status = "in_progress"
1937 plan_path = temp_dir / "implementation.md"
1938 plan_path.write_text(
1939 "\n".join(
1940 [
1941 "# Implementation Plan",
1942 "",
1943 "## File Changes",
1944 f"- `{temp_dir / 'guide'}`",
1945 f"- `{temp_dir / 'guide' / 'chapters'}`",
1946 f"- `{temp_dir / 'guide' / 'index.html'}`",
1947 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1948 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1949 "",
1950 ]
1951 )
1952 )
1953 dod.implementation_plan = str(plan_path)
1954 guide_dir = temp_dir / "guide" / "chapters"
1955 guide_dir.mkdir(parents=True, exist_ok=True)
1956 (temp_dir / "guide" / "index.html").write_text("index")
1957 (guide_dir / "01-getting-started.html").write_text("one")
1958 (guide_dir / "02-installation.html").write_text("two")
1959 dod_path = dod_store.save(dod)
1960 session = FakeSession(active_dod_path=str(dod_path), messages=[])
1961 hook = LateReferenceDriftHook(
1962 dod_store=dod_store,
1963 project_root=temp_dir,
1964 session=session,
1965 )
1966
1967 result = await hook.pre_tool_use(
1968 HookContext(
1969 tool_call=ToolCall(
1970 id="read-verify-1",
1971 name="read",
1972 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1973 ),
1974 tool=registry.get("read"),
1975 registry=registry,
1976 permission_policy=policy,
1977 source="verification",
1978 )
1979 )
1980
1981 assert result.decision == HookDecision.CONTINUE
1982
1983
1984 @pytest.mark.asyncio
1985 async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits(
1986 temp_dir: Path,
1987 ) -> None:
1988 registry = create_default_registry(temp_dir)
1989 policy = build_permission_policy(
1990 active_mode=PermissionMode.WORKSPACE_WRITE,
1991 workspace_root=temp_dir,
1992 tool_requirements=registry.get_tool_requirements(),
1993 )
1994 dod_store = DefinitionOfDoneStore(temp_dir)
1995 dod = create_definition_of_done("Create a multi-file guide from a reference")
1996 dod.status = "in_progress"
1997 plan_path = temp_dir / "implementation.md"
1998 plan_path.write_text(
1999 "\n".join(
2000 [
2001 "# Implementation Plan",
2002 "",
2003 "## File Changes",
2004 f"- `{temp_dir / 'guide' / 'index.html'}`",
2005 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2006 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2007 "",
2008 ]
2009 )
2010 )
2011 dod.implementation_plan = str(plan_path)
2012 guide_dir = temp_dir / "guide" / "chapters"
2013 guide_dir.mkdir(parents=True, exist_ok=True)
2014 target = guide_dir / "02-installation.html"
2015 (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
2016 (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
2017 target.write_text("<h1>Installation</h1>\n")
2018 dod_path = dod_store.save(dod)
2019 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2020 hook = LateReferenceDriftHook(
2021 dod_store=dod_store,
2022 project_root=temp_dir,
2023 session=session,
2024 )
2025
2026 def make_context(index: int) -> HookContext:
2027 return HookContext(
2028 tool_call=ToolCall(
2029 id=f"read-{index}",
2030 name="read",
2031 arguments={"file_path": str(target)},
2032 ),
2033 tool=registry.get("read"),
2034 registry=registry,
2035 permission_policy=policy,
2036 source="native",
2037 )
2038
2039 for index in range(1, 5):
2040 context = make_context(index)
2041 result = await hook.pre_tool_use(context)
2042 assert result.decision == HookDecision.CONTINUE
2043 await hook.post_tool_use(context)
2044
2045 blocked = await hook.pre_tool_use(make_context(5))
2046
2047 assert blocked.decision == HookDecision.DENY
2048 assert blocked.terminal_state == "blocked"
2049 assert blocked.message is not None
2050 assert "post-build audit loop" in blocked.message
2051
2052
2053 @pytest.mark.asyncio
2054 async def test_late_reference_drift_hook_requires_edit_during_active_repair_audit_loop(
2055 temp_dir: Path,
2056 ) -> None:
2057 registry = create_default_registry(temp_dir)
2058 policy = build_permission_policy(
2059 active_mode=PermissionMode.WORKSPACE_WRITE,
2060 workspace_root=temp_dir,
2061 tool_requirements=registry.get_tool_requirements(),
2062 )
2063 dod_store = DefinitionOfDoneStore(temp_dir)
2064 dod = create_definition_of_done("Create a multi-file guide from a reference")
2065 dod.status = "in_progress"
2066 guide_root = temp_dir / "guide"
2067 chapters = guide_root / "chapters"
2068 chapters.mkdir(parents=True, exist_ok=True)
2069 index_path = guide_root / "index.html"
2070 intro_path = chapters / "01-introduction.html"
2071 config_path = chapters / "03-basic-configuration.html"
2072 index_path.write_text("<h1>Nginx Guide</h1>\n")
2073 intro_path.write_text("<h1>Introduction</h1>\n")
2074 config_path.write_text("<h1>Configuration</h1>\n")
2075 plan_path = temp_dir / "implementation.md"
2076 plan_path.write_text(
2077 "\n".join(
2078 [
2079 "# Implementation Plan",
2080 "",
2081 "## File Changes",
2082 f"- `{index_path}`",
2083 f"- `{chapters}/`",
2084 "",
2085 ]
2086 )
2087 )
2088 dod.implementation_plan = str(plan_path)
2089 dod_path = dod_store.save(dod)
2090 session = FakeSession(
2091 active_dod_path=str(dod_path),
2092 messages=[
2093 Message(
2094 role=Role.USER,
2095 content=(
2096 "Repair focus:\n"
2097 f"- Improve `{index_path}`: insufficient structured content.\n"
2098 f"- Improve `{intro_path}`: insufficient structured content.\n"
2099 f"- Improve `{config_path}`: thin content.\n"
2100 f"- Immediate next step: edit `{index_path}` with a substantial expansion.\n"
2101 ),
2102 )
2103 ],
2104 )
2105 hook = LateReferenceDriftHook(
2106 dod_store=dod_store,
2107 project_root=temp_dir,
2108 session=session,
2109 )
2110
2111 def make_context(index: int) -> HookContext:
2112 return HookContext(
2113 tool_call=ToolCall(
2114 id=f"read-{index}",
2115 name="read",
2116 arguments={"file_path": str(index_path)},
2117 ),
2118 tool=registry.get("read"),
2119 registry=registry,
2120 permission_policy=policy,
2121 source="native",
2122 )
2123
2124 for index in range(1, 5):
2125 context = make_context(index)
2126 result = await hook.pre_tool_use(context)
2127 assert result.decision == HookDecision.CONTINUE
2128 await hook.post_tool_use(context)
2129
2130 blocked = await hook.pre_tool_use(make_context(5))
2131
2132 assert blocked.decision == HookDecision.DENY
2133 assert blocked.message is not None
2134 assert "post-build audit loop" in blocked.message
2135 assert "make one concrete edit, patch, or write" in blocked.message
2136 assert "Do not finish with a final response" in blocked.message
2137 assert str(index_path.resolve(strict=False)) in blocked.message
2138
2139
2140 @pytest.mark.asyncio
2141 async def test_late_reference_drift_hook_allows_post_build_self_audits_during_verification(
2142 temp_dir: Path,
2143 ) -> None:
2144 registry = create_default_registry(temp_dir)
2145 policy = build_permission_policy(
2146 active_mode=PermissionMode.WORKSPACE_WRITE,
2147 workspace_root=temp_dir,
2148 tool_requirements=registry.get_tool_requirements(),
2149 )
2150 dod_store = DefinitionOfDoneStore(temp_dir)
2151 dod = create_definition_of_done("Create a multi-file guide from a reference")
2152 dod.status = "in_progress"
2153 plan_path = temp_dir / "implementation.md"
2154 plan_path.write_text(
2155 "\n".join(
2156 [
2157 "# Implementation Plan",
2158 "",
2159 "## File Changes",
2160 f"- `{temp_dir / 'guide' / 'index.html'}`",
2161 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2162 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2163 "",
2164 ]
2165 )
2166 )
2167 dod.implementation_plan = str(plan_path)
2168 guide_dir = temp_dir / "guide" / "chapters"
2169 guide_dir.mkdir(parents=True, exist_ok=True)
2170 target = guide_dir / "02-installation.html"
2171 (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
2172 (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
2173 target.write_text("<h1>Installation</h1>\n")
2174 dod_path = dod_store.save(dod)
2175 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2176 hook = LateReferenceDriftHook(
2177 dod_store=dod_store,
2178 project_root=temp_dir,
2179 session=session,
2180 )
2181
2182 def make_context(index: int) -> HookContext:
2183 return HookContext(
2184 tool_call=ToolCall(
2185 id=f"read-verify-{index}",
2186 name="read",
2187 arguments={"file_path": str(target)},
2188 ),
2189 tool=registry.get("read"),
2190 registry=registry,
2191 permission_policy=policy,
2192 source="verification",
2193 )
2194
2195 for index in range(1, 5):
2196 context = make_context(index)
2197 result = await hook.pre_tool_use(context)
2198 assert result.decision == HookDecision.CONTINUE
2199 await hook.post_tool_use(context)
2200
2201 result = await hook.pre_tool_use(make_context(5))
2202
2203 assert result.decision == HookDecision.CONTINUE
2204
2205
2206 @pytest.mark.asyncio
2207 async def test_late_reference_drift_hook_blocks_relative_bash_reference_reads_after_artifacts_exist(
2208 temp_dir: Path,
2209 ) -> None:
2210 registry = create_default_registry(temp_dir)
2211 policy = build_permission_policy(
2212 active_mode=PermissionMode.WORKSPACE_WRITE,
2213 workspace_root=temp_dir,
2214 tool_requirements=registry.get_tool_requirements(),
2215 )
2216 dod_store = DefinitionOfDoneStore(temp_dir)
2217 dod = create_definition_of_done("Create a multi-file guide from a reference")
2218 dod.status = "in_progress"
2219 plan_path = temp_dir / "implementation.md"
2220 plan_path.write_text(
2221 "\n".join(
2222 [
2223 "# Implementation Plan",
2224 "",
2225 "## File Changes",
2226 f"- `{temp_dir / 'guide'}`",
2227 f"- `{temp_dir / 'guide' / 'chapters'}`",
2228 f"- `{temp_dir / 'guide' / 'index.html'}`",
2229 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2230 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2231 "",
2232 ]
2233 )
2234 )
2235 dod.implementation_plan = str(plan_path)
2236 guide_dir = temp_dir / "guide" / "chapters"
2237 guide_dir.mkdir(parents=True, exist_ok=True)
2238 (temp_dir / "guide" / "index.html").write_text("index")
2239 (guide_dir / "01-getting-started.html").write_text("one")
2240 (guide_dir / "02-installation.html").write_text("two")
2241 dod_path = dod_store.save(dod)
2242 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2243 hook = LateReferenceDriftHook(
2244 dod_store=dod_store,
2245 project_root=temp_dir,
2246 session=session,
2247 )
2248
2249 result = await hook.pre_tool_use(
2250 HookContext(
2251 tool_call=ToolCall(
2252 id="bash-relative-reference-1",
2253 name="bash",
2254 arguments={
2255 "command": f"cd {temp_dir} && ls -la reference/"
2256 },
2257 ),
2258 tool=registry.get("bash"),
2259 registry=registry,
2260 permission_policy=policy,
2261 source="native",
2262 )
2263 )
2264
2265 assert result.decision == HookDecision.DENY
2266 assert result.terminal_state == "blocked"
2267 assert result.message is not None
2268 assert "completed artifact set scope" in result.message
2269
2270
2271 @pytest.mark.asyncio
2272 async def test_late_reference_drift_hook_allows_relative_bash_post_build_audit_loop_during_verification(
2273 temp_dir: Path,
2274 ) -> None:
2275 registry = create_default_registry(temp_dir)
2276 policy = build_permission_policy(
2277 active_mode=PermissionMode.WORKSPACE_WRITE,
2278 workspace_root=temp_dir,
2279 tool_requirements=registry.get_tool_requirements(),
2280 )
2281 dod_store = DefinitionOfDoneStore(temp_dir)
2282 dod = create_definition_of_done("Create a multi-file guide from a reference")
2283 dod.status = "in_progress"
2284 plan_path = temp_dir / "implementation.md"
2285 plan_path.write_text(
2286 "\n".join(
2287 [
2288 "# Implementation Plan",
2289 "",
2290 "## File Changes",
2291 f"- `{temp_dir / 'guide' / 'index.html'}`",
2292 f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
2293 f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
2294 "",
2295 ]
2296 )
2297 )
2298 dod.implementation_plan = str(plan_path)
2299 guide_dir = temp_dir / "guide" / "chapters"
2300 guide_dir.mkdir(parents=True, exist_ok=True)
2301 (temp_dir / "guide" / "index.html").write_text("<h1>Guide</h1>\n")
2302 (guide_dir / "01-getting-started.html").write_text("<h1>One</h1>\n")
2303 (guide_dir / "02-installation.html").write_text("<h1>Two</h1>\n")
2304 dod_path = dod_store.save(dod)
2305 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2306 hook = LateReferenceDriftHook(
2307 dod_store=dod_store,
2308 project_root=temp_dir,
2309 session=session,
2310 )
2311
2312 def make_context(index: int) -> HookContext:
2313 return HookContext(
2314 tool_call=ToolCall(
2315 id=f"bash-relative-audit-{index}",
2316 name="bash",
2317 arguments={
2318 "command": f"cd {temp_dir} && ls -la guide/chapters/"
2319 },
2320 ),
2321 tool=registry.get("bash"),
2322 registry=registry,
2323 permission_policy=policy,
2324 source="verification",
2325 )
2326
2327 for index in range(1, 5):
2328 context = make_context(index)
2329 result = await hook.pre_tool_use(context)
2330 assert result.decision == HookDecision.CONTINUE
2331 await hook.post_tool_use(context)
2332
2333 result = await hook.pre_tool_use(make_context(5))
2334
2335 assert result.decision == HookDecision.CONTINUE
2336
2337
2338 @pytest.mark.asyncio
2339 async def test_late_reference_drift_hook_does_not_treat_empty_output_dir_as_complete_artifact_set(
2340 temp_dir: Path,
2341 ) -> None:
2342 registry = create_default_registry(temp_dir)
2343 policy = build_permission_policy(
2344 active_mode=PermissionMode.WORKSPACE_WRITE,
2345 workspace_root=temp_dir,
2346 tool_requirements=registry.get_tool_requirements(),
2347 )
2348 dod_store = DefinitionOfDoneStore(temp_dir)
2349 dod = create_definition_of_done("Create a multi-file guide from a reference")
2350 dod.status = "in_progress"
2351 dod.completed_items = ["Create chapter files with appropriate content"]
2352 plan_path = temp_dir / "implementation.md"
2353 plan_path.write_text(
2354 "\n".join(
2355 [
2356 "# Implementation Plan",
2357 "",
2358 "## File Changes",
2359 f"- `{temp_dir / 'guide' / 'index.html'}`",
2360 f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
2361 "",
2362 "## Execution Order",
2363 "- Create chapter files with appropriate content",
2364 ]
2365 )
2366 )
2367 dod.implementation_plan = str(plan_path)
2368 guide_dir = temp_dir / "guide" / "chapters"
2369 guide_dir.mkdir(parents=True, exist_ok=True)
2370 (temp_dir / "guide" / "index.html").write_text("index")
2371 dod_path = dod_store.save(dod)
2372 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2373 hook = LateReferenceDriftHook(
2374 dod_store=dod_store,
2375 project_root=temp_dir,
2376 session=session,
2377 )
2378
2379 result = await hook.pre_tool_use(
2380 HookContext(
2381 tool_call=ToolCall(
2382 id="read-1",
2383 name="read",
2384 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
2385 ),
2386 tool=registry.get("read"),
2387 registry=registry,
2388 permission_policy=policy,
2389 source="native",
2390 )
2391 )
2392
2393 assert result.decision == HookDecision.CONTINUE
2394
2395
2396 @pytest.mark.asyncio
2397 async def test_late_reference_drift_hook_blocks_when_html_outputs_declare_missing_files(
2398 temp_dir: Path,
2399 ) -> None:
2400 registry = create_default_registry(temp_dir)
2401 policy = build_permission_policy(
2402 active_mode=PermissionMode.WORKSPACE_WRITE,
2403 workspace_root=temp_dir,
2404 tool_requirements=registry.get_tool_requirements(),
2405 )
2406 dod_store = DefinitionOfDoneStore(temp_dir)
2407 dod = create_definition_of_done("Create a multi-file guide from a reference")
2408 dod.status = "in_progress"
2409 dod.completed_items = ["Create chapter files with appropriate content"]
2410 plan_path = temp_dir / "implementation.md"
2411 plan_path.write_text(
2412 "\n".join(
2413 [
2414 "# Implementation Plan",
2415 "",
2416 "## File Changes",
2417 f"- `{temp_dir / 'guide' / 'index.html'}`",
2418 f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
2419 "",
2420 "## Execution Order",
2421 "- Create chapter files with appropriate content",
2422 ]
2423 )
2424 )
2425 dod.implementation_plan = str(plan_path)
2426 guide_dir = temp_dir / "guide"
2427 chapters = guide_dir / "chapters"
2428 chapters.mkdir(parents=True, exist_ok=True)
2429 index = guide_dir / "index.html"
2430 index.write_text(
2431 '<a href="chapters/01-getting-started.html">One</a>\n'
2432 '<a href="chapters/02-installation.html">Two</a>\n'
2433 )
2434 (chapters / "01-getting-started.html").write_text("one")
2435 dod.touched_files = [str(index), str(chapters / "01-getting-started.html")]
2436 dod_path = dod_store.save(dod)
2437 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2438 hook = LateReferenceDriftHook(
2439 dod_store=dod_store,
2440 project_root=temp_dir,
2441 session=session,
2442 )
2443
2444 result = await hook.pre_tool_use(
2445 HookContext(
2446 tool_call=ToolCall(
2447 id="read-1",
2448 name="read",
2449 arguments={"file_path": str(temp_dir / "reference" / "index.html")},
2450 ),
2451 tool=registry.get("read"),
2452 registry=registry,
2453 permission_policy=policy,
2454 source="native",
2455 )
2456 )
2457
2458 assert result.decision == HookDecision.DENY
2459 assert result.terminal_state == "blocked"
2460 assert result.message is not None
2461 assert "late reference drift" in result.message
2462 assert "02-installation.html" in result.message
2463
2464
2465 @pytest.mark.asyncio
2466 async def test_missing_planned_output_read_hook_blocks_reads_of_declared_missing_output(
2467 temp_dir: Path,
2468 ) -> None:
2469 registry = create_default_registry(temp_dir)
2470 policy = build_permission_policy(
2471 active_mode=PermissionMode.WORKSPACE_WRITE,
2472 workspace_root=temp_dir,
2473 tool_requirements=registry.get_tool_requirements(),
2474 )
2475 dod_store = DefinitionOfDoneStore(temp_dir)
2476 dod = create_definition_of_done("Create a multi-file guide from a reference")
2477 dod.status = "in_progress"
2478 plan_path = temp_dir / "implementation.md"
2479 guide_root = temp_dir / "guide"
2480 chapters = guide_root / "chapters"
2481 plan_path.write_text(
2482 "\n".join(
2483 [
2484 "# Implementation Plan",
2485 "",
2486 "## File Changes",
2487 f"- `{guide_root / 'index.html'}`",
2488 f"- `{chapters}/`",
2489 "",
2490 ]
2491 )
2492 )
2493 dod.implementation_plan = str(plan_path)
2494 chapters.mkdir(parents=True, exist_ok=True)
2495 (guide_root / "index.html").write_text(
2496 "\n".join(
2497 [
2498 "<html>",
2499 '<a href="chapters/01-introduction.html">Chapter 1: Introduction</a>',
2500 '<a href="chapters/02-installation.html">Chapter 2: Installation</a>',
2501 '<a href="chapters/03-configuration-basics.html">Chapter 3: Configuration Basics</a>',
2502 "</html>",
2503 ]
2504 )
2505 + "\n"
2506 )
2507 (chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
2508 (chapters / "02-installation.html").write_text("<h1>Installation</h1>\n")
2509 dod_path = dod_store.save(dod)
2510 session = FakeSession(active_dod_path=str(dod_path), messages=[])
2511 hook = MissingPlannedOutputReadHook(
2512 dod_store=dod_store,
2513 project_root=temp_dir,
2514 session=session,
2515 )
2516 missing_target = chapters / "03-configuration-basics.html"
2517
2518 result = await hook.pre_tool_use(
2519 HookContext(
2520 tool_call=ToolCall(
2521 id="read-missing-output",
2522 name="read",
2523 arguments={"file_path": str(missing_target)},
2524 ),
2525 tool=registry.get("read"),
2526 registry=registry,
2527 permission_policy=policy,
2528 source="native",
2529 )
2530 )
2531
2532 assert result.decision == HookDecision.DENY
2533 assert result.terminal_state == "blocked"
2534 assert result.message is not None
2535 assert "missing planned output artifact" in result.message
2536 assert 'write(file_path="' in result.message
2537 assert "03-configuration-basics.html" in result.message
2538 assert "Chapter 3: Configuration Basics" in result.message
2539 assert "02-installation.html" in result.message