tenseleyflow/loader / 5b0f1ee

Browse files

Tighten post-build guide convergence

Authored by espadonne
SHA
5b0f1ee901534e254c02ad9640b9ff17dae4311d
Parents
df5639d
Tree
3089c60

4 changed files

StatusFile+-
M src/loader/runtime/dod.py 108 0
M src/loader/runtime/hooks.py 58 0
M tests/test_dod.py 38 0
M tests/test_permissions.py 69 0
src/loader/runtime/dod.pymodified
@@ -266,6 +266,13 @@ def derive_verification_commands(
266266
 
267267
     if html_link_command:
268268
         _append_unique(commands, html_link_command)
269
+    html_quality_command = _derive_multi_page_html_quality_command(
270
+        dod,
271
+        project_root=project_root,
272
+        task_statement=task_statement,
273
+    )
274
+    if html_quality_command:
275
+        _append_unique(commands, html_quality_command)
269276
     for command in _build_planned_artifact_verification_commands(planned_artifact_targets):
270277
         _append_unique(commands, command)
271278
 
@@ -570,6 +577,62 @@ def _derive_local_html_link_verification_command(
570577
     return _build_local_html_link_verification_command(resolved_paths)
571578
 
572579
 
580
+def _derive_multi_page_html_quality_command(
581
+    dod: DefinitionOfDone,
582
+    *,
583
+    project_root: Path,
584
+    task_statement: str,
585
+) -> str | None:
586
+    html_paths = _multi_page_html_quality_paths(dod, project_root=project_root)
587
+    if len(html_paths) < 4:
588
+        return None
589
+    if not _task_requires_substantive_html_guide_quality(task_statement):
590
+        return None
591
+
592
+    path_literals = ", ".join(repr(str(path)) for path in html_paths)
593
+    return "\n".join(
594
+        [
595
+            "python3 - <<'PY'",
596
+            "from pathlib import Path",
597
+            "import re",
598
+            "",
599
+            f"paths = [{path_literals}]",
600
+            "tag_pattern = re.compile(r'<[^>]+>')",
601
+            "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)",
602
+            "issues = []",
603
+            "checked = 0",
604
+            "for raw_path in paths:",
605
+            "    path = Path(raw_path)",
606
+            "    if not path.exists():",
607
+            "        continue",
608
+            "    checked += 1",
609
+            "    text = path.read_text()",
610
+            "    plain = tag_pattern.sub(' ', text)",
611
+            "    plain = re.sub(r'\\s+', ' ', plain).strip()",
612
+            "    content_blocks = len(content_block_pattern.findall(text))",
613
+            "    has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))",
614
+            "    minimum_chars = 180 if path.name.lower() == 'index.html' else 220",
615
+            "    minimum_blocks = 2 if path.name.lower() == 'index.html' else 3",
616
+            "    if not has_h1:",
617
+            "        issues.append(f'{path}: missing <h1>')",
618
+            "    if len(plain) < minimum_chars:",
619
+            "        issues.append(",
620
+            "            f'{path}: thin content ({len(plain)} text chars, expected at least {minimum_chars})'",
621
+            "        )",
622
+            "    if content_blocks < minimum_blocks:",
623
+            "        issues.append(",
624
+            "            f'{path}: insufficient structured content ({content_blocks} blocks, expected at least {minimum_blocks})'",
625
+            "        )",
626
+            "if issues:",
627
+            "    print('HTML guide content quality issues:')",
628
+            "    print('\\n'.join(issues))",
629
+            "    raise SystemExit(1)",
630
+            "print(f'Checked HTML guide content quality across {checked} file(s).')",
631
+            "PY",
632
+        ]
633
+    )
634
+
635
+
573636
 def collect_planned_artifact_targets(
574637
     dod: DefinitionOfDone,
575638
     *,
@@ -735,6 +798,51 @@ def _build_planned_artifact_verification_commands(
735798
     return commands
736799
 
737800
 
801
+def _multi_page_html_quality_paths(
802
+    dod: DefinitionOfDone,
803
+    *,
804
+    project_root: Path,
805
+) -> list[Path]:
806
+    planned_targets = collect_planned_artifact_targets(
807
+        dod,
808
+        project_root=project_root,
809
+        max_paths=24,
810
+    )
811
+    planned_html = [
812
+        target
813
+        for target, expect_directory in planned_targets
814
+        if not expect_directory and target.suffix.lower() in {".html", ".htm"}
815
+    ]
816
+    if planned_html:
817
+        return planned_html
818
+
819
+    touched_html = []
820
+    for path_str in dod.touched_files:
821
+        path = Path(path_str)
822
+        effective_path = path if path.is_absolute() else (project_root / path)
823
+        if effective_path.suffix.lower() in {".html", ".htm"}:
824
+            touched_html.append(effective_path)
825
+    return list(dict.fromkeys(touched_html))
826
+
827
+
828
+def _task_requires_substantive_html_guide_quality(task_statement: str) -> bool:
829
+    lowered = task_statement.lower()
830
+    if not any(token in lowered for token in ("guide", "tutorial", "documentation", "docs")):
831
+        return False
832
+    return any(
833
+        token in lowered
834
+        for token in (
835
+            "thorough",
836
+            "comprehensive",
837
+            "detailed",
838
+            "equally",
839
+            "cadence",
840
+            "chapter",
841
+            "chapters",
842
+        )
843
+    )
844
+
845
+
738846
 def _extract_markdown_section_lines(markdown: str, heading: str) -> list[str]:
739847
     current_heading: str | None = None
740848
     collected: list[str] = []
src/loader/runtime/hooks.pymodified
@@ -682,11 +682,14 @@ class LateReferenceDriftHook(BaseToolHook):
682682
     """Block reopening old reference paths once planned artifacts are well underway."""
683683
 
684684
     _MIN_COMPLETED_FILES = 3
685
+    _MAX_COMPLETED_SCOPE_OBSERVATIONS = 4
685686
 
686687
     def __init__(self, *, dod_store: DefinitionOfDoneStore, project_root: Path, session: Any) -> None:
687688
         self.dod_store = dod_store
688689
         self.project_root = project_root
689690
         self.session = session
691
+        self._completed_scope_key: tuple[str, ...] | None = None
692
+        self._completed_scope_observation_count = 0
690693
 
691694
     async def pre_tool_use(self, context: HookContext) -> HookResult:
692695
         if context.tool_call.name not in _OBSERVATION_TOOLS:
@@ -698,6 +701,26 @@ class LateReferenceDriftHook(BaseToolHook):
698701
             if not observed_paths:
699702
                 return HookResult()
700703
             if all(path_within_allowed_roots(path, completed_scope) for path in observed_paths):
704
+                self._sync_completed_scope_state(completed_scope)
705
+                if (
706
+                    context.source != "verification"
707
+                    and self._completed_scope_observation_count
708
+                    >= self._MAX_COMPLETED_SCOPE_OBSERVATIONS
709
+                ):
710
+                    roots_preview = ", ".join(f"`{root}`" for root in completed_scope[:2])
711
+                    if len(completed_scope) > 2:
712
+                        roots_preview += ", ..."
713
+                    return HookResult(
714
+                        decision=HookDecision.DENY,
715
+                        message=(
716
+                            "[Blocked - post-build audit loop: all explicitly planned artifacts "
717
+                            "already exist and the current output set has already been inspected "
718
+                            "several times.] Suggestion: move to verification now or make one "
719
+                            "concrete edit for a specific mismatch inside "
720
+                            f"{roots_preview} instead of more rereads."
721
+                        ),
722
+                        terminal_state="blocked",
723
+                    )
701724
                 return HookResult()
702725
 
703726
             roots_preview = ", ".join(f"`{root}`" for root in completed_scope[:2])
@@ -786,6 +809,30 @@ class LateReferenceDriftHook(BaseToolHook):
786809
             return None
787810
         return missing_label, tuple(planned_roots)
788811
 
812
+    async def post_tool_use(self, context: HookContext) -> HookResult:
813
+        if context.tool_call.name in _MUTATION_TOOLS:
814
+            self._reset_completed_scope_state()
815
+            return HookResult()
816
+        if context.tool_call.name not in _OBSERVATION_TOOLS:
817
+            return HookResult()
818
+        if context.source == "verification":
819
+            return HookResult()
820
+
821
+        completed_scope = self._completed_artifact_scope()
822
+        if completed_scope is None:
823
+            self._reset_completed_scope_state()
824
+            return HookResult()
825
+
826
+        observed_paths = _extract_observation_paths(context.tool_call)
827
+        if not observed_paths:
828
+            return HookResult()
829
+        if not all(path_within_allowed_roots(path, completed_scope) for path in observed_paths):
830
+            return HookResult()
831
+
832
+        self._sync_completed_scope_state(completed_scope)
833
+        self._completed_scope_observation_count += 1
834
+        return HookResult()
835
+
789836
     def _completed_artifact_scope(self) -> tuple[str, ...] | None:
790837
         dod_path = getattr(self.session, "active_dod_path", None)
791838
         if not dod_path:
@@ -816,6 +863,17 @@ class LateReferenceDriftHook(BaseToolHook):
816863
             planned_roots.append(root)
817864
         return tuple(planned_roots)
818865
 
866
+    def _sync_completed_scope_state(self, completed_scope: tuple[str, ...]) -> None:
867
+        normalized = tuple(sorted(completed_scope))
868
+        if self._completed_scope_key == normalized:
869
+            return
870
+        self._completed_scope_key = normalized
871
+        self._completed_scope_observation_count = 0
872
+
873
+    def _reset_completed_scope_state(self) -> None:
874
+        self._completed_scope_key = None
875
+        self._completed_scope_observation_count = 0
876
+
819877
 
820878
 class HookManager:
821879
     """Runs tool hooks across Loader's three lifecycle events."""
tests/test_dod.pymodified
@@ -223,6 +223,44 @@ def test_derive_verification_commands_adds_planned_artifact_existence_checks(
223223
     assert f"test -d {tmp_path / 'docs/chapters'}" in commands
224224
 
225225
 
226
+def test_derive_verification_commands_adds_html_guide_quality_check_for_thorough_guides(
227
+    tmp_path: Path,
228
+) -> None:
229
+    docs = tmp_path / "docs"
230
+    chapters = docs / "chapters"
231
+    chapters.mkdir(parents=True)
232
+    implementation_plan = tmp_path / "implementation.md"
233
+    implementation_plan.write_text(
234
+        "\n".join(
235
+            [
236
+                "# Implementation Plan",
237
+                "",
238
+                "## File Changes",
239
+                f"- `{docs / 'index.html'}`",
240
+                f"- `{chapters / '01-introduction.html'}`",
241
+                f"- `{chapters / '02-installation.html'}`",
242
+                f"- `{chapters / '03-configuration.html'}`",
243
+                f"- `{chapters / '04-troubleshooting.html'}`",
244
+                "",
245
+            ]
246
+        )
247
+    )
248
+
249
+    dod = create_definition_of_done(
250
+        "Create an equally thorough multi-page HTML guide with chapter files."
251
+    )
252
+    dod.implementation_plan = str(implementation_plan)
253
+
254
+    commands = derive_verification_commands(
255
+        dod,
256
+        project_root=tmp_path,
257
+        task_statement=dod.task_statement,
258
+        supplement_existing=True,
259
+    )
260
+
261
+    assert any("HTML guide content quality issues:" in command for command in commands)
262
+
263
+
226264
 def test_collect_planned_artifact_targets_ignores_prose_path_fragments_in_refreshed_plan(
227265
     tmp_path: Path,
228266
 ) -> None:
tests/test_permissions.pymodified
@@ -1302,6 +1302,75 @@ async def test_late_reference_drift_hook_blocks_reference_reads_after_artifacts_
13021302
     assert str(temp_dir / "guide") in result.message
13031303
 
13041304
 
1305
+@pytest.mark.asyncio
1306
+async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits(
1307
+    temp_dir: Path,
1308
+) -> None:
1309
+    registry = create_default_registry(temp_dir)
1310
+    policy = build_permission_policy(
1311
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1312
+        workspace_root=temp_dir,
1313
+        tool_requirements=registry.get_tool_requirements(),
1314
+    )
1315
+    dod_store = DefinitionOfDoneStore(temp_dir)
1316
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
1317
+    dod.status = "in_progress"
1318
+    plan_path = temp_dir / "implementation.md"
1319
+    plan_path.write_text(
1320
+        "\n".join(
1321
+            [
1322
+                "# Implementation Plan",
1323
+                "",
1324
+                "## File Changes",
1325
+                f"- `{temp_dir / 'guide' / 'index.html'}`",
1326
+                f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1327
+                f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1328
+                "",
1329
+            ]
1330
+        )
1331
+    )
1332
+    dod.implementation_plan = str(plan_path)
1333
+    guide_dir = temp_dir / "guide" / "chapters"
1334
+    guide_dir.mkdir(parents=True, exist_ok=True)
1335
+    target = guide_dir / "02-installation.html"
1336
+    (temp_dir / "guide" / "index.html").write_text("<h1>Nginx Guide</h1>\n")
1337
+    (guide_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
1338
+    target.write_text("<h1>Installation</h1>\n")
1339
+    dod_path = dod_store.save(dod)
1340
+    session = FakeSession(active_dod_path=str(dod_path), messages=[])
1341
+    hook = LateReferenceDriftHook(
1342
+        dod_store=dod_store,
1343
+        project_root=temp_dir,
1344
+        session=session,
1345
+    )
1346
+
1347
+    def make_context(index: int) -> HookContext:
1348
+        return HookContext(
1349
+            tool_call=ToolCall(
1350
+                id=f"read-{index}",
1351
+                name="read",
1352
+                arguments={"file_path": str(target)},
1353
+            ),
1354
+            tool=registry.get("read"),
1355
+            registry=registry,
1356
+            permission_policy=policy,
1357
+            source="native",
1358
+        )
1359
+
1360
+    for index in range(1, 5):
1361
+        context = make_context(index)
1362
+        result = await hook.pre_tool_use(context)
1363
+        assert result.decision == HookDecision.CONTINUE
1364
+        await hook.post_tool_use(context)
1365
+
1366
+    blocked = await hook.pre_tool_use(make_context(5))
1367
+
1368
+    assert blocked.decision == HookDecision.DENY
1369
+    assert blocked.terminal_state == "blocked"
1370
+    assert blocked.message is not None
1371
+    assert "post-build audit loop" in blocked.message
1372
+
1373
+
13051374
 @pytest.mark.asyncio
13061375
 async def test_late_reference_drift_hook_does_not_treat_empty_output_dir_as_complete_artifact_set(
13071376
     temp_dir: Path,