tenseleyflow/loader / bba591d

Browse files

Reject thin guide chapters

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
bba591d61151d031678ee6f8a23343decfd3362a
Parents
4db4c36
Tree
28ad414

2 changed files

StatusFile+-
M src/loader/runtime/safeguard_services.py 74 0
M tests/test_safeguard_services.py 99 0
src/loader/runtime/safeguard_services.pymodified
@@ -829,6 +829,10 @@ class ValidationResult:
829829
 class PreActionValidator:
830830
     """Validates tool calls before execution to catch problematic actions."""
831831
 
832
+    HTML_GUIDE_CHAPTER_MIN_TEXT_CHARS = 1200
833
+    HTML_GUIDE_CHAPTER_MIN_BLOCKS = 12
834
+    HTML_GUIDE_CHAPTER_MIN_DECLARED_LINKS = 4
835
+
832836
     HTML_PLACEHOLDER_PATTERNS = [
833837
         (
834838
             re.compile(r"\bstarter\s+(?:content|overview)\b", re.IGNORECASE),
@@ -1063,6 +1067,13 @@ class PreActionValidator:
10631067
         if not html_asset_result.valid:
10641068
             return html_asset_result
10651069
 
1070
+        html_guide_substance_result = self._validate_html_substantive_guide_content(
1071
+            str(file_path),
1072
+            str(content),
1073
+        )
1074
+        if not html_guide_substance_result.valid:
1075
+            return html_guide_substance_result
1076
+
10661077
         html_root_coverage_result = self._validate_html_root_link_coverage(
10671078
             str(file_path),
10681079
             str(content),
@@ -1445,6 +1456,69 @@ class PreActionValidator:
14451456
             severity="error",
14461457
         )
14471458
 
1459
+    def _validate_html_substantive_guide_content(
1460
+        self,
1461
+        file_path: str,
1462
+        content: str,
1463
+    ) -> ValidationResult:
1464
+        normalized = Path(file_path).expanduser()
1465
+        if normalized.suffix.lower() not in {".html", ".htm"}:
1466
+            return ValidationResult(valid=True)
1467
+        if normalized.name.lower() in {"index.html", "index.htm"}:
1468
+            return ValidationResult(valid=True)
1469
+        if not _html_text_looks_like_document(content):
1470
+            return ValidationResult(valid=True)
1471
+
1472
+        root = self._resolve_html_artifact_root(normalized)
1473
+        current_relative = self._relative_html_target(root, normalized)
1474
+        if current_relative is None:
1475
+            return ValidationResult(valid=True)
1476
+        relative_parts = Path(current_relative).parts
1477
+        if not relative_parts or relative_parts[0] not in {"chapters", "chapter"}:
1478
+            return ValidationResult(valid=True)
1479
+
1480
+        declared_targets, authoritative_root_graph = self._collect_declared_html_targets(
1481
+            root,
1482
+            normalized,
1483
+        )
1484
+        if (
1485
+            not authoritative_root_graph
1486
+            or current_relative not in declared_targets
1487
+            or len(declared_targets) < self.HTML_GUIDE_CHAPTER_MIN_DECLARED_LINKS
1488
+        ):
1489
+            return ValidationResult(valid=True)
1490
+
1491
+        text_without_tags = re.sub(r"<[^>]+>", " ", content)
1492
+        plain_text = re.sub(r"\s+", " ", text_without_tags).strip()
1493
+        content_blocks = len(
1494
+            re.findall(
1495
+                r"<(p|li|pre|code|section|article|table|h2|h3|h4)\b",
1496
+                content,
1497
+                re.IGNORECASE,
1498
+            )
1499
+        )
1500
+        if (
1501
+            len(plain_text) >= self.HTML_GUIDE_CHAPTER_MIN_TEXT_CHARS
1502
+            and content_blocks >= self.HTML_GUIDE_CHAPTER_MIN_BLOCKS
1503
+        ):
1504
+            return ValidationResult(valid=True)
1505
+
1506
+        return ValidationResult(
1507
+            valid=False,
1508
+            reason="HTML guide chapter content is too thin",
1509
+            suggestion=(
1510
+                "This looks like a root-declared multi-page guide chapter. Write a "
1511
+                "substantive chapter before creating it: include concrete explanations, "
1512
+                "commands, examples, lists, and troubleshooting/detail sections. "
1513
+                f"Current content has {len(plain_text)} text chars and "
1514
+                f"{content_blocks} structured block(s); expected at least "
1515
+                f"{self.HTML_GUIDE_CHAPTER_MIN_TEXT_CHARS} text chars and "
1516
+                f"{self.HTML_GUIDE_CHAPTER_MIN_BLOCKS} structured blocks. For an "
1517
+                "equally thorough guide, aim comfortably above that floor."
1518
+            ),
1519
+            severity="error",
1520
+        )
1521
+
14481522
     def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult:
14491523
         path = Path(file_path).expanduser()
14501524
         if path.exists() or not path.suffix or not path.parent.exists():
tests/test_safeguard_services.pymodified
@@ -698,6 +698,105 @@ def test_pre_action_validator_allows_existing_local_html_asset_href(
698698
     assert result.valid is True
699699
 
700700
 
701
+def test_pre_action_validator_blocks_thin_root_declared_guide_chapter(
702
+    tmp_path: Path,
703
+) -> None:
704
+    validator = PreActionValidator()
705
+    guide = tmp_path / "guides" / "nginx"
706
+    chapters = guide / "chapters"
707
+    chapters.mkdir(parents=True)
708
+    (guide / "index.html").write_text(
709
+        "<html><body>"
710
+        '<a href="chapters/01-introduction.html">Introduction</a>'
711
+        '<a href="chapters/02-installation.html">Installation</a>'
712
+        '<a href="chapters/03-configuration.html">Configuration</a>'
713
+        '<a href="chapters/04-reverse-proxy.html">Reverse Proxy</a>'
714
+        "</body></html>"
715
+    )
716
+
717
+    result = validator.validate(
718
+        "write",
719
+        {
720
+            "file_path": str(chapters / "01-introduction.html"),
721
+            "content": (
722
+                '<!DOCTYPE html><html lang="en"><body><h1>Introduction</h1>'
723
+                "<p>Nginx is a web server and reverse proxy.</p>"
724
+                '<p><a href="../index.html">Back</a></p></body></html>'
725
+            ),
726
+        },
727
+    )
728
+
729
+    assert result.valid is False
730
+    assert result.reason == "HTML guide chapter content is too thin"
731
+    assert "root-declared multi-page guide chapter" in result.suggestion
732
+    assert "text chars" in result.suggestion
733
+    assert "structured block" in result.suggestion
734
+
735
+
736
+def test_pre_action_validator_allows_small_non_guide_html_chapter(
737
+    tmp_path: Path,
738
+) -> None:
739
+    validator = PreActionValidator()
740
+    page = tmp_path / "scratch" / "chapters" / "01-note.html"
741
+    page.parent.mkdir(parents=True)
742
+
743
+    result = validator.validate(
744
+        "write",
745
+        {
746
+            "file_path": str(page),
747
+            "content": (
748
+                '<!DOCTYPE html><html lang="en"><body><h1>Note</h1>'
749
+                "<p>Small standalone HTML is allowed outside a root-declared guide.</p>"
750
+                "</body></html>"
751
+            ),
752
+        },
753
+    )
754
+
755
+    assert result.valid is True
756
+
757
+
758
+def test_pre_action_validator_allows_substantive_root_declared_guide_chapter(
759
+    tmp_path: Path,
760
+) -> None:
761
+    validator = PreActionValidator()
762
+    guide = tmp_path / "guides" / "nginx"
763
+    chapters = guide / "chapters"
764
+    chapters.mkdir(parents=True)
765
+    (guide / "index.html").write_text(
766
+        "<html><body>"
767
+        '<a href="chapters/01-introduction.html">Introduction</a>'
768
+        '<a href="chapters/02-installation.html">Installation</a>'
769
+        '<a href="chapters/03-configuration.html">Configuration</a>'
770
+        '<a href="chapters/04-reverse-proxy.html">Reverse Proxy</a>'
771
+        "</body></html>"
772
+    )
773
+    paragraph = (
774
+        "Concrete Nginx guide content with commands, examples, tradeoffs, "
775
+        "checks, and operational context. "
776
+    )
777
+    sections = "".join(
778
+        (
779
+            f"<h2>Section {index}</h2>"
780
+            f"<p>{paragraph * 3}</p>"
781
+            f"<ul><li>Action {index}</li><li>Verification {index}</li></ul>"
782
+        )
783
+        for index in range(1, 8)
784
+    )
785
+
786
+    result = validator.validate(
787
+        "write",
788
+        {
789
+            "file_path": str(chapters / "01-introduction.html"),
790
+            "content": (
791
+                f'<!DOCTYPE html><html lang="en"><body><h1>Introduction</h1>{sections}'
792
+                '<p><a href="../index.html">Back</a></p></body></html>'
793
+            ),
794
+        },
795
+    )
796
+
797
+    assert result.valid is True
798
+
799
+
701800
 def test_pre_action_validator_blocks_new_root_index_parent_html_link(
702801
     tmp_path: Path,
703802
 ) -> None: