tenseleyflow/loader / 432985d

Browse files

Reject placeholder HTML writes

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
432985dd455836297f8b8ed7d43f17dc1510cc78
Parents
75beb49
Tree
ec386d1

4 changed files

StatusFile+-
M src/loader/runtime/repair.py 7 3
M src/loader/runtime/safeguard_services.py 117 0
M tests/test_repair.py 7 0
M tests/test_safeguard_services.py 64 0
src/loader/runtime/repair.pymodified
@@ -1756,7 +1756,8 @@ class ResponseRepairer:
17561756
                 '<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">'
17571757
                 '<meta name="viewport" content="width=device-width, initial-scale=1.0">'
17581758
                 f"<title>{label}</title></head><body><div class=\"container\"><h1>{label}</h1>"
1759
-                "<p>Starter overview for this guide.</p><nav><ul>"
1759
+                f"<p>{label} introduces the guide topic, the practical workflow, and the "
1760
+                "chapter sequence readers can follow from setup through verification.</p><nav><ul>"
17601761
                 '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>'
17611762
                 '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>'
17621763
                 "</ul></nav></div></body></html>"
@@ -1765,8 +1766,11 @@ class ResponseRepairer:
17651766
             '<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">'
17661767
             '<meta name="viewport" content="width=device-width, initial-scale=1.0">'
17671768
             f"<title>{label}</title></head><body><div class=\"container\"><h1>{label}</h1>"
1768
-            "<p>Starter content for this chapter.</p><h2>Overview</h2><p>Key concepts go here.</p>"
1769
-            "<h2>Key Steps</h2><p>Practical steps go here.</p>"
1769
+            f"<p>{label} frames the chapter topic and connects it to the guide workflow.</p>"
1770
+            "<h2>Core Concepts</h2><p>This section introduces the essential ideas, "
1771
+            "configuration choices, and operational tradeoffs.</p>"
1772
+            "<h2>Practical Workflow</h2><p>This section walks through the actions, checks, "
1773
+            "and expected outcomes for a real environment.</p>"
17701774
             '<p><a href="../index.html">Back to Main Guide Index</a></p>'
17711775
             "</div></body></html>"
17721776
         )
src/loader/runtime/safeguard_services.pymodified
@@ -624,6 +624,33 @@ class ValidationResult:
624624
 class PreActionValidator:
625625
     """Validates tool calls before execution to catch problematic actions."""
626626
 
627
+    HTML_PLACEHOLDER_PATTERNS = [
628
+        (
629
+            re.compile(r"\bstarter\s+(?:content|overview)\b", re.IGNORECASE),
630
+            "starter content",
631
+        ),
632
+        (
633
+            re.compile(r"\bkey\s+concepts\s+go\s+here\b", re.IGNORECASE),
634
+            "key concepts go here",
635
+        ),
636
+        (
637
+            re.compile(r"\bpractical\s+steps\s+go\s+here\b", re.IGNORECASE),
638
+            "practical steps go here",
639
+        ),
640
+        (
641
+            re.compile(r"\blorem\s+ipsum\b", re.IGNORECASE),
642
+            "lorem ipsum",
643
+        ),
644
+        (
645
+            re.compile(r"\bcoming\s+soon\b", re.IGNORECASE),
646
+            "coming soon",
647
+        ),
648
+        (
649
+            re.compile(r"\bto\s+be\s+(?:added|written|completed|filled\s+in)\b", re.IGNORECASE),
650
+            "to be added/written",
651
+        ),
652
+    ]
653
+
627654
     DANGEROUS_PATTERNS = [
628655
         (r'rm\s+(-[rf]+\s+)?/', "Dangerous: removing from root directory"),
629656
         (r'rm\s+-rf\s+~', "Dangerous: removing home directory"),
@@ -749,6 +776,13 @@ class PreActionValidator:
749776
                 severity="warning",
750777
             )
751778
 
779
+        html_placeholder_result = self._validate_html_placeholder_content(
780
+            str(file_path),
781
+            str(content),
782
+        )
783
+        if not html_placeholder_result.valid:
784
+            return html_placeholder_result
785
+
752786
         sensitive_paths = ['/etc/', '/usr/', '/bin/', '/sbin/', '/boot/', '/sys/', '/proc/']
753787
         for sensitive in sensitive_paths:
754788
             if file_path.startswith(sensitive):
@@ -822,6 +856,13 @@ class PreActionValidator:
822856
             str(new_string),
823857
         )
824858
 
859
+        html_placeholder_result = self._validate_html_placeholder_content(
860
+            str(file_path),
861
+            prospective_content,
862
+        )
863
+        if not html_placeholder_result.valid:
864
+            return html_placeholder_result
865
+
825866
         html_index_result = self._validate_html_index_links(
826867
             str(file_path),
827868
             prospective_content,
@@ -875,8 +916,84 @@ class PreActionValidator:
875916
                 severity="error",
876917
             )
877918
 
919
+        html_placeholder_result = self._validate_html_placeholder_patch(
920
+            str(file_path),
921
+            hunks,
922
+            raw_patch,
923
+        )
924
+        if not html_placeholder_result.valid:
925
+            return html_placeholder_result
926
+
878927
         return ValidationResult(valid=True)
879928
 
929
+    def _validate_html_placeholder_content(
930
+        self,
931
+        file_path: str,
932
+        content: str,
933
+    ) -> ValidationResult:
934
+        normalized = Path(file_path).expanduser()
935
+        if normalized.suffix.lower() not in {".html", ".htm"}:
936
+            return ValidationResult(valid=True)
937
+
938
+        matched_labels = [
939
+            label
940
+            for pattern, label in self.HTML_PLACEHOLDER_PATTERNS
941
+            if pattern.search(content)
942
+        ]
943
+        if not matched_labels:
944
+            return ValidationResult(valid=True)
945
+
946
+        preview = ", ".join(matched_labels[:3])
947
+        if len(matched_labels) > 3:
948
+            preview += ", ..."
949
+        return ValidationResult(
950
+            valid=False,
951
+            reason="HTML content contains placeholder or stub text",
952
+            suggestion=(
953
+                "Replace placeholder phrases with concrete user-facing content before "
954
+                f"writing the HTML artifact. Placeholder phrase(s): {preview}. Include "
955
+                "specific explanations, examples, commands, or structured prose instead."
956
+            ),
957
+            severity="error",
958
+        )
959
+
960
+    def _validate_html_placeholder_patch(
961
+        self,
962
+        file_path: str,
963
+        hunks: object,
964
+        raw_patch: object,
965
+    ) -> ValidationResult:
966
+        normalized = Path(file_path).expanduser()
967
+        if normalized.suffix.lower() not in {".html", ".htm"}:
968
+            return ValidationResult(valid=True)
969
+
970
+        added_fragments: list[str] = []
971
+        if isinstance(raw_patch, str):
972
+            for line in raw_patch.splitlines():
973
+                if line.startswith("+") and not line.startswith("+++"):
974
+                    added_fragments.append(line[1:])
975
+
976
+        if isinstance(hunks, list):
977
+            for hunk in hunks:
978
+                if not isinstance(hunk, dict):
979
+                    continue
980
+                new_lines = hunk.get("new_lines")
981
+                if isinstance(new_lines, list):
982
+                    added_fragments.extend(str(line) for line in new_lines)
983
+                lines = hunk.get("lines")
984
+                if isinstance(lines, list):
985
+                    for line in lines:
986
+                        text = str(line)
987
+                        if text.startswith("+") and not text.startswith("+++"):
988
+                            added_fragments.append(text[1:])
989
+
990
+        if not added_fragments:
991
+            return ValidationResult(valid=True)
992
+        return self._validate_html_placeholder_content(
993
+            str(file_path),
994
+            "\n".join(added_fragments),
995
+        )
996
+
880997
     def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult:
881998
         path = Path(file_path).expanduser()
882999
         if path.exists() or not path.suffix or not path.parent.exists():
tests/test_repair.pymodified
@@ -1149,6 +1149,8 @@ def test_repeated_first_index_retry_includes_root_html_payload_shape(
11491149
     assert "<title>Nginx Guide</title>" in decision.retry_message
11501150
     assert 'href="chapters/01-introduction.html"' in decision.retry_message
11511151
     assert "../index.html" not in decision.retry_message
1152
+    assert "Starter overview" not in decision.retry_message
1153
+    assert "go here" not in decision.retry_message
11521154
 
11531155
 
11541156
 def test_empty_response_retry_prefers_pending_index_over_broad_directory_headline(
@@ -1607,6 +1609,9 @@ def test_repeated_first_substantive_retry_includes_minimal_payload_shape(
16071609
     assert "If blanking continues, use this minimal starter payload shape" in decision.retry_message
16081610
     assert "<title>Chapter 1: Introduction to Nginx</title>" in decision.retry_message
16091611
     assert "../index.html" in decision.retry_message
1612
+    assert "Starter content" not in decision.retry_message
1613
+    assert "Key concepts go here" not in decision.retry_message
1614
+    assert "Practical steps go here" not in decision.retry_message
16101615
 
16111616
 
16121617
 def test_empty_response_retry_surfaces_minimal_child_html_payload_earlier_after_progress(
@@ -1668,6 +1673,8 @@ def test_empty_response_retry_surfaces_minimal_child_html_payload_earlier_after_
16681673
     assert decision.retry_message is not None
16691674
     assert "If blanking continues, use this minimal starter payload shape" in decision.retry_message
16701675
     assert "<title>Chapter 1: Introduction to Nginx</title>" in decision.retry_message
1676
+    assert "Starter content" not in decision.retry_message
1677
+    assert "go here" not in decision.retry_message
16711678
 
16721679
 
16731680
 def test_final_empty_response_retry_uses_short_exact_write_call(
tests/test_safeguard_services.pymodified
@@ -353,6 +353,70 @@ def test_pre_action_validator_allows_patch_string_without_hunks() -> None:
353353
     assert result == ValidationResult(valid=True)
354354
 
355355
 
356
+def test_pre_action_validator_blocks_placeholder_html_write(tmp_path: Path) -> None:
357
+    validator = PreActionValidator()
358
+
359
+    result = validator.validate(
360
+        "write",
361
+        {
362
+            "file_path": str(tmp_path / "guide" / "chapters" / "01-introduction.html"),
363
+            "content": (
364
+                "<html><body><h1>Introduction</h1>"
365
+                "<p>Starter content for this chapter.</p>"
366
+                "<h2>Overview</h2><p>Key concepts go here.</p>"
367
+                "</body></html>"
368
+            ),
369
+        },
370
+    )
371
+
372
+    assert result.valid is False
373
+    assert result.reason == "HTML content contains placeholder or stub text"
374
+    assert "concrete user-facing content" in result.suggestion
375
+    assert "starter content" in result.suggestion
376
+
377
+
378
+def test_pre_action_validator_blocks_placeholder_html_edit(tmp_path: Path) -> None:
379
+    validator = PreActionValidator()
380
+    page = tmp_path / "guide" / "chapters" / "01-introduction.html"
381
+    page.parent.mkdir(parents=True)
382
+    page.write_text("<html><body><h1>Introduction</h1></body></html>")
383
+
384
+    result = validator.validate(
385
+        "edit",
386
+        {
387
+            "file_path": str(page),
388
+            "old_string": "</body>",
389
+            "new_string": "<p>Practical steps go here.</p></body>",
390
+        },
391
+    )
392
+
393
+    assert result.valid is False
394
+    assert result.reason == "HTML content contains placeholder or stub text"
395
+    assert "practical steps go here" in result.suggestion
396
+
397
+
398
+def test_pre_action_validator_blocks_placeholder_html_patch(tmp_path: Path) -> None:
399
+    validator = PreActionValidator()
400
+
401
+    result = validator.validate(
402
+        "patch",
403
+        {
404
+            "file_path": str(tmp_path / "guide" / "chapters" / "02-installation.html"),
405
+            "patch": (
406
+                "--- a/guide/chapters/02-installation.html\n"
407
+                "+++ b/guide/chapters/02-installation.html\n"
408
+                "@@ -1,1 +1,2 @@\n"
409
+                " <h1>Installation</h1>\n"
410
+                "+<p>Coming soon.</p>\n"
411
+            ),
412
+        },
413
+    )
414
+
415
+    assert result.valid is False
416
+    assert result.reason == "HTML content contains placeholder or stub text"
417
+    assert "coming soon" in result.suggestion
418
+
419
+
356420
 def test_pre_action_validator_blocks_shell_text_rewrite_for_html_target() -> None:
357421
     validator = PreActionValidator()
358422