Reject placeholder HTML writes
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
432985dd455836297f8b8ed7d43f17dc1510cc78- Parents
-
75beb49 - Tree
ec386d1
432985d
432985dd455836297f8b8ed7d43f17dc1510cc7875beb49
ec386d1| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/repair.py
|
7 | 3 |
| M |
src/loader/runtime/safeguard_services.py
|
117 | 0 |
| M |
tests/test_repair.py
|
7 | 0 |
| M |
tests/test_safeguard_services.py
|
64 | 0 |
src/loader/runtime/repair.pymodified@@ -1756,7 +1756,8 @@ class ResponseRepairer: | ||
| 1756 | 1756 | '<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">' |
| 1757 | 1757 | '<meta name="viewport" content="width=device-width, initial-scale=1.0">' |
| 1758 | 1758 | f"<title>{label}</title></head><body><div class=\"container\"><h1>{label}</h1>" |
| 1759 | - "<p>Starter overview for this guide.</p><nav><ul>" | |
| 1759 | + f"<p>{label} introduces the guide topic, the practical workflow, and the " | |
| 1760 | + "chapter sequence readers can follow from setup through verification.</p><nav><ul>" | |
| 1760 | 1761 | '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>' |
| 1761 | 1762 | '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>' |
| 1762 | 1763 | "</ul></nav></div></body></html>" |
@@ -1765,8 +1766,11 @@ class ResponseRepairer: | ||
| 1765 | 1766 | '<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">' |
| 1766 | 1767 | '<meta name="viewport" content="width=device-width, initial-scale=1.0">' |
| 1767 | 1768 | f"<title>{label}</title></head><body><div class=\"container\"><h1>{label}</h1>" |
| 1768 | - "<p>Starter content for this chapter.</p><h2>Overview</h2><p>Key concepts go here.</p>" | |
| 1769 | - "<h2>Key Steps</h2><p>Practical steps go here.</p>" | |
| 1769 | + f"<p>{label} frames the chapter topic and connects it to the guide workflow.</p>" | |
| 1770 | + "<h2>Core Concepts</h2><p>This section introduces the essential ideas, " | |
| 1771 | + "configuration choices, and operational tradeoffs.</p>" | |
| 1772 | + "<h2>Practical Workflow</h2><p>This section walks through the actions, checks, " | |
| 1773 | + "and expected outcomes for a real environment.</p>" | |
| 1770 | 1774 | '<p><a href="../index.html">Back to Main Guide Index</a></p>' |
| 1771 | 1775 | "</div></body></html>" |
| 1772 | 1776 | ) |
src/loader/runtime/safeguard_services.pymodified@@ -624,6 +624,33 @@ class ValidationResult: | ||
| 624 | 624 | class PreActionValidator: |
| 625 | 625 | """Validates tool calls before execution to catch problematic actions.""" |
| 626 | 626 | |
| 627 | + HTML_PLACEHOLDER_PATTERNS = [ | |
| 628 | + ( | |
| 629 | + re.compile(r"\bstarter\s+(?:content|overview)\b", re.IGNORECASE), | |
| 630 | + "starter content", | |
| 631 | + ), | |
| 632 | + ( | |
| 633 | + re.compile(r"\bkey\s+concepts\s+go\s+here\b", re.IGNORECASE), | |
| 634 | + "key concepts go here", | |
| 635 | + ), | |
| 636 | + ( | |
| 637 | + re.compile(r"\bpractical\s+steps\s+go\s+here\b", re.IGNORECASE), | |
| 638 | + "practical steps go here", | |
| 639 | + ), | |
| 640 | + ( | |
| 641 | + re.compile(r"\blorem\s+ipsum\b", re.IGNORECASE), | |
| 642 | + "lorem ipsum", | |
| 643 | + ), | |
| 644 | + ( | |
| 645 | + re.compile(r"\bcoming\s+soon\b", re.IGNORECASE), | |
| 646 | + "coming soon", | |
| 647 | + ), | |
| 648 | + ( | |
| 649 | + re.compile(r"\bto\s+be\s+(?:added|written|completed|filled\s+in)\b", re.IGNORECASE), | |
| 650 | + "to be added/written", | |
| 651 | + ), | |
| 652 | + ] | |
| 653 | + | |
| 627 | 654 | DANGEROUS_PATTERNS = [ |
| 628 | 655 | (r'rm\s+(-[rf]+\s+)?/', "Dangerous: removing from root directory"), |
| 629 | 656 | (r'rm\s+-rf\s+~', "Dangerous: removing home directory"), |
@@ -749,6 +776,13 @@ class PreActionValidator: | ||
| 749 | 776 | severity="warning", |
| 750 | 777 | ) |
| 751 | 778 | |
| 779 | + html_placeholder_result = self._validate_html_placeholder_content( | |
| 780 | + str(file_path), | |
| 781 | + str(content), | |
| 782 | + ) | |
| 783 | + if not html_placeholder_result.valid: | |
| 784 | + return html_placeholder_result | |
| 785 | + | |
| 752 | 786 | sensitive_paths = ['/etc/', '/usr/', '/bin/', '/sbin/', '/boot/', '/sys/', '/proc/'] |
| 753 | 787 | for sensitive in sensitive_paths: |
| 754 | 788 | if file_path.startswith(sensitive): |
@@ -822,6 +856,13 @@ class PreActionValidator: | ||
| 822 | 856 | str(new_string), |
| 823 | 857 | ) |
| 824 | 858 | |
| 859 | + html_placeholder_result = self._validate_html_placeholder_content( | |
| 860 | + str(file_path), | |
| 861 | + prospective_content, | |
| 862 | + ) | |
| 863 | + if not html_placeholder_result.valid: | |
| 864 | + return html_placeholder_result | |
| 865 | + | |
| 825 | 866 | html_index_result = self._validate_html_index_links( |
| 826 | 867 | str(file_path), |
| 827 | 868 | prospective_content, |
@@ -875,8 +916,84 @@ class PreActionValidator: | ||
| 875 | 916 | severity="error", |
| 876 | 917 | ) |
| 877 | 918 | |
| 919 | + html_placeholder_result = self._validate_html_placeholder_patch( | |
| 920 | + str(file_path), | |
| 921 | + hunks, | |
| 922 | + raw_patch, | |
| 923 | + ) | |
| 924 | + if not html_placeholder_result.valid: | |
| 925 | + return html_placeholder_result | |
| 926 | + | |
| 878 | 927 | return ValidationResult(valid=True) |
| 879 | 928 | |
| 929 | + def _validate_html_placeholder_content( | |
| 930 | + self, | |
| 931 | + file_path: str, | |
| 932 | + content: str, | |
| 933 | + ) -> ValidationResult: | |
| 934 | + normalized = Path(file_path).expanduser() | |
| 935 | + if normalized.suffix.lower() not in {".html", ".htm"}: | |
| 936 | + return ValidationResult(valid=True) | |
| 937 | + | |
| 938 | + matched_labels = [ | |
| 939 | + label | |
| 940 | + for pattern, label in self.HTML_PLACEHOLDER_PATTERNS | |
| 941 | + if pattern.search(content) | |
| 942 | + ] | |
| 943 | + if not matched_labels: | |
| 944 | + return ValidationResult(valid=True) | |
| 945 | + | |
| 946 | + preview = ", ".join(matched_labels[:3]) | |
| 947 | + if len(matched_labels) > 3: | |
| 948 | + preview += ", ..." | |
| 949 | + return ValidationResult( | |
| 950 | + valid=False, | |
| 951 | + reason="HTML content contains placeholder or stub text", | |
| 952 | + suggestion=( | |
| 953 | + "Replace placeholder phrases with concrete user-facing content before " | |
| 954 | + f"writing the HTML artifact. Placeholder phrase(s): {preview}. Include " | |
| 955 | + "specific explanations, examples, commands, or structured prose instead." | |
| 956 | + ), | |
| 957 | + severity="error", | |
| 958 | + ) | |
| 959 | + | |
| 960 | + def _validate_html_placeholder_patch( | |
| 961 | + self, | |
| 962 | + file_path: str, | |
| 963 | + hunks: object, | |
| 964 | + raw_patch: object, | |
| 965 | + ) -> ValidationResult: | |
| 966 | + normalized = Path(file_path).expanduser() | |
| 967 | + if normalized.suffix.lower() not in {".html", ".htm"}: | |
| 968 | + return ValidationResult(valid=True) | |
| 969 | + | |
| 970 | + added_fragments: list[str] = [] | |
| 971 | + if isinstance(raw_patch, str): | |
| 972 | + for line in raw_patch.splitlines(): | |
| 973 | + if line.startswith("+") and not line.startswith("+++"): | |
| 974 | + added_fragments.append(line[1:]) | |
| 975 | + | |
| 976 | + if isinstance(hunks, list): | |
| 977 | + for hunk in hunks: | |
| 978 | + if not isinstance(hunk, dict): | |
| 979 | + continue | |
| 980 | + new_lines = hunk.get("new_lines") | |
| 981 | + if isinstance(new_lines, list): | |
| 982 | + added_fragments.extend(str(line) for line in new_lines) | |
| 983 | + lines = hunk.get("lines") | |
| 984 | + if isinstance(lines, list): | |
| 985 | + for line in lines: | |
| 986 | + text = str(line) | |
| 987 | + if text.startswith("+") and not text.startswith("+++"): | |
| 988 | + added_fragments.append(text[1:]) | |
| 989 | + | |
| 990 | + if not added_fragments: | |
| 991 | + return ValidationResult(valid=True) | |
| 992 | + return self._validate_html_placeholder_content( | |
| 993 | + str(file_path), | |
| 994 | + "\n".join(added_fragments), | |
| 995 | + ) | |
| 996 | + | |
| 880 | 997 | def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult: |
| 881 | 998 | path = Path(file_path).expanduser() |
| 882 | 999 | if path.exists() or not path.suffix or not path.parent.exists(): |
tests/test_repair.pymodified@@ -1149,6 +1149,8 @@ def test_repeated_first_index_retry_includes_root_html_payload_shape( | ||
| 1149 | 1149 | assert "<title>Nginx Guide</title>" in decision.retry_message |
| 1150 | 1150 | assert 'href="chapters/01-introduction.html"' in decision.retry_message |
| 1151 | 1151 | assert "../index.html" not in decision.retry_message |
| 1152 | + assert "Starter overview" not in decision.retry_message | |
| 1153 | + assert "go here" not in decision.retry_message | |
| 1152 | 1154 | |
| 1153 | 1155 | |
| 1154 | 1156 | def test_empty_response_retry_prefers_pending_index_over_broad_directory_headline( |
@@ -1607,6 +1609,9 @@ def test_repeated_first_substantive_retry_includes_minimal_payload_shape( | ||
| 1607 | 1609 | assert "If blanking continues, use this minimal starter payload shape" in decision.retry_message |
| 1608 | 1610 | assert "<title>Chapter 1: Introduction to Nginx</title>" in decision.retry_message |
| 1609 | 1611 | assert "../index.html" in decision.retry_message |
| 1612 | + assert "Starter content" not in decision.retry_message | |
| 1613 | + assert "Key concepts go here" not in decision.retry_message | |
| 1614 | + assert "Practical steps go here" not in decision.retry_message | |
| 1610 | 1615 | |
| 1611 | 1616 | |
| 1612 | 1617 | def test_empty_response_retry_surfaces_minimal_child_html_payload_earlier_after_progress( |
@@ -1668,6 +1673,8 @@ def test_empty_response_retry_surfaces_minimal_child_html_payload_earlier_after_ | ||
| 1668 | 1673 | assert decision.retry_message is not None |
| 1669 | 1674 | assert "If blanking continues, use this minimal starter payload shape" in decision.retry_message |
| 1670 | 1675 | assert "<title>Chapter 1: Introduction to Nginx</title>" in decision.retry_message |
| 1676 | + assert "Starter content" not in decision.retry_message | |
| 1677 | + assert "go here" not in decision.retry_message | |
| 1671 | 1678 | |
| 1672 | 1679 | |
| 1673 | 1680 | def test_final_empty_response_retry_uses_short_exact_write_call( |
tests/test_safeguard_services.pymodified@@ -353,6 +353,70 @@ def test_pre_action_validator_allows_patch_string_without_hunks() -> None: | ||
| 353 | 353 | assert result == ValidationResult(valid=True) |
| 354 | 354 | |
| 355 | 355 | |
| 356 | +def test_pre_action_validator_blocks_placeholder_html_write(tmp_path: Path) -> None: | |
| 357 | + validator = PreActionValidator() | |
| 358 | + | |
| 359 | + result = validator.validate( | |
| 360 | + "write", | |
| 361 | + { | |
| 362 | + "file_path": str(tmp_path / "guide" / "chapters" / "01-introduction.html"), | |
| 363 | + "content": ( | |
| 364 | + "<html><body><h1>Introduction</h1>" | |
| 365 | + "<p>Starter content for this chapter.</p>" | |
| 366 | + "<h2>Overview</h2><p>Key concepts go here.</p>" | |
| 367 | + "</body></html>" | |
| 368 | + ), | |
| 369 | + }, | |
| 370 | + ) | |
| 371 | + | |
| 372 | + assert result.valid is False | |
| 373 | + assert result.reason == "HTML content contains placeholder or stub text" | |
| 374 | + assert "concrete user-facing content" in result.suggestion | |
| 375 | + assert "starter content" in result.suggestion | |
| 376 | + | |
| 377 | + | |
| 378 | +def test_pre_action_validator_blocks_placeholder_html_edit(tmp_path: Path) -> None: | |
| 379 | + validator = PreActionValidator() | |
| 380 | + page = tmp_path / "guide" / "chapters" / "01-introduction.html" | |
| 381 | + page.parent.mkdir(parents=True) | |
| 382 | + page.write_text("<html><body><h1>Introduction</h1></body></html>") | |
| 383 | + | |
| 384 | + result = validator.validate( | |
| 385 | + "edit", | |
| 386 | + { | |
| 387 | + "file_path": str(page), | |
| 388 | + "old_string": "</body>", | |
| 389 | + "new_string": "<p>Practical steps go here.</p></body>", | |
| 390 | + }, | |
| 391 | + ) | |
| 392 | + | |
| 393 | + assert result.valid is False | |
| 394 | + assert result.reason == "HTML content contains placeholder or stub text" | |
| 395 | + assert "practical steps go here" in result.suggestion | |
| 396 | + | |
| 397 | + | |
| 398 | +def test_pre_action_validator_blocks_placeholder_html_patch(tmp_path: Path) -> None: | |
| 399 | + validator = PreActionValidator() | |
| 400 | + | |
| 401 | + result = validator.validate( | |
| 402 | + "patch", | |
| 403 | + { | |
| 404 | + "file_path": str(tmp_path / "guide" / "chapters" / "02-installation.html"), | |
| 405 | + "patch": ( | |
| 406 | + "--- a/guide/chapters/02-installation.html\n" | |
| 407 | + "+++ b/guide/chapters/02-installation.html\n" | |
| 408 | + "@@ -1,1 +1,2 @@\n" | |
| 409 | + " <h1>Installation</h1>\n" | |
| 410 | + "+<p>Coming soon.</p>\n" | |
| 411 | + ), | |
| 412 | + }, | |
| 413 | + ) | |
| 414 | + | |
| 415 | + assert result.valid is False | |
| 416 | + assert result.reason == "HTML content contains placeholder or stub text" | |
| 417 | + assert "coming soon" in result.suggestion | |
| 418 | + | |
| 419 | + | |
| 356 | 420 | def test_pre_action_validator_blocks_shell_text_rewrite_for_html_target() -> None: |
| 357 | 421 | validator = PreActionValidator() |
| 358 | 422 | |