Reject thin guide chapters
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
bba591d61151d031678ee6f8a23343decfd3362a- Parents
-
4db4c36 - Tree
28ad414
bba591d
bba591d61151d031678ee6f8a23343decfd3362a4db4c36
28ad414| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/safeguard_services.py
|
74 | 0 |
| M |
tests/test_safeguard_services.py
|
99 | 0 |
src/loader/runtime/safeguard_services.pymodified@@ -829,6 +829,10 @@ class ValidationResult: | |||
| 829 | class PreActionValidator: | 829 | class PreActionValidator: |
| 830 | """Validates tool calls before execution to catch problematic actions.""" | 830 | """Validates tool calls before execution to catch problematic actions.""" |
| 831 | 831 | ||
| 832 | + HTML_GUIDE_CHAPTER_MIN_TEXT_CHARS = 1200 | ||
| 833 | + HTML_GUIDE_CHAPTER_MIN_BLOCKS = 12 | ||
| 834 | + HTML_GUIDE_CHAPTER_MIN_DECLARED_LINKS = 4 | ||
| 835 | + | ||
| 832 | HTML_PLACEHOLDER_PATTERNS = [ | 836 | HTML_PLACEHOLDER_PATTERNS = [ |
| 833 | ( | 837 | ( |
| 834 | re.compile(r"\bstarter\s+(?:content|overview)\b", re.IGNORECASE), | 838 | re.compile(r"\bstarter\s+(?:content|overview)\b", re.IGNORECASE), |
@@ -1063,6 +1067,13 @@ class PreActionValidator: | |||
| 1063 | if not html_asset_result.valid: | 1067 | if not html_asset_result.valid: |
| 1064 | return html_asset_result | 1068 | return html_asset_result |
| 1065 | 1069 | ||
| 1070 | + html_guide_substance_result = self._validate_html_substantive_guide_content( | ||
| 1071 | + str(file_path), | ||
| 1072 | + str(content), | ||
| 1073 | + ) | ||
| 1074 | + if not html_guide_substance_result.valid: | ||
| 1075 | + return html_guide_substance_result | ||
| 1076 | + | ||
| 1066 | html_root_coverage_result = self._validate_html_root_link_coverage( | 1077 | html_root_coverage_result = self._validate_html_root_link_coverage( |
| 1067 | str(file_path), | 1078 | str(file_path), |
| 1068 | str(content), | 1079 | str(content), |
@@ -1445,6 +1456,69 @@ class PreActionValidator: | |||
| 1445 | severity="error", | 1456 | severity="error", |
| 1446 | ) | 1457 | ) |
| 1447 | 1458 | ||
| 1459 | + def _validate_html_substantive_guide_content( | ||
| 1460 | + self, | ||
| 1461 | + file_path: str, | ||
| 1462 | + content: str, | ||
| 1463 | + ) -> ValidationResult: | ||
| 1464 | + normalized = Path(file_path).expanduser() | ||
| 1465 | + if normalized.suffix.lower() not in {".html", ".htm"}: | ||
| 1466 | + return ValidationResult(valid=True) | ||
| 1467 | + if normalized.name.lower() in {"index.html", "index.htm"}: | ||
| 1468 | + return ValidationResult(valid=True) | ||
| 1469 | + if not _html_text_looks_like_document(content): | ||
| 1470 | + return ValidationResult(valid=True) | ||
| 1471 | + | ||
| 1472 | + root = self._resolve_html_artifact_root(normalized) | ||
| 1473 | + current_relative = self._relative_html_target(root, normalized) | ||
| 1474 | + if current_relative is None: | ||
| 1475 | + return ValidationResult(valid=True) | ||
| 1476 | + relative_parts = Path(current_relative).parts | ||
| 1477 | + if not relative_parts or relative_parts[0] not in {"chapters", "chapter"}: | ||
| 1478 | + return ValidationResult(valid=True) | ||
| 1479 | + | ||
| 1480 | + declared_targets, authoritative_root_graph = self._collect_declared_html_targets( | ||
| 1481 | + root, | ||
| 1482 | + normalized, | ||
| 1483 | + ) | ||
| 1484 | + if ( | ||
| 1485 | + not authoritative_root_graph | ||
| 1486 | + or current_relative not in declared_targets | ||
| 1487 | + or len(declared_targets) < self.HTML_GUIDE_CHAPTER_MIN_DECLARED_LINKS | ||
| 1488 | + ): | ||
| 1489 | + return ValidationResult(valid=True) | ||
| 1490 | + | ||
| 1491 | + text_without_tags = re.sub(r"<[^>]+>", " ", content) | ||
| 1492 | + plain_text = re.sub(r"\s+", " ", text_without_tags).strip() | ||
| 1493 | + content_blocks = len( | ||
| 1494 | + re.findall( | ||
| 1495 | + r"<(p|li|pre|code|section|article|table|h2|h3|h4)\b", | ||
| 1496 | + content, | ||
| 1497 | + re.IGNORECASE, | ||
| 1498 | + ) | ||
| 1499 | + ) | ||
| 1500 | + if ( | ||
| 1501 | + len(plain_text) >= self.HTML_GUIDE_CHAPTER_MIN_TEXT_CHARS | ||
| 1502 | + and content_blocks >= self.HTML_GUIDE_CHAPTER_MIN_BLOCKS | ||
| 1503 | + ): | ||
| 1504 | + return ValidationResult(valid=True) | ||
| 1505 | + | ||
| 1506 | + return ValidationResult( | ||
| 1507 | + valid=False, | ||
| 1508 | + reason="HTML guide chapter content is too thin", | ||
| 1509 | + suggestion=( | ||
| 1510 | + "This looks like a root-declared multi-page guide chapter. Write a " | ||
| 1511 | + "substantive chapter before creating it: include concrete explanations, " | ||
| 1512 | + "commands, examples, lists, and troubleshooting/detail sections. " | ||
| 1513 | + f"Current content has {len(plain_text)} text chars and " | ||
| 1514 | + f"{content_blocks} structured block(s); expected at least " | ||
| 1515 | + f"{self.HTML_GUIDE_CHAPTER_MIN_TEXT_CHARS} text chars and " | ||
| 1516 | + f"{self.HTML_GUIDE_CHAPTER_MIN_BLOCKS} structured blocks. For an " | ||
| 1517 | + "equally thorough guide, aim comfortably above that floor." | ||
| 1518 | + ), | ||
| 1519 | + severity="error", | ||
| 1520 | + ) | ||
| 1521 | + | ||
| 1448 | def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult: | 1522 | def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult: |
| 1449 | path = Path(file_path).expanduser() | 1523 | path = Path(file_path).expanduser() |
| 1450 | if path.exists() or not path.suffix or not path.parent.exists(): | 1524 | if path.exists() or not path.suffix or not path.parent.exists(): |
tests/test_safeguard_services.pymodified@@ -698,6 +698,105 @@ def test_pre_action_validator_allows_existing_local_html_asset_href( | |||
| 698 | assert result.valid is True | 698 | assert result.valid is True |
| 699 | 699 | ||
| 700 | 700 | ||
| 701 | +def test_pre_action_validator_blocks_thin_root_declared_guide_chapter( | ||
| 702 | + tmp_path: Path, | ||
| 703 | +) -> None: | ||
| 704 | + validator = PreActionValidator() | ||
| 705 | + guide = tmp_path / "guides" / "nginx" | ||
| 706 | + chapters = guide / "chapters" | ||
| 707 | + chapters.mkdir(parents=True) | ||
| 708 | + (guide / "index.html").write_text( | ||
| 709 | + "<html><body>" | ||
| 710 | + '<a href="chapters/01-introduction.html">Introduction</a>' | ||
| 711 | + '<a href="chapters/02-installation.html">Installation</a>' | ||
| 712 | + '<a href="chapters/03-configuration.html">Configuration</a>' | ||
| 713 | + '<a href="chapters/04-reverse-proxy.html">Reverse Proxy</a>' | ||
| 714 | + "</body></html>" | ||
| 715 | + ) | ||
| 716 | + | ||
| 717 | + result = validator.validate( | ||
| 718 | + "write", | ||
| 719 | + { | ||
| 720 | + "file_path": str(chapters / "01-introduction.html"), | ||
| 721 | + "content": ( | ||
| 722 | + '<!DOCTYPE html><html lang="en"><body><h1>Introduction</h1>' | ||
| 723 | + "<p>Nginx is a web server and reverse proxy.</p>" | ||
| 724 | + '<p><a href="../index.html">Back</a></p></body></html>' | ||
| 725 | + ), | ||
| 726 | + }, | ||
| 727 | + ) | ||
| 728 | + | ||
| 729 | + assert result.valid is False | ||
| 730 | + assert result.reason == "HTML guide chapter content is too thin" | ||
| 731 | + assert "root-declared multi-page guide chapter" in result.suggestion | ||
| 732 | + assert "text chars" in result.suggestion | ||
| 733 | + assert "structured block" in result.suggestion | ||
| 734 | + | ||
| 735 | + | ||
| 736 | +def test_pre_action_validator_allows_small_non_guide_html_chapter( | ||
| 737 | + tmp_path: Path, | ||
| 738 | +) -> None: | ||
| 739 | + validator = PreActionValidator() | ||
| 740 | + page = tmp_path / "scratch" / "chapters" / "01-note.html" | ||
| 741 | + page.parent.mkdir(parents=True) | ||
| 742 | + | ||
| 743 | + result = validator.validate( | ||
| 744 | + "write", | ||
| 745 | + { | ||
| 746 | + "file_path": str(page), | ||
| 747 | + "content": ( | ||
| 748 | + '<!DOCTYPE html><html lang="en"><body><h1>Note</h1>' | ||
| 749 | + "<p>Small standalone HTML is allowed outside a root-declared guide.</p>" | ||
| 750 | + "</body></html>" | ||
| 751 | + ), | ||
| 752 | + }, | ||
| 753 | + ) | ||
| 754 | + | ||
| 755 | + assert result.valid is True | ||
| 756 | + | ||
| 757 | + | ||
| 758 | +def test_pre_action_validator_allows_substantive_root_declared_guide_chapter( | ||
| 759 | + tmp_path: Path, | ||
| 760 | +) -> None: | ||
| 761 | + validator = PreActionValidator() | ||
| 762 | + guide = tmp_path / "guides" / "nginx" | ||
| 763 | + chapters = guide / "chapters" | ||
| 764 | + chapters.mkdir(parents=True) | ||
| 765 | + (guide / "index.html").write_text( | ||
| 766 | + "<html><body>" | ||
| 767 | + '<a href="chapters/01-introduction.html">Introduction</a>' | ||
| 768 | + '<a href="chapters/02-installation.html">Installation</a>' | ||
| 769 | + '<a href="chapters/03-configuration.html">Configuration</a>' | ||
| 770 | + '<a href="chapters/04-reverse-proxy.html">Reverse Proxy</a>' | ||
| 771 | + "</body></html>" | ||
| 772 | + ) | ||
| 773 | + paragraph = ( | ||
| 774 | + "Concrete Nginx guide content with commands, examples, tradeoffs, " | ||
| 775 | + "checks, and operational context. " | ||
| 776 | + ) | ||
| 777 | + sections = "".join( | ||
| 778 | + ( | ||
| 779 | + f"<h2>Section {index}</h2>" | ||
| 780 | + f"<p>{paragraph * 3}</p>" | ||
| 781 | + f"<ul><li>Action {index}</li><li>Verification {index}</li></ul>" | ||
| 782 | + ) | ||
| 783 | + for index in range(1, 8) | ||
| 784 | + ) | ||
| 785 | + | ||
| 786 | + result = validator.validate( | ||
| 787 | + "write", | ||
| 788 | + { | ||
| 789 | + "file_path": str(chapters / "01-introduction.html"), | ||
| 790 | + "content": ( | ||
| 791 | + f'<!DOCTYPE html><html lang="en"><body><h1>Introduction</h1>{sections}' | ||
| 792 | + '<p><a href="../index.html">Back</a></p></body></html>' | ||
| 793 | + ), | ||
| 794 | + }, | ||
| 795 | + ) | ||
| 796 | + | ||
| 797 | + assert result.valid is True | ||
| 798 | + | ||
| 799 | + | ||
| 701 | def test_pre_action_validator_blocks_new_root_index_parent_html_link( | 800 | def test_pre_action_validator_blocks_new_root_index_parent_html_link( |
| 702 | tmp_path: Path, | 801 | tmp_path: Path, |
| 703 | ) -> None: | 802 | ) -> None: |