@@ -97,6 +97,18 @@ _MUTATING_FILE_CHANGE_HINTS = ( |
| 97 | "developing", | 97 | "developing", |
| 98 | ) | 98 | ) |
| 99 | _MINIMUM_SUBSTANTIVE_HTML_GUIDE_PAGES = 4 | 99 | _MINIMUM_SUBSTANTIVE_HTML_GUIDE_PAGES = 4 |
| | 100 | +_MINIMUM_SUBSTANTIVE_HTML_INDEX_CHARS = 400 |
| | 101 | +_MINIMUM_SUBSTANTIVE_HTML_INDEX_BLOCKS = 4 |
| | 102 | +_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_CHARS = 900 |
| | 103 | +_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_BLOCKS = 8 |
| | 104 | +_REFERENCE_HTML_INDEX_CHAR_FRACTION = 0.35 |
| | 105 | +_REFERENCE_HTML_INDEX_BLOCK_FRACTION = 0.35 |
| | 106 | +_REFERENCE_HTML_INDEX_CHAR_CAP = 1400 |
| | 107 | +_REFERENCE_HTML_INDEX_BLOCK_CAP = 12 |
| | 108 | +_REFERENCE_HTML_CHAPTER_CHAR_FRACTION = 0.50 |
| | 109 | +_REFERENCE_HTML_CHAPTER_BLOCK_FRACTION = 0.45 |
| | 110 | +_REFERENCE_HTML_CHAPTER_CHAR_CAP = 2200 |
| | 111 | +_REFERENCE_HTML_CHAPTER_BLOCK_CAP = 18 |
| 100 | | 112 | |
| 101 | | 113 | |
| 102 | @dataclass | 114 | @dataclass |
@@ -113,6 +125,16 @@ class VerificationEvidence: |
| 113 | kind: VerificationKind = "runtime" | 125 | kind: VerificationKind = "runtime" |
| 114 | | 126 | |
| 115 | | 127 | |
| | 128 | +@dataclass(frozen=True) |
| | 129 | +class HtmlGuideQualityFloor: |
| | 130 | + """Minimum content density expected for substantive generated HTML guides.""" |
| | 131 | + |
| | 132 | + index_chars: int = _MINIMUM_SUBSTANTIVE_HTML_INDEX_CHARS |
| | 133 | + index_blocks: int = _MINIMUM_SUBSTANTIVE_HTML_INDEX_BLOCKS |
| | 134 | + chapter_chars: int = _MINIMUM_SUBSTANTIVE_HTML_CHAPTER_CHARS |
| | 135 | + chapter_blocks: int = _MINIMUM_SUBSTANTIVE_HTML_CHAPTER_BLOCKS |
| | 136 | + |
| | 137 | + |
| 116 | @dataclass | 138 | @dataclass |
| 117 | class DefinitionOfDone: | 139 | class DefinitionOfDone: |
| 118 | """Single source of truth for task completion state.""" | 140 | """Single source of truth for task completion state.""" |
@@ -727,6 +749,11 @@ def _derive_multi_page_html_quality_command( |
| 727 | return None | 749 | return None |
| 728 | | 750 | |
| 729 | path_literals = ", ".join(repr(str(path)) for path in html_paths) | 751 | path_literals = ", ".join(repr(str(path)) for path in html_paths) |
| | 752 | + quality_floor = _derive_html_guide_quality_floor( |
| | 753 | + task_statement, |
| | 754 | + output_paths=html_paths, |
| | 755 | + project_root=project_root, |
| | 756 | + ) |
| 730 | return "\n".join( | 757 | return "\n".join( |
| 731 | [ | 758 | [ |
| 732 | "python3 - <<'PY'", | 759 | "python3 - <<'PY'", |
@@ -734,6 +761,10 @@ def _derive_multi_page_html_quality_command( |
| 734 | "import re", | 761 | "import re", |
| 735 | "", | 762 | "", |
| 736 | f"paths = [{path_literals}]", | 763 | f"paths = [{path_literals}]", |
| | 764 | + f"minimum_index_chars = {quality_floor.index_chars}", |
| | 765 | + f"minimum_index_blocks = {quality_floor.index_blocks}", |
| | 766 | + f"minimum_chapter_chars = {quality_floor.chapter_chars}", |
| | 767 | + f"minimum_chapter_blocks = {quality_floor.chapter_blocks}", |
| 737 | "tag_pattern = re.compile(r'<[^>]+>')", | 768 | "tag_pattern = re.compile(r'<[^>]+>')", |
| 738 | "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)", | 769 | "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)", |
| 739 | "issues = []", | 770 | "issues = []", |
@@ -748,8 +779,8 @@ def _derive_multi_page_html_quality_command( |
| 748 | " plain = re.sub(r'\\s+', ' ', plain).strip()", | 779 | " plain = re.sub(r'\\s+', ' ', plain).strip()", |
| 749 | " content_blocks = len(content_block_pattern.findall(text))", | 780 | " content_blocks = len(content_block_pattern.findall(text))", |
| 750 | " has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))", | 781 | " has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))", |
| 751 | - " minimum_chars = 180 if path.name.lower() == 'index.html' else 220", | 782 | + " minimum_chars = minimum_index_chars if path.name.lower() == 'index.html' else minimum_chapter_chars", |
| 752 | - " minimum_blocks = 2 if path.name.lower() == 'index.html' else 3", | 783 | + " minimum_blocks = minimum_index_blocks if path.name.lower() == 'index.html' else minimum_chapter_blocks", |
| 753 | " if not has_h1:", | 784 | " if not has_h1:", |
| 754 | " issues.append(f'{path}: missing <h1>')", | 785 | " issues.append(f'{path}: missing <h1>')", |
| 755 | " if len(plain) < minimum_chars:", | 786 | " if len(plain) < minimum_chars:", |
@@ -770,6 +801,180 @@ def _derive_multi_page_html_quality_command( |
| 770 | ) | 801 | ) |
| 771 | | 802 | |
| 772 | | 803 | |
| | 804 | +def _derive_html_guide_quality_floor( |
| | 805 | + task_statement: str, |
| | 806 | + *, |
| | 807 | + output_paths: list[Path], |
| | 808 | + project_root: Path, |
| | 809 | +) -> HtmlGuideQualityFloor: |
| | 810 | + reference_paths = _reference_html_guide_paths_from_task( |
| | 811 | + task_statement, |
| | 812 | + output_paths=output_paths, |
| | 813 | + project_root=project_root, |
| | 814 | + ) |
| | 815 | + if not reference_paths: |
| | 816 | + return HtmlGuideQualityFloor() |
| | 817 | + |
| | 818 | + index_chars: list[int] = [] |
| | 819 | + index_blocks: list[int] = [] |
| | 820 | + chapter_chars: list[int] = [] |
| | 821 | + chapter_blocks: list[int] = [] |
| | 822 | + for path in reference_paths: |
| | 823 | + try: |
| | 824 | + text = path.read_text() |
| | 825 | + except OSError: |
| | 826 | + continue |
| | 827 | + plain = re.sub(r"\s+", " ", re.sub(r"<[^>]+>", " ", text)).strip() |
| | 828 | + blocks = len( |
| | 829 | + re.findall( |
| | 830 | + r"<(p|li|pre|code|section|article|table|h2|h3|h4)\b", |
| | 831 | + text, |
| | 832 | + re.IGNORECASE, |
| | 833 | + ) |
| | 834 | + ) |
| | 835 | + if path.name.lower() == "index.html": |
| | 836 | + index_chars.append(len(plain)) |
| | 837 | + index_blocks.append(blocks) |
| | 838 | + else: |
| | 839 | + chapter_chars.append(len(plain)) |
| | 840 | + chapter_blocks.append(blocks) |
| | 841 | + |
| | 842 | + if not chapter_chars and not index_chars: |
| | 843 | + return HtmlGuideQualityFloor() |
| | 844 | + |
| | 845 | + return HtmlGuideQualityFloor( |
| | 846 | + index_chars=_reference_quality_floor( |
| | 847 | + _lower_quartile(index_chars) or _lower_quartile(chapter_chars), |
| | 848 | + fraction=_REFERENCE_HTML_INDEX_CHAR_FRACTION, |
| | 849 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_INDEX_CHARS, |
| | 850 | + cap=_REFERENCE_HTML_INDEX_CHAR_CAP, |
| | 851 | + ), |
| | 852 | + index_blocks=_reference_quality_floor( |
| | 853 | + _lower_quartile(index_blocks) or _lower_quartile(chapter_blocks), |
| | 854 | + fraction=_REFERENCE_HTML_INDEX_BLOCK_FRACTION, |
| | 855 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_INDEX_BLOCKS, |
| | 856 | + cap=_REFERENCE_HTML_INDEX_BLOCK_CAP, |
| | 857 | + ), |
| | 858 | + chapter_chars=_reference_quality_floor( |
| | 859 | + _lower_quartile(chapter_chars) or _lower_quartile(index_chars), |
| | 860 | + fraction=_REFERENCE_HTML_CHAPTER_CHAR_FRACTION, |
| | 861 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_CHARS, |
| | 862 | + cap=_REFERENCE_HTML_CHAPTER_CHAR_CAP, |
| | 863 | + ), |
| | 864 | + chapter_blocks=_reference_quality_floor( |
| | 865 | + _lower_quartile(chapter_blocks) or _lower_quartile(index_blocks), |
| | 866 | + fraction=_REFERENCE_HTML_CHAPTER_BLOCK_FRACTION, |
| | 867 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_BLOCKS, |
| | 868 | + cap=_REFERENCE_HTML_CHAPTER_BLOCK_CAP, |
| | 869 | + ), |
| | 870 | + ) |
| | 871 | + |
| | 872 | + |
| | 873 | +def _reference_html_guide_paths_from_task( |
| | 874 | + task_statement: str, |
| | 875 | + *, |
| | 876 | + output_paths: list[Path], |
| | 877 | + project_root: Path, |
| | 878 | +) -> list[Path]: |
| | 879 | + output_roots = _html_output_scope_roots(output_paths) |
| | 880 | + paths: list[Path] = [] |
| | 881 | + seen: set[str] = set() |
| | 882 | + for raw_path in _extract_task_path_mentions(task_statement): |
| | 883 | + path = Path(raw_path).expanduser() |
| | 884 | + if not path.is_absolute(): |
| | 885 | + path = project_root / path |
| | 886 | + try: |
| | 887 | + resolved = path.resolve(strict=False) |
| | 888 | + except (OSError, RuntimeError, ValueError): |
| | 889 | + continue |
| | 890 | + if any(_path_is_within_root(resolved, root) for root in output_roots): |
| | 891 | + continue |
| | 892 | + for candidate in _collect_reference_html_paths(resolved): |
| | 893 | + key = str(candidate) |
| | 894 | + if key in seen: |
| | 895 | + continue |
| | 896 | + seen.add(key) |
| | 897 | + paths.append(candidate) |
| | 898 | + return paths |
| | 899 | + |
| | 900 | + |
| | 901 | +def _html_output_scope_roots(output_paths: list[Path]) -> tuple[Path, ...]: |
| | 902 | + roots: list[Path] = [] |
| | 903 | + seen: set[str] = set() |
| | 904 | + for path in output_paths: |
| | 905 | + try: |
| | 906 | + resolved = path.expanduser().resolve(strict=False) |
| | 907 | + except (OSError, RuntimeError, ValueError): |
| | 908 | + continue |
| | 909 | + root = resolved.parent |
| | 910 | + if root.name.lower() in {"chapters", "pages", "sections"}: |
| | 911 | + root = root.parent |
| | 912 | + key = str(root) |
| | 913 | + if key in seen: |
| | 914 | + continue |
| | 915 | + seen.add(key) |
| | 916 | + roots.append(root) |
| | 917 | + return tuple(roots) |
| | 918 | + |
| | 919 | + |
| | 920 | +def _extract_task_path_mentions(task_statement: str) -> list[str]: |
| | 921 | + candidates = re.findall(r"`([^`]+)`", task_statement) |
| | 922 | + candidates.extend( |
| | 923 | + re.findall( |
| | 924 | + r"(?:~|/|\./|\../)[A-Za-z0-9_./~+-]+", |
| | 925 | + task_statement, |
| | 926 | + ) |
| | 927 | + ) |
| | 928 | + |
| | 929 | + paths: list[str] = [] |
| | 930 | + seen: set[str] = set() |
| | 931 | + for candidate in candidates: |
| | 932 | + cleaned = candidate.strip().strip("`'\",.:;()[]{}") |
| | 933 | + if not cleaned or cleaned in seen: |
| | 934 | + continue |
| | 935 | + if not _looks_like_path_literal(cleaned): |
| | 936 | + continue |
| | 937 | + seen.add(cleaned) |
| | 938 | + paths.append(cleaned) |
| | 939 | + return paths |
| | 940 | + |
| | 941 | + |
| | 942 | +def _collect_reference_html_paths(path: Path) -> list[Path]: |
| | 943 | + if path.is_file() and path.suffix.lower() in {".html", ".htm"}: |
| | 944 | + return [path] |
| | 945 | + if not path.is_dir(): |
| | 946 | + return [] |
| | 947 | + |
| | 948 | + candidates: list[Path] = [] |
| | 949 | + index = path / "index.html" |
| | 950 | + if index.is_file(): |
| | 951 | + candidates.append(index) |
| | 952 | + try: |
| | 953 | + candidates.extend(sorted(child for child in path.rglob("*.html") if child.is_file())) |
| | 954 | + except OSError: |
| | 955 | + return candidates |
| | 956 | + return list(dict.fromkeys(candidates[:32])) |
| | 957 | + |
| | 958 | + |
| | 959 | +def _lower_quartile(values: list[int]) -> int: |
| | 960 | + if not values: |
| | 961 | + return 0 |
| | 962 | + ordered = sorted(values) |
| | 963 | + return ordered[max(0, (len(ordered) - 1) // 4)] |
| | 964 | + |
| | 965 | + |
| | 966 | +def _reference_quality_floor( |
| | 967 | + value: int, |
| | 968 | + *, |
| | 969 | + fraction: float, |
| | 970 | + minimum: int, |
| | 971 | + cap: int, |
| | 972 | +) -> int: |
| | 973 | + if value <= 0: |
| | 974 | + return minimum |
| | 975 | + return max(minimum, min(cap, int(value * fraction))) |
| | 976 | + |
| | 977 | + |
| 773 | def collect_planned_artifact_targets( | 978 | def collect_planned_artifact_targets( |
| 774 | dod: DefinitionOfDone, | 979 | dod: DefinitionOfDone, |
| 775 | *, | 980 | *, |