@@ -97,6 +97,18 @@ _MUTATING_FILE_CHANGE_HINTS = ( |
| 97 | 97 | "developing", |
| 98 | 98 | ) |
| 99 | 99 | _MINIMUM_SUBSTANTIVE_HTML_GUIDE_PAGES = 4 |
| 100 | +_MINIMUM_SUBSTANTIVE_HTML_INDEX_CHARS = 400 |
| 101 | +_MINIMUM_SUBSTANTIVE_HTML_INDEX_BLOCKS = 4 |
| 102 | +_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_CHARS = 900 |
| 103 | +_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_BLOCKS = 8 |
| 104 | +_REFERENCE_HTML_INDEX_CHAR_FRACTION = 0.35 |
| 105 | +_REFERENCE_HTML_INDEX_BLOCK_FRACTION = 0.35 |
| 106 | +_REFERENCE_HTML_INDEX_CHAR_CAP = 1400 |
| 107 | +_REFERENCE_HTML_INDEX_BLOCK_CAP = 12 |
| 108 | +_REFERENCE_HTML_CHAPTER_CHAR_FRACTION = 0.50 |
| 109 | +_REFERENCE_HTML_CHAPTER_BLOCK_FRACTION = 0.45 |
| 110 | +_REFERENCE_HTML_CHAPTER_CHAR_CAP = 2200 |
| 111 | +_REFERENCE_HTML_CHAPTER_BLOCK_CAP = 18 |
| 100 | 112 | |
| 101 | 113 | |
| 102 | 114 | @dataclass |
@@ -113,6 +125,16 @@ class VerificationEvidence: |
| 113 | 125 | kind: VerificationKind = "runtime" |
| 114 | 126 | |
| 115 | 127 | |
| 128 | +@dataclass(frozen=True) |
| 129 | +class HtmlGuideQualityFloor: |
| 130 | + """Minimum content density expected for substantive generated HTML guides.""" |
| 131 | + |
| 132 | + index_chars: int = _MINIMUM_SUBSTANTIVE_HTML_INDEX_CHARS |
| 133 | + index_blocks: int = _MINIMUM_SUBSTANTIVE_HTML_INDEX_BLOCKS |
| 134 | + chapter_chars: int = _MINIMUM_SUBSTANTIVE_HTML_CHAPTER_CHARS |
| 135 | + chapter_blocks: int = _MINIMUM_SUBSTANTIVE_HTML_CHAPTER_BLOCKS |
| 136 | + |
| 137 | + |
| 116 | 138 | @dataclass |
| 117 | 139 | class DefinitionOfDone: |
| 118 | 140 | """Single source of truth for task completion state.""" |
@@ -727,6 +749,11 @@ def _derive_multi_page_html_quality_command( |
| 727 | 749 | return None |
| 728 | 750 | |
| 729 | 751 | path_literals = ", ".join(repr(str(path)) for path in html_paths) |
| 752 | + quality_floor = _derive_html_guide_quality_floor( |
| 753 | + task_statement, |
| 754 | + output_paths=html_paths, |
| 755 | + project_root=project_root, |
| 756 | + ) |
| 730 | 757 | return "\n".join( |
| 731 | 758 | [ |
| 732 | 759 | "python3 - <<'PY'", |
@@ -734,6 +761,10 @@ def _derive_multi_page_html_quality_command( |
| 734 | 761 | "import re", |
| 735 | 762 | "", |
| 736 | 763 | f"paths = [{path_literals}]", |
| 764 | + f"minimum_index_chars = {quality_floor.index_chars}", |
| 765 | + f"minimum_index_blocks = {quality_floor.index_blocks}", |
| 766 | + f"minimum_chapter_chars = {quality_floor.chapter_chars}", |
| 767 | + f"minimum_chapter_blocks = {quality_floor.chapter_blocks}", |
| 737 | 768 | "tag_pattern = re.compile(r'<[^>]+>')", |
| 738 | 769 | "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)", |
| 739 | 770 | "issues = []", |
@@ -748,8 +779,8 @@ def _derive_multi_page_html_quality_command( |
| 748 | 779 | " plain = re.sub(r'\\s+', ' ', plain).strip()", |
| 749 | 780 | " content_blocks = len(content_block_pattern.findall(text))", |
| 750 | 781 | " has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))", |
| 751 | | - " minimum_chars = 180 if path.name.lower() == 'index.html' else 220", |
| 752 | | - " minimum_blocks = 2 if path.name.lower() == 'index.html' else 3", |
| 782 | + " minimum_chars = minimum_index_chars if path.name.lower() == 'index.html' else minimum_chapter_chars", |
| 783 | + " minimum_blocks = minimum_index_blocks if path.name.lower() == 'index.html' else minimum_chapter_blocks", |
| 753 | 784 | " if not has_h1:", |
| 754 | 785 | " issues.append(f'{path}: missing <h1>')", |
| 755 | 786 | " if len(plain) < minimum_chars:", |
@@ -770,6 +801,180 @@ def _derive_multi_page_html_quality_command( |
| 770 | 801 | ) |
| 771 | 802 | |
| 772 | 803 | |
| 804 | +def _derive_html_guide_quality_floor( |
| 805 | + task_statement: str, |
| 806 | + *, |
| 807 | + output_paths: list[Path], |
| 808 | + project_root: Path, |
| 809 | +) -> HtmlGuideQualityFloor: |
| 810 | + reference_paths = _reference_html_guide_paths_from_task( |
| 811 | + task_statement, |
| 812 | + output_paths=output_paths, |
| 813 | + project_root=project_root, |
| 814 | + ) |
| 815 | + if not reference_paths: |
| 816 | + return HtmlGuideQualityFloor() |
| 817 | + |
| 818 | + index_chars: list[int] = [] |
| 819 | + index_blocks: list[int] = [] |
| 820 | + chapter_chars: list[int] = [] |
| 821 | + chapter_blocks: list[int] = [] |
| 822 | + for path in reference_paths: |
| 823 | + try: |
| 824 | + text = path.read_text() |
| 825 | + except OSError: |
| 826 | + continue |
| 827 | + plain = re.sub(r"\s+", " ", re.sub(r"<[^>]+>", " ", text)).strip() |
| 828 | + blocks = len( |
| 829 | + re.findall( |
| 830 | + r"<(p|li|pre|code|section|article|table|h2|h3|h4)\b", |
| 831 | + text, |
| 832 | + re.IGNORECASE, |
| 833 | + ) |
| 834 | + ) |
| 835 | + if path.name.lower() == "index.html": |
| 836 | + index_chars.append(len(plain)) |
| 837 | + index_blocks.append(blocks) |
| 838 | + else: |
| 839 | + chapter_chars.append(len(plain)) |
| 840 | + chapter_blocks.append(blocks) |
| 841 | + |
| 842 | + if not chapter_chars and not index_chars: |
| 843 | + return HtmlGuideQualityFloor() |
| 844 | + |
| 845 | + return HtmlGuideQualityFloor( |
| 846 | + index_chars=_reference_quality_floor( |
| 847 | + _lower_quartile(index_chars) or _lower_quartile(chapter_chars), |
| 848 | + fraction=_REFERENCE_HTML_INDEX_CHAR_FRACTION, |
| 849 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_INDEX_CHARS, |
| 850 | + cap=_REFERENCE_HTML_INDEX_CHAR_CAP, |
| 851 | + ), |
| 852 | + index_blocks=_reference_quality_floor( |
| 853 | + _lower_quartile(index_blocks) or _lower_quartile(chapter_blocks), |
| 854 | + fraction=_REFERENCE_HTML_INDEX_BLOCK_FRACTION, |
| 855 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_INDEX_BLOCKS, |
| 856 | + cap=_REFERENCE_HTML_INDEX_BLOCK_CAP, |
| 857 | + ), |
| 858 | + chapter_chars=_reference_quality_floor( |
| 859 | + _lower_quartile(chapter_chars) or _lower_quartile(index_chars), |
| 860 | + fraction=_REFERENCE_HTML_CHAPTER_CHAR_FRACTION, |
| 861 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_CHARS, |
| 862 | + cap=_REFERENCE_HTML_CHAPTER_CHAR_CAP, |
| 863 | + ), |
| 864 | + chapter_blocks=_reference_quality_floor( |
| 865 | + _lower_quartile(chapter_blocks) or _lower_quartile(index_blocks), |
| 866 | + fraction=_REFERENCE_HTML_CHAPTER_BLOCK_FRACTION, |
| 867 | + minimum=_MINIMUM_SUBSTANTIVE_HTML_CHAPTER_BLOCKS, |
| 868 | + cap=_REFERENCE_HTML_CHAPTER_BLOCK_CAP, |
| 869 | + ), |
| 870 | + ) |
| 871 | + |
| 872 | + |
| 873 | +def _reference_html_guide_paths_from_task( |
| 874 | + task_statement: str, |
| 875 | + *, |
| 876 | + output_paths: list[Path], |
| 877 | + project_root: Path, |
| 878 | +) -> list[Path]: |
| 879 | + output_roots = _html_output_scope_roots(output_paths) |
| 880 | + paths: list[Path] = [] |
| 881 | + seen: set[str] = set() |
| 882 | + for raw_path in _extract_task_path_mentions(task_statement): |
| 883 | + path = Path(raw_path).expanduser() |
| 884 | + if not path.is_absolute(): |
| 885 | + path = project_root / path |
| 886 | + try: |
| 887 | + resolved = path.resolve(strict=False) |
| 888 | + except (OSError, RuntimeError, ValueError): |
| 889 | + continue |
| 890 | + if any(_path_is_within_root(resolved, root) for root in output_roots): |
| 891 | + continue |
| 892 | + for candidate in _collect_reference_html_paths(resolved): |
| 893 | + key = str(candidate) |
| 894 | + if key in seen: |
| 895 | + continue |
| 896 | + seen.add(key) |
| 897 | + paths.append(candidate) |
| 898 | + return paths |
| 899 | + |
| 900 | + |
| 901 | +def _html_output_scope_roots(output_paths: list[Path]) -> tuple[Path, ...]: |
| 902 | + roots: list[Path] = [] |
| 903 | + seen: set[str] = set() |
| 904 | + for path in output_paths: |
| 905 | + try: |
| 906 | + resolved = path.expanduser().resolve(strict=False) |
| 907 | + except (OSError, RuntimeError, ValueError): |
| 908 | + continue |
| 909 | + root = resolved.parent |
| 910 | + if root.name.lower() in {"chapters", "pages", "sections"}: |
| 911 | + root = root.parent |
| 912 | + key = str(root) |
| 913 | + if key in seen: |
| 914 | + continue |
| 915 | + seen.add(key) |
| 916 | + roots.append(root) |
| 917 | + return tuple(roots) |
| 918 | + |
| 919 | + |
| 920 | +def _extract_task_path_mentions(task_statement: str) -> list[str]: |
| 921 | + candidates = re.findall(r"`([^`]+)`", task_statement) |
| 922 | + candidates.extend( |
| 923 | + re.findall( |
| 924 | + r"(?:~|/|\./|\../)[A-Za-z0-9_./~+-]+", |
| 925 | + task_statement, |
| 926 | + ) |
| 927 | + ) |
| 928 | + |
| 929 | + paths: list[str] = [] |
| 930 | + seen: set[str] = set() |
| 931 | + for candidate in candidates: |
| 932 | + cleaned = candidate.strip().strip("`'\",.:;()[]{}") |
| 933 | + if not cleaned or cleaned in seen: |
| 934 | + continue |
| 935 | + if not _looks_like_path_literal(cleaned): |
| 936 | + continue |
| 937 | + seen.add(cleaned) |
| 938 | + paths.append(cleaned) |
| 939 | + return paths |
| 940 | + |
| 941 | + |
| 942 | +def _collect_reference_html_paths(path: Path) -> list[Path]: |
| 943 | + if path.is_file() and path.suffix.lower() in {".html", ".htm"}: |
| 944 | + return [path] |
| 945 | + if not path.is_dir(): |
| 946 | + return [] |
| 947 | + |
| 948 | + candidates: list[Path] = [] |
| 949 | + index = path / "index.html" |
| 950 | + if index.is_file(): |
| 951 | + candidates.append(index) |
| 952 | + try: |
| 953 | + candidates.extend(sorted(child for child in path.rglob("*.html") if child.is_file())) |
| 954 | + except OSError: |
| 955 | + return candidates |
| 956 | + return list(dict.fromkeys(candidates[:32])) |
| 957 | + |
| 958 | + |
| 959 | +def _lower_quartile(values: list[int]) -> int: |
| 960 | + if not values: |
| 961 | + return 0 |
| 962 | + ordered = sorted(values) |
| 963 | + return ordered[max(0, (len(ordered) - 1) // 4)] |
| 964 | + |
| 965 | + |
| 966 | +def _reference_quality_floor( |
| 967 | + value: int, |
| 968 | + *, |
| 969 | + fraction: float, |
| 970 | + minimum: int, |
| 971 | + cap: int, |
| 972 | +) -> int: |
| 973 | + if value <= 0: |
| 974 | + return minimum |
| 975 | + return max(minimum, min(cap, int(value * fraction))) |
| 976 | + |
| 977 | + |
| 773 | 978 | def collect_planned_artifact_targets( |
| 774 | 979 | dod: DefinitionOfDone, |
| 775 | 980 | *, |