@@ -682,7 +682,7 @@ class ResponseRepairer: |
| 682 | 682 | ) -> Path | None: |
| 683 | 683 | candidates = todo_file_candidates(item) |
| 684 | 684 | if not candidates: |
| 685 | | - return None |
| 685 | + return self._infer_pending_item_target_from_html_graph(dod, item) |
| 686 | 686 | |
| 687 | 687 | planned_targets = collect_planned_artifact_targets( |
| 688 | 688 | dod, |
@@ -726,6 +726,67 @@ class ResponseRepairer: |
| 726 | 726 | |
| 727 | 727 | return None |
| 728 | 728 | |
| 729 | + def _infer_pending_item_target_from_html_graph( |
| 730 | + self, |
| 731 | + dod: DefinitionOfDone, |
| 732 | + item: str, |
| 733 | + ) -> Path | None: |
| 734 | + target_label = _normalize_pending_output_label(item) |
| 735 | + if not target_label: |
| 736 | + return None |
| 737 | + |
| 738 | + html_files = self._pending_item_html_sources(dod) |
| 739 | + matches: list[tuple[int, bool, Path]] = [] |
| 740 | + for html_file in html_files: |
| 741 | + try: |
| 742 | + content = html_file.read_text() |
| 743 | + except OSError: |
| 744 | + continue |
| 745 | + for href, link_text in _iter_local_html_links(html_file, content): |
| 746 | + resolved = (html_file.parent / href).resolve(strict=False) |
| 747 | + score = _pending_output_link_match_score( |
| 748 | + target_label, |
| 749 | + _normalize_pending_output_label(link_text), |
| 750 | + ) |
| 751 | + if score <= 0: |
| 752 | + continue |
| 753 | + matches.append((score, not resolved.exists(), resolved)) |
| 754 | + |
| 755 | + if not matches: |
| 756 | + return None |
| 757 | + matches.sort(key=lambda item: (item[0], item[1], str(item[2])), reverse=True) |
| 758 | + return matches[0][2] |
| 759 | + |
| 760 | + def _pending_item_html_sources(self, dod: DefinitionOfDone) -> list[Path]: |
| 761 | + planned_targets = collect_planned_artifact_targets( |
| 762 | + dod, |
| 763 | + project_root=self.context.project_root, |
| 764 | + max_paths=12, |
| 765 | + ) |
| 766 | + html_sources: list[Path] = [] |
| 767 | + seen: set[str] = set() |
| 768 | + |
| 769 | + for raw_path in dod.touched_files: |
| 770 | + path = Path(raw_path).expanduser().resolve(strict=False) |
| 771 | + if path.suffix.lower() not in {".html", ".htm"}: |
| 772 | + continue |
| 773 | + key = str(path) |
| 774 | + if key in seen: |
| 775 | + continue |
| 776 | + seen.add(key) |
| 777 | + html_sources.append(path) |
| 778 | + |
| 779 | + for target, expect_directory in planned_targets: |
| 780 | + if expect_directory or target.suffix.lower() not in {".html", ".htm"}: |
| 781 | + continue |
| 782 | + key = str(target) |
| 783 | + if key in seen: |
| 784 | + continue |
| 785 | + seen.add(key) |
| 786 | + html_sources.append(target) |
| 787 | + |
| 788 | + return html_sources |
| 789 | + |
| 729 | 790 | def _preferred_resume_pending_item( |
| 730 | 791 | self, |
| 731 | 792 | dod: DefinitionOfDone, |
@@ -794,3 +855,58 @@ def _todo_is_mutation_step(label: str) -> bool: |
| 794 | 855 | def _todo_is_consistency_review_step(label: str) -> bool: |
| 795 | 856 | lowered = label.lower() |
| 796 | 857 | return any(token in lowered for token in _CONSISTENCY_REVIEW_HINTS) |
| 858 | + |
| 859 | + |
| 860 | +def _normalize_pending_output_label(value: str) -> str: |
| 861 | + text = " ".join(str(value).strip().split()).lower() |
| 862 | + if not text: |
| 863 | + return "" |
| 864 | + text = re.sub( |
| 865 | + r"^(?:working on:\s*)?(?:create|creating|write|writing|build|building|develop|developing)\s+", |
| 866 | + "", |
| 867 | + text, |
| 868 | + ) |
| 869 | + text = re.sub(r"\bfor nginx guide\b", "", text) |
| 870 | + text = re.sub(r"[^a-z0-9]+", " ", text) |
| 871 | + return " ".join(text.split()) |
| 872 | + |
| 873 | + |
| 874 | +def _pending_output_link_match_score(todo_label: str, link_label: str) -> int: |
| 875 | + if not todo_label or not link_label: |
| 876 | + return 0 |
| 877 | + if todo_label == link_label: |
| 878 | + return 3 |
| 879 | + if todo_label in link_label or link_label in todo_label: |
| 880 | + return 2 |
| 881 | + todo_tokens = {token for token in todo_label.split() if len(token) > 2} |
| 882 | + link_tokens = {token for token in link_label.split() if len(token) > 2} |
| 883 | + if not todo_tokens or not link_tokens: |
| 884 | + return 0 |
| 885 | + overlap = todo_tokens & link_tokens |
| 886 | + if len(overlap) >= min(3, len(todo_tokens), len(link_tokens)): |
| 887 | + return 1 |
| 888 | + return 0 |
| 889 | + |
| 890 | + |
| 891 | +def _iter_local_html_links(file_path: Path, content: str) -> list[tuple[str, str]]: |
| 892 | + pattern = re.compile( |
| 893 | + r"<a\b[^>]*href\s*=\s*[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", |
| 894 | + re.IGNORECASE | re.DOTALL, |
| 895 | + ) |
| 896 | + links: list[tuple[str, str]] = [] |
| 897 | + seen: set[tuple[str, str]] = set() |
| 898 | + for href, inner_html in pattern.findall(content): |
| 899 | + target = href.strip() |
| 900 | + if not target or target.startswith(("#", "http://", "https://", "mailto:")): |
| 901 | + continue |
| 902 | + trimmed_target = target.split("?", 1)[0].split("#", 1)[0] |
| 903 | + if Path(trimmed_target).suffix.lower() not in {".html", ".htm"}: |
| 904 | + continue |
| 905 | + label = re.sub(r"<[^>]+>", " ", inner_html) |
| 906 | + label = " ".join(label.split()) |
| 907 | + key = (trimmed_target, label) |
| 908 | + if key in seen: |
| 909 | + continue |
| 910 | + seen.add(key) |
| 911 | + links.append((trimmed_target, label)) |
| 912 | + return links |