tenseleyflow/loader / 67175cc

Browse files

Map chapter titles to files

Authored by espadonne
SHA
67175ccc1c10fce220c8a1f96661c60d9be6cba1
Parents
77961f8
Tree
078240c

2 changed files

StatusFile+-
M src/loader/runtime/repair.py 117 1
M tests/test_repair.py 80 3
src/loader/runtime/repair.pymodified
@@ -682,7 +682,7 @@ class ResponseRepairer:
682682
     ) -> Path | None:
683683
         candidates = todo_file_candidates(item)
684684
         if not candidates:
685
-            return None
685
+            return self._infer_pending_item_target_from_html_graph(dod, item)
686686
 
687687
         planned_targets = collect_planned_artifact_targets(
688688
             dod,
@@ -726,6 +726,67 @@ class ResponseRepairer:
726726
 
727727
         return None
728728
 
729
+    def _infer_pending_item_target_from_html_graph(
730
+        self,
731
+        dod: DefinitionOfDone,
732
+        item: str,
733
+    ) -> Path | None:
734
+        target_label = _normalize_pending_output_label(item)
735
+        if not target_label:
736
+            return None
737
+
738
+        html_files = self._pending_item_html_sources(dod)
739
+        matches: list[tuple[int, bool, Path]] = []
740
+        for html_file in html_files:
741
+            try:
742
+                content = html_file.read_text()
743
+            except OSError:
744
+                continue
745
+            for href, link_text in _iter_local_html_links(html_file, content):
746
+                resolved = (html_file.parent / href).resolve(strict=False)
747
+                score = _pending_output_link_match_score(
748
+                    target_label,
749
+                    _normalize_pending_output_label(link_text),
750
+                )
751
+                if score <= 0:
752
+                    continue
753
+                matches.append((score, not resolved.exists(), resolved))
754
+
755
+        if not matches:
756
+            return None
757
+        matches.sort(key=lambda item: (item[0], item[1], str(item[2])), reverse=True)
758
+        return matches[0][2]
759
+
760
+    def _pending_item_html_sources(self, dod: DefinitionOfDone) -> list[Path]:
761
+        planned_targets = collect_planned_artifact_targets(
762
+            dod,
763
+            project_root=self.context.project_root,
764
+            max_paths=12,
765
+        )
766
+        html_sources: list[Path] = []
767
+        seen: set[str] = set()
768
+
769
+        for raw_path in dod.touched_files:
770
+            path = Path(raw_path).expanduser().resolve(strict=False)
771
+            if path.suffix.lower() not in {".html", ".htm"}:
772
+                continue
773
+            key = str(path)
774
+            if key in seen:
775
+                continue
776
+            seen.add(key)
777
+            html_sources.append(path)
778
+
779
+        for target, expect_directory in planned_targets:
780
+            if expect_directory or target.suffix.lower() not in {".html", ".htm"}:
781
+                continue
782
+            key = str(target)
783
+            if key in seen:
784
+                continue
785
+            seen.add(key)
786
+            html_sources.append(target)
787
+
788
+        return html_sources
789
+
729790
     def _preferred_resume_pending_item(
730791
         self,
731792
         dod: DefinitionOfDone,
@@ -794,3 +855,58 @@ def _todo_is_mutation_step(label: str) -> bool:
794855
 def _todo_is_consistency_review_step(label: str) -> bool:
795856
     lowered = label.lower()
796857
     return any(token in lowered for token in _CONSISTENCY_REVIEW_HINTS)
858
+
859
+
860
+def _normalize_pending_output_label(value: str) -> str:
861
+    text = " ".join(str(value).strip().split()).lower()
862
+    if not text:
863
+        return ""
864
+    text = re.sub(
865
+        r"^(?:working on:\s*)?(?:create|creating|write|writing|build|building|develop|developing)\s+",
866
+        "",
867
+        text,
868
+    )
869
+    text = re.sub(r"\bfor nginx guide\b", "", text)
870
+    text = re.sub(r"[^a-z0-9]+", " ", text)
871
+    return " ".join(text.split())
872
+
873
+
874
+def _pending_output_link_match_score(todo_label: str, link_label: str) -> int:
875
+    if not todo_label or not link_label:
876
+        return 0
877
+    if todo_label == link_label:
878
+        return 3
879
+    if todo_label in link_label or link_label in todo_label:
880
+        return 2
881
+    todo_tokens = {token for token in todo_label.split() if len(token) > 2}
882
+    link_tokens = {token for token in link_label.split() if len(token) > 2}
883
+    if not todo_tokens or not link_tokens:
884
+        return 0
885
+    overlap = todo_tokens & link_tokens
886
+    if len(overlap) >= min(3, len(todo_tokens), len(link_tokens)):
887
+        return 1
888
+    return 0
889
+
890
+
891
+def _iter_local_html_links(file_path: Path, content: str) -> list[tuple[str, str]]:
892
+    pattern = re.compile(
893
+        r"<a\b[^>]*href\s*=\s*[\"']([^\"']+)[\"'][^>]*>(.*?)</a>",
894
+        re.IGNORECASE | re.DOTALL,
895
+    )
896
+    links: list[tuple[str, str]] = []
897
+    seen: set[tuple[str, str]] = set()
898
+    for href, inner_html in pattern.findall(content):
899
+        target = href.strip()
900
+        if not target or target.startswith(("#", "http://", "https://", "mailto:")):
901
+            continue
902
+        trimmed_target = target.split("?", 1)[0].split("#", 1)[0]
903
+        if Path(trimmed_target).suffix.lower() not in {".html", ".htm"}:
904
+            continue
905
+        label = re.sub(r"<[^>]+>", " ", inner_html)
906
+        label = " ".join(label.split())
907
+        key = (trimmed_target, label)
908
+        if key in seen:
909
+            continue
910
+        seen.add(key)
911
+        links.append((trimmed_target, label))
912
+    return links
tests/test_repair.pymodified
@@ -722,9 +722,11 @@ def test_empty_response_retry_points_at_declared_child_file_within_incomplete_ou
722722
         "by creating `introduction.html`."
723723
         in decision.retry_message
724724
     )
725
-    assert "It is the next missing declared output under `chapters/`." in decision.retry_message
726
-    assert "Prefer one `write` call for `" in decision.retry_message
727
-    assert "introduction.html` before more research." in decision.retry_message
725
+    assert (
726
+        f"Prefer one `write(content=...)` call for `{(chapters / 'introduction.html').resolve(strict=False)}` "
727
+        "before more research."
728
+        in decision.retry_message
729
+    )
728730
 
729731
 
730732
 def test_empty_response_retry_infers_concrete_file_from_pending_todo_after_broad_artifacts_exist(
@@ -793,6 +795,81 @@ def test_empty_response_retry_infers_concrete_file_from_pending_todo_after_broad
793795
     assert "Do not return another working note or empty response" in decision.retry_message
794796
 
795797
 
798
+def test_empty_response_retry_maps_title_style_todo_to_html_graph_target(
799
+    temp_dir: Path,
800
+) -> None:
801
+    context = build_context(
802
+        temp_dir=temp_dir,
803
+        use_react=False,
804
+    )
805
+    repairer = ResponseRepairer(context)
806
+
807
+    guide_root = temp_dir / "guides" / "nginx"
808
+    chapters = guide_root / "chapters"
809
+    chapters.mkdir(parents=True)
810
+    index_path = guide_root / "index.html"
811
+    chapter_one = chapters / "01-introduction.html"
812
+    index_path.write_text(
813
+        "\n".join(
814
+            [
815
+                "<html>",
816
+                '<a href="chapters/01-introduction.html">Chapter 1: Introduction to NGINX Tool</a>',
817
+                '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
818
+                "</html>",
819
+            ]
820
+        )
821
+        + "\n"
822
+    )
823
+    chapter_one.write_text("<html></html>\n")
824
+
825
+    implementation_plan = temp_dir / "implementation.md"
826
+    implementation_plan.write_text(
827
+        "\n".join(
828
+            [
829
+                "# Implementation Plan",
830
+                "",
831
+                "## File Changes",
832
+                f"- `{guide_root}/`",
833
+                f"- `{chapters}/`",
834
+                f"- `{index_path}`",
835
+                "",
836
+            ]
837
+        )
838
+    )
839
+
840
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
841
+    dod.implementation_plan = str(implementation_plan)
842
+    dod.touched_files.extend([str(index_path), str(chapter_one)])
843
+    dod.completed_items.extend(
844
+        [
845
+            "Create index.html for nginx guide",
846
+            "Create Chapter 1: Introduction to NGINX Tool",
847
+        ]
848
+    )
849
+    dod.pending_items.append("Creating Chapter 2: Installation and Setup")
850
+
851
+    decision = repairer.handle_empty_response(
852
+        task="Create a multi-file nginx guide.",
853
+        original_task=None,
854
+        empty_retry_count=2,
855
+        max_empty_retries=2,
856
+        dod=dod,
857
+    )
858
+
859
+    assert decision.should_continue is True
860
+    assert decision.retry_message is not None
861
+    assert (
862
+        "Resume with this exact next step: continue `Creating Chapter 2: Installation and Setup` "
863
+        "by creating `02-installation.html`."
864
+        in decision.retry_message
865
+    )
866
+    assert (
867
+        f"Prefer one `write(content=...)` call for `{(chapters / '02-installation.html').resolve(strict=False)}` "
868
+        "before more research."
869
+        in decision.retry_message
870
+    )
871
+
872
+
796873
 def test_empty_response_retry_fails_after_extended_late_stage_budget_is_exhausted(
797874
     temp_dir: Path,
798875
 ) -> None: