tenseleyflow/loader / 624edfe

Browse files

Surface reference content cues

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
624edfe8a53ff4fc08a4e9b79f526981ebeeb1b1
Parents
edc01d7
Tree
57b4aee

2 changed files

StatusFile+-
M src/loader/runtime/repair.py 96 0
M tests/test_repair.py 5 0
src/loader/runtime/repair.pymodified
@@ -435,6 +435,13 @@ class ResponseRepairer:
435435
         )
436436
         if reference_line:
437437
             lines.append(reference_line)
438
+        reference_cues_line = self._known_reference_cues_line(
439
+            concrete_target,
440
+            require_first_substantive_output=True,
441
+            retry_number=retry_number,
442
+        )
443
+        if reference_cues_line:
444
+            lines.append(reference_cues_line)
438445
         if _should_encourage_initial_version(
439446
             target=concrete_target,
440447
             has_confirmed_output_file_progress=True,
@@ -907,6 +914,16 @@ class ResponseRepairer:
907914
             )
908915
             if reference_line:
909916
                 lines.append(reference_line)
917
+            reference_cues_line = self._known_reference_cues_line(
918
+                concrete_target,
919
+                require_first_substantive_output=(
920
+                    has_confirmed_output_file_progress
921
+                    and not has_confirmed_substantive_output_file_progress
922
+                ),
923
+                retry_number=retry_number,
924
+            )
925
+            if reference_cues_line:
926
+                lines.append(reference_cues_line)
910927
             if _should_encourage_initial_version(
911928
                 target=concrete_target,
912929
                 has_confirmed_output_file_progress=has_confirmed_output_file_progress,
@@ -981,6 +998,16 @@ class ResponseRepairer:
981998
             )
982999
             if reference_line:
9831000
                 lines.append(reference_line)
1001
+            reference_cues_line = self._known_reference_cues_line(
1002
+                inferred_pending_target,
1003
+                require_first_substantive_output=(
1004
+                    has_confirmed_output_file_progress
1005
+                    and not has_confirmed_substantive_output_file_progress
1006
+                ),
1007
+                retry_number=retry_number,
1008
+            )
1009
+            if reference_cues_line:
1010
+                lines.append(reference_cues_line)
9841011
             if todo_describes_aggregate_mutation(next_pending):
9851012
                 lines.insert(
9861013
                     1,
@@ -1090,6 +1117,16 @@ class ResponseRepairer:
10901117
                     )
10911118
                     if reference_line:
10921119
                         lines.append(reference_line)
1120
+                    reference_cues_line = self._known_reference_cues_line(
1121
+                        next_output_file,
1122
+                        require_first_substantive_output=(
1123
+                            has_confirmed_output_file_progress
1124
+                            and not has_confirmed_substantive_output_file_progress
1125
+                        ),
1126
+                        retry_number=retry_number,
1127
+                    )
1128
+                    if reference_cues_line:
1129
+                        lines.append(reference_cues_line)
10931130
                     if _should_encourage_initial_version(
10941131
                         target=next_output_file,
10951132
                         has_confirmed_output_file_progress=has_confirmed_output_file_progress,
@@ -1388,6 +1425,23 @@ class ResponseRepairer:
13881425
             "to the current target."
13891426
         )
13901427
 
1428
+    def _known_reference_cues_line(
1429
+        self,
1430
+        target: Path,
1431
+        *,
1432
+        require_first_substantive_output: bool,
1433
+        retry_number: int,
1434
+    ) -> str | None:
1435
+        if not require_first_substantive_output or retry_number < 2:
1436
+            return None
1437
+        reference = self._best_known_reference_path(target)
1438
+        if reference is None:
1439
+            return None
1440
+        cues = self._reference_content_cues(reference)
1441
+        if not cues:
1442
+            return None
1443
+        return f"Reference cues from `{display_runtime_path(reference)}`: {cues}"
1444
+
13911445
     def _best_known_reference_path(self, target: Path) -> Path | None:
13921446
         normalized_target = target.expanduser().resolve(strict=False)
13931447
         target_tokens = {
@@ -1433,6 +1487,42 @@ class ResponseRepairer:
14331487
         candidates.sort(key=lambda item: (item[0], item[1]), reverse=True)
14341488
         return candidates[0][2]
14351489
 
1490
+    def _reference_content_cues(self, reference: Path) -> str | None:
1491
+        try:
1492
+            content = reference.read_text()
1493
+        except OSError:
1494
+            return None
1495
+
1496
+        suffix = reference.suffix.lower()
1497
+        cues: list[str] = []
1498
+        if suffix in {".html", ".htm"}:
1499
+            for raw_line in content.splitlines():
1500
+                stripped = " ".join(raw_line.strip().split())
1501
+                if not stripped:
1502
+                    continue
1503
+                lowered = stripped.lower()
1504
+                if not any(
1505
+                    token in lowered
1506
+                    for token in ("<title", "<h1", "<h2", "<p", "<li", "<a ")
1507
+                ):
1508
+                    continue
1509
+                cues.append(_truncate_reference_cue(stripped))
1510
+                if len(cues) >= 3:
1511
+                    break
1512
+        if not cues:
1513
+            for raw_line in content.splitlines():
1514
+                stripped = " ".join(raw_line.strip().split())
1515
+                if not stripped:
1516
+                    continue
1517
+                if sum(ch.isalpha() for ch in stripped) < 6:
1518
+                    continue
1519
+                cues.append(_truncate_reference_cue(stripped))
1520
+                if len(cues) >= 3:
1521
+                    break
1522
+        if not cues:
1523
+            return None
1524
+        return " | ".join(cues)
1525
+
14361526
     @staticmethod
14371527
     def _mutation_tool_scaffold(path: Path, *, tool_name: str) -> str:
14381528
         normalized_path = json.dumps(display_runtime_path(path))
@@ -1483,3 +1573,9 @@ def _should_encourage_initial_version(
14831573
 def _leading_numeric_prefix(stem: str) -> str:
14841574
     match = re.match(r"^(\d+)", stem)
14851575
     return match.group(1) if match else ""
1576
+
1577
+
1578
+def _truncate_reference_cue(value: str, *, max_chars: int = 96) -> str:
1579
+    if len(value) <= max_chars:
1580
+        return value
1581
+    return value[: max_chars - 3].rstrip() + "..."
tests/test_repair.pymodified
@@ -1331,6 +1331,11 @@ def test_compact_first_substantive_retry_reuses_known_reference_structure(
13311331
         "as the starting pattern for this new file, then adapt the content to the current target."
13321332
         in decision.retry_message
13331333
     )
1334
+    assert (
1335
+        f"Reference cues from `{display_runtime_path(reference_chapter)}`: "
1336
+        "<h1>Chapter 1: Introduction to Fortran</h1>"
1337
+        in decision.retry_message
1338
+    )
13341339
 
13351340
 
13361341
 def test_empty_response_retry_prefers_output_index_over_reference_index_with_same_name(