tenseleyflow/loader / 6236b4d

Browse files

Steer structural HTML repairs

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6236b4d4c3d85c4e3ed6fa14721a2790b37ad941
Parents
3f4faaf
Tree
0a212d2

8 changed files

StatusFile+-
M src/loader/runtime/finalization.py 16 2
M src/loader/runtime/repair.py 24 16
M src/loader/runtime/repair_focus.py 49 0
M src/loader/runtime/tool_batches.py 13 11
M src/loader/runtime/turn_completion.py 25 16
M tests/test_finalization.py 35 0
M tests/test_repair.py 42 0
M tests/test_turn_completion.py 76 0
src/loader/runtime/finalization.pymodified
@@ -33,6 +33,7 @@ from .executor import ToolExecutor
3333
 from .logging import get_runtime_logger
3434
 from .memory import MemoryStore
3535
 from .policy_timeline import append_verification_timeline_entry
36
+from .repair_focus import html_repair_issue_is_structural
3637
 from .session import normalize_usage
3738
 from .tracing import RuntimeTracer
3839
 from .verification_observations import (
@@ -1364,10 +1365,20 @@ def _build_html_quality_repair_guidance(
13641365
     lines = ["Repair focus:"]
13651366
     lines.extend(_html_quality_repair_target_lines(targets))
13661367
     primary = targets[0]
1368
+    if html_repair_issue_is_structural(primary.issue):
1369
+        immediate_step = (
1370
+            f"- Immediate next step: replace `{primary.artifact_path}` with one complete "
1371
+            "valid HTML document that preserves the page content while fixing the "
1372
+            "listed structure issue."
1373
+        )
1374
+    else:
1375
+        immediate_step = (
1376
+            f"- Immediate next step: edit `{primary.artifact_path}` with a substantial "
1377
+            "expansion or replacement that satisfies its listed quality issue."
1378
+        )
13671379
     lines.extend(
13681380
         [
1369
-            f"- Immediate next step: edit `{primary.artifact_path}` with a substantial "
1370
-            "expansion or replacement that satisfies its listed quality issue.",
1381
+            immediate_step,
13711382
             *_html_quality_repair_strategy_lines(),
13721383
             "- Repair every listed quality target in order before any final answer; "
13731384
             "do not stop after touching only the first file.",
@@ -1398,6 +1409,9 @@ def _html_quality_repair_target_lines(
13981409
 
13991410
 def _html_quality_repair_strategy_lines() -> list[str]:
14001411
     return [
1412
+        "- If a target mentions duplicate closing tags, closing tags in the wrong order, "
1413
+        "or content after `</html>`, replace the malformed file with one complete "
1414
+        "valid HTML document; do not append more content after an existing closing tag.",
14011415
         "- If a target says thin content, add enough concrete prose to comfortably "
14021416
         "exceed the expected text-character floor; if it says insufficient "
14031417
         "structured content, add enough real sections, lists, code, tables, or "
src/loader/runtime/repair.pymodified
@@ -23,7 +23,10 @@ from .recovery import detect_missing_mutation_payload
2323
 from .repair_focus import (
2424
     ActiveRepairContext,
2525
     extract_active_repair_context,
26
+    html_repair_issue_is_structural,
2627
     recent_repair_mutation_context_failed,
28
+    repair_line_is_html_quality,
29
+    repair_line_matches_target,
2730
 )
2831
 from .workflow import (
2932
     infer_output_outline_label,
@@ -835,7 +838,8 @@ class ResponseRepairer:
835838
             (
836839
                 line
837840
                 for line in repair.repair_lines
838
-                if target in line and _repair_line_is_html_quality(line)
841
+                if repair_line_matches_target(line, target)
842
+                and _repair_line_is_html_quality(line)
839843
             ),
840844
             next(
841845
                 (
@@ -872,17 +876,31 @@ class ResponseRepairer:
872876
         ]
873877
         if issue_line:
874878
             lines.append(f"- Current verifier issue: {issue_line[2:] if issue_line.startswith('- ') else issue_line}")
875
-        force_write = recent_repair_mutation_context_failed(
879
+        structural_issue = html_repair_issue_is_structural(issue_line)
880
+        force_write = structural_issue or recent_repair_mutation_context_failed(
876881
             self.context.session.messages,
877882
             target,
878883
         )
879884
         if force_write:
885
+            structural_suffix = (
886
+                " Ensure the replacement has exactly one closing `</body>` tag, "
887
+                "exactly one closing `</html>` tag, and no content after `</html>`."
888
+                if structural_issue
889
+                else ""
890
+            )
891
+            reason = (
892
+                "- The active verifier is reporting malformed HTML document "
893
+                "structure. "
894
+                if structural_issue
895
+                else "- Recent `edit`/`patch` attempts for this same target failed "
896
+                "against stale or malformed context. "
897
+            )
880898
             lines.extend(
881899
                 [
882
-                    "- Recent `edit`/`patch` attempts for this same target failed "
883
-                    "against stale or malformed context. Use exactly one "
900
+                    reason
901
+                    + "Use exactly one "
884902
                     "`write(file_path=..., content=...)` call now with a complete "
885
-                    "valid HTML document for that file.",
903
+                    f"valid HTML document for that file.{structural_suffix}",
886904
                     "- Do not call `read`, `edit`, `patch`, TodoWrite, or a final "
887905
                     "summary on this retry; emit the `write` mutation tool call now.",
888906
                 ]
@@ -1967,17 +1985,7 @@ def _is_summary_artifact_path(path: Path) -> bool:
19671985
 
19681986
 
19691987
 def _repair_line_is_html_quality(line: str) -> bool:
1970
-    lowered = line.lower()
1971
-    return any(
1972
-        phrase in lowered
1973
-        for phrase in (
1974
-            "thin content",
1975
-            "insufficient structured content",
1976
-            "content-quality",
1977
-            "quality target",
1978
-            "html guide content quality",
1979
-        )
1980
-    )
1988
+    return repair_line_is_html_quality(line)
19811989
 
19821990
 
19831991
 def _should_encourage_initial_version(
src/loader/runtime/repair_focus.pymodified
@@ -22,6 +22,25 @@ _STALE_REPAIR_MUTATION_MARKERS = (
2222
     "structured patch hunks overlap",
2323
     "failed to complete the operation after",
2424
 )
25
+_HTML_REPAIR_ISSUE_MARKERS = (
26
+    "thin content",
27
+    "insufficient structured content",
28
+    "content-quality",
29
+    "content quality",
30
+    "quality target",
31
+    "html guide content quality",
32
+    "expected exactly one closing </body>",
33
+    "expected exactly one closing </html>",
34
+    "content appears after closing </html>",
35
+    "closing </body> appears after closing </html>",
36
+    "missing <h1>",
37
+)
38
+_HTML_STRUCTURAL_REPAIR_MARKERS = (
39
+    "expected exactly one closing </body>",
40
+    "expected exactly one closing </html>",
41
+    "content appears after closing </html>",
42
+    "closing </body> appears after closing </html>",
43
+)
2544
 
2645
 
2746
 @dataclass(frozen=True)
@@ -144,6 +163,36 @@ def recent_repair_mutation_context_failed(
144163
     return False
145164
 
146165
 
166
+def repair_line_is_html_quality(line: str) -> bool:
167
+    """Return whether a repair-focus line describes generated HTML quality."""
168
+
169
+    lowered = str(line or "").lower()
170
+    return any(marker in lowered for marker in _HTML_REPAIR_ISSUE_MARKERS)
171
+
172
+
173
+def repair_line_matches_target(line: str, target: str) -> bool:
174
+    """Return whether a repair-focus line refers to the target path."""
175
+
176
+    line_text = str(line or "")
177
+    target_text = str(target or "").strip()
178
+    if not line_text or not target_text:
179
+        return False
180
+    normalized_target = normalize_repair_path(target_text)
181
+    if target_text in line_text or (normalized_target and normalized_target in line_text):
182
+        return True
183
+    for candidate in re.findall(r"`([^`]+)`", line_text):
184
+        if normalize_repair_path(candidate) == normalized_target:
185
+            return True
186
+    return False
187
+
188
+
189
+def html_repair_issue_is_structural(line: str) -> bool:
190
+    """Return whether an HTML quality issue is about document structure."""
191
+
192
+    lowered = str(line or "").lower()
193
+    return any(marker in lowered for marker in _HTML_STRUCTURAL_REPAIR_MARKERS)
194
+
195
+
147196
 def normalize_repair_path(raw_path: str) -> str:
148197
     text = str(raw_path or "").strip()
149198
     if not text:
src/loader/runtime/tool_batches.pymodified
@@ -36,8 +36,10 @@ from .policy_timeline import append_verification_timeline_entry
3636
 from .recovery import RecoveryContext, detect_missing_mutation_payload
3737
 from .repair_focus import (
3838
     extract_active_repair_context,
39
+    html_repair_issue_is_structural,
3940
     path_within_allowed_roots,
4041
     recent_repair_mutation_context_failed,
42
+    repair_line_is_html_quality,
4143
 )
4244
 from .safeguard_services import extract_shell_text_rewrite_target
4345
 from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
@@ -2042,17 +2044,23 @@ class ToolBatchRunner:
20422044
                 if repair_issue
20432045
                 else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
20442046
             )
2047
+            structural_repair = html_repair_issue_is_structural(repair_issue)
20452048
             force_write = recent_repair_mutation_context_failed(
20462049
                 self.context.session.messages,
20472050
                 target,
20482051
             )
2049
-            if force_write:
2052
+            if force_write or structural_repair:
2053
+                structural_suffix = (
2054
+                    " Ensure the replacement has exactly one closing `</body>` tag, "
2055
+                    "exactly one closing `</html>` tag, and no content after `</html>`."
2056
+                    if structural_repair
2057
+                    else ""
2058
+                )
20502059
                 immediate_step = (
20512060
                     f"- Immediate next step: rewrite `{target}` with one `write` call.\n"
2052
-                    "- Recent `edit`/`patch` attempts for this file failed against stale "
2053
-                    "or malformed context. Use `write(file_path=..., content=...)` with "
2061
+                    "- Use `write(file_path=..., content=...)` with "
20542062
                     "a complete valid HTML document, and do not call `read`, `edit`, "
2055
-                    "`patch`, or TodoWrite again first."
2063
+                    f"`patch`, or TodoWrite again first.{structural_suffix}"
20562064
                 )
20572065
             else:
20582066
                 immediate_step = (
@@ -3406,13 +3414,7 @@ def _repair_context_is_html_quality(repair: Any) -> bool:
34063414
 
34073415
 
34083416
 def _repair_line_is_html_quality(line: str) -> bool:
3409
-    lowered = str(line or "").lower()
3410
-    return (
3411
-        "thin content" in lowered
3412
-        or "insufficient structured content" in lowered
3413
-        or "content-quality" in lowered
3414
-        or "content quality" in lowered
3415
-    )
3417
+    return repair_line_is_html_quality(line)
34163418
 
34173419
 
34183420
 def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str:
src/loader/runtime/turn_completion.pymodified
@@ -29,7 +29,10 @@ from .policy_timeline import (
2929
 from .repair import ResponseRepairer
3030
 from .repair_focus import (
3131
     extract_active_repair_context,
32
+    html_repair_issue_is_structural,
3233
     recent_repair_mutation_context_failed,
34
+    repair_line_is_html_quality,
35
+    repair_line_matches_target,
3336
 )
3437
 from .rollback import RollbackPlan
3538
 from .verification_observations import VerificationObservation
@@ -531,7 +534,7 @@ def _build_html_quality_repair_continuation(
531534
     if not target_text:
532535
         return None
533536
 
534
-    force_write = recent_repair_mutation_context_failed(
537
+    stale_context = recent_repair_mutation_context_failed(
535538
         cast(list[Message], messages),
536539
         target_text,
537540
     )
@@ -539,21 +542,35 @@ def _build_html_quality_repair_continuation(
539542
         (
540543
             line[2:] if line.startswith("- ") else line
541544
             for line in repair.repair_lines
542
-            if target_text in line and _repair_line_is_html_quality(line)
545
+            if repair_line_matches_target(line, target_text)
546
+            and _repair_line_is_html_quality(line)
543547
         ),
544548
         "",
545549
     )
546550
     issue_sentence = f" Current verifier issue: {issue_line}" if issue_line else ""
551
+    structural_issue = html_repair_issue_is_structural(issue_line)
552
+    force_write = stale_context or structural_issue
547553
     if force_write:
554
+        structural_sentence = (
555
+            " Ensure the replacement has exactly one closing `</body>` tag, "
556
+            "exactly one closing `</html>` tag, and no content after `</html>`."
557
+            if structural_issue
558
+            else ""
559
+        )
560
+        failure_reason = (
561
+            "The active verifier is reporting malformed HTML document structure. "
562
+            if structural_issue and not stale_context
563
+            else "Recent `patch`/`edit` attempts for this same file failed because their "
564
+            "remembered context was stale or malformed. "
565
+        )
548566
         prompt = (
549567
             "[CONTINUE QUALITY REPAIR]\n"
550568
             "You just described a content-quality repair, but did not execute it. "
551
-            "Recent `patch`/`edit` attempts for this same file failed because their "
552
-            "remembered context was stale or malformed. "
569
+            f"{failure_reason}"
553570
             f"Emit exactly one `write(file_path=..., content=...)` tool call for `{target_text}` now."
554571
             f"{issue_sentence} "
555572
             "Write a complete valid HTML document for this file that preserves the chapter topic "
556
-            "and satisfies the listed quality issue. Do not call `read`, `edit`, `patch`, "
573
+            f"and satisfies the listed quality issue.{structural_sentence} Do not call `read`, `edit`, `patch`, "
557574
             "`TodoWrite`, or summarize."
558575
         )
559576
         return InProgressContinuation(prompt=prompt, target=None)
@@ -591,6 +608,8 @@ def _build_pending_html_quality_repair_continuation(
591608
         "content-quality" in lowered
592609
         or "thin content" in lowered
593610
         or "insufficient structured content" in lowered
611
+        or "expected exactly one closing" in lowered
612
+        or "content appears after closing </html>" in lowered
594613
     )
595614
     if not (
596615
         latest_user_content.startswith("[CONTINUE QUALITY REPAIR]")
@@ -610,17 +629,7 @@ def _looks_like_progress_intent(content: str) -> bool:
610629
 
611630
 
612631
 def _repair_line_is_html_quality(line: str) -> bool:
613
-    lowered = line.lower()
614
-    return any(
615
-        phrase in lowered
616
-        for phrase in (
617
-            "thin content",
618
-            "insufficient structured content",
619
-            "content-quality",
620
-            "quality target",
621
-            "html guide content quality",
622
-        )
623
-    )
632
+    return repair_line_is_html_quality(line)
624633
 
625634
 
626635
 def _next_missing_planned_artifact(
tests/test_finalization.pymodified
@@ -452,6 +452,41 @@ def test_verification_repair_guidance_replaces_stale_focus_for_html_quality_issu
452452
     assert str(third_chapter.resolve(strict=False)) in repair.allowed_paths
453453
 
454454
 
455
+def test_verification_repair_guidance_prioritizes_structural_html_quality_issue(
456
+    temp_dir: Path,
457
+) -> None:
458
+    chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html"
459
+    chapter.parent.mkdir(parents=True)
460
+    chapter.write_text(
461
+        "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n"
462
+        "<p>Trailing content.</p>\n"
463
+    )
464
+    dod = create_definition_of_done("Create an equally thorough HTML guide.")
465
+    dod.evidence = [
466
+        VerificationEvidence(
467
+            command="quality",
468
+            passed=False,
469
+            output=(
470
+                "HTML guide content quality issues:\n"
471
+                f"{chapter}: expected exactly one closing </html> tag (found 2)\n"
472
+            ),
473
+        )
474
+    ]
475
+
476
+    guidance = _build_verification_repair_guidance(
477
+        dod,
478
+        project_root=temp_dir,
479
+    )
480
+    repair = extract_active_repair_context([Message(role=Role.USER, content=guidance)])
481
+
482
+    assert f"Improve `{chapter}`: expected exactly one closing </html> tag" in guidance
483
+    assert f"Immediate next step: replace `{chapter}` with one complete" in guidance
484
+    assert "replace the malformed file with one complete valid HTML document" in guidance
485
+    assert "do not append more content after an existing closing tag" in guidance
486
+    assert repair is not None
487
+    assert repair.artifact_path == str(chapter.resolve(strict=False))
488
+
489
+
455490
 def test_verification_repair_guidance_keeps_multi_file_quality_worklist(
456491
     temp_dir: Path,
457492
 ) -> None:
tests/test_repair.pymodified
@@ -371,6 +371,48 @@ def test_empty_response_retry_forces_write_after_stale_quality_repair_context(
371371
     assert "Do not call `read`, `edit`, `patch`, TodoWrite" in decision.retry_message
372372
 
373373
 
374
+def test_empty_response_retry_forces_write_for_structural_html_repair(
375
+    temp_dir: Path,
376
+) -> None:
377
+    context = build_context(
378
+        temp_dir=temp_dir,
379
+        use_react=False,
380
+    )
381
+    repairer = ResponseRepairer(context)
382
+    chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html"
383
+    chapter.parent.mkdir(parents=True)
384
+    chapter.write_text(
385
+        "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n"
386
+        "<p>Trailing content.</p>\n"
387
+    )
388
+    context.session.append(
389
+        Message(
390
+            role=Role.USER,
391
+            content=(
392
+                "Repair focus:\n"
393
+                f"- Improve `{chapter}`: content appears after closing </html>.\n"
394
+                f"- Immediate next step: replace `{chapter}` with one complete valid HTML document.\n"
395
+            ),
396
+        )
397
+    )
398
+    dod = create_definition_of_done("Create an equally thorough HTML guide.")
399
+    dod.touched_files = [str(chapter)]
400
+
401
+    decision = repairer.handle_empty_response(
402
+        task="Create an equally thorough HTML guide.",
403
+        original_task=None,
404
+        empty_retry_count=1,
405
+        max_empty_retries=2,
406
+        dod=dod,
407
+    )
408
+
409
+    assert decision.should_continue is True
410
+    assert decision.retry_message is not None
411
+    assert "content appears after closing </html>" in decision.retry_message
412
+    assert "Use exactly one `write(file_path=..., content=...)`" in decision.retry_message
413
+    assert "exactly one closing `</body>` tag" in decision.retry_message
414
+
415
+
374416
 def test_empty_response_retry_mentions_write_can_create_missing_parent_directories(
375417
     temp_dir: Path,
376418
 ) -> None:
tests/test_turn_completion.pymodified
@@ -537,6 +537,82 @@ async def test_turn_completion_forces_write_after_stale_quality_repair_context(
537537
     assert "Do not call `read`, `edit`, `patch`, `TodoWrite`, or summarize." in message
538538
 
539539
 
540
+@pytest.mark.asyncio
541
+async def test_turn_completion_forces_write_for_structural_html_repair(
542
+    temp_dir: Path,
543
+) -> None:
544
+    backend = ScriptedBackend()
545
+    config = non_streaming_config()
546
+    config.reasoning.completion_check = False
547
+    agent = Agent(
548
+        backend=backend,
549
+        config=config,
550
+        project_root=temp_dir,
551
+    )
552
+    runtime = ConversationRuntime(agent)
553
+    events = []
554
+
555
+    async def capture(event) -> None:
556
+        events.append(event)
557
+
558
+    prepared = await runtime.turn_preparation.prepare(
559
+        task="Create an equally thorough HTML guide.",
560
+        emit=capture,
561
+        requested_mode="execute",
562
+        original_task=None,
563
+        on_user_question=None,
564
+    )
565
+    await runtime.phase_tracker.enter(
566
+        TurnPhase.ASSISTANT,
567
+        capture,
568
+        detail="Requesting assistant response",
569
+        reason_code="request_assistant_response",
570
+    )
571
+
572
+    chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html"
573
+    chapter.parent.mkdir(parents=True)
574
+    chapter.write_text(
575
+        "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n"
576
+        "<p>Trailing content.</p>\n"
577
+    )
578
+    prepared.definition_of_done.touched_files.append(str(chapter))
579
+    agent.session.append(
580
+        Message(
581
+            role=Role.USER,
582
+            content=(
583
+                "Repair focus:\n"
584
+                f"- Improve `{chapter}`: expected exactly one closing </html> tag (found 2).\n"
585
+                f"- Immediate next step: replace `{chapter}` with one complete valid HTML document.\n"
586
+            ),
587
+        )
588
+    )
589
+
590
+    content = "I will fix the malformed troubleshooting HTML structure."
591
+    decision = await runtime.turn_completion.handle_text_response(
592
+        content=content,
593
+        response_content=content,
594
+        task=prepared.task,
595
+        effective_task=prepared.effective_task,
596
+        iterations=1,
597
+        max_iterations=agent.config.max_iterations,
598
+        actions_taken=[],
599
+        continuation_count=0,
600
+        dod=prepared.definition_of_done,
601
+        emit=capture,
602
+        summary=prepared.summary,
603
+        executor=prepared.executor,
604
+        rollback_plan=prepared.rollback_plan,
605
+    )
606
+
607
+    assert decision.action == TurnCompletionAction.CONTINUE
608
+    message = agent.session.messages[-1].content
609
+    assert message.startswith("[CONTINUE QUALITY REPAIR]")
610
+    assert "malformed HTML document structure" in message
611
+    assert "expected exactly one closing </html>" in message
612
+    assert "exactly one closing `</body>` tag" in message
613
+    assert "exactly one `write(file_path=..., content=...)`" in message
614
+
615
+
540616
 @pytest.mark.asyncio
541617
 async def test_turn_completion_continues_queued_quality_repair_after_summary(
542618
     temp_dir: Path,