Steer structural HTML repairs
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
6236b4d4c3d85c4e3ed6fa14721a2790b37ad941- Parents
-
3f4faaf - Tree
0a212d2
6236b4d
6236b4d4c3d85c4e3ed6fa14721a2790b37ad9413f4faaf
0a212d2| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/finalization.py
|
16 | 2 |
| M |
src/loader/runtime/repair.py
|
24 | 16 |
| M |
src/loader/runtime/repair_focus.py
|
49 | 0 |
| M |
src/loader/runtime/tool_batches.py
|
13 | 11 |
| M |
src/loader/runtime/turn_completion.py
|
25 | 16 |
| M |
tests/test_finalization.py
|
35 | 0 |
| M |
tests/test_repair.py
|
42 | 0 |
| M |
tests/test_turn_completion.py
|
76 | 0 |
src/loader/runtime/finalization.pymodified@@ -33,6 +33,7 @@ from .executor import ToolExecutor | ||
| 33 | 33 | from .logging import get_runtime_logger |
| 34 | 34 | from .memory import MemoryStore |
| 35 | 35 | from .policy_timeline import append_verification_timeline_entry |
| 36 | +from .repair_focus import html_repair_issue_is_structural | |
| 36 | 37 | from .session import normalize_usage |
| 37 | 38 | from .tracing import RuntimeTracer |
| 38 | 39 | from .verification_observations import ( |
@@ -1364,10 +1365,20 @@ def _build_html_quality_repair_guidance( | ||
| 1364 | 1365 | lines = ["Repair focus:"] |
| 1365 | 1366 | lines.extend(_html_quality_repair_target_lines(targets)) |
| 1366 | 1367 | primary = targets[0] |
| 1368 | + if html_repair_issue_is_structural(primary.issue): | |
| 1369 | + immediate_step = ( | |
| 1370 | + f"- Immediate next step: replace `{primary.artifact_path}` with one complete " | |
| 1371 | + "valid HTML document that preserves the page content while fixing the " | |
| 1372 | + "listed structure issue." | |
| 1373 | + ) | |
| 1374 | + else: | |
| 1375 | + immediate_step = ( | |
| 1376 | + f"- Immediate next step: edit `{primary.artifact_path}` with a substantial " | |
| 1377 | + "expansion or replacement that satisfies its listed quality issue." | |
| 1378 | + ) | |
| 1367 | 1379 | lines.extend( |
| 1368 | 1380 | [ |
| 1369 | - f"- Immediate next step: edit `{primary.artifact_path}` with a substantial " | |
| 1370 | - "expansion or replacement that satisfies its listed quality issue.", | |
| 1381 | + immediate_step, | |
| 1371 | 1382 | *_html_quality_repair_strategy_lines(), |
| 1372 | 1383 | "- Repair every listed quality target in order before any final answer; " |
| 1373 | 1384 | "do not stop after touching only the first file.", |
@@ -1398,6 +1409,9 @@ def _html_quality_repair_target_lines( | ||
| 1398 | 1409 | |
| 1399 | 1410 | def _html_quality_repair_strategy_lines() -> list[str]: |
| 1400 | 1411 | return [ |
| 1412 | + "- If a target mentions duplicate closing tags, closing tags in the wrong order, " | |
| 1413 | + "or content after `</html>`, replace the malformed file with one complete " | |
| 1414 | + "valid HTML document; do not append more content after an existing closing tag.", | |
| 1401 | 1415 | "- If a target says thin content, add enough concrete prose to comfortably " |
| 1402 | 1416 | "exceed the expected text-character floor; if it says insufficient " |
| 1403 | 1417 | "structured content, add enough real sections, lists, code, tables, or " |
src/loader/runtime/repair.pymodified@@ -23,7 +23,10 @@ from .recovery import detect_missing_mutation_payload | ||
| 23 | 23 | from .repair_focus import ( |
| 24 | 24 | ActiveRepairContext, |
| 25 | 25 | extract_active_repair_context, |
| 26 | + html_repair_issue_is_structural, | |
| 26 | 27 | recent_repair_mutation_context_failed, |
| 28 | + repair_line_is_html_quality, | |
| 29 | + repair_line_matches_target, | |
| 27 | 30 | ) |
| 28 | 31 | from .workflow import ( |
| 29 | 32 | infer_output_outline_label, |
@@ -835,7 +838,8 @@ class ResponseRepairer: | ||
| 835 | 838 | ( |
| 836 | 839 | line |
| 837 | 840 | for line in repair.repair_lines |
| 838 | - if target in line and _repair_line_is_html_quality(line) | |
| 841 | + if repair_line_matches_target(line, target) | |
| 842 | + and _repair_line_is_html_quality(line) | |
| 839 | 843 | ), |
| 840 | 844 | next( |
| 841 | 845 | ( |
@@ -872,17 +876,31 @@ class ResponseRepairer: | ||
| 872 | 876 | ] |
| 873 | 877 | if issue_line: |
| 874 | 878 | lines.append(f"- Current verifier issue: {issue_line[2:] if issue_line.startswith('- ') else issue_line}") |
| 875 | - force_write = recent_repair_mutation_context_failed( | |
| 879 | + structural_issue = html_repair_issue_is_structural(issue_line) | |
| 880 | + force_write = structural_issue or recent_repair_mutation_context_failed( | |
| 876 | 881 | self.context.session.messages, |
| 877 | 882 | target, |
| 878 | 883 | ) |
| 879 | 884 | if force_write: |
| 885 | + structural_suffix = ( | |
| 886 | + " Ensure the replacement has exactly one closing `</body>` tag, " | |
| 887 | + "exactly one closing `</html>` tag, and no content after `</html>`." | |
| 888 | + if structural_issue | |
| 889 | + else "" | |
| 890 | + ) | |
| 891 | + reason = ( | |
| 892 | + "- The active verifier is reporting malformed HTML document " | |
| 893 | + "structure. " | |
| 894 | + if structural_issue | |
| 895 | + else "- Recent `edit`/`patch` attempts for this same target failed " | |
| 896 | + "against stale or malformed context. " | |
| 897 | + ) | |
| 880 | 898 | lines.extend( |
| 881 | 899 | [ |
| 882 | - "- Recent `edit`/`patch` attempts for this same target failed " | |
| 883 | - "against stale or malformed context. Use exactly one " | |
| 900 | + reason | |
| 901 | + + "Use exactly one " | |
| 884 | 902 | "`write(file_path=..., content=...)` call now with a complete " |
| 885 | - "valid HTML document for that file.", | |
| 903 | + f"valid HTML document for that file.{structural_suffix}", | |
| 886 | 904 | "- Do not call `read`, `edit`, `patch`, TodoWrite, or a final " |
| 887 | 905 | "summary on this retry; emit the `write` mutation tool call now.", |
| 888 | 906 | ] |
@@ -1967,17 +1985,7 @@ def _is_summary_artifact_path(path: Path) -> bool: | ||
| 1967 | 1985 | |
| 1968 | 1986 | |
| 1969 | 1987 | def _repair_line_is_html_quality(line: str) -> bool: |
| 1970 | - lowered = line.lower() | |
| 1971 | - return any( | |
| 1972 | - phrase in lowered | |
| 1973 | - for phrase in ( | |
| 1974 | - "thin content", | |
| 1975 | - "insufficient structured content", | |
| 1976 | - "content-quality", | |
| 1977 | - "quality target", | |
| 1978 | - "html guide content quality", | |
| 1979 | - ) | |
| 1980 | - ) | |
| 1988 | + return repair_line_is_html_quality(line) | |
| 1981 | 1989 | |
| 1982 | 1990 | |
| 1983 | 1991 | def _should_encourage_initial_version( |
src/loader/runtime/repair_focus.pymodified@@ -22,6 +22,25 @@ _STALE_REPAIR_MUTATION_MARKERS = ( | ||
| 22 | 22 | "structured patch hunks overlap", |
| 23 | 23 | "failed to complete the operation after", |
| 24 | 24 | ) |
| 25 | +_HTML_REPAIR_ISSUE_MARKERS = ( | |
| 26 | + "thin content", | |
| 27 | + "insufficient structured content", | |
| 28 | + "content-quality", | |
| 29 | + "content quality", | |
| 30 | + "quality target", | |
| 31 | + "html guide content quality", | |
| 32 | + "expected exactly one closing </body>", | |
| 33 | + "expected exactly one closing </html>", | |
| 34 | + "content appears after closing </html>", | |
| 35 | + "closing </body> appears after closing </html>", | |
| 36 | + "missing <h1>", | |
| 37 | +) | |
| 38 | +_HTML_STRUCTURAL_REPAIR_MARKERS = ( | |
| 39 | + "expected exactly one closing </body>", | |
| 40 | + "expected exactly one closing </html>", | |
| 41 | + "content appears after closing </html>", | |
| 42 | + "closing </body> appears after closing </html>", | |
| 43 | +) | |
| 25 | 44 | |
| 26 | 45 | |
| 27 | 46 | @dataclass(frozen=True) |
@@ -144,6 +163,36 @@ def recent_repair_mutation_context_failed( | ||
| 144 | 163 | return False |
| 145 | 164 | |
| 146 | 165 | |
| 166 | +def repair_line_is_html_quality(line: str) -> bool: | |
| 167 | + """Return whether a repair-focus line describes generated HTML quality.""" | |
| 168 | + | |
| 169 | + lowered = str(line or "").lower() | |
| 170 | + return any(marker in lowered for marker in _HTML_REPAIR_ISSUE_MARKERS) | |
| 171 | + | |
| 172 | + | |
| 173 | +def repair_line_matches_target(line: str, target: str) -> bool: | |
| 174 | + """Return whether a repair-focus line refers to the target path.""" | |
| 175 | + | |
| 176 | + line_text = str(line or "") | |
| 177 | + target_text = str(target or "").strip() | |
| 178 | + if not line_text or not target_text: | |
| 179 | + return False | |
| 180 | + normalized_target = normalize_repair_path(target_text) | |
| 181 | + if target_text in line_text or (normalized_target and normalized_target in line_text): | |
| 182 | + return True | |
| 183 | + for candidate in re.findall(r"`([^`]+)`", line_text): | |
| 184 | + if normalize_repair_path(candidate) == normalized_target: | |
| 185 | + return True | |
| 186 | + return False | |
| 187 | + | |
| 188 | + | |
| 189 | +def html_repair_issue_is_structural(line: str) -> bool: | |
| 190 | + """Return whether an HTML quality issue is about document structure.""" | |
| 191 | + | |
| 192 | + lowered = str(line or "").lower() | |
| 193 | + return any(marker in lowered for marker in _HTML_STRUCTURAL_REPAIR_MARKERS) | |
| 194 | + | |
| 195 | + | |
| 147 | 196 | def normalize_repair_path(raw_path: str) -> str: |
| 148 | 197 | text = str(raw_path or "").strip() |
| 149 | 198 | if not text: |
src/loader/runtime/tool_batches.pymodified@@ -36,8 +36,10 @@ from .policy_timeline import append_verification_timeline_entry | ||
| 36 | 36 | from .recovery import RecoveryContext, detect_missing_mutation_payload |
| 37 | 37 | from .repair_focus import ( |
| 38 | 38 | extract_active_repair_context, |
| 39 | + html_repair_issue_is_structural, | |
| 39 | 40 | path_within_allowed_roots, |
| 40 | 41 | recent_repair_mutation_context_failed, |
| 42 | + repair_line_is_html_quality, | |
| 41 | 43 | ) |
| 42 | 44 | from .safeguard_services import extract_shell_text_rewrite_target |
| 43 | 45 | from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate |
@@ -2042,17 +2044,23 @@ class ToolBatchRunner: | ||
| 2042 | 2044 | if repair_issue |
| 2043 | 2045 | else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n" |
| 2044 | 2046 | ) |
| 2047 | + structural_repair = html_repair_issue_is_structural(repair_issue) | |
| 2045 | 2048 | force_write = recent_repair_mutation_context_failed( |
| 2046 | 2049 | self.context.session.messages, |
| 2047 | 2050 | target, |
| 2048 | 2051 | ) |
| 2049 | - if force_write: | |
| 2052 | + if force_write or structural_repair: | |
| 2053 | + structural_suffix = ( | |
| 2054 | + " Ensure the replacement has exactly one closing `</body>` tag, " | |
| 2055 | + "exactly one closing `</html>` tag, and no content after `</html>`." | |
| 2056 | + if structural_repair | |
| 2057 | + else "" | |
| 2058 | + ) | |
| 2050 | 2059 | immediate_step = ( |
| 2051 | 2060 | f"- Immediate next step: rewrite `{target}` with one `write` call.\n" |
| 2052 | - "- Recent `edit`/`patch` attempts for this file failed against stale " | |
| 2053 | - "or malformed context. Use `write(file_path=..., content=...)` with " | |
| 2061 | + "- Use `write(file_path=..., content=...)` with " | |
| 2054 | 2062 | "a complete valid HTML document, and do not call `read`, `edit`, " |
| 2055 | - "`patch`, or TodoWrite again first." | |
| 2063 | + f"`patch`, or TodoWrite again first.{structural_suffix}" | |
| 2056 | 2064 | ) |
| 2057 | 2065 | else: |
| 2058 | 2066 | immediate_step = ( |
@@ -3406,13 +3414,7 @@ def _repair_context_is_html_quality(repair: Any) -> bool: | ||
| 3406 | 3414 | |
| 3407 | 3415 | |
| 3408 | 3416 | def _repair_line_is_html_quality(line: str) -> bool: |
| 3409 | - lowered = str(line or "").lower() | |
| 3410 | - return ( | |
| 3411 | - "thin content" in lowered | |
| 3412 | - or "insufficient structured content" in lowered | |
| 3413 | - or "content-quality" in lowered | |
| 3414 | - or "content quality" in lowered | |
| 3415 | - ) | |
| 3417 | + return repair_line_is_html_quality(line) | |
| 3416 | 3418 | |
| 3417 | 3419 | |
| 3418 | 3420 | def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str: |
src/loader/runtime/turn_completion.pymodified@@ -29,7 +29,10 @@ from .policy_timeline import ( | ||
| 29 | 29 | from .repair import ResponseRepairer |
| 30 | 30 | from .repair_focus import ( |
| 31 | 31 | extract_active_repair_context, |
| 32 | + html_repair_issue_is_structural, | |
| 32 | 33 | recent_repair_mutation_context_failed, |
| 34 | + repair_line_is_html_quality, | |
| 35 | + repair_line_matches_target, | |
| 33 | 36 | ) |
| 34 | 37 | from .rollback import RollbackPlan |
| 35 | 38 | from .verification_observations import VerificationObservation |
@@ -531,7 +534,7 @@ def _build_html_quality_repair_continuation( | ||
| 531 | 534 | if not target_text: |
| 532 | 535 | return None |
| 533 | 536 | |
| 534 | - force_write = recent_repair_mutation_context_failed( | |
| 537 | + stale_context = recent_repair_mutation_context_failed( | |
| 535 | 538 | cast(list[Message], messages), |
| 536 | 539 | target_text, |
| 537 | 540 | ) |
@@ -539,21 +542,35 @@ def _build_html_quality_repair_continuation( | ||
| 539 | 542 | ( |
| 540 | 543 | line[2:] if line.startswith("- ") else line |
| 541 | 544 | for line in repair.repair_lines |
| 542 | - if target_text in line and _repair_line_is_html_quality(line) | |
| 545 | + if repair_line_matches_target(line, target_text) | |
| 546 | + and _repair_line_is_html_quality(line) | |
| 543 | 547 | ), |
| 544 | 548 | "", |
| 545 | 549 | ) |
| 546 | 550 | issue_sentence = f" Current verifier issue: {issue_line}" if issue_line else "" |
| 551 | + structural_issue = html_repair_issue_is_structural(issue_line) | |
| 552 | + force_write = stale_context or structural_issue | |
| 547 | 553 | if force_write: |
| 554 | + structural_sentence = ( | |
| 555 | + " Ensure the replacement has exactly one closing `</body>` tag, " | |
| 556 | + "exactly one closing `</html>` tag, and no content after `</html>`." | |
| 557 | + if structural_issue | |
| 558 | + else "" | |
| 559 | + ) | |
| 560 | + failure_reason = ( | |
| 561 | + "The active verifier is reporting malformed HTML document structure. " | |
| 562 | + if structural_issue and not stale_context | |
| 563 | + else "Recent `patch`/`edit` attempts for this same file failed because their " | |
| 564 | + "remembered context was stale or malformed. " | |
| 565 | + ) | |
| 548 | 566 | prompt = ( |
| 549 | 567 | "[CONTINUE QUALITY REPAIR]\n" |
| 550 | 568 | "You just described a content-quality repair, but did not execute it. " |
| 551 | - "Recent `patch`/`edit` attempts for this same file failed because their " | |
| 552 | - "remembered context was stale or malformed. " | |
| 569 | + f"{failure_reason}" | |
| 553 | 570 | f"Emit exactly one `write(file_path=..., content=...)` tool call for `{target_text}` now." |
| 554 | 571 | f"{issue_sentence} " |
| 555 | 572 | "Write a complete valid HTML document for this file that preserves the chapter topic " |
| 556 | - "and satisfies the listed quality issue. Do not call `read`, `edit`, `patch`, " | |
| 573 | + f"and satisfies the listed quality issue.{structural_sentence} Do not call `read`, `edit`, `patch`, " | |
| 557 | 574 | "`TodoWrite`, or summarize." |
| 558 | 575 | ) |
| 559 | 576 | return InProgressContinuation(prompt=prompt, target=None) |
@@ -591,6 +608,8 @@ def _build_pending_html_quality_repair_continuation( | ||
| 591 | 608 | "content-quality" in lowered |
| 592 | 609 | or "thin content" in lowered |
| 593 | 610 | or "insufficient structured content" in lowered |
| 611 | + or "expected exactly one closing" in lowered | |
| 612 | + or "content appears after closing </html>" in lowered | |
| 594 | 613 | ) |
| 595 | 614 | if not ( |
| 596 | 615 | latest_user_content.startswith("[CONTINUE QUALITY REPAIR]") |
@@ -610,17 +629,7 @@ def _looks_like_progress_intent(content: str) -> bool: | ||
| 610 | 629 | |
| 611 | 630 | |
| 612 | 631 | def _repair_line_is_html_quality(line: str) -> bool: |
| 613 | - lowered = line.lower() | |
| 614 | - return any( | |
| 615 | - phrase in lowered | |
| 616 | - for phrase in ( | |
| 617 | - "thin content", | |
| 618 | - "insufficient structured content", | |
| 619 | - "content-quality", | |
| 620 | - "quality target", | |
| 621 | - "html guide content quality", | |
| 622 | - ) | |
| 623 | - ) | |
| 632 | + return repair_line_is_html_quality(line) | |
| 624 | 633 | |
| 625 | 634 | |
| 626 | 635 | def _next_missing_planned_artifact( |
tests/test_finalization.pymodified@@ -452,6 +452,41 @@ def test_verification_repair_guidance_replaces_stale_focus_for_html_quality_issu | ||
| 452 | 452 | assert str(third_chapter.resolve(strict=False)) in repair.allowed_paths |
| 453 | 453 | |
| 454 | 454 | |
| 455 | +def test_verification_repair_guidance_prioritizes_structural_html_quality_issue( | |
| 456 | + temp_dir: Path, | |
| 457 | +) -> None: | |
| 458 | + chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html" | |
| 459 | + chapter.parent.mkdir(parents=True) | |
| 460 | + chapter.write_text( | |
| 461 | + "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n" | |
| 462 | + "<p>Trailing content.</p>\n" | |
| 463 | + ) | |
| 464 | + dod = create_definition_of_done("Create an equally thorough HTML guide.") | |
| 465 | + dod.evidence = [ | |
| 466 | + VerificationEvidence( | |
| 467 | + command="quality", | |
| 468 | + passed=False, | |
| 469 | + output=( | |
| 470 | + "HTML guide content quality issues:\n" | |
| 471 | + f"{chapter}: expected exactly one closing </html> tag (found 2)\n" | |
| 472 | + ), | |
| 473 | + ) | |
| 474 | + ] | |
| 475 | + | |
| 476 | + guidance = _build_verification_repair_guidance( | |
| 477 | + dod, | |
| 478 | + project_root=temp_dir, | |
| 479 | + ) | |
| 480 | + repair = extract_active_repair_context([Message(role=Role.USER, content=guidance)]) | |
| 481 | + | |
| 482 | + assert f"Improve `{chapter}`: expected exactly one closing </html> tag" in guidance | |
| 483 | + assert f"Immediate next step: replace `{chapter}` with one complete" in guidance | |
| 484 | + assert "replace the malformed file with one complete valid HTML document" in guidance | |
| 485 | + assert "do not append more content after an existing closing tag" in guidance | |
| 486 | + assert repair is not None | |
| 487 | + assert repair.artifact_path == str(chapter.resolve(strict=False)) | |
| 488 | + | |
| 489 | + | |
| 455 | 490 | def test_verification_repair_guidance_keeps_multi_file_quality_worklist( |
| 456 | 491 | temp_dir: Path, |
| 457 | 492 | ) -> None: |
tests/test_repair.pymodified@@ -371,6 +371,48 @@ def test_empty_response_retry_forces_write_after_stale_quality_repair_context( | ||
| 371 | 371 | assert "Do not call `read`, `edit`, `patch`, TodoWrite" in decision.retry_message |
| 372 | 372 | |
| 373 | 373 | |
| 374 | +def test_empty_response_retry_forces_write_for_structural_html_repair( | |
| 375 | + temp_dir: Path, | |
| 376 | +) -> None: | |
| 377 | + context = build_context( | |
| 378 | + temp_dir=temp_dir, | |
| 379 | + use_react=False, | |
| 380 | + ) | |
| 381 | + repairer = ResponseRepairer(context) | |
| 382 | + chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html" | |
| 383 | + chapter.parent.mkdir(parents=True) | |
| 384 | + chapter.write_text( | |
| 385 | + "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n" | |
| 386 | + "<p>Trailing content.</p>\n" | |
| 387 | + ) | |
| 388 | + context.session.append( | |
| 389 | + Message( | |
| 390 | + role=Role.USER, | |
| 391 | + content=( | |
| 392 | + "Repair focus:\n" | |
| 393 | + f"- Improve `{chapter}`: content appears after closing </html>.\n" | |
| 394 | + f"- Immediate next step: replace `{chapter}` with one complete valid HTML document.\n" | |
| 395 | + ), | |
| 396 | + ) | |
| 397 | + ) | |
| 398 | + dod = create_definition_of_done("Create an equally thorough HTML guide.") | |
| 399 | + dod.touched_files = [str(chapter)] | |
| 400 | + | |
| 401 | + decision = repairer.handle_empty_response( | |
| 402 | + task="Create an equally thorough HTML guide.", | |
| 403 | + original_task=None, | |
| 404 | + empty_retry_count=1, | |
| 405 | + max_empty_retries=2, | |
| 406 | + dod=dod, | |
| 407 | + ) | |
| 408 | + | |
| 409 | + assert decision.should_continue is True | |
| 410 | + assert decision.retry_message is not None | |
| 411 | + assert "content appears after closing </html>" in decision.retry_message | |
| 412 | + assert "Use exactly one `write(file_path=..., content=...)`" in decision.retry_message | |
| 413 | + assert "exactly one closing `</body>` tag" in decision.retry_message | |
| 414 | + | |
| 415 | + | |
| 374 | 416 | def test_empty_response_retry_mentions_write_can_create_missing_parent_directories( |
| 375 | 417 | temp_dir: Path, |
| 376 | 418 | ) -> None: |
tests/test_turn_completion.pymodified@@ -537,6 +537,82 @@ async def test_turn_completion_forces_write_after_stale_quality_repair_context( | ||
| 537 | 537 | assert "Do not call `read`, `edit`, `patch`, `TodoWrite`, or summarize." in message |
| 538 | 538 | |
| 539 | 539 | |
| 540 | +@pytest.mark.asyncio | |
| 541 | +async def test_turn_completion_forces_write_for_structural_html_repair( | |
| 542 | + temp_dir: Path, | |
| 543 | +) -> None: | |
| 544 | + backend = ScriptedBackend() | |
| 545 | + config = non_streaming_config() | |
| 546 | + config.reasoning.completion_check = False | |
| 547 | + agent = Agent( | |
| 548 | + backend=backend, | |
| 549 | + config=config, | |
| 550 | + project_root=temp_dir, | |
| 551 | + ) | |
| 552 | + runtime = ConversationRuntime(agent) | |
| 553 | + events = [] | |
| 554 | + | |
| 555 | + async def capture(event) -> None: | |
| 556 | + events.append(event) | |
| 557 | + | |
| 558 | + prepared = await runtime.turn_preparation.prepare( | |
| 559 | + task="Create an equally thorough HTML guide.", | |
| 560 | + emit=capture, | |
| 561 | + requested_mode="execute", | |
| 562 | + original_task=None, | |
| 563 | + on_user_question=None, | |
| 564 | + ) | |
| 565 | + await runtime.phase_tracker.enter( | |
| 566 | + TurnPhase.ASSISTANT, | |
| 567 | + capture, | |
| 568 | + detail="Requesting assistant response", | |
| 569 | + reason_code="request_assistant_response", | |
| 570 | + ) | |
| 571 | + | |
| 572 | + chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html" | |
| 573 | + chapter.parent.mkdir(parents=True) | |
| 574 | + chapter.write_text( | |
| 575 | + "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n" | |
| 576 | + "<p>Trailing content.</p>\n" | |
| 577 | + ) | |
| 578 | + prepared.definition_of_done.touched_files.append(str(chapter)) | |
| 579 | + agent.session.append( | |
| 580 | + Message( | |
| 581 | + role=Role.USER, | |
| 582 | + content=( | |
| 583 | + "Repair focus:\n" | |
| 584 | + f"- Improve `{chapter}`: expected exactly one closing </html> tag (found 2).\n" | |
| 585 | + f"- Immediate next step: replace `{chapter}` with one complete valid HTML document.\n" | |
| 586 | + ), | |
| 587 | + ) | |
| 588 | + ) | |
| 589 | + | |
| 590 | + content = "I will fix the malformed troubleshooting HTML structure." | |
| 591 | + decision = await runtime.turn_completion.handle_text_response( | |
| 592 | + content=content, | |
| 593 | + response_content=content, | |
| 594 | + task=prepared.task, | |
| 595 | + effective_task=prepared.effective_task, | |
| 596 | + iterations=1, | |
| 597 | + max_iterations=agent.config.max_iterations, | |
| 598 | + actions_taken=[], | |
| 599 | + continuation_count=0, | |
| 600 | + dod=prepared.definition_of_done, | |
| 601 | + emit=capture, | |
| 602 | + summary=prepared.summary, | |
| 603 | + executor=prepared.executor, | |
| 604 | + rollback_plan=prepared.rollback_plan, | |
| 605 | + ) | |
| 606 | + | |
| 607 | + assert decision.action == TurnCompletionAction.CONTINUE | |
| 608 | + message = agent.session.messages[-1].content | |
| 609 | + assert message.startswith("[CONTINUE QUALITY REPAIR]") | |
| 610 | + assert "malformed HTML document structure" in message | |
| 611 | + assert "expected exactly one closing </html>" in message | |
| 612 | + assert "exactly one closing `</body>` tag" in message | |
| 613 | + assert "exactly one `write(file_path=..., content=...)`" in message | |
| 614 | + | |
| 615 | + | |
| 540 | 616 | @pytest.mark.asyncio |
| 541 | 617 | async def test_turn_completion_continues_queued_quality_repair_after_summary( |
| 542 | 618 | temp_dir: Path, |