Force stale repairs to write
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
3f4faafb565a1d9a79d0fac446b6cc084c1038ad- Parents
-
c7a21e5 - Tree
62365ed
3f4faaf
3f4faafb565a1d9a79d0fac446b6cc084c1038adc7a21e5
62365ed| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/dod.py
|
16 | 0 |
| M |
src/loader/runtime/repair.py
|
32 | 12 |
| M |
src/loader/runtime/repair_focus.py
|
55 | 0 |
| M |
src/loader/runtime/tool_batches.py
|
24 | 4 |
| M |
src/loader/runtime/turn_completion.py
|
22 | 1 |
| M |
src/loader/tools/fs_safety.py
|
21 | 8 |
| M |
tests/test_dod.py
|
65 | 0 |
| M |
tests/test_expanded_tools.py
|
28 | 0 |
| M |
tests/test_repair.py
|
50 | 0 |
| M |
tests/test_tool_batches.py
|
62 | 0 |
| M |
tests/test_turn_completion.py
|
82 | 0 |
src/loader/runtime/dod.pymodified@@ -840,6 +840,8 @@ def _derive_multi_page_html_quality_command( | |||
| 840 | f"minimum_chapter_blocks = {quality_floor.chapter_blocks}", | 840 | f"minimum_chapter_blocks = {quality_floor.chapter_blocks}", |
| 841 | "tag_pattern = re.compile(r'<[^>]+>')", | 841 | "tag_pattern = re.compile(r'<[^>]+>')", |
| 842 | "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)", | 842 | "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)", |
| 843 | + "html_close_pattern = re.compile(r'</html\\s*>', re.IGNORECASE)", | ||
| 844 | + "body_close_pattern = re.compile(r'</body\\s*>', re.IGNORECASE)", | ||
| 843 | "issues = []", | 845 | "issues = []", |
| 844 | "checked = 0", | 846 | "checked = 0", |
| 845 | "for raw_path in paths:", | 847 | "for raw_path in paths:", |
@@ -852,8 +854,22 @@ def _derive_multi_page_html_quality_command( | |||
| 852 | " plain = re.sub(r'\\s+', ' ', plain).strip()", | 854 | " plain = re.sub(r'\\s+', ' ', plain).strip()", |
| 853 | " content_blocks = len(content_block_pattern.findall(text))", | 855 | " content_blocks = len(content_block_pattern.findall(text))", |
| 854 | " has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))", | 856 | " has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))", |
| 857 | + " html_close_matches = list(html_close_pattern.finditer(text))", | ||
| 858 | + " body_close_matches = list(body_close_pattern.finditer(text))", | ||
| 855 | " minimum_chars = minimum_index_chars if path.name.lower() == 'index.html' else minimum_chapter_chars", | 859 | " minimum_chars = minimum_index_chars if path.name.lower() == 'index.html' else minimum_chapter_chars", |
| 856 | " minimum_blocks = minimum_index_blocks if path.name.lower() == 'index.html' else minimum_chapter_blocks", | 860 | " minimum_blocks = minimum_index_blocks if path.name.lower() == 'index.html' else minimum_chapter_blocks", |
| 861 | + " if len(body_close_matches) != 1:", | ||
| 862 | + " issues.append(", | ||
| 863 | + " f'{path}: expected exactly one closing </body> tag (found {len(body_close_matches)})'", | ||
| 864 | + " )", | ||
| 865 | + " if len(html_close_matches) != 1:", | ||
| 866 | + " issues.append(", | ||
| 867 | + " f'{path}: expected exactly one closing </html> tag (found {len(html_close_matches)})'", | ||
| 868 | + " )", | ||
| 869 | + " if html_close_matches and text[html_close_matches[-1].end():].strip():", | ||
| 870 | + " issues.append(f'{path}: content appears after closing </html>')", | ||
| 871 | + " if html_close_matches and body_close_matches and body_close_matches[-1].start() > html_close_matches[-1].start():", | ||
| 872 | + " issues.append(f'{path}: closing </body> appears after closing </html>')", | ||
| 857 | " if not has_h1:", | 873 | " if not has_h1:", |
| 858 | " issues.append(f'{path}: missing <h1>')", | 874 | " issues.append(f'{path}: missing <h1>')", |
| 859 | " if len(plain) < minimum_chars:", | 875 | " if len(plain) < minimum_chars:", |
src/loader/runtime/repair.pymodified@@ -20,7 +20,11 @@ from .dod import ( | |||
| 20 | from .parsing import parse_tool_calls | 20 | from .parsing import parse_tool_calls |
| 21 | from .path_display import display_runtime_path | 21 | from .path_display import display_runtime_path |
| 22 | from .recovery import detect_missing_mutation_payload | 22 | from .recovery import detect_missing_mutation_payload |
| 23 | -from .repair_focus import ActiveRepairContext, extract_active_repair_context | 23 | +from .repair_focus import ( |
| 24 | + ActiveRepairContext, | ||
| 25 | + extract_active_repair_context, | ||
| 26 | + recent_repair_mutation_context_failed, | ||
| 27 | +) | ||
| 24 | from .workflow import ( | 28 | from .workflow import ( |
| 25 | infer_output_outline_label, | 29 | infer_output_outline_label, |
| 26 | infer_pending_todo_output_target, | 30 | infer_pending_todo_output_target, |
@@ -868,18 +872,34 @@ class ResponseRepairer: | |||
| 868 | ] | 872 | ] |
| 869 | if issue_line: | 873 | if issue_line: |
| 870 | lines.append(f"- Current verifier issue: {issue_line[2:] if issue_line.startswith('- ') else issue_line}") | 874 | lines.append(f"- Current verifier issue: {issue_line[2:] if issue_line.startswith('- ') else issue_line}") |
| 871 | - lines.extend( | 875 | + force_write = recent_repair_mutation_context_failed( |
| 872 | - [ | 876 | + self.context.session.messages, |
| 873 | - "- Use one bounded `edit`, `patch`, or `write` call for that same " | 877 | + target, |
| 874 | - "file now. Append or replace a body section with 4-6 substantive " | ||
| 875 | - "sections, lists, commands, or examples; do not attempt a giant " | ||
| 876 | - "full-page rewrite from memory.", | ||
| 877 | - "- Do not add table-of-contents entries, do not retarget links, and " | ||
| 878 | - "do not reopen unrelated reference files for this retry.", | ||
| 879 | - "- No narration, no TodoWrite, no final summary, and no empty " | ||
| 880 | - "response; emit the mutation tool call now.", | ||
| 881 | - ] | ||
| 882 | ) | 878 | ) |
| 879 | + if force_write: | ||
| 880 | + lines.extend( | ||
| 881 | + [ | ||
| 882 | + "- Recent `edit`/`patch` attempts for this same target failed " | ||
| 883 | + "against stale or malformed context. Use exactly one " | ||
| 884 | + "`write(file_path=..., content=...)` call now with a complete " | ||
| 885 | + "valid HTML document for that file.", | ||
| 886 | + "- Do not call `read`, `edit`, `patch`, TodoWrite, or a final " | ||
| 887 | + "summary on this retry; emit the `write` mutation tool call now.", | ||
| 888 | + ] | ||
| 889 | + ) | ||
| 890 | + else: | ||
| 891 | + lines.extend( | ||
| 892 | + [ | ||
| 893 | + "- Use one bounded `edit`, `patch`, or `write` call for that same " | ||
| 894 | + "file now. Append or replace a body section with 4-6 substantive " | ||
| 895 | + "sections, lists, commands, or examples; do not attempt a giant " | ||
| 896 | + "full-page rewrite from memory.", | ||
| 897 | + "- Do not add table-of-contents entries, do not retarget links, and " | ||
| 898 | + "do not reopen unrelated reference files for this retry.", | ||
| 899 | + "- No narration, no TodoWrite, no final summary, and no empty " | ||
| 900 | + "response; emit the mutation tool call now.", | ||
| 901 | + ] | ||
| 902 | + ) | ||
| 883 | if remaining_line: | 903 | if remaining_line: |
| 884 | lines.append(remaining_line) | 904 | lines.append(remaining_line) |
| 885 | return "\n".join(lines) | 905 | return "\n".join(lines) |
src/loader/runtime/repair_focus.pymodified@@ -9,6 +9,20 @@ from pathlib import Path | |||
| 9 | 9 | ||
| 10 | from ..llm.base import Message | 10 | from ..llm.base import Message |
| 11 | 11 | ||
| 12 | +_STALE_REPAIR_MUTATION_MARKERS = ( | ||
| 13 | + "old_string not found", | ||
| 14 | + "old_string was stale", | ||
| 15 | + "do not retry the same remembered text", | ||
| 16 | + "patch hunks are missing", | ||
| 17 | + "provide structured patch hunks", | ||
| 18 | + "hunks must not be empty", | ||
| 19 | + "structured patch context mismatch", | ||
| 20 | + "structured patch hunk consumed", | ||
| 21 | + "structured patch references lines past the end", | ||
| 22 | + "structured patch hunks overlap", | ||
| 23 | + "failed to complete the operation after", | ||
| 24 | +) | ||
| 25 | + | ||
| 12 | 26 | ||
| 13 | @dataclass(frozen=True) | 27 | @dataclass(frozen=True) |
| 14 | class ActiveRepairContext: | 28 | class ActiveRepairContext: |
@@ -106,6 +120,30 @@ def path_matches_allowed_paths(path: str, allowed_paths: tuple[str, ...]) -> boo | |||
| 106 | return normalized in normalized_paths | 120 | return normalized in normalized_paths |
| 107 | 121 | ||
| 108 | 122 | ||
| 123 | +def recent_repair_mutation_context_failed( | ||
| 124 | + messages: list[Message], | ||
| 125 | + target: str, | ||
| 126 | + *, | ||
| 127 | + lookback: int = 24, | ||
| 128 | +) -> bool: | ||
| 129 | + """Return whether recent repair attempts proved the target context is stale.""" | ||
| 130 | + | ||
| 131 | + target_tokens = _target_match_tokens(target) | ||
| 132 | + if not target_tokens: | ||
| 133 | + return False | ||
| 134 | + | ||
| 135 | + for message in reversed(messages[-lookback:]): | ||
| 136 | + content = str(getattr(message, "content", "") or "") | ||
| 137 | + if not content: | ||
| 138 | + continue | ||
| 139 | + lowered = content.lower() | ||
| 140 | + if not any(token and token in content for token in target_tokens): | ||
| 141 | + continue | ||
| 142 | + if any(marker in lowered for marker in _STALE_REPAIR_MUTATION_MARKERS): | ||
| 143 | + return True | ||
| 144 | + return False | ||
| 145 | + | ||
| 146 | + | ||
| 109 | def normalize_repair_path(raw_path: str) -> str: | 147 | def normalize_repair_path(raw_path: str) -> str: |
| 110 | text = str(raw_path or "").strip() | 148 | text = str(raw_path or "").strip() |
| 111 | if not text: | 149 | if not text: |
@@ -116,6 +154,23 @@ def normalize_repair_path(raw_path: str) -> str: | |||
| 116 | return str(Path(text).expanduser()) | 154 | return str(Path(text).expanduser()) |
| 117 | 155 | ||
| 118 | 156 | ||
| 157 | +def _target_match_tokens(raw_path: str) -> tuple[str, ...]: | ||
| 158 | + text = str(raw_path or "").strip() | ||
| 159 | + if not text: | ||
| 160 | + return () | ||
| 161 | + tokens: list[str] = [text] | ||
| 162 | + normalized = normalize_repair_path(text) | ||
| 163 | + if normalized and normalized not in tokens: | ||
| 164 | + tokens.append(normalized) | ||
| 165 | + try: | ||
| 166 | + name = Path(normalized or text).name | ||
| 167 | + except (OSError, RuntimeError, ValueError): | ||
| 168 | + name = "" | ||
| 169 | + if name and name not in tokens: | ||
| 170 | + tokens.append(name) | ||
| 171 | + return tuple(tokens) | ||
| 172 | + | ||
| 173 | + | ||
| 119 | def _path_roots(paths: set[str]) -> set[str]: | 174 | def _path_roots(paths: set[str]) -> set[str]: |
| 120 | roots: set[str] = set() | 175 | roots: set[str] = set() |
| 121 | for raw_path in paths: | 176 | for raw_path in paths: |
src/loader/runtime/tool_batches.pymodified@@ -34,7 +34,11 @@ from .logging import get_runtime_logger | |||
| 34 | from .path_display import display_runtime_path | 34 | from .path_display import display_runtime_path |
| 35 | from .policy_timeline import append_verification_timeline_entry | 35 | from .policy_timeline import append_verification_timeline_entry |
| 36 | from .recovery import RecoveryContext, detect_missing_mutation_payload | 36 | from .recovery import RecoveryContext, detect_missing_mutation_payload |
| 37 | -from .repair_focus import extract_active_repair_context, path_within_allowed_roots | 37 | +from .repair_focus import ( |
| 38 | + extract_active_repair_context, | ||
| 39 | + path_within_allowed_roots, | ||
| 40 | + recent_repair_mutation_context_failed, | ||
| 41 | +) | ||
| 38 | from .safeguard_services import extract_shell_text_rewrite_target | 42 | from .safeguard_services import extract_shell_text_rewrite_target |
| 39 | from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate | 43 | from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate |
| 40 | from .tool_batch_recovery import ToolBatchRecoveryController | 44 | from .tool_batch_recovery import ToolBatchRecoveryController |
@@ -2038,6 +2042,24 @@ class ToolBatchRunner: | |||
| 2038 | if repair_issue | 2042 | if repair_issue |
| 2039 | else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n" | 2043 | else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n" |
| 2040 | ) | 2044 | ) |
| 2045 | + force_write = recent_repair_mutation_context_failed( | ||
| 2046 | + self.context.session.messages, | ||
| 2047 | + target, | ||
| 2048 | + ) | ||
| 2049 | + if force_write: | ||
| 2050 | + immediate_step = ( | ||
| 2051 | + f"- Immediate next step: rewrite `{target}` with one `write` call.\n" | ||
| 2052 | + "- Recent `edit`/`patch` attempts for this file failed against stale " | ||
| 2053 | + "or malformed context. Use `write(file_path=..., content=...)` with " | ||
| 2054 | + "a complete valid HTML document, and do not call `read`, `edit`, " | ||
| 2055 | + "`patch`, or TodoWrite again first." | ||
| 2056 | + ) | ||
| 2057 | + else: | ||
| 2058 | + immediate_step = ( | ||
| 2059 | + f"- Immediate next step: edit `{target}`.\n" | ||
| 2060 | + "- Continue with one concrete `edit`, `patch`, or `write` call that " | ||
| 2061 | + "actually changes the current generated file." | ||
| 2062 | + ) | ||
| 2041 | self.context.set_workflow_mode("execute") | 2063 | self.context.set_workflow_mode("execute") |
| 2042 | self.context.queue_steering_message( | 2064 | self.context.queue_steering_message( |
| 2043 | "Todo tracking is updated, but verification still has an active " | 2065 | "Todo tracking is updated, but verification still has an active " |
@@ -2046,9 +2068,7 @@ class ToolBatchRunner: | |||
| 2046 | "not finish yet.\n\n" | 2068 | "not finish yet.\n\n" |
| 2047 | "Repair focus:\n" | 2069 | "Repair focus:\n" |
| 2048 | f"{issue_line}" | 2070 | f"{issue_line}" |
| 2049 | - f"- Immediate next step: edit `{target}`.\n" | 2071 | + f"{immediate_step}" |
| 2050 | - "- Continue with one concrete `edit`, `patch`, or `write` call that " | ||
| 2051 | - "actually changes the current generated file." | ||
| 2052 | ) | 2072 | ) |
| 2053 | return | 2073 | return |
| 2054 | 2074 | ||
src/loader/runtime/turn_completion.pymodified@@ -27,7 +27,10 @@ from .policy_timeline import ( | |||
| 27 | completion_timeline_kind, | 27 | completion_timeline_kind, |
| 28 | ) | 28 | ) |
| 29 | from .repair import ResponseRepairer | 29 | from .repair import ResponseRepairer |
| 30 | -from .repair_focus import extract_active_repair_context | 30 | +from .repair_focus import ( |
| 31 | + extract_active_repair_context, | ||
| 32 | + recent_repair_mutation_context_failed, | ||
| 33 | +) | ||
| 31 | from .rollback import RollbackPlan | 34 | from .rollback import RollbackPlan |
| 32 | from .verification_observations import VerificationObservation | 35 | from .verification_observations import VerificationObservation |
| 33 | from .workflow import ( | 36 | from .workflow import ( |
@@ -528,6 +531,10 @@ def _build_html_quality_repair_continuation( | |||
| 528 | if not target_text: | 531 | if not target_text: |
| 529 | return None | 532 | return None |
| 530 | 533 | ||
| 534 | + force_write = recent_repair_mutation_context_failed( | ||
| 535 | + cast(list[Message], messages), | ||
| 536 | + target_text, | ||
| 537 | + ) | ||
| 531 | issue_line = next( | 538 | issue_line = next( |
| 532 | ( | 539 | ( |
| 533 | line[2:] if line.startswith("- ") else line | 540 | line[2:] if line.startswith("- ") else line |
@@ -537,6 +544,20 @@ def _build_html_quality_repair_continuation( | |||
| 537 | "", | 544 | "", |
| 538 | ) | 545 | ) |
| 539 | issue_sentence = f" Current verifier issue: {issue_line}" if issue_line else "" | 546 | issue_sentence = f" Current verifier issue: {issue_line}" if issue_line else "" |
| 547 | + if force_write: | ||
| 548 | + prompt = ( | ||
| 549 | + "[CONTINUE QUALITY REPAIR]\n" | ||
| 550 | + "You just described a content-quality repair, but did not execute it. " | ||
| 551 | + "Recent `patch`/`edit` attempts for this same file failed because their " | ||
| 552 | + "remembered context was stale or malformed. " | ||
| 553 | + f"Emit exactly one `write(file_path=..., content=...)` tool call for `{target_text}` now." | ||
| 554 | + f"{issue_sentence} " | ||
| 555 | + "Write a complete valid HTML document for this file that preserves the chapter topic " | ||
| 556 | + "and satisfies the listed quality issue. Do not call `read`, `edit`, `patch`, " | ||
| 557 | + "`TodoWrite`, or summarize." | ||
| 558 | + ) | ||
| 559 | + return InProgressContinuation(prompt=prompt, target=None) | ||
| 560 | + | ||
| 540 | prompt = ( | 561 | prompt = ( |
| 541 | "[CONTINUE QUALITY REPAIR]\n" | 562 | "[CONTINUE QUALITY REPAIR]\n" |
| 542 | "You just described a content-quality repair, but did not execute it. " | 563 | "You just described a content-quality repair, but did not execute it. " |
src/loader/tools/fs_safety.pymodified@@ -124,7 +124,10 @@ def coerce_structured_patch_payload( | |||
| 124 | try: | 124 | try: |
| 125 | value = ast.literal_eval(value) | 125 | value = ast.literal_eval(value) |
| 126 | except (SyntaxError, ValueError): | 126 | except (SyntaxError, ValueError): |
| 127 | - return [] | 127 | + repaired = _load_python_literal_with_balanced_closers(value) |
| 128 | + if repaired is None: | ||
| 129 | + return [] | ||
| 130 | + value = repaired | ||
| 128 | 131 | ||
| 129 | if isinstance(value, StructuredPatchHunk): | 132 | if isinstance(value, StructuredPatchHunk): |
| 130 | return [value] | 133 | return [value] |
@@ -146,26 +149,36 @@ def _load_json_with_balanced_closers(value: str) -> object | None: | |||
| 146 | return None | 149 | return None |
| 147 | 150 | ||
| 148 | 151 | ||
| 152 | +def _load_python_literal_with_balanced_closers(value: str) -> object | None: | ||
| 153 | + suffix = _missing_json_closer_suffix(value) | ||
| 154 | + if not suffix: | ||
| 155 | + return None | ||
| 156 | + try: | ||
| 157 | + return ast.literal_eval(value + suffix) | ||
| 158 | + except (SyntaxError, ValueError): | ||
| 159 | + return None | ||
| 160 | + | ||
| 161 | + | ||
| 149 | def _missing_json_closer_suffix(value: str) -> str: | 162 | def _missing_json_closer_suffix(value: str) -> str: |
| 150 | stack: list[str] = [] | 163 | stack: list[str] = [] |
| 151 | - in_string = False | 164 | + quote_char = "" |
| 152 | escaped = False | 165 | escaped = False |
| 153 | pairs = {"[": "]", "{": "}"} | 166 | pairs = {"[": "]", "{": "}"} |
| 154 | openers = set(pairs) | 167 | openers = set(pairs) |
| 155 | closers = {"]": "[", "}": "{"} | 168 | closers = {"]": "[", "}": "{"} |
| 156 | 169 | ||
| 157 | for char in value: | 170 | for char in value: |
| 158 | - if in_string: | 171 | + if quote_char: |
| 159 | if escaped: | 172 | if escaped: |
| 160 | escaped = False | 173 | escaped = False |
| 161 | elif char == "\\": | 174 | elif char == "\\": |
| 162 | escaped = True | 175 | escaped = True |
| 163 | - elif char == '"': | 176 | + elif char == quote_char: |
| 164 | - in_string = False | 177 | + quote_char = "" |
| 165 | continue | 178 | continue |
| 166 | 179 | ||
| 167 | - if char == '"': | 180 | + if char in {"'", '"'}: |
| 168 | - in_string = True | 181 | + quote_char = char |
| 169 | elif char in openers: | 182 | elif char in openers: |
| 170 | stack.append(char) | 183 | stack.append(char) |
| 171 | elif char in closers: | 184 | elif char in closers: |
@@ -173,7 +186,7 @@ def _missing_json_closer_suffix(value: str) -> str: | |||
| 173 | return "" | 186 | return "" |
| 174 | stack.pop() | 187 | stack.pop() |
| 175 | 188 | ||
| 176 | - if in_string: | 189 | + if quote_char: |
| 177 | return "" | 190 | return "" |
| 178 | return "".join(pairs[char] for char in reversed(stack)) | 191 | return "".join(pairs[char] for char in reversed(stack)) |
| 179 | 192 | ||
tests/test_dod.pymodified@@ -388,6 +388,71 @@ def test_derive_verification_commands_uses_reference_guide_depth_floor( | |||
| 388 | assert "expected at least 15" in result.stdout | 388 | assert "expected at least 15" in result.stdout |
| 389 | 389 | ||
| 390 | 390 | ||
| 391 | +def test_html_guide_quality_check_flags_malformed_document_structure( | ||
| 392 | + tmp_path: Path, | ||
| 393 | +) -> None: | ||
| 394 | + def rich_doc(title: str) -> str: | ||
| 395 | + body = "".join( | ||
| 396 | + f"<h2>Section {index}</h2><p>{'x' * 180}</p><ul><li>{'y' * 90}</li></ul>" | ||
| 397 | + for index in range(9) | ||
| 398 | + ) | ||
| 399 | + return f"<!DOCTYPE html><html><body><h1>{title}</h1>{body}</body></html>\n" | ||
| 400 | + | ||
| 401 | + guide = tmp_path / "guide" | ||
| 402 | + chapters = guide / "chapters" | ||
| 403 | + chapters.mkdir(parents=True) | ||
| 404 | + index_path = guide / "index.html" | ||
| 405 | + first = chapters / "01-introduction.html" | ||
| 406 | + second = chapters / "02-installation.html" | ||
| 407 | + third = chapters / "03-configuration.html" | ||
| 408 | + index_path.write_text(rich_doc("Guide")) | ||
| 409 | + first.write_text(rich_doc("Introduction")) | ||
| 410 | + second.write_text(rich_doc("Installation").rstrip() + "\n</html>\n") | ||
| 411 | + third.write_text(rich_doc("Configuration")) | ||
| 412 | + | ||
| 413 | + implementation_plan = tmp_path / "implementation.md" | ||
| 414 | + implementation_plan.write_text( | ||
| 415 | + "\n".join( | ||
| 416 | + [ | ||
| 417 | + "# Implementation Plan", | ||
| 418 | + "", | ||
| 419 | + "## File Changes", | ||
| 420 | + f"- `{index_path}`", | ||
| 421 | + f"- `{first}`", | ||
| 422 | + f"- `{second}`", | ||
| 423 | + f"- `{third}`", | ||
| 424 | + "", | ||
| 425 | + ] | ||
| 426 | + ) | ||
| 427 | + ) | ||
| 428 | + | ||
| 429 | + dod = create_definition_of_done( | ||
| 430 | + "Create an equally thorough multi-page HTML guide with chapter files." | ||
| 431 | + ) | ||
| 432 | + dod.implementation_plan = str(implementation_plan) | ||
| 433 | + | ||
| 434 | + commands = derive_verification_commands( | ||
| 435 | + dod, | ||
| 436 | + project_root=tmp_path, | ||
| 437 | + task_statement=dod.task_statement, | ||
| 438 | + supplement_existing=True, | ||
| 439 | + ) | ||
| 440 | + quality_command = next( | ||
| 441 | + command for command in commands if "HTML guide content quality issues:" in command | ||
| 442 | + ) | ||
| 443 | + result = subprocess.run( | ||
| 444 | + quality_command, | ||
| 445 | + shell=True, | ||
| 446 | + cwd=tmp_path, | ||
| 447 | + capture_output=True, | ||
| 448 | + text=True, | ||
| 449 | + check=False, | ||
| 450 | + ) | ||
| 451 | + | ||
| 452 | + assert result.returncode == 1 | ||
| 453 | + assert "02-installation.html: expected exactly one closing </html> tag" in result.stdout | ||
| 454 | + | ||
| 455 | + | ||
| 391 | def test_derive_verification_commands_flags_insufficient_pages_for_broad_thorough_guide( | 456 | def test_derive_verification_commands_flags_insufficient_pages_for_broad_thorough_guide( |
| 392 | tmp_path: Path, | 457 | tmp_path: Path, |
| 393 | ) -> None: | 458 | ) -> None: |
tests/test_expanded_tools.pymodified@@ -120,6 +120,34 @@ async def test_patch_tool_accepts_python_literal_structured_hunks( | |||
| 120 | assert target.read_text() == "alpha\nbeta from literal string\ngamma\n" | 120 | assert target.read_text() == "alpha\nbeta from literal string\ngamma\n" |
| 121 | 121 | ||
| 122 | 122 | ||
| 123 | +@pytest.mark.asyncio | ||
| 124 | +async def test_patch_tool_accepts_python_literal_hunks_missing_outer_close( | ||
| 125 | + temp_dir: Path, | ||
| 126 | +) -> None: | ||
| 127 | + target = temp_dir / "sample.txt" | ||
| 128 | + target.write_text("alpha\nbeta\ngamma\n") | ||
| 129 | + tool = PatchTool(workspace_root=temp_dir) | ||
| 130 | + | ||
| 131 | + hunk_payload = repr( | ||
| 132 | + [ | ||
| 133 | + { | ||
| 134 | + "old_start": 2, | ||
| 135 | + "old_lines": 1, | ||
| 136 | + "new_start": 2, | ||
| 137 | + "new_lines": 1, | ||
| 138 | + "lines": ["-beta", "+beta from repaired literal string"], | ||
| 139 | + } | ||
| 140 | + ] | ||
| 141 | + )[:-1] | ||
| 142 | + result = await tool.execute( | ||
| 143 | + file_path=str(target), | ||
| 144 | + hunks=hunk_payload, | ||
| 145 | + ) | ||
| 146 | + | ||
| 147 | + assert result.is_error is False | ||
| 148 | + assert target.read_text() == "alpha\nbeta from repaired literal string\ngamma\n" | ||
| 149 | + | ||
| 150 | + | ||
| 123 | @pytest.mark.asyncio | 151 | @pytest.mark.asyncio |
| 124 | async def test_patch_tool_rejects_context_mismatch(temp_dir: Path) -> None: | 152 | async def test_patch_tool_rejects_context_mismatch(temp_dir: Path) -> None: |
| 125 | target = temp_dir / "sample.txt" | 153 | target = temp_dir / "sample.txt" |
tests/test_repair.pymodified@@ -321,6 +321,56 @@ def test_empty_response_retry_during_html_quality_repair_shrinks_mutation( | |||
| 321 | assert f"`{second_chapter.resolve(strict=False)}`" in decision.retry_message | 321 | assert f"`{second_chapter.resolve(strict=False)}`" in decision.retry_message |
| 322 | 322 | ||
| 323 | 323 | ||
| 324 | +def test_empty_response_retry_forces_write_after_stale_quality_repair_context( | ||
| 325 | + temp_dir: Path, | ||
| 326 | +) -> None: | ||
| 327 | + context = build_context( | ||
| 328 | + temp_dir=temp_dir, | ||
| 329 | + use_react=False, | ||
| 330 | + ) | ||
| 331 | + repairer = ResponseRepairer(context) | ||
| 332 | + guide = temp_dir / "guides" / "nginx" | ||
| 333 | + chapters = guide / "chapters" | ||
| 334 | + chapters.mkdir(parents=True) | ||
| 335 | + chapter = chapters / "05-load-balancing.html" | ||
| 336 | + chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n") | ||
| 337 | + context.session.append( | ||
| 338 | + Message( | ||
| 339 | + role=Role.USER, | ||
| 340 | + content=( | ||
| 341 | + "Repair focus:\n" | ||
| 342 | + f"- Improve `{chapter}`: thin content " | ||
| 343 | + "(846 text chars, expected at least 1758).\n" | ||
| 344 | + f"- Immediate next step: edit `{chapter}`.\n" | ||
| 345 | + ), | ||
| 346 | + ) | ||
| 347 | + ) | ||
| 348 | + context.session.append( | ||
| 349 | + Message( | ||
| 350 | + role=Role.TOOL, | ||
| 351 | + content=( | ||
| 352 | + "Observation [edit]: Error: Failed to complete the operation " | ||
| 353 | + f"after 2 attempts for {chapter}. old_string not found in file." | ||
| 354 | + ), | ||
| 355 | + ) | ||
| 356 | + ) | ||
| 357 | + dod = create_definition_of_done("Create an equally thorough HTML guide.") | ||
| 358 | + dod.touched_files = [str(chapter)] | ||
| 359 | + | ||
| 360 | + decision = repairer.handle_empty_response( | ||
| 361 | + task="Create an equally thorough HTML guide.", | ||
| 362 | + original_task=None, | ||
| 363 | + empty_retry_count=1, | ||
| 364 | + max_empty_retries=2, | ||
| 365 | + dod=dod, | ||
| 366 | + ) | ||
| 367 | + | ||
| 368 | + assert decision.should_continue is True | ||
| 369 | + assert decision.retry_message is not None | ||
| 370 | + assert "Use exactly one `write(file_path=..., content=...)`" in decision.retry_message | ||
| 371 | + assert "Do not call `read`, `edit`, `patch`, TodoWrite" in decision.retry_message | ||
| 372 | + | ||
| 373 | + | ||
| 324 | def test_empty_response_retry_mentions_write_can_create_missing_parent_directories( | 374 | def test_empty_response_retry_mentions_write_can_create_missing_parent_directories( |
| 325 | temp_dir: Path, | 375 | temp_dir: Path, |
| 326 | ) -> None: | 376 | ) -> None: |
tests/test_tool_batches.pymodified@@ -4425,6 +4425,68 @@ async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutati | |||
| 4425 | assert dod.completed_items == completed_before_todowrite | 4425 | assert dod.completed_items == completed_before_todowrite |
| 4426 | 4426 | ||
| 4427 | 4427 | ||
| 4428 | +def test_todowrite_quality_repair_nudge_forces_write_after_stale_context( | ||
| 4429 | + temp_dir: Path, | ||
| 4430 | +) -> None: | ||
| 4431 | + async def assess_confidence( | ||
| 4432 | + tool_name: str, | ||
| 4433 | + tool_args: dict, | ||
| 4434 | + context: str, | ||
| 4435 | + ) -> ConfidenceAssessment: | ||
| 4436 | + raise AssertionError("Confidence should not run for direct nudge test") | ||
| 4437 | + | ||
| 4438 | + async def verify_action( | ||
| 4439 | + tool_name: str, | ||
| 4440 | + tool_args: dict, | ||
| 4441 | + result: str, | ||
| 4442 | + expected: str = "", | ||
| 4443 | + ) -> ActionVerification: | ||
| 4444 | + raise AssertionError("Verification should not run for direct nudge test") | ||
| 4445 | + | ||
| 4446 | + guide_root = temp_dir / "guides" / "nginx" | ||
| 4447 | + chapters = guide_root / "chapters" | ||
| 4448 | + chapters.mkdir(parents=True) | ||
| 4449 | + chapter_one = chapters / "05-load-balancing.html" | ||
| 4450 | + chapter_one.write_text("<html><body><h1>Load Balancing</h1></body></html>\n") | ||
| 4451 | + context = build_context( | ||
| 4452 | + temp_dir=temp_dir, | ||
| 4453 | + messages=[ | ||
| 4454 | + Message( | ||
| 4455 | + role=Role.USER, | ||
| 4456 | + content=( | ||
| 4457 | + "Repair focus:\n" | ||
| 4458 | + f"- Improve `{chapter_one}`: thin content " | ||
| 4459 | + "(846 text chars, expected at least 1758).\n" | ||
| 4460 | + f"- Immediate next step: edit `{chapter_one}`.\n" | ||
| 4461 | + ), | ||
| 4462 | + ), | ||
| 4463 | + Message( | ||
| 4464 | + role=Role.TOOL, | ||
| 4465 | + content=( | ||
| 4466 | + "Observation [edit]: Error: Failed to complete the operation " | ||
| 4467 | + f"after 2 attempts for {chapter_one}. old_string not found in file." | ||
| 4468 | + ), | ||
| 4469 | + ), | ||
| 4470 | + ], | ||
| 4471 | + safeguards=FakeSafeguards(), | ||
| 4472 | + assess_confidence=assess_confidence, | ||
| 4473 | + verify_action=verify_action, | ||
| 4474 | + auto_recover=False, | ||
| 4475 | + ) | ||
| 4476 | + queued_messages: list[str] = [] | ||
| 4477 | + context.queue_steering_message_callback = queued_messages.append | ||
| 4478 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | ||
| 4479 | + dod = create_definition_of_done("Create a multi-file nginx guide.") | ||
| 4480 | + | ||
| 4481 | + runner._queue_todowrite_resume_nudge(dod=dod) | ||
| 4482 | + | ||
| 4483 | + assert queued_messages | ||
| 4484 | + message = queued_messages[-1] | ||
| 4485 | + assert f"Immediate next step: rewrite `{chapter_one.resolve(strict=False)}`" in message | ||
| 4486 | + assert "`write(file_path=..., content=...)`" in message | ||
| 4487 | + assert "do not call `read`, `edit`, `patch`, or TodoWrite again first" in message | ||
| 4488 | + | ||
| 4489 | + | ||
| 4428 | @pytest.mark.asyncio | 4490 | @pytest.mark.asyncio |
| 4429 | async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff( | 4491 | async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff( |
| 4430 | temp_dir: Path, | 4492 | temp_dir: Path, |
tests/test_turn_completion.pymodified@@ -455,6 +455,88 @@ async def test_turn_completion_uses_quality_repair_prompt_for_rewrite_narration( | |||
| 455 | assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content | 455 | assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content |
| 456 | 456 | ||
| 457 | 457 | ||
| 458 | +@pytest.mark.asyncio | ||
| 459 | +async def test_turn_completion_forces_write_after_stale_quality_repair_context( | ||
| 460 | + temp_dir: Path, | ||
| 461 | +) -> None: | ||
| 462 | + backend = ScriptedBackend() | ||
| 463 | + config = non_streaming_config() | ||
| 464 | + config.reasoning.completion_check = False | ||
| 465 | + agent = Agent( | ||
| 466 | + backend=backend, | ||
| 467 | + config=config, | ||
| 468 | + project_root=temp_dir, | ||
| 469 | + ) | ||
| 470 | + runtime = ConversationRuntime(agent) | ||
| 471 | + events = [] | ||
| 472 | + | ||
| 473 | + async def capture(event) -> None: | ||
| 474 | + events.append(event) | ||
| 475 | + | ||
| 476 | + prepared = await runtime.turn_preparation.prepare( | ||
| 477 | + task="Create an equally thorough HTML guide.", | ||
| 478 | + emit=capture, | ||
| 479 | + requested_mode="execute", | ||
| 480 | + original_task=None, | ||
| 481 | + on_user_question=None, | ||
| 482 | + ) | ||
| 483 | + await runtime.phase_tracker.enter( | ||
| 484 | + TurnPhase.ASSISTANT, | ||
| 485 | + capture, | ||
| 486 | + detail="Requesting assistant response", | ||
| 487 | + reason_code="request_assistant_response", | ||
| 488 | + ) | ||
| 489 | + | ||
| 490 | + chapter = temp_dir / "guides" / "nginx" / "chapters" / "05-load-balancing.html" | ||
| 491 | + chapter.parent.mkdir(parents=True) | ||
| 492 | + chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n") | ||
| 493 | + prepared.definition_of_done.touched_files.append(str(chapter)) | ||
| 494 | + prepared.definition_of_done.mutating_actions.append("edit") | ||
| 495 | + agent.session.append( | ||
| 496 | + Message( | ||
| 497 | + role=Role.USER, | ||
| 498 | + content=( | ||
| 499 | + "Repair focus:\n" | ||
| 500 | + f"- Improve `{chapter}`: thin content " | ||
| 501 | + "(846 text chars, expected at least 1758).\n" | ||
| 502 | + f"- Immediate next step: edit `{chapter}`.\n" | ||
| 503 | + ), | ||
| 504 | + ) | ||
| 505 | + ) | ||
| 506 | + agent.session.append( | ||
| 507 | + Message( | ||
| 508 | + role=Role.TOOL, | ||
| 509 | + content=( | ||
| 510 | + "Observation [edit]: Error: Failed to complete the operation after " | ||
| 511 | + f"2 attempts for {chapter}. old_string not found in file." | ||
| 512 | + ), | ||
| 513 | + ) | ||
| 514 | + ) | ||
| 515 | + | ||
| 516 | + content = "I'll rewrite the load balancing chapter with comprehensive content." | ||
| 517 | + decision = await runtime.turn_completion.handle_text_response( | ||
| 518 | + content=content, | ||
| 519 | + response_content=content, | ||
| 520 | + task=prepared.task, | ||
| 521 | + effective_task=prepared.effective_task, | ||
| 522 | + iterations=1, | ||
| 523 | + max_iterations=agent.config.max_iterations, | ||
| 524 | + actions_taken=[], | ||
| 525 | + continuation_count=0, | ||
| 526 | + dod=prepared.definition_of_done, | ||
| 527 | + emit=capture, | ||
| 528 | + summary=prepared.summary, | ||
| 529 | + executor=prepared.executor, | ||
| 530 | + rollback_plan=prepared.rollback_plan, | ||
| 531 | + ) | ||
| 532 | + | ||
| 533 | + assert decision.action == TurnCompletionAction.CONTINUE | ||
| 534 | + message = agent.session.messages[-1].content | ||
| 535 | + assert message.startswith("[CONTINUE QUALITY REPAIR]") | ||
| 536 | + assert "exactly one `write(file_path=..., content=...)`" in message | ||
| 537 | + assert "Do not call `read`, `edit`, `patch`, `TodoWrite`, or summarize." in message | ||
| 538 | + | ||
| 539 | + | ||
| 458 | @pytest.mark.asyncio | 540 | @pytest.mark.asyncio |
| 459 | async def test_turn_completion_continues_queued_quality_repair_after_summary( | 541 | async def test_turn_completion_continues_queued_quality_repair_after_summary( |
| 460 | temp_dir: Path, | 542 | temp_dir: Path, |