Continue queued quality repairs
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
df982555614904c8b1a172b95fd72a5d9aa484a4- Parents
-
d950aff - Tree
e387e09
df98255
df982555614904c8b1a172b95fd72a5d9aa484a4d950aff
e387e09| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/tool_batches.py
|
9 | 7 |
| M |
src/loader/runtime/turn_completion.py
|
65 | 0 |
| M |
tests/test_tool_batches.py
|
4 | 0 |
| M |
tests/test_turn_completion.py
|
87 | 0 |
src/loader/runtime/tool_batches.pymodified@@ -1588,13 +1588,15 @@ class ToolBatchRunner: | ||
| 1588 | 1588 | previously_verified = dod.last_verification_result == "passed" |
| 1589 | 1589 | record_successful_tool_call(dod, tool_call) |
| 1590 | 1590 | if tool_call.name == "TodoWrite" and outcome.registry_result is not None: |
| 1591 | - new_todos = outcome.registry_result.metadata.get("new_todos", []) | |
| 1592 | - if isinstance(new_todos, list): | |
| 1593 | - sync_todos_to_definition_of_done( | |
| 1594 | - dod, | |
| 1595 | - new_todos, | |
| 1596 | - project_root=self.context.project_root, | |
| 1597 | - ) | |
| 1591 | + repair = extract_active_repair_context(self.context.session.messages) | |
| 1592 | + if repair is None or not _repair_context_is_html_quality(repair): | |
| 1593 | + new_todos = outcome.registry_result.metadata.get("new_todos", []) | |
| 1594 | + if isinstance(new_todos, list): | |
| 1595 | + sync_todos_to_definition_of_done( | |
| 1596 | + dod, | |
| 1597 | + new_todos, | |
| 1598 | + project_root=self.context.project_root, | |
| 1599 | + ) | |
| 1598 | 1600 | self._refresh_todowrite_outcome_summary(outcome=outcome, dod=dod) |
| 1599 | 1601 | self._queue_todowrite_resume_nudge(dod=dod) |
| 1600 | 1602 | else: |
src/loader/runtime/turn_completion.pymodified@@ -174,6 +174,41 @@ class TurnCompletionController: | ||
| 174 | 174 | reason_code="completion_gate", |
| 175 | 175 | ) |
| 176 | 176 | progress_messages = list(getattr(self.context.session, "messages", []) or []) |
| 177 | + pending_quality_repair = _build_pending_html_quality_repair_continuation( | |
| 178 | + progress_messages | |
| 179 | + ) | |
| 180 | + if pending_quality_repair is not None and not _looks_like_progress_intent( | |
| 181 | + content | |
| 182 | + ): | |
| 183 | + assistant_message = Message(role=Role.ASSISTANT, content=response_content) | |
| 184 | + self.context.session.append(assistant_message) | |
| 185 | + summary.assistant_messages.append(assistant_message) | |
| 186 | + self.context.session.append( | |
| 187 | + Message(role=Role.USER, content=pending_quality_repair.prompt) | |
| 188 | + ) | |
| 189 | + self._append_completion_trace_entry( | |
| 190 | + summary=summary, | |
| 191 | + stage="continuation_check", | |
| 192 | + outcome="continue", | |
| 193 | + decision_code="pending_quality_repair_continue", | |
| 194 | + decision_summary=( | |
| 195 | + "continued because a queued quality-repair handoff still " | |
| 196 | + "requires a concrete mutation" | |
| 197 | + ), | |
| 198 | + ) | |
| 199 | + self._record_completion_decision( | |
| 200 | + summary=summary, | |
| 201 | + decision_code="pending_quality_repair_continue", | |
| 202 | + decision_summary=( | |
| 203 | + "continued because a queued quality-repair handoff still " | |
| 204 | + "requires a concrete mutation" | |
| 205 | + ), | |
| 206 | + ) | |
| 207 | + return TurnCompletionDecision( | |
| 208 | + action=TurnCompletionAction.CONTINUE, | |
| 209 | + continuation_count=continuation_count + 1, | |
| 210 | + ) | |
| 211 | + | |
| 177 | 212 | progress_intent = _build_in_progress_continuation( |
| 178 | 213 | content=content, |
| 179 | 214 | dod=dod, |
@@ -514,6 +549,36 @@ def _build_html_quality_repair_continuation( | ||
| 514 | 549 | return InProgressContinuation(prompt=prompt, target=None) |
| 515 | 550 | |
| 516 | 551 | |
| 552 | +def _build_pending_html_quality_repair_continuation( | |
| 553 | + messages: list[object], | |
| 554 | +) -> InProgressContinuation | None: | |
| 555 | + latest_user_content = "" | |
| 556 | + for message in reversed(messages): | |
| 557 | + if not isinstance(message, Message) or message.role != Role.USER: | |
| 558 | + continue | |
| 559 | + latest_user_content = str(message.content or "") | |
| 560 | + break | |
| 561 | + | |
| 562 | + if not latest_user_content: | |
| 563 | + return None | |
| 564 | + if "Finish with a final response now" in latest_user_content: | |
| 565 | + return None | |
| 566 | + lowered = latest_user_content.lower() | |
| 567 | + has_repair_focus = "repair focus:" in lowered | |
| 568 | + has_concrete_mutation_cue = "continue with one concrete" in lowered | |
| 569 | + has_quality_cue = ( | |
| 570 | + "content-quality" in lowered | |
| 571 | + or "thin content" in lowered | |
| 572 | + or "insufficient structured content" in lowered | |
| 573 | + ) | |
| 574 | + if not ( | |
| 575 | + latest_user_content.startswith("[CONTINUE QUALITY REPAIR]") | |
| 576 | + or (has_repair_focus and has_concrete_mutation_cue and has_quality_cue) | |
| 577 | + ): | |
| 578 | + return None | |
| 579 | + return _build_html_quality_repair_continuation(messages) | |
| 580 | + | |
| 581 | + | |
| 517 | 582 | def _looks_like_progress_intent(content: str) -> bool: |
| 518 | 583 | text = content.lower().strip() |
| 519 | 584 | if not text or "?" in text: |
tests/test_tool_batches.pymodified@@ -4356,6 +4356,8 @@ async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutati | ||
| 4356 | 4356 | ], |
| 4357 | 4357 | project_root=temp_dir, |
| 4358 | 4358 | ) |
| 4359 | + pending_before_todowrite = list(dod.pending_items) | |
| 4360 | + completed_before_todowrite = list(dod.completed_items) | |
| 4359 | 4361 | |
| 4360 | 4362 | tool_call = ToolCall( |
| 4361 | 4363 | id="todo-quality", |
@@ -4419,6 +4421,8 @@ async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutati | ||
| 4419 | 4421 | assert result.halted is False |
| 4420 | 4422 | assert summary.final_response == "" |
| 4421 | 4423 | assert not emitted_responses |
| 4424 | + assert dod.pending_items == pending_before_todowrite | |
| 4425 | + assert dod.completed_items == completed_before_todowrite | |
| 4422 | 4426 | |
| 4423 | 4427 | |
| 4424 | 4428 | @pytest.mark.asyncio |
tests/test_turn_completion.pymodified@@ -455,6 +455,93 @@ async def test_turn_completion_uses_quality_repair_prompt_for_rewrite_narration( | ||
| 455 | 455 | assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content |
| 456 | 456 | |
| 457 | 457 | |
| 458 | +@pytest.mark.asyncio | |
| 459 | +async def test_turn_completion_continues_queued_quality_repair_after_summary( | |
| 460 | + temp_dir: Path, | |
| 461 | +) -> None: | |
| 462 | + backend = ScriptedBackend() | |
| 463 | + config = non_streaming_config() | |
| 464 | + config.reasoning.completion_check = False | |
| 465 | + agent = Agent( | |
| 466 | + backend=backend, | |
| 467 | + config=config, | |
| 468 | + project_root=temp_dir, | |
| 469 | + ) | |
| 470 | + runtime = ConversationRuntime(agent) | |
| 471 | + events = [] | |
| 472 | + | |
| 473 | + async def capture(event) -> None: | |
| 474 | + events.append(event) | |
| 475 | + | |
| 476 | + prepared = await runtime.turn_preparation.prepare( | |
| 477 | + task="Repair generated HTML guide quality.", | |
| 478 | + emit=capture, | |
| 479 | + requested_mode="execute", | |
| 480 | + original_task=None, | |
| 481 | + on_user_question=None, | |
| 482 | + ) | |
| 483 | + await runtime.phase_tracker.enter( | |
| 484 | + TurnPhase.ASSISTANT, | |
| 485 | + capture, | |
| 486 | + detail="Requesting assistant response", | |
| 487 | + reason_code="request_assistant_response", | |
| 488 | + ) | |
| 489 | + | |
| 490 | + first = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html" | |
| 491 | + second = temp_dir / "guides" / "nginx" / "chapters" / "02-installation.html" | |
| 492 | + second.parent.mkdir(parents=True) | |
| 493 | + first.write_text("<html><body><h1>Intro</h1></body></html>\n") | |
| 494 | + second.write_text("<html><body><h1>Install</h1></body></html>\n") | |
| 495 | + prepared.definition_of_done.touched_files.extend( | |
| 496 | + [ | |
| 497 | + str(first), | |
| 498 | + str(second), | |
| 499 | + ] | |
| 500 | + ) | |
| 501 | + prepared.definition_of_done.mutating_actions.append("edit") | |
| 502 | + agent.session.append( | |
| 503 | + Message( | |
| 504 | + role=Role.USER, | |
| 505 | + content=( | |
| 506 | + "The active HTML content-quality repair target was updated. " | |
| 507 | + f"Continue directly with the next listed quality target `{second}` " | |
| 508 | + "using one substantial write/edit/patch anchored to current content.\n\n" | |
| 509 | + "Repair focus:\n" | |
| 510 | + f"- Improve `{second}`: thin content (513 text chars, expected at least 1758).\n" | |
| 511 | + f"- Immediate next step: edit `{second}`.\n" | |
| 512 | + "- Continue with one concrete `edit`, `patch`, or `write` call that actually changes the current generated file." | |
| 513 | + ), | |
| 514 | + ) | |
| 515 | + ) | |
| 516 | + | |
| 517 | + content = ( | |
| 518 | + "I've expanded the introduction chapter, so it should now meet the " | |
| 519 | + "minimum quality threshold." | |
| 520 | + ) | |
| 521 | + decision = await runtime.turn_completion.handle_text_response( | |
| 522 | + content=content, | |
| 523 | + response_content=content, | |
| 524 | + task=prepared.task, | |
| 525 | + effective_task=prepared.effective_task, | |
| 526 | + iterations=1, | |
| 527 | + max_iterations=agent.config.max_iterations, | |
| 528 | + actions_taken=[], | |
| 529 | + continuation_count=0, | |
| 530 | + dod=prepared.definition_of_done, | |
| 531 | + emit=capture, | |
| 532 | + summary=prepared.summary, | |
| 533 | + executor=prepared.executor, | |
| 534 | + rollback_plan=prepared.rollback_plan, | |
| 535 | + ) | |
| 536 | + | |
| 537 | + assert decision.action == TurnCompletionAction.CONTINUE | |
| 538 | + assert prepared.summary.completion_decision_code == "pending_quality_repair_continue" | |
| 539 | + assert agent.session.messages[-1].role.value == "user" | |
| 540 | + assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]") | |
| 541 | + assert str(second.resolve(strict=False)) in agent.session.messages[-1].content | |
| 542 | + assert "one concrete `patch`, `edit`, or `write` tool call" in agent.session.messages[-1].content | |
| 543 | + | |
| 544 | + | |
| 458 | 545 | @pytest.mark.asyncio |
| 459 | 546 | async def test_turn_completion_allows_first_progress_narration_before_any_output_exists( |
| 460 | 547 | temp_dir: Path, |