`df98255`

Continue queued quality repairs

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: df982555614904c8b1a172b95fd72a5d9aa484a4
Parents: d950aff
Tree: e387e09

4 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	9	7
M	`src/loader/runtime/turn_completion.py`	65	0
M	`tests/test_tool_batches.py`	4	0
M	`tests/test_turn_completion.py`	87	0

src/loader/runtime/tool_batches.pymodified

          previously_verified = dod.last_verification_result == "passed"
          record_successful_tool_call(dod, tool_call)
          if tool_call.name == "TodoWrite" and outcome.registry_result is not None:
 -            new_todos = outcome.registry_result.metadata.get("new_todos", [])
 -            if isinstance(new_todos, list):
 -                sync_todos_to_definition_of_done(
 -                    dod,
 -                    new_todos,
 -                    project_root=self.context.project_root,
 -                )
 +            repair = extract_active_repair_context(self.context.session.messages)
 +            if repair is None or not _repair_context_is_html_quality(repair):
 +                new_todos = outcome.registry_result.metadata.get("new_todos", [])
 +                if isinstance(new_todos, list):
 +                    sync_todos_to_definition_of_done(
 +                        dod,
 +                        new_todos,
 +                        project_root=self.context.project_root,
 +                    )
              self._refresh_todowrite_outcome_summary(outcome=outcome, dod=dod)
              self._queue_todowrite_resume_nudge(dod=dod)
          else:

src/loader/runtime/turn_completion.pymodified

              reason_code="completion_gate",
+         )
          progress_messages = list(getattr(self.context.session, "messages", []) or [])
 +        pending_quality_repair = _build_pending_html_quality_repair_continuation(
 +            progress_messages
 +        )
 +        if pending_quality_repair is not None and not _looks_like_progress_intent(
 +            content
 +        ):
 +            assistant_message = Message(role=Role.ASSISTANT, content=response_content)
 +            self.context.session.append(assistant_message)
 +            summary.assistant_messages.append(assistant_message)
 +            self.context.session.append(
 +                Message(role=Role.USER, content=pending_quality_repair.prompt)
 +            )
 +            self._append_completion_trace_entry(
 +                summary=summary,
 +                stage="continuation_check",
 +                outcome="continue",
 +                decision_code="pending_quality_repair_continue",
 +                decision_summary=(
 +                    "continued because a queued quality-repair handoff still "
 +                    "requires a concrete mutation"
 +                ),
 +            )
 +            self._record_completion_decision(
 +                summary=summary,
 +                decision_code="pending_quality_repair_continue",
 +                decision_summary=(
 +                    "continued because a queued quality-repair handoff still "
 +                    "requires a concrete mutation"
 +                ),
 +            )
 +            return TurnCompletionDecision(
 +                action=TurnCompletionAction.CONTINUE,
 +                continuation_count=continuation_count + 1,
 +            )
++
          progress_intent = _build_in_progress_continuation(
              content=content,
              dod=dod,
      return InProgressContinuation(prompt=prompt, target=None)
 +def _build_pending_html_quality_repair_continuation(
 +    messages: list[object],
 +) -> InProgressContinuation | None:
 +    latest_user_content = ""
 +    for message in reversed(messages):
 +        if not isinstance(message, Message) or message.role != Role.USER:
 +            continue
 +        latest_user_content = str(message.content or "")
 +        break
++
 +    if not latest_user_content:
 +        return None
 +    if "Finish with a final response now" in latest_user_content:
 +        return None
 +    lowered = latest_user_content.lower()
 +    has_repair_focus = "repair focus:" in lowered
 +    has_concrete_mutation_cue = "continue with one concrete" in lowered
 +    has_quality_cue = (
 +        "content-quality" in lowered
 +        or "thin content" in lowered
 +        or "insufficient structured content" in lowered
 +    )
 +    if not (
 +        latest_user_content.startswith("[CONTINUE QUALITY REPAIR]")
 +        or (has_repair_focus and has_concrete_mutation_cue and has_quality_cue)
 +    ):
 +        return None
 +    return _build_html_quality_repair_continuation(messages)
++
++
  def _looks_like_progress_intent(content: str) -> bool:
      text = content.lower().strip()
      if not text or "?" in text:

tests/test_tool_batches.pymodified

          ],
          project_root=temp_dir,
+     )
 +    pending_before_todowrite = list(dod.pending_items)
 +    completed_before_todowrite = list(dod.completed_items)
      tool_call = ToolCall(
          id="todo-quality",
      assert result.halted is False
      assert summary.final_response == ""
      assert not emitted_responses
 +    assert dod.pending_items == pending_before_todowrite
 +    assert dod.completed_items == completed_before_todowrite
  @pytest.mark.asyncio

tests/test_turn_completion.pymodified

      assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content
 +@pytest.mark.asyncio
 +async def test_turn_completion_continues_queued_quality_repair_after_summary(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend()
 +    config = non_streaming_config()
 +    config.reasoning.completion_check = False
 +    agent = Agent(
 +        backend=backend,
 +        config=config,
 +        project_root=temp_dir,
 +    )
 +    runtime = ConversationRuntime(agent)
 +    events = []
++
 +    async def capture(event) -> None:
 +        events.append(event)
++
 +    prepared = await runtime.turn_preparation.prepare(
 +        task="Repair generated HTML guide quality.",
 +        emit=capture,
 +        requested_mode="execute",
 +        original_task=None,
 +        on_user_question=None,
 +    )
 +    await runtime.phase_tracker.enter(
 +        TurnPhase.ASSISTANT,
 +        capture,
 +        detail="Requesting assistant response",
 +        reason_code="request_assistant_response",
 +    )
++
 +    first = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
 +    second = temp_dir / "guides" / "nginx" / "chapters" / "02-installation.html"
 +    second.parent.mkdir(parents=True)
 +    first.write_text("<html><body><h1>Intro</h1></body></html>\n")
 +    second.write_text("<html><body><h1>Install</h1></body></html>\n")
 +    prepared.definition_of_done.touched_files.extend(
 +        [
 +            str(first),
 +            str(second),
 +        ]
 +    )
 +    prepared.definition_of_done.mutating_actions.append("edit")
 +    agent.session.append(
 +        Message(
 +            role=Role.USER,
 +            content=(
 +                "The active HTML content-quality repair target was updated. "
 +                f"Continue directly with the next listed quality target `{second}` "
 +                "using one substantial write/edit/patch anchored to current content.\n\n"
 +                "Repair focus:\n"
 +                f"- Improve `{second}`: thin content (513 text chars, expected at least 1758).\n"
 +                f"- Immediate next step: edit `{second}`.\n"
 +                "- Continue with one concrete `edit`, `patch`, or `write` call that actually changes the current generated file."
 +            ),
 +        )
 +    )
++
 +    content = (
 +        "I've expanded the introduction chapter, so it should now meet the "
 +        "minimum quality threshold."
 +    )
 +    decision = await runtime.turn_completion.handle_text_response(
 +        content=content,
 +        response_content=content,
 +        task=prepared.task,
 +        effective_task=prepared.effective_task,
 +        iterations=1,
 +        max_iterations=agent.config.max_iterations,
 +        actions_taken=[],
 +        continuation_count=0,
 +        dod=prepared.definition_of_done,
 +        emit=capture,
 +        summary=prepared.summary,
 +        executor=prepared.executor,
 +        rollback_plan=prepared.rollback_plan,
 +    )
++
 +    assert decision.action == TurnCompletionAction.CONTINUE
 +    assert prepared.summary.completion_decision_code == "pending_quality_repair_continue"
 +    assert agent.session.messages[-1].role.value == "user"
 +    assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]")
 +    assert str(second.resolve(strict=False)) in agent.session.messages[-1].content
 +    assert "one concrete `patch`, `edit`, or `write` tool call" in agent.session.messages[-1].content
++
++
  @pytest.mark.asyncio
  async def test_turn_completion_allows_first_progress_narration_before_any_output_exists(
      temp_dir: Path,