Soften step continuation
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
fa5d5a82e1461e7cc551537a49859aad8c022db4- Parents
-
2a6f142 - Tree
6305bf1
fa5d5a8
fa5d5a82e1461e7cc551537a49859aad8c022db42a6f142
6305bf1| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/turn_completion.py
|
52 | 15 |
| M |
tests/test_turn_completion.py
|
85 | 3 |
src/loader/runtime/turn_completion.pymodified@@ -261,18 +261,43 @@ class TurnCompletionController: | ||
| 261 | 261 | finalize_reason_summary=continuation_decision.decision_summary, |
| 262 | 262 | ) |
| 263 | 263 | |
| 264 | - progress_intent_prompt = _build_in_progress_continuation_prompt( | |
| 264 | + progress_messages = list(getattr(self.context.session, "messages", []) or []) | |
| 265 | + progress_intent = _build_in_progress_continuation( | |
| 265 | 266 | content=content, |
| 266 | 267 | dod=dod, |
| 267 | 268 | project_root=self.context.project_root, |
| 268 | - messages=list(getattr(self.context.session, "messages", []) or []), | |
| 269 | + messages=progress_messages, | |
| 269 | 270 | ) |
| 270 | - if progress_intent_prompt: | |
| 271 | + if progress_intent is not None: | |
| 271 | 272 | assistant_message = Message(role=Role.ASSISTANT, content=response_content) |
| 272 | 273 | self.context.session.append(assistant_message) |
| 273 | 274 | summary.assistant_messages.append(assistant_message) |
| 275 | + if progress_intent.target is not None and continuation_count == 0: | |
| 276 | + self._append_completion_trace_entry( | |
| 277 | + summary=summary, | |
| 278 | + stage="continuation_check", | |
| 279 | + outcome="continue", | |
| 280 | + decision_code="in_progress_transition_continue", | |
| 281 | + decision_summary=( | |
| 282 | + "continued to let the assistant finish the concrete next " | |
| 283 | + "planned step without interrupting it yet" | |
| 284 | + ), | |
| 285 | + ) | |
| 286 | + self._record_completion_decision( | |
| 287 | + summary=summary, | |
| 288 | + decision_code="in_progress_transition_continue", | |
| 289 | + decision_summary=( | |
| 290 | + "continued to let the assistant finish the concrete next " | |
| 291 | + "planned step without interrupting it yet" | |
| 292 | + ), | |
| 293 | + ) | |
| 294 | + return TurnCompletionDecision( | |
| 295 | + action=TurnCompletionAction.CONTINUE, | |
| 296 | + continuation_count=continuation_count + 1, | |
| 297 | + ) | |
| 298 | + | |
| 274 | 299 | self.context.session.append( |
| 275 | - Message(role=Role.USER, content=progress_intent_prompt) | |
| 300 | + Message(role=Role.USER, content=progress_intent.prompt) | |
| 276 | 301 | ) |
| 277 | 302 | self._append_completion_trace_entry( |
| 278 | 303 | summary=summary, |
@@ -355,13 +380,19 @@ class TurnCompletionController: | ||
| 355 | 380 | ) |
| 356 | 381 | |
| 357 | 382 | |
| 358 | -def _build_in_progress_continuation_prompt( | |
| 383 | +@dataclass(frozen=True, slots=True) | |
| 384 | +class InProgressContinuation: | |
| 385 | + prompt: str | |
| 386 | + target: Path | None | |
| 387 | + | |
| 388 | + | |
| 389 | +def _build_in_progress_continuation( | |
| 359 | 390 | *, |
| 360 | 391 | content: str, |
| 361 | 392 | dod: DefinitionOfDone, |
| 362 | 393 | project_root: Path, |
| 363 | 394 | messages: list[object], |
| 364 | -) -> str | None: | |
| 395 | +) -> InProgressContinuation | None: | |
| 365 | 396 | if not _looks_like_progress_intent(content): |
| 366 | 397 | return None |
| 367 | 398 | |
@@ -386,18 +417,24 @@ def _build_in_progress_continuation_prompt( | ||
| 386 | 417 | messages=messages, |
| 387 | 418 | ) |
| 388 | 419 | if target is not None: |
| 389 | - return ( | |
| 390 | - "[CONTINUE CURRENT STEP]\n" | |
| 391 | - "You just described the next planned step, but the concrete output is not on disk yet. " | |
| 392 | - f"Respond with one concrete `write` or `edit`-style tool call that creates or updates `{target}` now. " | |
| 393 | - "Do not summarize, verify, or restart discovery first." | |
| 420 | + return InProgressContinuation( | |
| 421 | + prompt=( | |
| 422 | + "[CONTINUE CURRENT STEP]\n" | |
| 423 | + "You just described the next planned step, but the concrete output is not on disk yet. " | |
| 424 | + f"Respond with one concrete `write` or `edit`-style tool call that creates or updates `{target}` now. " | |
| 425 | + "Do not summarize, verify, or restart discovery first." | |
| 426 | + ), | |
| 427 | + target=target, | |
| 394 | 428 | ) |
| 395 | 429 | |
| 396 | 430 | if next_pending: |
| 397 | - return ( | |
| 398 | - "[CONTINUE CURRENT STEP]\n" | |
| 399 | - "You just described the next planned step, but it has not been executed yet. " | |
| 400 | - f"Continue with `{next_pending}` now by emitting one concrete tool call instead of another narration, summary, or verification claim." | |
| 431 | + return InProgressContinuation( | |
| 432 | + prompt=( | |
| 433 | + "[CONTINUE CURRENT STEP]\n" | |
| 434 | + "You just described the next planned step, but it has not been executed yet. " | |
| 435 | + f"Continue with `{next_pending}` now by emitting one concrete tool call instead of another narration, summary, or verification claim." | |
| 436 | + ), | |
| 437 | + target=None, | |
| 401 | 438 | ) |
| 402 | 439 | return None |
| 403 | 440 | |
tests/test_turn_completion.pymodified@@ -362,9 +362,7 @@ async def test_turn_completion_continues_progress_intent_without_dod_gate_spam( | ||
| 362 | 362 | assert decision.continuation_count == 1 |
| 363 | 363 | assert prepared.summary.completion_decision_code == "in_progress_transition_continue" |
| 364 | 364 | assert prepared.summary.assistant_messages[-1].content == content |
| 365 | - assert agent.session.messages[-1].role.value == "user" | |
| 366 | - assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]") | |
| 367 | - assert "02-installation.html" in agent.session.messages[-1].content | |
| 365 | + assert agent.session.messages[-1].role.value == "assistant" | |
| 368 | 366 | assert not any( |
| 369 | 367 | message.role.value == "user" |
| 370 | 368 | and message.content.startswith("[PLANNED ARTIFACTS STILL MISSING]") |
@@ -372,6 +370,90 @@ async def test_turn_completion_continues_progress_intent_without_dod_gate_spam( | ||
| 372 | 370 | ) |
| 373 | 371 | |
| 374 | 372 | |
| 373 | +@pytest.mark.asyncio | |
| 374 | +async def test_turn_completion_interrupts_repeated_concrete_progress_narration( | |
| 375 | + temp_dir: Path, | |
| 376 | +) -> None: | |
| 377 | + backend = ScriptedBackend() | |
| 378 | + config = non_streaming_config() | |
| 379 | + config.reasoning.completion_check = False | |
| 380 | + agent = Agent( | |
| 381 | + backend=backend, | |
| 382 | + config=config, | |
| 383 | + project_root=temp_dir, | |
| 384 | + ) | |
| 385 | + runtime = ConversationRuntime(agent) | |
| 386 | + events = [] | |
| 387 | + | |
| 388 | + async def capture(event) -> None: | |
| 389 | + events.append(event) | |
| 390 | + | |
| 391 | + prepared = await runtime.turn_preparation.prepare( | |
| 392 | + task=( | |
| 393 | + "Create a multi-file nginx guide under ~/Loader/guides/nginx " | |
| 394 | + "with an index and chapter files." | |
| 395 | + ), | |
| 396 | + emit=capture, | |
| 397 | + requested_mode="execute", | |
| 398 | + original_task=None, | |
| 399 | + on_user_question=None, | |
| 400 | + ) | |
| 401 | + await runtime.phase_tracker.enter( | |
| 402 | + TurnPhase.ASSISTANT, | |
| 403 | + capture, | |
| 404 | + detail="Requesting assistant response", | |
| 405 | + reason_code="request_assistant_response", | |
| 406 | + ) | |
| 407 | + | |
| 408 | + implementation_plan = temp_dir / "implementation.md" | |
| 409 | + implementation_plan.write_text( | |
| 410 | + "# Implementation Plan\n\n" | |
| 411 | + "## File Changes\n\n" | |
| 412 | + "1. Create main index.html file:\n" | |
| 413 | + f" - `{temp_dir / 'index.html'}`\n\n" | |
| 414 | + "2. Create chapter files:\n" | |
| 415 | + f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n" | |
| 416 | + f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n" | |
| 417 | + ) | |
| 418 | + chapters_dir = temp_dir / "chapters" | |
| 419 | + chapters_dir.mkdir() | |
| 420 | + (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n") | |
| 421 | + (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n") | |
| 422 | + | |
| 423 | + prepared.definition_of_done.implementation_plan = str(implementation_plan) | |
| 424 | + prepared.definition_of_done.mutating_actions.append("write") | |
| 425 | + prepared.definition_of_done.touched_files.extend( | |
| 426 | + [ | |
| 427 | + str(temp_dir / "index.html"), | |
| 428 | + str(chapters_dir / "01-introduction.html"), | |
| 429 | + ] | |
| 430 | + ) | |
| 431 | + prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide") | |
| 432 | + | |
| 433 | + content = "Now I'll create the second chapter file for the nginx guide." | |
| 434 | + decision = await runtime.turn_completion.handle_text_response( | |
| 435 | + content=content, | |
| 436 | + response_content=content, | |
| 437 | + task=prepared.task, | |
| 438 | + effective_task=prepared.effective_task, | |
| 439 | + iterations=1, | |
| 440 | + max_iterations=agent.config.max_iterations, | |
| 441 | + actions_taken=[], | |
| 442 | + continuation_count=1, | |
| 443 | + dod=prepared.definition_of_done, | |
| 444 | + emit=capture, | |
| 445 | + summary=prepared.summary, | |
| 446 | + executor=prepared.executor, | |
| 447 | + rollback_plan=prepared.rollback_plan, | |
| 448 | + ) | |
| 449 | + | |
| 450 | + assert decision.action == TurnCompletionAction.CONTINUE | |
| 451 | + assert decision.continuation_count == 2 | |
| 452 | + assert agent.session.messages[-1].role.value == "user" | |
| 453 | + assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]") | |
| 454 | + assert "02-installation.html" in agent.session.messages[-1].content | |
| 455 | + | |
| 456 | + | |
| 375 | 457 | @pytest.mark.asyncio |
| 376 | 458 | async def test_turn_completion_handles_fake_tool_narration_without_reroute( |
| 377 | 459 | temp_dir: Path, |