Add structured runtime logging at turn loop, tool execution, session context, and completion gates
- SHA
06f8c4512bebdffc1042f89c7d786dbd1e498015- Parents
-
67408f6 - Tree
12aabac
06f8c45
06f8c4512bebdffc1042f89c7d786dbd1e49801567408f6
12aabac| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/completion_policy.py
|
14 | 0 |
| M |
src/loader/runtime/conversation.py
|
3 | 0 |
| A |
src/loader/runtime/logging.py
|
150 | 0 |
| M |
src/loader/runtime/session.py
|
8 | 0 |
| M |
src/loader/runtime/tool_batches.py
|
12 | 0 |
| M |
src/loader/runtime/turn_iteration.py
|
10 | 0 |
| M |
src/loader/runtime/turn_loop.py
|
20 | 2 |
src/loader/runtime/completion_policy.pymodified@@ -8,6 +8,7 @@ from dataclasses import dataclass, field | ||
| 8 | 8 | from ..llm.base import Message, Role |
| 9 | 9 | from .context import RuntimeContext |
| 10 | 10 | from .dod import DefinitionOfDone |
| 11 | +from .logging import get_runtime_logger | |
| 11 | 12 | from .events import AgentEvent, TurnSummary |
| 12 | 13 | from .evidence_provenance import EvidenceProvenance |
| 13 | 14 | from .reasoning_types import TaskCompletionCheck |
@@ -62,6 +63,9 @@ class CompletionPolicy: | ||
| 62 | 63 | """Stop the turn when the assistant starts repeating textually.""" |
| 63 | 64 | |
| 64 | 65 | is_text_loop, loop_description = self.context.safeguards.detect_text_loop(content) |
| 66 | + rlog = get_runtime_logger() | |
| 67 | + rlog.completion_check("text_loop", "detected" if is_text_loop else "clear", | |
| 68 | + reason=loop_description if is_text_loop else None) | |
| 65 | 69 | if not is_text_loop: |
| 66 | 70 | return TextLoopDecision( |
| 67 | 71 | should_stop=False, |
@@ -118,6 +122,16 @@ class CompletionPolicy: | ||
| 118 | 122 | ) |
| 119 | 123 | completion_check = assessment.check if assessment is not None else None |
| 120 | 124 | is_premature = bool(completion_check is not None and not completion_check.is_complete) |
| 125 | + rlog = get_runtime_logger() | |
| 126 | + rlog.completion_check( | |
| 127 | + "follow_through", | |
| 128 | + "premature" if is_premature else "accepted", | |
| 129 | + reason=( | |
| 130 | + "; ".join(completion_check.missing_evidence[:2]) | |
| 131 | + if is_premature and completion_check | |
| 132 | + else None | |
| 133 | + ), | |
| 134 | + ) | |
| 121 | 135 | if not is_premature: |
| 122 | 136 | return ContinuationDecision( |
| 123 | 137 | should_continue=False, |
src/loader/runtime/conversation.pymodified@@ -6,6 +6,7 @@ from collections.abc import Awaitable, Callable | ||
| 6 | 6 | |
| 7 | 7 | from .artifact_invalidation import ArtifactInvalidationAssessor |
| 8 | 8 | from .assistant_turns import AssistantTurnRequester |
| 9 | +from .logging import reset_runtime_logger | |
| 9 | 10 | from .bootstrap import ( |
| 10 | 11 | RuntimeBootstrapSource, |
| 11 | 12 | RuntimeBootstrapView, |
@@ -140,6 +141,8 @@ class ConversationRuntime: | ||
| 140 | 141 | ) -> TurnSummary: |
| 141 | 142 | """Run one task turn and return a structured summary.""" |
| 142 | 143 | |
| 144 | + reset_runtime_logger() | |
| 145 | + | |
| 143 | 146 | prepared_turn = await self.turn_preparation.prepare( |
| 144 | 147 | task=task, |
| 145 | 148 | emit=emit, |
src/loader/runtime/logging.pyadded@@ -0,0 +1,150 @@ | ||
| 1 | +"""Structured runtime logging for diagnosing agent behavior.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +import time | |
| 7 | +from dataclasses import asdict, dataclass, field | |
| 8 | +from pathlib import Path | |
| 9 | +from typing import Any | |
| 10 | + | |
| 11 | +_LOG_PATH = Path("/tmp/loader_runtime.log") | |
| 12 | + | |
| 13 | + | |
| 14 | +@dataclass(slots=True) | |
| 15 | +class LogEntry: | |
| 16 | + """One structured log entry.""" | |
| 17 | + | |
| 18 | + ts: float | |
| 19 | + event: str | |
| 20 | + data: dict[str, Any] = field(default_factory=dict) | |
| 21 | + | |
| 22 | + def to_line(self) -> str: | |
| 23 | + compact = {k: v for k, v in self.data.items() if v is not None} | |
| 24 | + return json.dumps({"t": round(self.ts, 2), "e": self.event, **compact}) | |
| 25 | + | |
| 26 | + | |
| 27 | +class RuntimeLogger: | |
| 28 | + """Append-only structured logger for one session.""" | |
| 29 | + | |
| 30 | + def __init__(self, path: Path | str | None = None) -> None: | |
| 31 | + self._path = Path(path) if path else _LOG_PATH | |
| 32 | + self._start = time.monotonic() | |
| 33 | + self._path.write_text("") # truncate on session start | |
| 34 | + | |
| 35 | + def _elapsed(self) -> float: | |
| 36 | + return time.monotonic() - self._start | |
| 37 | + | |
| 38 | + def log(self, event: str, **data: Any) -> None: | |
| 39 | + entry = LogEntry(ts=self._elapsed(), event=event, data=data) | |
| 40 | + try: | |
| 41 | + with self._path.open("a") as f: | |
| 42 | + f.write(entry.to_line() + "\n") | |
| 43 | + except Exception: | |
| 44 | + pass | |
| 45 | + | |
| 46 | + # ── convenience methods ────────────────────────────────────────── | |
| 47 | + | |
| 48 | + def turn_start(self, iteration: int, message_count: int, task: str) -> None: | |
| 49 | + self.log( | |
| 50 | + "turn.start", | |
| 51 | + iteration=iteration, | |
| 52 | + messages=message_count, | |
| 53 | + task=task[:120], | |
| 54 | + ) | |
| 55 | + | |
| 56 | + def turn_response( | |
| 57 | + self, | |
| 58 | + iteration: int, | |
| 59 | + content_len: int, | |
| 60 | + tool_call_count: int, | |
| 61 | + tool_names: list[str], | |
| 62 | + usage: dict[str, int] | None = None, | |
| 63 | + ) -> None: | |
| 64 | + self.log( | |
| 65 | + "turn.response", | |
| 66 | + iteration=iteration, | |
| 67 | + content_len=content_len, | |
| 68 | + tool_calls=tool_call_count, | |
| 69 | + tools=tool_names or None, | |
| 70 | + usage=usage or None, | |
| 71 | + ) | |
| 72 | + | |
| 73 | + def turn_decision( | |
| 74 | + self, | |
| 75 | + iteration: int, | |
| 76 | + action: str, | |
| 77 | + continuation_count: int, | |
| 78 | + consecutive_errors: int, | |
| 79 | + reason: str | None = None, | |
| 80 | + ) -> None: | |
| 81 | + self.log( | |
| 82 | + "turn.decision", | |
| 83 | + iteration=iteration, | |
| 84 | + action=action, | |
| 85 | + continuations=continuation_count, | |
| 86 | + errors=consecutive_errors, | |
| 87 | + reason=reason, | |
| 88 | + ) | |
| 89 | + | |
| 90 | + def tool_exec( | |
| 91 | + self, | |
| 92 | + name: str, | |
| 93 | + state: str, | |
| 94 | + is_error: bool, | |
| 95 | + result_preview: str, | |
| 96 | + appended_to_session: bool, | |
| 97 | + ) -> None: | |
| 98 | + self.log( | |
| 99 | + "tool.exec", | |
| 100 | + name=name, | |
| 101 | + state=state, | |
| 102 | + error=is_error or None, | |
| 103 | + result=result_preview[:200], | |
| 104 | + in_session=appended_to_session, | |
| 105 | + ) | |
| 106 | + | |
| 107 | + def verification_gate(self, tool_name: str, should_continue: bool) -> None: | |
| 108 | + self.log( | |
| 109 | + "tool.verify_gate", | |
| 110 | + tool=tool_name, | |
| 111 | + continue_loop=should_continue, | |
| 112 | + ) | |
| 113 | + | |
| 114 | + def completion_check( | |
| 115 | + self, | |
| 116 | + stage: str, | |
| 117 | + outcome: str, | |
| 118 | + reason: str | None = None, | |
| 119 | + ) -> None: | |
| 120 | + self.log("completion", stage=stage, outcome=outcome, reason=reason) | |
| 121 | + | |
| 122 | + def session_context(self, message_count: int, roles: dict[str, int]) -> None: | |
| 123 | + self.log("session.context", messages=message_count, roles=roles) | |
| 124 | + | |
| 125 | + def loop_exit(self, iterations: int, reason_code: str, reason: str) -> None: | |
| 126 | + self.log( | |
| 127 | + "loop.exit", | |
| 128 | + iterations=iterations, | |
| 129 | + reason_code=reason_code, | |
| 130 | + reason=reason[:200], | |
| 131 | + ) | |
| 132 | + | |
| 133 | + | |
| 134 | +# Module-level singleton, initialized lazily by the runtime. | |
| 135 | +_logger: RuntimeLogger | None = None | |
| 136 | + | |
| 137 | + | |
| 138 | +def get_runtime_logger() -> RuntimeLogger: | |
| 139 | + """Return the active runtime logger, creating one if needed.""" | |
| 140 | + global _logger | |
| 141 | + if _logger is None: | |
| 142 | + _logger = RuntimeLogger() | |
| 143 | + return _logger | |
| 144 | + | |
| 145 | + | |
| 146 | +def reset_runtime_logger(path: Path | str | None = None) -> RuntimeLogger: | |
| 147 | + """Create a fresh logger (call once per session start).""" | |
| 148 | + global _logger | |
| 149 | + _logger = RuntimeLogger(path) | |
| 150 | + return _logger | |
src/loader/runtime/session.pymodified@@ -533,6 +533,14 @@ class ConversationSession: | ||
| 533 | 533 | if len(self.messages) <= 2: |
| 534 | 534 | request_messages.extend(self.few_shot_factory()) |
| 535 | 535 | request_messages.extend(self.messages) |
| 536 | + | |
| 537 | + from .logging import get_runtime_logger | |
| 538 | + rlog = get_runtime_logger() | |
| 539 | + roles: dict[str, int] = {} | |
| 540 | + for msg in request_messages: | |
| 541 | + roles[msg.role.value] = roles.get(msg.role.value, 0) + 1 | |
| 542 | + rlog.session_context(message_count=len(request_messages), roles=roles) | |
| 543 | + | |
| 536 | 544 | return request_messages |
| 537 | 545 | |
| 538 | 546 | def append(self, message: Message) -> None: |
src/loader/runtime/tool_batches.pymodified@@ -7,6 +7,7 @@ from dataclasses import dataclass, field | ||
| 7 | 7 | |
| 8 | 8 | from ..llm.base import ToolCall |
| 9 | 9 | from .context import RuntimeContext |
| 10 | +from .logging import get_runtime_logger | |
| 10 | 11 | from .dod import ( |
| 11 | 12 | DefinitionOfDone, |
| 12 | 13 | DefinitionOfDoneStore, |
@@ -176,7 +177,18 @@ class ToolBatchRunner: | ||
| 176 | 177 | outcome=outcome, |
| 177 | 178 | emit=emit, |
| 178 | 179 | ) |
| 180 | + | |
| 181 | + rlog = get_runtime_logger() | |
| 182 | + appended = not should_continue | |
| 183 | + rlog.tool_exec( | |
| 184 | + name=tool_call.name, | |
| 185 | + state=outcome.state.value, | |
| 186 | + is_error=outcome.is_error, | |
| 187 | + result_preview=outcome.event_content, | |
| 188 | + appended_to_session=appended, | |
| 189 | + ) | |
| 179 | 190 | if should_continue: |
| 191 | + rlog.verification_gate(tool_call.name, should_continue=True) | |
| 180 | 192 | continue |
| 181 | 193 | |
| 182 | 194 | self.context.session.append(outcome.message) |
src/loader/runtime/turn_iteration.pymodified@@ -10,6 +10,7 @@ from ..llm.base import Message, Role | ||
| 10 | 10 | from .assistant_turns import AssistantTurnRequester |
| 11 | 11 | from .context import RuntimeContext |
| 12 | 12 | from .dod import DefinitionOfDone |
| 13 | +from .logging import get_runtime_logger | |
| 13 | 14 | from .events import AgentEvent, TurnSummary |
| 14 | 15 | from .executor import ToolExecutor |
| 15 | 16 | from .finalization import merge_usage |
@@ -113,6 +114,15 @@ class TurnIterationController: | ||
| 113 | 114 | tool_calls = list(assistant_turn.tool_calls) |
| 114 | 115 | pending_tool_calls_seen = set(assistant_turn.pending_tool_calls_seen) |
| 115 | 116 | |
| 117 | + rlog = get_runtime_logger() | |
| 118 | + rlog.turn_response( | |
| 119 | + iteration=iterations, | |
| 120 | + content_len=len(assistant_turn.content), | |
| 121 | + tool_call_count=len(tool_calls), | |
| 122 | + tool_names=[tc.name for tc in tool_calls], | |
| 123 | + usage=assistant_turn.usage, | |
| 124 | + ) | |
| 125 | + | |
| 116 | 126 | if not assistant_turn.content.strip(): |
| 117 | 127 | return await self._handle_empty_response( |
| 118 | 128 | task=task, |
src/loader/runtime/turn_loop.pymodified@@ -9,6 +9,7 @@ from .context import RuntimeContext | ||
| 9 | 9 | from .dod import DefinitionOfDone |
| 10 | 10 | from .events import AgentEvent, TurnSummary |
| 11 | 11 | from .executor import ToolExecutor |
| 12 | +from .logging import get_runtime_logger | |
| 12 | 13 | from .rollback import RollbackPlan |
| 13 | 14 | from .turn_iteration import TurnIterationAction, TurnIterationController |
| 14 | 15 | from .turn_preamble import TurnPreludeController |
@@ -73,8 +74,14 @@ class TurnLoopController: | ||
| 73 | 74 | """Run the bounded main turn loop and report how it finished.""" |
| 74 | 75 | |
| 75 | 76 | state = TurnLoopState() |
| 77 | + rlog = get_runtime_logger() | |
| 76 | 78 | while state.iterations < self.context.config.max_iterations: |
| 77 | 79 | state.iterations += 1 |
| 80 | + rlog.turn_start( | |
| 81 | + iteration=state.iterations, | |
| 82 | + message_count=len(self.context.session.messages), | |
| 83 | + task=effective_task, | |
| 84 | + ) | |
| 78 | 85 | prelude_decision = await self.turn_preamble.prepare_iteration( |
| 79 | 86 | task=task, |
| 80 | 87 | original_task=original_task, |
@@ -116,18 +123,29 @@ class TurnLoopController: | ||
| 116 | 123 | state.extracted_iterations = iteration_decision.extracted_iterations |
| 117 | 124 | state.consecutive_errors = iteration_decision.consecutive_errors |
| 118 | 125 | state.actions_taken.extend(iteration_decision.new_actions_taken) |
| 126 | + rlog.turn_decision( | |
| 127 | + iteration=state.iterations, | |
| 128 | + action=iteration_decision.action.value, | |
| 129 | + continuation_count=state.continuation_count, | |
| 130 | + consecutive_errors=state.consecutive_errors, | |
| 131 | + reason=iteration_decision.finalize_reason_code, | |
| 132 | + ) | |
| 119 | 133 | if iteration_decision.action == TurnIterationAction.CONTINUE: |
| 120 | 134 | continue |
| 121 | 135 | if iteration_decision.action == TurnIterationAction.FINALIZE: |
| 122 | - return TurnLoopExit( | |
| 136 | + exit = TurnLoopExit( | |
| 123 | 137 | reason_code=iteration_decision.finalize_reason_code |
| 124 | 138 | or "turn_complete", |
| 125 | 139 | reason_summary=iteration_decision.finalize_reason_summary |
| 126 | 140 | or "Finalizing completed turn", |
| 127 | 141 | ) |
| 142 | + rlog.loop_exit(state.iterations, exit.reason_code, exit.reason_summary) | |
| 143 | + return exit | |
| 128 | 144 | break |
| 129 | 145 | |
| 130 | - return TurnLoopExit( | |
| 146 | + exit = TurnLoopExit( | |
| 131 | 147 | reason_code="turn_complete", |
| 132 | 148 | reason_summary="Finalizing completed turn", |
| 133 | 149 | ) |
| 150 | + rlog.loop_exit(state.iterations, exit.reason_code, exit.reason_summary) | |
| 151 | + return exit | |