`06f8c45`

Add structured runtime logging at turn loop, tool execution, session context, and completion gates

Authored by

espadonne 1 month ago

SHA: 06f8c4512bebdffc1042f89c7d786dbd1e498015
Parents: 67408f6
Tree: 12aabac

7 changed files

Status	File	+	-
M	`src/loader/runtime/completion_policy.py`	14	0
M	`src/loader/runtime/conversation.py`	3	0
A	`src/loader/runtime/logging.py`	150	0
M	`src/loader/runtime/session.py`	8	0
M	`src/loader/runtime/tool_batches.py`	12	0
M	`src/loader/runtime/turn_iteration.py`	10	0
M	`src/loader/runtime/turn_loop.py`	20	2

src/loader/runtime/completion_policy.pymodified

  from ..llm.base import Message, Role
  from .context import RuntimeContext
  from .dod import DefinitionOfDone
 +from .logging import get_runtime_logger
  from .events import AgentEvent, TurnSummary
  from .evidence_provenance import EvidenceProvenance
  from .reasoning_types import TaskCompletionCheck
          """Stop the turn when the assistant starts repeating textually."""
          is_text_loop, loop_description = self.context.safeguards.detect_text_loop(content)
 +        rlog = get_runtime_logger()
 +        rlog.completion_check("text_loop", "detected" if is_text_loop else "clear",
 +                              reason=loop_description if is_text_loop else None)
          if not is_text_loop:
              return TextLoopDecision(
                  should_stop=False,
+         )
          completion_check = assessment.check if assessment is not None else None
          is_premature = bool(completion_check is not None and not completion_check.is_complete)
 +        rlog = get_runtime_logger()
 +        rlog.completion_check(
 +            "follow_through",
 +            "premature" if is_premature else "accepted",
 +            reason=(
 +                "; ".join(completion_check.missing_evidence[:2])
 +                if is_premature and completion_check
 +                else None
 +            ),
 +        )
          if not is_premature:
              return ContinuationDecision(
                  should_continue=False,

src/loader/runtime/conversation.pymodified

  from .artifact_invalidation import ArtifactInvalidationAssessor
  from .assistant_turns import AssistantTurnRequester
 +from .logging import reset_runtime_logger
  from .bootstrap import (
      RuntimeBootstrapSource,
      RuntimeBootstrapView,
      ) -> TurnSummary:
          """Run one task turn and return a structured summary."""
 +        reset_runtime_logger()
++
          prepared_turn = await self.turn_preparation.prepare(
              task=task,
              emit=emit,

src/loader/runtime/logging.pyadded

 +"""Structured runtime logging for diagnosing agent behavior."""
++
 +from __future__ import annotations
++
 +import json
 +import time
 +from dataclasses import asdict, dataclass, field
 +from pathlib import Path
 +from typing import Any
++
 +_LOG_PATH = Path("/tmp/loader_runtime.log")
++
++
 +@dataclass(slots=True)
 +class LogEntry:
 +    """One structured log entry."""
++
 +    ts: float
 +    event: str
 +    data: dict[str, Any] = field(default_factory=dict)
++
 +    def to_line(self) -> str:
 +        compact = {k: v for k, v in self.data.items() if v is not None}
 +        return json.dumps({"t": round(self.ts, 2), "e": self.event, **compact})
++
++
 +class RuntimeLogger:
 +    """Append-only structured logger for one session."""
++
 +    def __init__(self, path: Path | str | None = None) -> None:
 +        self._path = Path(path) if path else _LOG_PATH
 +        self._start = time.monotonic()
 +        self._path.write_text("")  # truncate on session start
++
 +    def _elapsed(self) -> float:
 +        return time.monotonic() - self._start
++
 +    def log(self, event: str, **data: Any) -> None:
 +        entry = LogEntry(ts=self._elapsed(), event=event, data=data)
 +        try:
 +            with self._path.open("a") as f:
 +                f.write(entry.to_line() + "\n")
 +        except Exception:
 +            pass
++
 +    # ── convenience methods ──────────────────────────────────────────
++
 +    def turn_start(self, iteration: int, message_count: int, task: str) -> None:
 +        self.log(
 +            "turn.start",
 +            iteration=iteration,
 +            messages=message_count,
 +            task=task[:120],
 +        )
++
 +    def turn_response(
 +        self,
 +        iteration: int,
 +        content_len: int,
 +        tool_call_count: int,
 +        tool_names: list[str],
 +        usage: dict[str, int] | None = None,
 +    ) -> None:
 +        self.log(
 +            "turn.response",
 +            iteration=iteration,
 +            content_len=content_len,
 +            tool_calls=tool_call_count,
 +            tools=tool_names or None,
 +            usage=usage or None,
 +        )
++
 +    def turn_decision(
 +        self,
 +        iteration: int,
 +        action: str,
 +        continuation_count: int,
 +        consecutive_errors: int,
 +        reason: str | None = None,
 +    ) -> None:
 +        self.log(
 +            "turn.decision",
 +            iteration=iteration,
 +            action=action,
 +            continuations=continuation_count,
 +            errors=consecutive_errors,
 +            reason=reason,
 +        )
++
 +    def tool_exec(
 +        self,
 +        name: str,
 +        state: str,
 +        is_error: bool,
 +        result_preview: str,
 +        appended_to_session: bool,
 +    ) -> None:
 +        self.log(
 +            "tool.exec",
 +            name=name,
 +            state=state,
 +            error=is_error or None,
 +            result=result_preview[:200],
 +            in_session=appended_to_session,
 +        )
++
 +    def verification_gate(self, tool_name: str, should_continue: bool) -> None:
 +        self.log(
 +            "tool.verify_gate",
 +            tool=tool_name,
 +            continue_loop=should_continue,
 +        )
++
 +    def completion_check(
 +        self,
 +        stage: str,
 +        outcome: str,
 +        reason: str | None = None,
 +    ) -> None:
 +        self.log("completion", stage=stage, outcome=outcome, reason=reason)
++
 +    def session_context(self, message_count: int, roles: dict[str, int]) -> None:
 +        self.log("session.context", messages=message_count, roles=roles)
++
 +    def loop_exit(self, iterations: int, reason_code: str, reason: str) -> None:
 +        self.log(
 +            "loop.exit",
 +            iterations=iterations,
 +            reason_code=reason_code,
 +            reason=reason[:200],
 +        )
++
++
 +# Module-level singleton, initialized lazily by the runtime.
 +_logger: RuntimeLogger | None = None
++
++
 +def get_runtime_logger() -> RuntimeLogger:
 +    """Return the active runtime logger, creating one if needed."""
 +    global _logger
 +    if _logger is None:
 +        _logger = RuntimeLogger()
 +    return _logger
++
++
 +def reset_runtime_logger(path: Path | str | None = None) -> RuntimeLogger:
 +    """Create a fresh logger (call once per session start)."""
 +    global _logger
 +    _logger = RuntimeLogger(path)
 +    return _logger

src/loader/runtime/session.pymodified

          if len(self.messages) <= 2:
              request_messages.extend(self.few_shot_factory())
          request_messages.extend(self.messages)
++
 +        from .logging import get_runtime_logger
 +        rlog = get_runtime_logger()
 +        roles: dict[str, int] = {}
 +        for msg in request_messages:
 +            roles[msg.role.value] = roles.get(msg.role.value, 0) + 1
 +        rlog.session_context(message_count=len(request_messages), roles=roles)
++
          return request_messages
      def append(self, message: Message) -> None:

src/loader/runtime/tool_batches.pymodified

  from ..llm.base import ToolCall
  from .context import RuntimeContext
 +from .logging import get_runtime_logger
  from .dod import (
      DefinitionOfDone,
      DefinitionOfDoneStore,
                  outcome=outcome,
                  emit=emit,
+             )
++
 +            rlog = get_runtime_logger()
 +            appended = not should_continue
 +            rlog.tool_exec(
 +                name=tool_call.name,
 +                state=outcome.state.value,
 +                is_error=outcome.is_error,
 +                result_preview=outcome.event_content,
 +                appended_to_session=appended,
 +            )
              if should_continue:
 +                rlog.verification_gate(tool_call.name, should_continue=True)
                  continue
              self.context.session.append(outcome.message)

src/loader/runtime/turn_iteration.pymodified

  from .assistant_turns import AssistantTurnRequester
  from .context import RuntimeContext
  from .dod import DefinitionOfDone
 +from .logging import get_runtime_logger
  from .events import AgentEvent, TurnSummary
  from .executor import ToolExecutor
  from .finalization import merge_usage
          tool_calls = list(assistant_turn.tool_calls)
          pending_tool_calls_seen = set(assistant_turn.pending_tool_calls_seen)
 +        rlog = get_runtime_logger()
 +        rlog.turn_response(
 +            iteration=iterations,
 +            content_len=len(assistant_turn.content),
 +            tool_call_count=len(tool_calls),
 +            tool_names=[tc.name for tc in tool_calls],
 +            usage=assistant_turn.usage,
 +        )
++
          if not assistant_turn.content.strip():
              return await self._handle_empty_response(
                  task=task,

src/loader/runtime/turn_loop.pymodified

  from .dod import DefinitionOfDone
  from .events import AgentEvent, TurnSummary
  from .executor import ToolExecutor
 +from .logging import get_runtime_logger
  from .rollback import RollbackPlan
  from .turn_iteration import TurnIterationAction, TurnIterationController
  from .turn_preamble import TurnPreludeController
          """Run the bounded main turn loop and report how it finished."""
          state = TurnLoopState()
 +        rlog = get_runtime_logger()
          while state.iterations < self.context.config.max_iterations:
              state.iterations += 1
 +            rlog.turn_start(
 +                iteration=state.iterations,
 +                message_count=len(self.context.session.messages),
 +                task=effective_task,
 +            )
              prelude_decision = await self.turn_preamble.prepare_iteration(
                  task=task,
                  original_task=original_task,
              state.extracted_iterations = iteration_decision.extracted_iterations
              state.consecutive_errors = iteration_decision.consecutive_errors
              state.actions_taken.extend(iteration_decision.new_actions_taken)
 +            rlog.turn_decision(
 +                iteration=state.iterations,
 +                action=iteration_decision.action.value,
 +                continuation_count=state.continuation_count,
 +                consecutive_errors=state.consecutive_errors,
 +                reason=iteration_decision.finalize_reason_code,
 +            )
              if iteration_decision.action == TurnIterationAction.CONTINUE:
                  continue
              if iteration_decision.action == TurnIterationAction.FINALIZE:
 -                return TurnLoopExit(
 +                exit = TurnLoopExit(
                      reason_code=iteration_decision.finalize_reason_code
                      or "turn_complete",
                      reason_summary=iteration_decision.finalize_reason_summary
                      or "Finalizing completed turn",
+                 )
 +                rlog.loop_exit(state.iterations, exit.reason_code, exit.reason_summary)
 +                return exit
              break
 -        return TurnLoopExit(
 +        exit = TurnLoopExit(
              reason_code="turn_complete",
              reason_summary="Finalizing completed turn",
+         )
 +        rlog.loop_exit(state.iterations, exit.reason_code, exit.reason_summary)
 +        return exit