tenseleyflow/loader / 06f8c45

Browse files

Add structured runtime logging at turn loop, tool execution, session context, and completion gates

Authored by espadonne
SHA
06f8c4512bebdffc1042f89c7d786dbd1e498015
Parents
67408f6
Tree
12aabac

7 changed files

StatusFile+-
M src/loader/runtime/completion_policy.py 14 0
M src/loader/runtime/conversation.py 3 0
A src/loader/runtime/logging.py 150 0
M src/loader/runtime/session.py 8 0
M src/loader/runtime/tool_batches.py 12 0
M src/loader/runtime/turn_iteration.py 10 0
M src/loader/runtime/turn_loop.py 20 2
src/loader/runtime/completion_policy.pymodified
@@ -8,6 +8,7 @@ from dataclasses import dataclass, field
88
 from ..llm.base import Message, Role
99
 from .context import RuntimeContext
1010
 from .dod import DefinitionOfDone
11
+from .logging import get_runtime_logger
1112
 from .events import AgentEvent, TurnSummary
1213
 from .evidence_provenance import EvidenceProvenance
1314
 from .reasoning_types import TaskCompletionCheck
@@ -62,6 +63,9 @@ class CompletionPolicy:
6263
         """Stop the turn when the assistant starts repeating textually."""
6364
 
6465
         is_text_loop, loop_description = self.context.safeguards.detect_text_loop(content)
66
+        rlog = get_runtime_logger()
67
+        rlog.completion_check("text_loop", "detected" if is_text_loop else "clear",
68
+                              reason=loop_description if is_text_loop else None)
6569
         if not is_text_loop:
6670
             return TextLoopDecision(
6771
                 should_stop=False,
@@ -118,6 +122,16 @@ class CompletionPolicy:
118122
         )
119123
         completion_check = assessment.check if assessment is not None else None
120124
         is_premature = bool(completion_check is not None and not completion_check.is_complete)
125
+        rlog = get_runtime_logger()
126
+        rlog.completion_check(
127
+            "follow_through",
128
+            "premature" if is_premature else "accepted",
129
+            reason=(
130
+                "; ".join(completion_check.missing_evidence[:2])
131
+                if is_premature and completion_check
132
+                else None
133
+            ),
134
+        )
121135
         if not is_premature:
122136
             return ContinuationDecision(
123137
                 should_continue=False,
src/loader/runtime/conversation.pymodified
@@ -6,6 +6,7 @@ from collections.abc import Awaitable, Callable
66
 
77
 from .artifact_invalidation import ArtifactInvalidationAssessor
88
 from .assistant_turns import AssistantTurnRequester
9
+from .logging import reset_runtime_logger
910
 from .bootstrap import (
1011
     RuntimeBootstrapSource,
1112
     RuntimeBootstrapView,
@@ -140,6 +141,8 @@ class ConversationRuntime:
140141
     ) -> TurnSummary:
141142
         """Run one task turn and return a structured summary."""
142143
 
144
+        reset_runtime_logger()
145
+
143146
         prepared_turn = await self.turn_preparation.prepare(
144147
             task=task,
145148
             emit=emit,
src/loader/runtime/logging.pyadded
@@ -0,0 +1,150 @@
1
+"""Structured runtime logging for diagnosing agent behavior."""
2
+
3
+from __future__ import annotations
4
+
5
+import json
6
+import time
7
+from dataclasses import asdict, dataclass, field
8
+from pathlib import Path
9
+from typing import Any
10
+
11
+_LOG_PATH = Path("/tmp/loader_runtime.log")
12
+
13
+
14
+@dataclass(slots=True)
15
+class LogEntry:
16
+    """One structured log entry."""
17
+
18
+    ts: float
19
+    event: str
20
+    data: dict[str, Any] = field(default_factory=dict)
21
+
22
+    def to_line(self) -> str:
23
+        compact = {k: v for k, v in self.data.items() if v is not None}
24
+        return json.dumps({"t": round(self.ts, 2), "e": self.event, **compact})
25
+
26
+
27
+class RuntimeLogger:
28
+    """Append-only structured logger for one session."""
29
+
30
+    def __init__(self, path: Path | str | None = None) -> None:
31
+        self._path = Path(path) if path else _LOG_PATH
32
+        self._start = time.monotonic()
33
+        self._path.write_text("")  # truncate on session start
34
+
35
+    def _elapsed(self) -> float:
36
+        return time.monotonic() - self._start
37
+
38
+    def log(self, event: str, **data: Any) -> None:
39
+        entry = LogEntry(ts=self._elapsed(), event=event, data=data)
40
+        try:
41
+            with self._path.open("a") as f:
42
+                f.write(entry.to_line() + "\n")
43
+        except Exception:
44
+            pass
45
+
46
+    # ── convenience methods ──────────────────────────────────────────
47
+
48
+    def turn_start(self, iteration: int, message_count: int, task: str) -> None:
49
+        self.log(
50
+            "turn.start",
51
+            iteration=iteration,
52
+            messages=message_count,
53
+            task=task[:120],
54
+        )
55
+
56
+    def turn_response(
57
+        self,
58
+        iteration: int,
59
+        content_len: int,
60
+        tool_call_count: int,
61
+        tool_names: list[str],
62
+        usage: dict[str, int] | None = None,
63
+    ) -> None:
64
+        self.log(
65
+            "turn.response",
66
+            iteration=iteration,
67
+            content_len=content_len,
68
+            tool_calls=tool_call_count,
69
+            tools=tool_names or None,
70
+            usage=usage or None,
71
+        )
72
+
73
+    def turn_decision(
74
+        self,
75
+        iteration: int,
76
+        action: str,
77
+        continuation_count: int,
78
+        consecutive_errors: int,
79
+        reason: str | None = None,
80
+    ) -> None:
81
+        self.log(
82
+            "turn.decision",
83
+            iteration=iteration,
84
+            action=action,
85
+            continuations=continuation_count,
86
+            errors=consecutive_errors,
87
+            reason=reason,
88
+        )
89
+
90
+    def tool_exec(
91
+        self,
92
+        name: str,
93
+        state: str,
94
+        is_error: bool,
95
+        result_preview: str,
96
+        appended_to_session: bool,
97
+    ) -> None:
98
+        self.log(
99
+            "tool.exec",
100
+            name=name,
101
+            state=state,
102
+            error=is_error or None,
103
+            result=result_preview[:200],
104
+            in_session=appended_to_session,
105
+        )
106
+
107
+    def verification_gate(self, tool_name: str, should_continue: bool) -> None:
108
+        self.log(
109
+            "tool.verify_gate",
110
+            tool=tool_name,
111
+            continue_loop=should_continue,
112
+        )
113
+
114
+    def completion_check(
115
+        self,
116
+        stage: str,
117
+        outcome: str,
118
+        reason: str | None = None,
119
+    ) -> None:
120
+        self.log("completion", stage=stage, outcome=outcome, reason=reason)
121
+
122
+    def session_context(self, message_count: int, roles: dict[str, int]) -> None:
123
+        self.log("session.context", messages=message_count, roles=roles)
124
+
125
+    def loop_exit(self, iterations: int, reason_code: str, reason: str) -> None:
126
+        self.log(
127
+            "loop.exit",
128
+            iterations=iterations,
129
+            reason_code=reason_code,
130
+            reason=reason[:200],
131
+        )
132
+
133
+
134
+# Module-level singleton, initialized lazily by the runtime.
135
+_logger: RuntimeLogger | None = None
136
+
137
+
138
+def get_runtime_logger() -> RuntimeLogger:
139
+    """Return the active runtime logger, creating one if needed."""
140
+    global _logger
141
+    if _logger is None:
142
+        _logger = RuntimeLogger()
143
+    return _logger
144
+
145
+
146
+def reset_runtime_logger(path: Path | str | None = None) -> RuntimeLogger:
147
+    """Create a fresh logger (call once per session start)."""
148
+    global _logger
149
+    _logger = RuntimeLogger(path)
150
+    return _logger
src/loader/runtime/session.pymodified
@@ -533,6 +533,14 @@ class ConversationSession:
533533
         if len(self.messages) <= 2:
534534
             request_messages.extend(self.few_shot_factory())
535535
         request_messages.extend(self.messages)
536
+
537
+        from .logging import get_runtime_logger
538
+        rlog = get_runtime_logger()
539
+        roles: dict[str, int] = {}
540
+        for msg in request_messages:
541
+            roles[msg.role.value] = roles.get(msg.role.value, 0) + 1
542
+        rlog.session_context(message_count=len(request_messages), roles=roles)
543
+
536544
         return request_messages
537545
 
538546
     def append(self, message: Message) -> None:
src/loader/runtime/tool_batches.pymodified
@@ -7,6 +7,7 @@ from dataclasses import dataclass, field
77
 
88
 from ..llm.base import ToolCall
99
 from .context import RuntimeContext
10
+from .logging import get_runtime_logger
1011
 from .dod import (
1112
     DefinitionOfDone,
1213
     DefinitionOfDoneStore,
@@ -176,7 +177,18 @@ class ToolBatchRunner:
176177
                 outcome=outcome,
177178
                 emit=emit,
178179
             )
180
+
181
+            rlog = get_runtime_logger()
182
+            appended = not should_continue
183
+            rlog.tool_exec(
184
+                name=tool_call.name,
185
+                state=outcome.state.value,
186
+                is_error=outcome.is_error,
187
+                result_preview=outcome.event_content,
188
+                appended_to_session=appended,
189
+            )
179190
             if should_continue:
191
+                rlog.verification_gate(tool_call.name, should_continue=True)
180192
                 continue
181193
 
182194
             self.context.session.append(outcome.message)
src/loader/runtime/turn_iteration.pymodified
@@ -10,6 +10,7 @@ from ..llm.base import Message, Role
1010
 from .assistant_turns import AssistantTurnRequester
1111
 from .context import RuntimeContext
1212
 from .dod import DefinitionOfDone
13
+from .logging import get_runtime_logger
1314
 from .events import AgentEvent, TurnSummary
1415
 from .executor import ToolExecutor
1516
 from .finalization import merge_usage
@@ -113,6 +114,15 @@ class TurnIterationController:
113114
         tool_calls = list(assistant_turn.tool_calls)
114115
         pending_tool_calls_seen = set(assistant_turn.pending_tool_calls_seen)
115116
 
117
+        rlog = get_runtime_logger()
118
+        rlog.turn_response(
119
+            iteration=iterations,
120
+            content_len=len(assistant_turn.content),
121
+            tool_call_count=len(tool_calls),
122
+            tool_names=[tc.name for tc in tool_calls],
123
+            usage=assistant_turn.usage,
124
+        )
125
+
116126
         if not assistant_turn.content.strip():
117127
             return await self._handle_empty_response(
118128
                 task=task,
src/loader/runtime/turn_loop.pymodified
@@ -9,6 +9,7 @@ from .context import RuntimeContext
99
 from .dod import DefinitionOfDone
1010
 from .events import AgentEvent, TurnSummary
1111
 from .executor import ToolExecutor
12
+from .logging import get_runtime_logger
1213
 from .rollback import RollbackPlan
1314
 from .turn_iteration import TurnIterationAction, TurnIterationController
1415
 from .turn_preamble import TurnPreludeController
@@ -73,8 +74,14 @@ class TurnLoopController:
7374
         """Run the bounded main turn loop and report how it finished."""
7475
 
7576
         state = TurnLoopState()
77
+        rlog = get_runtime_logger()
7678
         while state.iterations < self.context.config.max_iterations:
7779
             state.iterations += 1
80
+            rlog.turn_start(
81
+                iteration=state.iterations,
82
+                message_count=len(self.context.session.messages),
83
+                task=effective_task,
84
+            )
7885
             prelude_decision = await self.turn_preamble.prepare_iteration(
7986
                 task=task,
8087
                 original_task=original_task,
@@ -116,18 +123,29 @@ class TurnLoopController:
116123
             state.extracted_iterations = iteration_decision.extracted_iterations
117124
             state.consecutive_errors = iteration_decision.consecutive_errors
118125
             state.actions_taken.extend(iteration_decision.new_actions_taken)
126
+            rlog.turn_decision(
127
+                iteration=state.iterations,
128
+                action=iteration_decision.action.value,
129
+                continuation_count=state.continuation_count,
130
+                consecutive_errors=state.consecutive_errors,
131
+                reason=iteration_decision.finalize_reason_code,
132
+            )
119133
             if iteration_decision.action == TurnIterationAction.CONTINUE:
120134
                 continue
121135
             if iteration_decision.action == TurnIterationAction.FINALIZE:
122
-                return TurnLoopExit(
136
+                exit = TurnLoopExit(
123137
                     reason_code=iteration_decision.finalize_reason_code
124138
                     or "turn_complete",
125139
                     reason_summary=iteration_decision.finalize_reason_summary
126140
                     or "Finalizing completed turn",
127141
                 )
142
+                rlog.loop_exit(state.iterations, exit.reason_code, exit.reason_summary)
143
+                return exit
128144
             break
129145
 
130
-        return TurnLoopExit(
146
+        exit = TurnLoopExit(
131147
             reason_code="turn_complete",
132148
             reason_summary="Finalizing completed turn",
133149
         )
150
+        rlog.loop_exit(state.iterations, exit.reason_code, exit.reason_summary)
151
+        return exit