tenseleyflow/loader / 20e3ff4

Browse files

Restore model-aware compaction headroom

Authored by espadonne
SHA
20e3ff4dad06c9a4d32d1626e423f3c7608fa331
Parents
e779c1b
Tree
f125e48

10 changed files

StatusFile+-
M src/loader/agent/loop.py 1 1
M src/loader/llm/ollama.py 2 0
M src/loader/runtime/capabilities.py 54 2
M src/loader/runtime/compaction.py 37 4
M src/loader/runtime/public_shell.py 16 2
M src/loader/runtime/safeguard_services.py 3 5
M tests/test_capabilities.py 14 0
M tests/test_compaction.py 41 0
M tests/test_runtime_public_shell.py 31 0
M tests/test_safeguard_services.py 9 0
src/loader/agent/loop.pymodified
@@ -90,7 +90,7 @@ class AgentConfig:
9090
     workflow_mode_override: str | None = None
9191
     stream: bool = True  # Stream LLM responses for real-time output
9292
     session_rotate_after_bytes: int = 256 * 1024
93
-    session_auto_compaction_input_tokens_threshold: int = 12_000  # ~75% of default 16K context
93
+    session_auto_compaction_input_tokens_threshold: int = 100_000  # Upper bound; runtime clamps to model capacity
9494
     session_compaction_keep_last_messages: int = 4
9595
 
9696
     # Reasoning stages configuration
src/loader/llm/ollama.pymodified
@@ -168,9 +168,11 @@ class OllamaBackend(LLMBackend):
168168
             response.raise_for_status()
169169
             self._model_details_cache = response.json()
170170
             self._model_details_loaded_for = self.model
171
+            self._capability_profile = None
171172
         except Exception:
172173
             self._model_details_cache = None
173174
             self._model_details_loaded_for = self.model
175
+            self._capability_profile = None
174176
 
175177
         return self._model_details_cache
176178
 
src/loader/runtime/capabilities.pymodified
@@ -193,6 +193,43 @@ def _any_prefix_match(tokens: set[str], family_set: set[str]) -> bool:
193193
     return False
194194
 
195195
 
196
+def _coerce_positive_int(value: Any) -> int | None:
197
+    """Return one positive integer when the input looks numeric."""
198
+
199
+    try:
200
+        number = int(value)
201
+    except (TypeError, ValueError):
202
+        return None
203
+    if number <= 0:
204
+        return None
205
+    return number
206
+
207
+
208
+def _infer_context_window(model_details: dict[str, Any] | None) -> int | None:
209
+    """Infer one model context window from Ollama model metadata."""
210
+
211
+    if not isinstance(model_details, dict):
212
+        return None
213
+
214
+    candidates: list[int] = []
215
+
216
+    details = model_details.get("details")
217
+    if isinstance(details, dict):
218
+        context_length = _coerce_positive_int(details.get("context_length"))
219
+        if context_length is not None:
220
+            candidates.append(context_length)
221
+
222
+    model_info = model_details.get("model_info")
223
+    if isinstance(model_info, dict):
224
+        for key, value in model_info.items():
225
+            if str(key).endswith(".context_length"):
226
+                context_length = _coerce_positive_int(value)
227
+                if context_length is not None:
228
+                    candidates.append(context_length)
229
+
230
+    return max(candidates) if candidates else None
231
+
232
+
196233
 def resolve_capability_profile(
197234
     model_name: str,
198235
     *,
@@ -207,8 +244,20 @@ def resolve_capability_profile(
207244
     3. heuristic fallback using model details / family tokens
208245
     """
209246
 
247
+    inferred_context_window = _infer_context_window(model_details)
248
+
210249
     if override is not None:
211
-        return override
250
+        if inferred_context_window is None:
251
+            return override
252
+        return CapabilityProfile(
253
+            model_name=override.model_name,
254
+            supports_native_tools=override.supports_native_tools,
255
+            supports_streaming=override.supports_streaming,
256
+            context_window=inferred_context_window,
257
+            preferred_tool_call_format=override.preferred_tool_call_format,
258
+            verification_strictness=override.verification_strictness,
259
+            notes=list(override.notes),
260
+        )
212261
 
213262
     normalized = model_name.lower().strip()
214263
     # Try full name first, then without :tag (e.g. "deepseek-r1:14b" -> "deepseek-r1")
@@ -219,7 +268,7 @@ def resolve_capability_profile(
219268
                 model_name=model_name,
220269
                 supports_native_tools=known.supports_native_tools,
221270
                 supports_streaming=known.supports_streaming,
222
-                context_window=known.context_window,
271
+                context_window=inferred_context_window or known.context_window,
223272
                 preferred_tool_call_format=known.preferred_tool_call_format,
224273
                 verification_strictness=known.verification_strictness,
225274
                 notes=list(known.notes),
@@ -231,6 +280,7 @@ def resolve_capability_profile(
231280
         return _profile(
232281
             model_name,
233282
             supports_native_tools=True,
283
+            context_window=inferred_context_window or 8192,
234284
             preferred_tool_call_format="native",
235285
             verification_strictness="standard",
236286
             notes=["Resolved from model family heuristic."],
@@ -240,6 +290,7 @@ def resolve_capability_profile(
240290
         return _profile(
241291
             model_name,
242292
             supports_native_tools=False,
293
+            context_window=inferred_context_window or 8192,
243294
             preferred_tool_call_format="json_tag",
244295
             verification_strictness="standard",
245296
             notes=["Resolved from conservative no-native-tools heuristic."],
@@ -248,6 +299,7 @@ def resolve_capability_profile(
248299
     return _profile(
249300
         model_name,
250301
         supports_native_tools=False,
302
+        context_window=inferred_context_window or 8192,
251303
         preferred_tool_call_format="json_tag",
252304
         verification_strictness="standard",
253305
         notes=["Unknown model family; defaulting to safe ReAct-style tool use."],
src/loader/runtime/compaction.pymodified
@@ -9,6 +9,7 @@ from dataclasses import dataclass
99
 from ..llm.base import Message, Role
1010
 
1111
 DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000
12
+MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 12_000
1213
 DEFAULT_COMPACTION_KEEP_LAST_MESSAGES = 4
1314
 DEFAULT_MAX_CHARS = 1_200
1415
 DEFAULT_MAX_LINES = 24
@@ -63,6 +64,25 @@ def estimate_message_tokens(messages: list[Message]) -> int:
6364
     return max(1, total_chars // 4)
6465
 
6566
 
67
+def resolve_auto_compaction_input_tokens_threshold(
68
+    configured_threshold: int,
69
+    *,
70
+    context_window: int | None = None,
71
+) -> int:
72
+    """Resolve one compaction threshold from config and model context."""
73
+
74
+    threshold = max(1, int(configured_threshold))
75
+    if context_window is None or context_window <= 0:
76
+        return threshold
77
+
78
+    context_bound = max(
79
+        MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD,
80
+        int(context_window * 0.75),
81
+    )
82
+    context_bound = min(DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, context_bound)
83
+    return min(threshold, context_bound)
84
+
85
+
6686
 def compress_summary(
6787
     summary: str,
6888
     budget: SummaryCompressionBudget | None = None,
@@ -197,12 +217,20 @@ def build_session_summary(
197217
     user_messages = [
198218
         _collapse_inline_whitespace(message.content)
199219
         for message in messages
200
-        if message.role == Role.USER and message.content.strip()
220
+        if (
221
+            message.role == Role.USER
222
+            and message.content.strip()
223
+            and not _is_compacted_context_message(message.content)
224
+        )
201225
     ]
202226
     assistant_messages = [
203227
         _collapse_inline_whitespace(message.content)
204228
         for message in messages
205
-        if message.role == Role.ASSISTANT and message.content.strip()
229
+        if (
230
+            message.role == Role.ASSISTANT
231
+            and message.content.strip()
232
+            and not _is_compacted_context_message(message.content)
233
+        )
206234
     ]
207235
     tool_names = [
208236
         tool_call.name
@@ -229,8 +257,7 @@ def build_session_summary(
229257
         f"- Recent user requests: {recent_requests}",
230258
     ]
231259
     if previous_summary:
232
-        previous_line = _collapse_inline_whitespace(previous_summary.splitlines()[0])
233
-        lines.append(f"- Previously compacted context: {previous_line}")
260
+        lines.append("- Previously compacted context retained.")
234261
     lines.extend(
235262
         [
236263
             f"- Newly compacted context: {len(messages)} earlier message(s) summarized.",
@@ -247,6 +274,8 @@ def _extract_key_files(messages: list[Message]) -> list[str]:
247274
     pattern = re.compile(r"(?:/|\.{1,2}/|[A-Za-z0-9_.-]+/)[A-Za-z0-9_./-]+\.[A-Za-z0-9]+")
248275
     files: list[str] = []
249276
     for message in messages:
277
+        if _is_compacted_context_message(message.content):
278
+            continue
250279
         for match in pattern.findall(message.content):
251280
             if match not in files:
252281
                 files.append(match)
@@ -262,6 +291,10 @@ def _collapse_inline_whitespace(line: str) -> str:
262291
     return " ".join(line.split())
263292
 
264293
 
294
+def _is_compacted_context_message(content: str) -> bool:
295
+    return content.lstrip().startswith("[COMPACTED CONTEXT]")
296
+
297
+
265298
 def _truncate_line(line: str, max_chars: int) -> str:
266299
     if max_chars <= 0 or len(line) <= max_chars:
267300
         return line
src/loader/runtime/public_shell.pymodified
@@ -15,6 +15,7 @@ from ..context.project import ProjectContext
1515
 from ..llm.base import Message, Role
1616
 from ..tools.base import ToolRegistry
1717
 from .capabilities import CapabilityProfile, resolve_backend_capability_profile
18
+from .compaction import resolve_auto_compaction_input_tokens_threshold
1819
 from .dod import DefinitionOfDoneStore
1920
 from .events import AgentEvent, TurnSummary
2021
 from .launcher import build_runtime_launcher
@@ -303,7 +304,7 @@ def build_fresh_runtime_session_install(
303304
         runtime_owner_path=owner_metadata["owner_path"],
304305
         rotate_after_bytes=owner.config.session_rotate_after_bytes,
305306
         auto_compaction_input_tokens_threshold=(
306
-            owner.config.session_auto_compaction_input_tokens_threshold
307
+            _resolve_owner_auto_compaction_threshold(owner)
307308
         ),
308309
         compaction_keep_last_messages=owner.config.session_compaction_keep_last_messages,
309310
         system_message_factory=owner._get_system_message,
@@ -399,7 +400,7 @@ def resume_runtime_shell_session(
399400
         session_id=session_id,
400401
         rotate_after_bytes=owner.config.session_rotate_after_bytes,
401402
         auto_compaction_input_tokens_threshold=(
402
-            owner.config.session_auto_compaction_input_tokens_threshold
403
+            _resolve_owner_auto_compaction_threshold(owner)
403404
         ),
404405
         compaction_keep_last_messages=owner.config.session_compaction_keep_last_messages,
405406
     )
@@ -614,9 +615,22 @@ def refresh_runtime_shell_capability_profile(
614615
     if refresh.prompt_reset_required:
615616
         owner._system_message = None
616617
     owner._use_react = None
618
+    if hasattr(owner, "session") and owner.session is not None:
619
+        owner.session.auto_compaction_input_tokens_threshold = (
620
+            _resolve_owner_auto_compaction_threshold(owner)
621
+        )
617622
     return refresh
618623
 
619624
 
625
+def _resolve_owner_auto_compaction_threshold(owner: RuntimeShellOwner) -> int:
626
+    """Clamp one owner's compaction threshold to the active model context."""
627
+
628
+    return resolve_auto_compaction_input_tokens_threshold(
629
+        owner.config.session_auto_compaction_input_tokens_threshold,
630
+        context_window=owner.capability_profile.context_window,
631
+    )
632
+
633
+
620634
 def build_runtime_system_message(
621635
     *,
622636
     registry: ToolRegistry,
src/loader/runtime/safeguard_services.pymodified
@@ -111,11 +111,9 @@ class ActionTracker:
111111
             if isinstance(hunks, list) and self.would_duplicate_patch(file_path, hunks):
112112
                 return True, f"Same patch already applied to: {file_path}"
113113
 
114
-        elif tool_name == "bash":
115
-            command = arguments.get("command", "")
116
-            if self.would_duplicate_command(command):
117
-                return True, f"Command already executed: {command[:50]}..."
118
-
114
+        # Bash commands intentionally skip exact-command dedupe here.
115
+        # Re-running the same shell probe after a filesystem change is often valid,
116
+        # and higher-level loop detection is a safer backstop than blocking `ls`.
119117
         return False, ""
120118
 
121119
     def record_tool_call(self, tool_name: str, arguments: dict) -> None:
tests/test_capabilities.pymodified
@@ -41,6 +41,20 @@ def test_family_heuristic_resolution_uses_model_details() -> None:
4141
     assert "heuristic" in resolved.notes[0].lower()
4242
 
4343
 
44
+def test_model_details_context_window_overrides_registry_default() -> None:
45
+    resolved = resolve_capability_profile(
46
+        "gpt-oss:20b",
47
+        model_details={
48
+            "model_info": {
49
+                "gptoss.context_length": 131072,
50
+            }
51
+        },
52
+    )
53
+
54
+    assert resolved.context_window == 131072
55
+    assert resolved.supports_native_tools
56
+
57
+
4458
 def test_unknown_models_default_to_safe_react_profile() -> None:
4559
     resolved = resolve_capability_profile("mystery-model")
4660
 
tests/test_compaction.pymodified
@@ -5,8 +5,10 @@ from __future__ import annotations
55
 from loader.llm.base import Message, Role
66
 from loader.runtime.compaction import (
77
     SummaryCompressionBudget,
8
+    build_session_summary,
89
     compact_session_messages,
910
     compress_summary,
11
+    resolve_auto_compaction_input_tokens_threshold,
1012
 )
1113
 
1214
 
@@ -51,3 +53,42 @@ def test_compact_session_messages_preserves_recent_messages() -> None:
5153
     ]
5254
     assert result.messages[0].content.startswith("[COMPACTED CONTEXT]")
5355
     assert "Continuation instructions:" in result.messages[0].content
56
+
57
+
58
+def test_build_session_summary_skips_nested_compacted_context_content() -> None:
59
+    messages = [
60
+        Message(
61
+            role=Role.USER,
62
+            content=(
63
+                "[COMPACTED CONTEXT]\nConversation summary:\n"
64
+                "- Scope: older work\n- Current work: old state"
65
+            ),
66
+        ),
67
+        Message(role=Role.ASSISTANT, content="Read the chapter index."),
68
+        Message(role=Role.USER, content="Update the chapter links."),
69
+    ]
70
+
71
+    summary = build_session_summary(
72
+        messages,
73
+        previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work",
74
+        current_task="Repair the table of contents links",
75
+    )
76
+
77
+    assert "Recent user requests: [COMPACTED CONTEXT]" not in summary
78
+    assert "Pending work: [COMPACTED CONTEXT]" not in summary
79
+    assert "- Previously compacted context retained." in summary
80
+
81
+
82
+def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None:
83
+    assert resolve_auto_compaction_input_tokens_threshold(
84
+        100_000,
85
+        context_window=131_072,
86
+    ) == 98_304
87
+    assert resolve_auto_compaction_input_tokens_threshold(
88
+        100_000,
89
+        context_window=262_144,
90
+    ) == 100_000
91
+    assert resolve_auto_compaction_input_tokens_threshold(
92
+        100_000,
93
+        context_window=8_192,
94
+    ) == 12_000
tests/test_runtime_public_shell.pymodified
@@ -9,6 +9,7 @@ import pytest
99
 
1010
 from loader.agent.loop import AgentConfig
1111
 from loader.llm.base import CompletionResponse, Message, Role, StreamChunk
12
+from loader.runtime.capabilities import CapabilityProfile
1213
 from loader.runtime.completion_trace import CompletionTraceEntry
1314
 from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
1415
 from loader.runtime.public_shell import (
@@ -366,6 +367,36 @@ def test_refresh_runtime_shell_capability_profile_updates_owner_cache_state(
366367
     assert handle._use_react is None
367368
 
368369
 
370
+def test_refresh_runtime_shell_capability_profile_reclamps_session_threshold(
371
+    temp_dir: Path,
372
+) -> None:
373
+    class ProfiledBackend(ScriptedBackend):
374
+        def __init__(self) -> None:
375
+            super().__init__(supports_native_tools=True)
376
+            self.context_window = 8192
377
+
378
+        def capability_profile(self) -> CapabilityProfile:
379
+            return CapabilityProfile(
380
+                model_name="qwen3-coder:30b",
381
+                supports_native_tools=True,
382
+                supports_streaming=True,
383
+                context_window=self.context_window,
384
+                preferred_tool_call_format="native",
385
+                verification_strictness="standard",
386
+                notes=["scripted"],
387
+            )
388
+
389
+    backend = ProfiledBackend()
390
+    handle = _runtime_handle(temp_dir, backend=backend)
391
+
392
+    assert handle.session.auto_compaction_input_tokens_threshold == 12_000
393
+
394
+    backend.context_window = 131_072
395
+    refresh_runtime_shell_capability_profile(handle)
396
+
397
+    assert handle.session.auto_compaction_input_tokens_threshold == 98_304
398
+
399
+
369400
 def test_create_runtime_session_install_builds_restored_shell_state(
370401
     temp_dir: Path,
371402
 ) -> None:
tests/test_safeguard_services.pymodified
@@ -41,6 +41,15 @@ def test_action_tracker_preserves_loop_description_format() -> None:
4141
     assert description == "Repeating pattern detected (2x): read → grep"
4242
 
4343
 
44
+def test_action_tracker_allows_repeated_bash_commands() -> None:
45
+    tracker = ActionTracker()
46
+    arguments = {"command": "ls -la ~/Loader/guides/fortran/chapters/"}
47
+
48
+    tracker.record_tool_call("bash", arguments)
49
+
50
+    assert tracker.check_tool_call("bash", arguments) == (False, "")
51
+
52
+
4453
 def test_pre_action_validator_blocks_patch_without_hunks() -> None:
4554
     validator = PreActionValidator()
4655