Restore model-aware compaction headroom
- SHA
20e3ff4dad06c9a4d32d1626e423f3c7608fa331- Parents
-
e779c1b - Tree
f125e48
20e3ff4
20e3ff4dad06c9a4d32d1626e423f3c7608fa331e779c1b
f125e48| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/agent/loop.py
|
1 | 1 |
| M |
src/loader/llm/ollama.py
|
2 | 0 |
| M |
src/loader/runtime/capabilities.py
|
54 | 2 |
| M |
src/loader/runtime/compaction.py
|
37 | 4 |
| M |
src/loader/runtime/public_shell.py
|
16 | 2 |
| M |
src/loader/runtime/safeguard_services.py
|
3 | 5 |
| M |
tests/test_capabilities.py
|
14 | 0 |
| M |
tests/test_compaction.py
|
41 | 0 |
| M |
tests/test_runtime_public_shell.py
|
31 | 0 |
| M |
tests/test_safeguard_services.py
|
9 | 0 |
src/loader/agent/loop.pymodified@@ -90,7 +90,7 @@ class AgentConfig: | ||
| 90 | 90 | workflow_mode_override: str | None = None |
| 91 | 91 | stream: bool = True # Stream LLM responses for real-time output |
| 92 | 92 | session_rotate_after_bytes: int = 256 * 1024 |
| 93 | - session_auto_compaction_input_tokens_threshold: int = 12_000 # ~75% of default 16K context | |
| 93 | + session_auto_compaction_input_tokens_threshold: int = 100_000 # Upper bound; runtime clamps to model capacity | |
| 94 | 94 | session_compaction_keep_last_messages: int = 4 |
| 95 | 95 | |
| 96 | 96 | # Reasoning stages configuration |
src/loader/llm/ollama.pymodified@@ -168,9 +168,11 @@ class OllamaBackend(LLMBackend): | ||
| 168 | 168 | response.raise_for_status() |
| 169 | 169 | self._model_details_cache = response.json() |
| 170 | 170 | self._model_details_loaded_for = self.model |
| 171 | + self._capability_profile = None | |
| 171 | 172 | except Exception: |
| 172 | 173 | self._model_details_cache = None |
| 173 | 174 | self._model_details_loaded_for = self.model |
| 175 | + self._capability_profile = None | |
| 174 | 176 | |
| 175 | 177 | return self._model_details_cache |
| 176 | 178 | |
src/loader/runtime/capabilities.pymodified@@ -193,6 +193,43 @@ def _any_prefix_match(tokens: set[str], family_set: set[str]) -> bool: | ||
| 193 | 193 | return False |
| 194 | 194 | |
| 195 | 195 | |
| 196 | +def _coerce_positive_int(value: Any) -> int | None: | |
| 197 | + """Return one positive integer when the input looks numeric.""" | |
| 198 | + | |
| 199 | + try: | |
| 200 | + number = int(value) | |
| 201 | + except (TypeError, ValueError): | |
| 202 | + return None | |
| 203 | + if number <= 0: | |
| 204 | + return None | |
| 205 | + return number | |
| 206 | + | |
| 207 | + | |
| 208 | +def _infer_context_window(model_details: dict[str, Any] | None) -> int | None: | |
| 209 | + """Infer one model context window from Ollama model metadata.""" | |
| 210 | + | |
| 211 | + if not isinstance(model_details, dict): | |
| 212 | + return None | |
| 213 | + | |
| 214 | + candidates: list[int] = [] | |
| 215 | + | |
| 216 | + details = model_details.get("details") | |
| 217 | + if isinstance(details, dict): | |
| 218 | + context_length = _coerce_positive_int(details.get("context_length")) | |
| 219 | + if context_length is not None: | |
| 220 | + candidates.append(context_length) | |
| 221 | + | |
| 222 | + model_info = model_details.get("model_info") | |
| 223 | + if isinstance(model_info, dict): | |
| 224 | + for key, value in model_info.items(): | |
| 225 | + if str(key).endswith(".context_length"): | |
| 226 | + context_length = _coerce_positive_int(value) | |
| 227 | + if context_length is not None: | |
| 228 | + candidates.append(context_length) | |
| 229 | + | |
| 230 | + return max(candidates) if candidates else None | |
| 231 | + | |
| 232 | + | |
| 196 | 233 | def resolve_capability_profile( |
| 197 | 234 | model_name: str, |
| 198 | 235 | *, |
@@ -207,8 +244,20 @@ def resolve_capability_profile( | ||
| 207 | 244 | 3. heuristic fallback using model details / family tokens |
| 208 | 245 | """ |
| 209 | 246 | |
| 247 | + inferred_context_window = _infer_context_window(model_details) | |
| 248 | + | |
| 210 | 249 | if override is not None: |
| 211 | - return override | |
| 250 | + if inferred_context_window is None: | |
| 251 | + return override | |
| 252 | + return CapabilityProfile( | |
| 253 | + model_name=override.model_name, | |
| 254 | + supports_native_tools=override.supports_native_tools, | |
| 255 | + supports_streaming=override.supports_streaming, | |
| 256 | + context_window=inferred_context_window, | |
| 257 | + preferred_tool_call_format=override.preferred_tool_call_format, | |
| 258 | + verification_strictness=override.verification_strictness, | |
| 259 | + notes=list(override.notes), | |
| 260 | + ) | |
| 212 | 261 | |
| 213 | 262 | normalized = model_name.lower().strip() |
| 214 | 263 | # Try full name first, then without :tag (e.g. "deepseek-r1:14b" -> "deepseek-r1") |
@@ -219,7 +268,7 @@ def resolve_capability_profile( | ||
| 219 | 268 | model_name=model_name, |
| 220 | 269 | supports_native_tools=known.supports_native_tools, |
| 221 | 270 | supports_streaming=known.supports_streaming, |
| 222 | - context_window=known.context_window, | |
| 271 | + context_window=inferred_context_window or known.context_window, | |
| 223 | 272 | preferred_tool_call_format=known.preferred_tool_call_format, |
| 224 | 273 | verification_strictness=known.verification_strictness, |
| 225 | 274 | notes=list(known.notes), |
@@ -231,6 +280,7 @@ def resolve_capability_profile( | ||
| 231 | 280 | return _profile( |
| 232 | 281 | model_name, |
| 233 | 282 | supports_native_tools=True, |
| 283 | + context_window=inferred_context_window or 8192, | |
| 234 | 284 | preferred_tool_call_format="native", |
| 235 | 285 | verification_strictness="standard", |
| 236 | 286 | notes=["Resolved from model family heuristic."], |
@@ -240,6 +290,7 @@ def resolve_capability_profile( | ||
| 240 | 290 | return _profile( |
| 241 | 291 | model_name, |
| 242 | 292 | supports_native_tools=False, |
| 293 | + context_window=inferred_context_window or 8192, | |
| 243 | 294 | preferred_tool_call_format="json_tag", |
| 244 | 295 | verification_strictness="standard", |
| 245 | 296 | notes=["Resolved from conservative no-native-tools heuristic."], |
@@ -248,6 +299,7 @@ def resolve_capability_profile( | ||
| 248 | 299 | return _profile( |
| 249 | 300 | model_name, |
| 250 | 301 | supports_native_tools=False, |
| 302 | + context_window=inferred_context_window or 8192, | |
| 251 | 303 | preferred_tool_call_format="json_tag", |
| 252 | 304 | verification_strictness="standard", |
| 253 | 305 | notes=["Unknown model family; defaulting to safe ReAct-style tool use."], |
src/loader/runtime/compaction.pymodified@@ -9,6 +9,7 @@ from dataclasses import dataclass | ||
| 9 | 9 | from ..llm.base import Message, Role |
| 10 | 10 | |
| 11 | 11 | DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000 |
| 12 | +MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 12_000 | |
| 12 | 13 | DEFAULT_COMPACTION_KEEP_LAST_MESSAGES = 4 |
| 13 | 14 | DEFAULT_MAX_CHARS = 1_200 |
| 14 | 15 | DEFAULT_MAX_LINES = 24 |
@@ -63,6 +64,25 @@ def estimate_message_tokens(messages: list[Message]) -> int: | ||
| 63 | 64 | return max(1, total_chars // 4) |
| 64 | 65 | |
| 65 | 66 | |
| 67 | +def resolve_auto_compaction_input_tokens_threshold( | |
| 68 | + configured_threshold: int, | |
| 69 | + *, | |
| 70 | + context_window: int | None = None, | |
| 71 | +) -> int: | |
| 72 | + """Resolve one compaction threshold from config and model context.""" | |
| 73 | + | |
| 74 | + threshold = max(1, int(configured_threshold)) | |
| 75 | + if context_window is None or context_window <= 0: | |
| 76 | + return threshold | |
| 77 | + | |
| 78 | + context_bound = max( | |
| 79 | + MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, | |
| 80 | + int(context_window * 0.75), | |
| 81 | + ) | |
| 82 | + context_bound = min(DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, context_bound) | |
| 83 | + return min(threshold, context_bound) | |
| 84 | + | |
| 85 | + | |
| 66 | 86 | def compress_summary( |
| 67 | 87 | summary: str, |
| 68 | 88 | budget: SummaryCompressionBudget | None = None, |
@@ -197,12 +217,20 @@ def build_session_summary( | ||
| 197 | 217 | user_messages = [ |
| 198 | 218 | _collapse_inline_whitespace(message.content) |
| 199 | 219 | for message in messages |
| 200 | - if message.role == Role.USER and message.content.strip() | |
| 220 | + if ( | |
| 221 | + message.role == Role.USER | |
| 222 | + and message.content.strip() | |
| 223 | + and not _is_compacted_context_message(message.content) | |
| 224 | + ) | |
| 201 | 225 | ] |
| 202 | 226 | assistant_messages = [ |
| 203 | 227 | _collapse_inline_whitespace(message.content) |
| 204 | 228 | for message in messages |
| 205 | - if message.role == Role.ASSISTANT and message.content.strip() | |
| 229 | + if ( | |
| 230 | + message.role == Role.ASSISTANT | |
| 231 | + and message.content.strip() | |
| 232 | + and not _is_compacted_context_message(message.content) | |
| 233 | + ) | |
| 206 | 234 | ] |
| 207 | 235 | tool_names = [ |
| 208 | 236 | tool_call.name |
@@ -229,8 +257,7 @@ def build_session_summary( | ||
| 229 | 257 | f"- Recent user requests: {recent_requests}", |
| 230 | 258 | ] |
| 231 | 259 | if previous_summary: |
| 232 | - previous_line = _collapse_inline_whitespace(previous_summary.splitlines()[0]) | |
| 233 | - lines.append(f"- Previously compacted context: {previous_line}") | |
| 260 | + lines.append("- Previously compacted context retained.") | |
| 234 | 261 | lines.extend( |
| 235 | 262 | [ |
| 236 | 263 | f"- Newly compacted context: {len(messages)} earlier message(s) summarized.", |
@@ -247,6 +274,8 @@ def _extract_key_files(messages: list[Message]) -> list[str]: | ||
| 247 | 274 | pattern = re.compile(r"(?:/|\.{1,2}/|[A-Za-z0-9_.-]+/)[A-Za-z0-9_./-]+\.[A-Za-z0-9]+") |
| 248 | 275 | files: list[str] = [] |
| 249 | 276 | for message in messages: |
| 277 | + if _is_compacted_context_message(message.content): | |
| 278 | + continue | |
| 250 | 279 | for match in pattern.findall(message.content): |
| 251 | 280 | if match not in files: |
| 252 | 281 | files.append(match) |
@@ -262,6 +291,10 @@ def _collapse_inline_whitespace(line: str) -> str: | ||
| 262 | 291 | return " ".join(line.split()) |
| 263 | 292 | |
| 264 | 293 | |
| 294 | +def _is_compacted_context_message(content: str) -> bool: | |
| 295 | + return content.lstrip().startswith("[COMPACTED CONTEXT]") | |
| 296 | + | |
| 297 | + | |
| 265 | 298 | def _truncate_line(line: str, max_chars: int) -> str: |
| 266 | 299 | if max_chars <= 0 or len(line) <= max_chars: |
| 267 | 300 | return line |
src/loader/runtime/public_shell.pymodified@@ -15,6 +15,7 @@ from ..context.project import ProjectContext | ||
| 15 | 15 | from ..llm.base import Message, Role |
| 16 | 16 | from ..tools.base import ToolRegistry |
| 17 | 17 | from .capabilities import CapabilityProfile, resolve_backend_capability_profile |
| 18 | +from .compaction import resolve_auto_compaction_input_tokens_threshold | |
| 18 | 19 | from .dod import DefinitionOfDoneStore |
| 19 | 20 | from .events import AgentEvent, TurnSummary |
| 20 | 21 | from .launcher import build_runtime_launcher |
@@ -303,7 +304,7 @@ def build_fresh_runtime_session_install( | ||
| 303 | 304 | runtime_owner_path=owner_metadata["owner_path"], |
| 304 | 305 | rotate_after_bytes=owner.config.session_rotate_after_bytes, |
| 305 | 306 | auto_compaction_input_tokens_threshold=( |
| 306 | - owner.config.session_auto_compaction_input_tokens_threshold | |
| 307 | + _resolve_owner_auto_compaction_threshold(owner) | |
| 307 | 308 | ), |
| 308 | 309 | compaction_keep_last_messages=owner.config.session_compaction_keep_last_messages, |
| 309 | 310 | system_message_factory=owner._get_system_message, |
@@ -399,7 +400,7 @@ def resume_runtime_shell_session( | ||
| 399 | 400 | session_id=session_id, |
| 400 | 401 | rotate_after_bytes=owner.config.session_rotate_after_bytes, |
| 401 | 402 | auto_compaction_input_tokens_threshold=( |
| 402 | - owner.config.session_auto_compaction_input_tokens_threshold | |
| 403 | + _resolve_owner_auto_compaction_threshold(owner) | |
| 403 | 404 | ), |
| 404 | 405 | compaction_keep_last_messages=owner.config.session_compaction_keep_last_messages, |
| 405 | 406 | ) |
@@ -614,9 +615,22 @@ def refresh_runtime_shell_capability_profile( | ||
| 614 | 615 | if refresh.prompt_reset_required: |
| 615 | 616 | owner._system_message = None |
| 616 | 617 | owner._use_react = None |
| 618 | + if hasattr(owner, "session") and owner.session is not None: | |
| 619 | + owner.session.auto_compaction_input_tokens_threshold = ( | |
| 620 | + _resolve_owner_auto_compaction_threshold(owner) | |
| 621 | + ) | |
| 617 | 622 | return refresh |
| 618 | 623 | |
| 619 | 624 | |
| 625 | +def _resolve_owner_auto_compaction_threshold(owner: RuntimeShellOwner) -> int: | |
| 626 | + """Clamp one owner's compaction threshold to the active model context.""" | |
| 627 | + | |
| 628 | + return resolve_auto_compaction_input_tokens_threshold( | |
| 629 | + owner.config.session_auto_compaction_input_tokens_threshold, | |
| 630 | + context_window=owner.capability_profile.context_window, | |
| 631 | + ) | |
| 632 | + | |
| 633 | + | |
| 620 | 634 | def build_runtime_system_message( |
| 621 | 635 | *, |
| 622 | 636 | registry: ToolRegistry, |
src/loader/runtime/safeguard_services.pymodified@@ -111,11 +111,9 @@ class ActionTracker: | ||
| 111 | 111 | if isinstance(hunks, list) and self.would_duplicate_patch(file_path, hunks): |
| 112 | 112 | return True, f"Same patch already applied to: {file_path}" |
| 113 | 113 | |
| 114 | - elif tool_name == "bash": | |
| 115 | - command = arguments.get("command", "") | |
| 116 | - if self.would_duplicate_command(command): | |
| 117 | - return True, f"Command already executed: {command[:50]}..." | |
| 118 | - | |
| 114 | + # Bash commands intentionally skip exact-command dedupe here. | |
| 115 | + # Re-running the same shell probe after a filesystem change is often valid, | |
| 116 | + # and higher-level loop detection is a safer backstop than blocking `ls`. | |
| 119 | 117 | return False, "" |
| 120 | 118 | |
| 121 | 119 | def record_tool_call(self, tool_name: str, arguments: dict) -> None: |
tests/test_capabilities.pymodified@@ -41,6 +41,20 @@ def test_family_heuristic_resolution_uses_model_details() -> None: | ||
| 41 | 41 | assert "heuristic" in resolved.notes[0].lower() |
| 42 | 42 | |
| 43 | 43 | |
| 44 | +def test_model_details_context_window_overrides_registry_default() -> None: | |
| 45 | + resolved = resolve_capability_profile( | |
| 46 | + "gpt-oss:20b", | |
| 47 | + model_details={ | |
| 48 | + "model_info": { | |
| 49 | + "gptoss.context_length": 131072, | |
| 50 | + } | |
| 51 | + }, | |
| 52 | + ) | |
| 53 | + | |
| 54 | + assert resolved.context_window == 131072 | |
| 55 | + assert resolved.supports_native_tools | |
| 56 | + | |
| 57 | + | |
| 44 | 58 | def test_unknown_models_default_to_safe_react_profile() -> None: |
| 45 | 59 | resolved = resolve_capability_profile("mystery-model") |
| 46 | 60 | |
tests/test_compaction.pymodified@@ -5,8 +5,10 @@ from __future__ import annotations | ||
| 5 | 5 | from loader.llm.base import Message, Role |
| 6 | 6 | from loader.runtime.compaction import ( |
| 7 | 7 | SummaryCompressionBudget, |
| 8 | + build_session_summary, | |
| 8 | 9 | compact_session_messages, |
| 9 | 10 | compress_summary, |
| 11 | + resolve_auto_compaction_input_tokens_threshold, | |
| 10 | 12 | ) |
| 11 | 13 | |
| 12 | 14 | |
@@ -51,3 +53,42 @@ def test_compact_session_messages_preserves_recent_messages() -> None: | ||
| 51 | 53 | ] |
| 52 | 54 | assert result.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 53 | 55 | assert "Continuation instructions:" in result.messages[0].content |
| 56 | + | |
| 57 | + | |
| 58 | +def test_build_session_summary_skips_nested_compacted_context_content() -> None: | |
| 59 | + messages = [ | |
| 60 | + Message( | |
| 61 | + role=Role.USER, | |
| 62 | + content=( | |
| 63 | + "[COMPACTED CONTEXT]\nConversation summary:\n" | |
| 64 | + "- Scope: older work\n- Current work: old state" | |
| 65 | + ), | |
| 66 | + ), | |
| 67 | + Message(role=Role.ASSISTANT, content="Read the chapter index."), | |
| 68 | + Message(role=Role.USER, content="Update the chapter links."), | |
| 69 | + ] | |
| 70 | + | |
| 71 | + summary = build_session_summary( | |
| 72 | + messages, | |
| 73 | + previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work", | |
| 74 | + current_task="Repair the table of contents links", | |
| 75 | + ) | |
| 76 | + | |
| 77 | + assert "Recent user requests: [COMPACTED CONTEXT]" not in summary | |
| 78 | + assert "Pending work: [COMPACTED CONTEXT]" not in summary | |
| 79 | + assert "- Previously compacted context retained." in summary | |
| 80 | + | |
| 81 | + | |
| 82 | +def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None: | |
| 83 | + assert resolve_auto_compaction_input_tokens_threshold( | |
| 84 | + 100_000, | |
| 85 | + context_window=131_072, | |
| 86 | + ) == 98_304 | |
| 87 | + assert resolve_auto_compaction_input_tokens_threshold( | |
| 88 | + 100_000, | |
| 89 | + context_window=262_144, | |
| 90 | + ) == 100_000 | |
| 91 | + assert resolve_auto_compaction_input_tokens_threshold( | |
| 92 | + 100_000, | |
| 93 | + context_window=8_192, | |
| 94 | + ) == 12_000 | |
tests/test_runtime_public_shell.pymodified@@ -9,6 +9,7 @@ import pytest | ||
| 9 | 9 | |
| 10 | 10 | from loader.agent.loop import AgentConfig |
| 11 | 11 | from loader.llm.base import CompletionResponse, Message, Role, StreamChunk |
| 12 | +from loader.runtime.capabilities import CapabilityProfile | |
| 12 | 13 | from loader.runtime.completion_trace import CompletionTraceEntry |
| 13 | 14 | from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done |
| 14 | 15 | from loader.runtime.public_shell import ( |
@@ -366,6 +367,36 @@ def test_refresh_runtime_shell_capability_profile_updates_owner_cache_state( | ||
| 366 | 367 | assert handle._use_react is None |
| 367 | 368 | |
| 368 | 369 | |
| 370 | +def test_refresh_runtime_shell_capability_profile_reclamps_session_threshold( | |
| 371 | + temp_dir: Path, | |
| 372 | +) -> None: | |
| 373 | + class ProfiledBackend(ScriptedBackend): | |
| 374 | + def __init__(self) -> None: | |
| 375 | + super().__init__(supports_native_tools=True) | |
| 376 | + self.context_window = 8192 | |
| 377 | + | |
| 378 | + def capability_profile(self) -> CapabilityProfile: | |
| 379 | + return CapabilityProfile( | |
| 380 | + model_name="qwen3-coder:30b", | |
| 381 | + supports_native_tools=True, | |
| 382 | + supports_streaming=True, | |
| 383 | + context_window=self.context_window, | |
| 384 | + preferred_tool_call_format="native", | |
| 385 | + verification_strictness="standard", | |
| 386 | + notes=["scripted"], | |
| 387 | + ) | |
| 388 | + | |
| 389 | + backend = ProfiledBackend() | |
| 390 | + handle = _runtime_handle(temp_dir, backend=backend) | |
| 391 | + | |
| 392 | + assert handle.session.auto_compaction_input_tokens_threshold == 12_000 | |
| 393 | + | |
| 394 | + backend.context_window = 131_072 | |
| 395 | + refresh_runtime_shell_capability_profile(handle) | |
| 396 | + | |
| 397 | + assert handle.session.auto_compaction_input_tokens_threshold == 98_304 | |
| 398 | + | |
| 399 | + | |
| 369 | 400 | def test_create_runtime_session_install_builds_restored_shell_state( |
| 370 | 401 | temp_dir: Path, |
| 371 | 402 | ) -> None: |
tests/test_safeguard_services.pymodified@@ -41,6 +41,15 @@ def test_action_tracker_preserves_loop_description_format() -> None: | ||
| 41 | 41 | assert description == "Repeating pattern detected (2x): read → grep" |
| 42 | 42 | |
| 43 | 43 | |
| 44 | +def test_action_tracker_allows_repeated_bash_commands() -> None: | |
| 45 | + tracker = ActionTracker() | |
| 46 | + arguments = {"command": "ls -la ~/Loader/guides/fortran/chapters/"} | |
| 47 | + | |
| 48 | + tracker.record_tool_call("bash", arguments) | |
| 49 | + | |
| 50 | + assert tracker.check_tool_call("bash", arguments) == (False, "") | |
| 51 | + | |
| 52 | + | |
| 44 | 53 | def test_pre_action_validator_blocks_patch_without_hunks() -> None: |
| 45 | 54 | validator = PreActionValidator() |
| 46 | 55 | |