tenseleyflow/loader / ac30b95

Browse files

Raise num_ctx to 16K and lower compaction threshold to 12K to prevent context overflow in multi-turn sessions

Authored by espadonne
SHA
ac30b951de2b0d99220b3582db92b77063278371
Parents
984bfec
Tree
d66c5ad

2 changed files

StatusFile+-
M src/loader/agent/loop.py 1 1
M src/loader/llm/ollama.py 1 1
src/loader/agent/loop.pymodified
@@ -89,7 +89,7 @@ class AgentConfig:
8989
     workflow_mode_override: str | None = None
9090
     stream: bool = True  # Stream LLM responses for real-time output
9191
     session_rotate_after_bytes: int = 256 * 1024
92
-    session_auto_compaction_input_tokens_threshold: int = 100_000
92
+    session_auto_compaction_input_tokens_threshold: int = 12_000  # ~75% of default 16K context
9393
     session_compaction_keep_last_messages: int = 4
9494
 
9595
     # Reasoning stages configuration
src/loader/llm/ollama.pymodified
@@ -26,7 +26,7 @@ class OllamaBackend(LLMBackend):
2626
         base_url: str = "http://localhost:11434",
2727
         timeout: float | None = None,
2828
         force_react: bool = False,
29
-        num_ctx: int = 8192,  # Reasonable context, not too slow
29
+        num_ctx: int = 16384,  # 16K context; most models support 32K+
3030
         num_gpu: int = -1,  # Use all GPU layers by default (fast)
3131
     ):
3232
         self.model = model