tenseleyflow/loader / 6e4f880

Browse files

fixes to agent behavior and more i forget

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6e4f880d2032c3e49e6642c143ba919fdb197fb0
Parents
833086d
Tree
f3fed0c

11 changed files

StatusFile+-
A package-lock.json 56 0
A package.json 30 0
M src/loader/agent/loop.py 61 20
M src/loader/agent/parsing.py 50 0
M src/loader/agent/reasoning.py 67 88
M src/loader/llm/base.py 3 0
M src/loader/llm/ollama.py 105 17
M src/loader/ui/adapter.py 78 11
M src/loader/ui/app.py 27 0
M src/loader/ui/widgets/streaming.py 15 1
M tests/test_parsing.py 31 0
package-lock.jsonadded
@@ -0,0 +1,56 @@
1
+{
2
+  "name": "loader",
3
+  "version": "1.0.0",
4
+  "lockfileVersion": 3,
5
+  "requires": true,
6
+  "packages": {
7
+    "": {
8
+      "name": "loader",
9
+      "version": "1.0.0",
10
+      "license": "ISC",
11
+      "dependencies": {
12
+        "react": "^19.2.3",
13
+        "react-dom": "^19.2.3",
14
+        "react-parallax-tilt": "^1.7.315"
15
+      },
16
+      "devDependencies": {}
17
+    },
18
+    "node_modules/react": {
19
+      "version": "19.2.3",
20
+      "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
21
+      "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
22
+      "license": "MIT",
23
+      "engines": {
24
+        "node": ">=0.10.0"
25
+      }
26
+    },
27
+    "node_modules/react-dom": {
28
+      "version": "19.2.3",
29
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz",
30
+      "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==",
31
+      "license": "MIT",
32
+      "dependencies": {
33
+        "scheduler": "^0.27.0"
34
+      },
35
+      "peerDependencies": {
36
+        "react": "^19.2.3"
37
+      }
38
+    },
39
+    "node_modules/react-parallax-tilt": {
40
+      "version": "1.7.315",
41
+      "resolved": "https://registry.npmjs.org/react-parallax-tilt/-/react-parallax-tilt-1.7.315.tgz",
42
+      "integrity": "sha512-m0I2yPEmzEC+qGelF+8P+L60lH/S50OJE+pz1bVmurnkKNMyd2Q4qhtAi8zRibNkwFd6oOGvA8qEqAySBbAOJg==",
43
+      "license": "MIT",
44
+      "peerDependencies": {
45
+        "react": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
46
+        "react-dom": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
47
+      }
48
+    },
49
+    "node_modules/scheduler": {
50
+      "version": "0.27.0",
51
+      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
52
+      "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==",
53
+      "license": "MIT"
54
+    }
55
+  }
56
+}
package.jsonadded
@@ -0,0 +1,30 @@
1
+{
2
+  "dependencies": {
3
+    "react": "^19.2.3",
4
+    "react-dom": "^19.2.3",
5
+    "react-parallax-tilt": "^1.7.315"
6
+  },
7
+  "name": "loader",
8
+  "version": "1.0.0",
9
+  "description": "Local agentic coding assistant. Runs on your hardware with local LLMs.",
10
+  "main": "index.js",
11
+  "directories": {
12
+    "doc": "docs",
13
+    "test": "tests"
14
+  },
15
+  "scripts": {
16
+    "test": "echo \"Error: no test specified\" && exit 1"
17
+  },
18
+  "repository": {
19
+    "type": "git",
20
+    "url": "git+https://github.com/tenseleyFlow/loader.git"
21
+  },
22
+  "keywords": [],
23
+  "author": "",
24
+  "license": "ISC",
25
+  "type": "commonjs",
26
+  "bugs": {
27
+    "url": "https://github.com/tenseleyFlow/loader/issues"
28
+  },
29
+  "homepage": "https://github.com/tenseleyFlow/loader#readme"
30
+}
src/loader/agent/loop.pymodified
@@ -194,7 +194,14 @@ class Agent:
194
 
194
 
195
         # Check if backend supports native tools
195
         # Check if backend supports native tools
196
         if hasattr(self.backend, "supports_native_tools"):
196
         if hasattr(self.backend, "supports_native_tools"):
197
-            self._use_react = not self.backend.supports_native_tools()
197
+            supports_native = self.backend.supports_native_tools()
198
+            self._use_react = not supports_native
199
+            # Debug log
200
+            try:
201
+                with open("/tmp/loader_debug.log", "a") as f:
202
+                    f.write(f"[loop] use_react: supports_native={supports_native}, use_react={self._use_react}\n")
203
+            except Exception:
204
+                pass
198
         else:
205
         else:
199
             # Default to ReAct for unknown backends
206
             # Default to ReAct for unknown backends
200
             self._use_react = True
207
             self._use_react = True
@@ -586,6 +593,7 @@ class Agent:
586
             tools = None if self.use_react else self.registry.get_schemas()
593
             tools = None if self.use_react else self.registry.get_schemas()
587
 
594
 
588
             # Use streaming or regular completion
595
             # Use streaming or regular completion
596
+            pending_tool_calls_seen: set[str] = set()  # Track IDs of pending tool calls shown
589
             if self.config.stream:
597
             if self.config.stream:
590
                 full_content = ""
598
                 full_content = ""
591
                 tool_calls: list[ToolCall] = []
599
                 tool_calls: list[ToolCall] = []
@@ -596,15 +604,30 @@ class Agent:
596
                     temperature=self.config.temperature,
604
                     temperature=self.config.temperature,
597
                     max_tokens=effective_max_tokens,
605
                     max_tokens=effective_max_tokens,
598
                 ):
606
                 ):
599
-                    if chunk.content:
607
+                    # Emit stream events for content OR for final chunk (to signal end)
608
+                    if chunk.content or chunk.is_done:
600
                         await emit(AgentEvent(
609
                         await emit(AgentEvent(
601
                             type="stream",
610
                             type="stream",
602
                             content=chunk.content,
611
                             content=chunk.content,
603
                             is_stream_end=chunk.is_done,
612
                             is_stream_end=chunk.is_done,
604
                         ))
613
                         ))
614
+                    # Show pending tool calls as they're detected (ReAct mode interleaving)
615
+                    if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
616
+                        pending_tool_calls_seen.add(chunk.pending_tool_call.id)
617
+                        await emit(AgentEvent(
618
+                            type="tool_call",
619
+                            tool_name=chunk.pending_tool_call.name,
620
+                            tool_args=chunk.pending_tool_call.arguments,
621
+                        ))
605
                     if chunk.is_done:
622
                     if chunk.is_done:
606
                         full_content = chunk.full_content or full_content
623
                         full_content = chunk.full_content or full_content
607
                         tool_calls = chunk.tool_calls
624
                         tool_calls = chunk.tool_calls
625
+                        # Debug log
626
+                        try:
627
+                            with open("/tmp/loader_debug.log", "a") as f:
628
+                                f.write(f"[loop] chunk.is_done: got {len(tool_calls)} tool_calls\n")
629
+                        except Exception:
630
+                            pass
608
 
631
 
609
                 content = full_content
632
                 content = full_content
610
                 response_content = full_content
633
                 response_content = full_content
@@ -638,6 +661,15 @@ class Agent:
638
 
661
 
639
             # If there are tool calls, execute them
662
             # If there are tool calls, execute them
640
             if tool_calls:
663
             if tool_calls:
664
+                # Debug log
665
+                try:
666
+                    with open("/tmp/loader_debug.log", "a") as f:
667
+                        f.write(f"[loop] executing {len(tool_calls)} tool_calls\n")
668
+                        for tc in tool_calls:
669
+                            f.write(f"[loop]   - {tc.name}: id={tc.id}, args_keys={list(tc.arguments.keys())}\n")
670
+                except Exception:
671
+                    pass
672
+
641
                 # Add assistant message with tool calls
673
                 # Add assistant message with tool calls
642
                 self.messages.append(Message(
674
                 self.messages.append(Message(
643
                     role=Role.ASSISTANT,
675
                     role=Role.ASSISTANT,
@@ -682,11 +714,24 @@ class Agent:
682
                             ))
714
                             ))
683
                             continue  # Skip this tool call, let LLM reconsider
715
                             continue  # Skip this tool call, let LLM reconsider
684
 
716
 
717
+                    # Only emit tool_call if not already shown during streaming
718
+                    if tool_call.id not in pending_tool_calls_seen:
719
+                        try:
720
+                            with open("/tmp/loader_debug.log", "a") as f:
721
+                                f.write(f"[loop] emitting tool_call event for {tool_call.name}\n")
722
+                        except Exception:
723
+                            pass
685
                         await emit(AgentEvent(
724
                         await emit(AgentEvent(
686
                             type="tool_call",
725
                             type="tool_call",
687
                             tool_name=tool_call.name,
726
                             tool_name=tool_call.name,
688
                             tool_args=tool_call.arguments,
727
                             tool_args=tool_call.arguments,
689
                         ))
728
                         ))
729
+                    else:
730
+                        try:
731
+                            with open("/tmp/loader_debug.log", "a") as f:
732
+                                f.write(f"[loop] SKIPPING tool_call event for {tool_call.name} (already in pending_seen)\n")
733
+                        except Exception:
734
+                            pass
690
 
735
 
691
                     # Track this action for completion checking
736
                     # Track this action for completion checking
692
                     action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
737
                     action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
@@ -904,25 +949,21 @@ class Agent:
904
                 ))
949
                 ))
905
                 continue
950
                 continue
906
 
951
 
907
-            # No tool calls and early in the task - likely giving up too soon
952
+            # No tool calls and early in the task - MAY be giving up too soon
908
-            # This catches native mode models that stop without using tools
953
+            # But only intervene if we haven't done ANY work yet
909
-            if not self.use_react and len(actions_taken) < 5 and iterations < self.config.max_iterations - 2:
954
+            if not self.use_react and len(actions_taken) == 0 and iterations < self.config.max_iterations - 2:
910
-                # Check if response looks like a stopping point but we haven't done much
955
+                # Check if response looks like deflection without having done anything
911
-                stopping_phrases = [
956
+                deflection_phrases = ["you can", "you should", "you could", "try running"]
912
-                    "let me know", "feel free", "hope this", "happy to help",
957
+                looks_like_deflection = any(p in content.lower() for p in deflection_phrases)
913
-                    "anything else", "is there", "that's", "all done", "complete",
914
-                ]
915
-                looks_like_stopping = any(p in content.lower() for p in stopping_phrases)
916
 
958
 
917
-                if looks_like_stopping or len(content) < 150:
959
+                if looks_like_deflection:
918
                     self.messages.append(Message(
960
                     self.messages.append(Message(
919
                         role=Role.ASSISTANT,
961
                         role=Role.ASSISTANT,
920
                         content=response_content,
962
                         content=response_content,
921
                     ))
963
                     ))
922
                     self.messages.append(Message(
964
                     self.messages.append(Message(
923
                         role=Role.USER,
965
                         role=Role.USER,
924
-                        content="You stopped without completing the task. Continue executing - "
966
+                        content="Please use your tools to execute the task rather than telling me what to do.",
925
-                                "use your tools to finish the job. Don't describe what to do, DO IT.",
926
                     ))
967
                     ))
927
                     continue
968
                     continue
928
 
969
 
src/loader/agent/parsing.pymodified
@@ -26,6 +26,38 @@ def _extract_arguments(data: dict) -> dict:
26
     return {}
26
     return {}
27
 
27
 
28
 
28
 
29
+def _parse_bracket_args(args_str: str) -> dict:
30
+    """Parse arguments from bracketed tool call format.
31
+
32
+    Handles formats like:
33
+        file_path=/tmp/test.txt, content="hello world"
34
+        command="ls -la"
35
+        file_path="test.py", old_string="foo", new_string="bar"
36
+
37
+    Args:
38
+        args_str: The arguments string (everything after "tool with:" or "tool:")
39
+
40
+    Returns:
41
+        Dictionary of parsed arguments
42
+    """
43
+    args = {}
44
+
45
+    # Pattern to match key=value pairs where value can be:
46
+    # - quoted string (single or double quotes)
47
+    # - unquoted value (until comma or end)
48
+    pattern = r'(\w+)\s*=\s*(?:"([^"]*?)"|\'([^\']*?)\'|([^,\]]+?))\s*(?:,|$)'
49
+
50
+    for match in re.finditer(pattern, args_str):
51
+        key = match.group(1)
52
+        # Value is in one of the capture groups (2=double quoted, 3=single quoted, 4=unquoted)
53
+        value = match.group(2) or match.group(3) or match.group(4)
54
+        if value is not None:
55
+            value = value.strip()
56
+            args[key] = value
57
+
58
+    return args
59
+
60
+
29
 def parse_tool_calls(text: str) -> ParsedResponse:
61
 def parse_tool_calls(text: str) -> ParsedResponse:
30
     """Parse tool calls from LLM text output.
62
     """Parse tool calls from LLM text output.
31
 
63
 
@@ -96,6 +128,24 @@ def parse_tool_calls(text: str) -> ParsedResponse:
96
         if tool_calls:
128
         if tool_calls:
97
             content = re.sub(bare_json_pattern, "", content)
129
             content = re.sub(bare_json_pattern, "", content)
98
 
130
 
131
+    # Pattern 3: Bracketed format [calls/USE tool with/: key=value, ...]
132
+    # Examples:
133
+    #   [calls write tool with: file_path=/tmp/test.txt, content="hello"]
134
+    #   [USE bash tool: command="ls -la"]
135
+    if not tool_calls:
136
+        bracket_pattern = r'\[(?:calls|USE)\s+(\w+)\s+tool(?:\s+with)?[:\s]+([^\]]+)\]'
137
+        for i, (name, args_str) in enumerate(re.findall(bracket_pattern, text, re.IGNORECASE)):
138
+            args = _parse_bracket_args(args_str)
139
+            if args:
140
+                tool_calls.append(ToolCall(
141
+                    id=f"call_{i}",
142
+                    name=name.lower(),
143
+                    arguments=args,
144
+                ))
145
+        # Remove bracketed tool calls from content
146
+        if tool_calls:
147
+            content = re.sub(bracket_pattern, "", content, flags=re.IGNORECASE)
148
+
99
     # Clean up content
149
     # Clean up content
100
     content = content.strip()
150
     content = content.strip()
101
 
151
 
src/loader/agent/reasoning.pymodified
@@ -722,46 +722,52 @@ def detect_premature_completion(task: str, response: str, actions_taken: list[st
722
     """Quick heuristic to detect if agent is stopping too early.
722
     """Quick heuristic to detect if agent is stopping too early.
723
 
723
 
724
     Returns True if the agent might be giving up prematurely.
724
     Returns True if the agent might be giving up prematurely.
725
+    This should be CONSERVATIVE - only trigger when really needed,
726
+    not for simple tasks that are genuinely complete.
725
     """
727
     """
726
     task_lower = task.lower()
728
     task_lower = task.lower()
727
     response_lower = response.lower()
729
     response_lower = response.lower()
728
 
730
 
729
-    # Keywords that suggest the task should involve multiple steps
731
+    # If no actions taken at all and task requires action, that's premature
730
-    multi_step_indicators = [
732
+    if not actions_taken:
731
-        "create a", "build a", "make a", "set up", "setup",
733
+        # But only if this looks like an actionable task
732
-        "initialize", "scaffold", "generate", "implement",
734
+        action_verbs = ["create", "write", "make", "edit", "fix", "add", "delete", "run"]
733
-        "project", "application", "app", "website", "api",
735
+        if any(verb in task_lower for verb in action_verbs):
734
-        "add", "write", "develop", "design", "help me",
736
+            return True
735
-    ]
737
+        return False  # Informational/conversational tasks don't need actions
736
 
738
 
737
-    # Keywords that suggest testing/verification should happen
739
+    # If we took actions and got successful results, trust that we're done
738
-    verification_indicators = [
740
+    # Check for success indicators in response
739
-        "test", "run", "start", "launch", "verify", "check",
741
+    success_indicators = [
740
-        "demo", "show", "demonstrate", "work", "function",
742
+        "successfully", "created", "written", "done", "completed",
743
+        "file now contains", "has been updated", "installed",
741
     ]
744
     ]
745
+    if any(ind in response_lower for ind in success_indicators) and len(actions_taken) >= 1:
746
+        return False  # Likely actually done
742
 
747
 
743
-    # Keywords in response that suggest premature completion
748
+    # Keywords that suggest COMPLEX multi-step tasks (not simple ones)
744
-    premature_phrases = [
749
+    complex_indicators = [
745
-        "i've created", "i created", "file has been created",
750
+        "set up a project", "create a project", "build a complete",
746
-        "here's the", "i've set up the basic", "i've written",
751
+        "scaffold", "initialize a new", "create a full",
747
-        "you can now", "you should now", "you can run",
752
+        "implement a full", "develop a complete",
748
-        "that's it", "all done", "complete", "finished",
749
-        "let me know", "feel free to", "hope this helps",
750
-        "is there anything else",
751
     ]
753
     ]
754
+    is_complex = any(ind in task_lower for ind in complex_indicators)
752
 
755
 
753
-    # Check if this looks like a multi-step task
756
+    # Simple creation tasks don't need follow-up
754
-    is_multi_step = any(ind in task_lower for ind in multi_step_indicators)
757
+    simple_creation = [
755
-
758
+        "create a file", "write a file", "make a file",
756
-    # Check if verification was expected but not done
759
+        "add a function", "edit the", "fix the", "update the",
757
-    expects_verification = any(ind in task_lower for ind in verification_indicators)
760
+        "read the", "show me", "list",
761
+    ]
762
+    is_simple = any(ind in task_lower for ind in simple_creation)
758
 
763
 
759
-    # Check for premature completion phrases
764
+    # If it's a simple task with at least one action, it's probably done
760
-    has_premature_phrase = any(phrase in response_lower for phrase in premature_phrases)
765
+    if is_simple and len(actions_taken) >= 1:
766
+        return False
761
 
767
 
762
-    # Action count thresholds
768
+    # Explicit verification requests need bash
763
-    few_actions = len(actions_taken) < 3
769
+    explicit_verification = ["and test", "and run", "and verify", "make sure it works"]
764
-    very_few_actions = len(actions_taken) < 2
770
+    needs_verification = any(ind in task_lower for ind in explicit_verification)
765
 
771
 
766
     # Categorize what actions were taken
772
     # Categorize what actions were taken
767
     action_types = set()
773
     action_types = set()
@@ -778,33 +784,19 @@ def detect_premature_completion(task: str, response: str, actions_taken: list[st
778
         elif "glob" in action_lower or "grep" in action_lower:
784
         elif "glob" in action_lower or "grep" in action_lower:
779
             action_types.add("search")
785
             action_types.add("search")
780
 
786
 
781
-    # More aggressive detection:
787
+    # Detection rules (more conservative):
782
-
783
-    # 1. Multi-step task with premature phrases and few actions
784
-    if is_multi_step and has_premature_phrase and few_actions:
785
-        return True
786
-
787
-    # 2. Multi-step task with very few actions (regardless of phrases)
788
-    if is_multi_step and very_few_actions:
789
-        return True
790
 
788
 
791
-    # 3. Only wrote/edited files but never ran/tested anything
789
+    # 1. Complex project tasks with very few actions
792
-    if action_types and action_types <= {"write", "edit", "read"} and few_actions:
790
+    if is_complex and len(actions_taken) < 3:
793
-        # Wrote files but never executed bash to test
794
-        if "write" in action_types or "edit" in action_types:
795
         return True
791
         return True
796
 
792
 
797
-    # 4. Verification expected but no bash commands run
793
+    # 2. Explicitly requested verification but no bash run
798
-    if expects_verification and "bash" not in action_types:
794
+    if needs_verification and "bash" not in action_types:
799
         return True
795
         return True
800
 
796
 
801
-    # 5. Response has chatbot-style "let me know" phrases
797
+    # 3. Chatbot-style deflection with no real work done
802
-    chatbot_phrases = ["let me know", "feel free", "hope this", "happy to help"]
798
+    deflection_phrases = ["you can now", "you should", "you can run", "you can use"]
803
-    if any(phrase in response_lower for phrase in chatbot_phrases):
799
+    if any(phrase in response_lower for phrase in deflection_phrases) and len(actions_taken) < 2:
804
-        return True
805
-
806
-    # 6. Response is very short but task seems substantial
807
-    if len(response) < 200 and is_multi_step and len(actions_taken) > 0:
808
         return True
800
         return True
809
 
801
 
810
     return False
802
     return False
@@ -814,6 +806,7 @@ def get_continuation_prompt(task: str, actions_taken: list[str], response: str)
814
     """Generate a prompt to encourage the agent to continue.
806
     """Generate a prompt to encourage the agent to continue.
815
 
807
 
816
     Returns a prompt that nudges the agent to follow through.
808
     Returns a prompt that nudges the agent to follow through.
809
+    Should be helpful, not aggressive.
817
     """
810
     """
818
     task_lower = task.lower()
811
     task_lower = task.lower()
819
     actions_str = ", ".join(a.split(":")[0] for a in actions_taken[-5:]) if actions_taken else "none"
812
     actions_str = ", ".join(a.split(":")[0] for a in actions_taken[-5:]) if actions_taken else "none"
@@ -821,51 +814,37 @@ def get_continuation_prompt(task: str, actions_taken: list[str], response: str)
821
     # Determine what type of follow-up is needed
814
     # Determine what type of follow-up is needed
822
     follow_ups = []
815
     follow_ups = []
823
 
816
 
824
-    # Project setup tasks should initialize
817
+    # Only suggest package install if explicitly mentioned in task
825
-    if any(kw in task_lower for kw in ["node", "npm", "javascript", "react", "vue", "next"]):
818
+    if any(kw in task_lower for kw in ["install", "dependencies", "set up project"]):
819
+        if "node" in task_lower or "npm" in task_lower:
826
             if not any("npm" in a for a in actions_taken):
820
             if not any("npm" in a for a in actions_taken):
827
                 follow_ups.append("Run `npm install` to install dependencies")
821
                 follow_ups.append("Run `npm install` to install dependencies")
828
-            follow_ups.append("Start the development server to verify it works")
822
+        if "python" in task_lower or "pip" in task_lower:
829
-
830
-    if any(kw in task_lower for kw in ["python", "pip", "django", "flask", "fastapi"]):
831
             if not any("pip" in a or "uv" in a for a in actions_taken):
823
             if not any("pip" in a or "uv" in a for a in actions_taken):
832
-            follow_ups.append("Install dependencies with pip/uv")
824
+                follow_ups.append("Install dependencies")
833
-            follow_ups.append("Run the application to verify it works")
834
 
825
 
835
-    # Test tasks should run tests
826
+    # Only suggest running tests if "test" is explicitly in task
836
-    if "test" in task_lower:
827
+    if "test" in task_lower and "run" in task_lower:
837
         if not any("test" in a or "pytest" in a or "jest" in a for a in actions_taken):
828
         if not any("test" in a or "pytest" in a or "jest" in a for a in actions_taken):
838
-            follow_ups.append("Run the tests to verify they pass")
829
+            follow_ups.append("Run the tests")
839
-
840
-    # Build tasks should verify build
841
-    if "build" in task_lower or "compile" in task_lower:
842
-        if not any("build" in a or "compile" in a for a in actions_taken):
843
-            follow_ups.append("Run the build to verify it succeeds")
844
 
830
 
845
-    # Generic follow-ups for creation tasks
831
+    # If task explicitly asks to run/verify, remind to do so
846
-    if any(kw in task_lower for kw in ["create", "make", "build", "set up"]):
832
+    if any(kw in task_lower for kw in ["and run", "and test", "and verify", "make sure it works"]):
847
-        if len(actions_taken) < 3:
833
+        follow_ups.append("Execute what was created to verify it works")
848
-            follow_ups.append("Verify the creation was successful")
849
-            follow_ups.append("Demonstrate that it works as expected")
850
 
834
 
851
     if follow_ups:
835
     if follow_ups:
852
-        steps = "\n".join(f"- {step}" for step in follow_ups[:3])
836
+        steps = "\n".join(f"- {step}" for step in follow_ups[:2])
853
         return (
837
         return (
854
-            f"STOP - You are NOT done. The task was: \"{task}\"\n\n"
838
+            f"The task was: \"{task}\"\n\n"
855
-            f"Actions so far: {actions_str}\n"
839
+            f"You may need to also:\n{steps}\n\n"
856
-            f"You MUST also:\n{steps}\n\n"
840
+            f"If the task is actually complete, just confirm what was done."
857
-            f"DO NOT respond with text. USE YOUR TOOLS NOW to complete these steps."
858
         )
841
         )
859
 
842
 
860
-    # Generic continuation - be forceful
843
+    # Generic - be gentle
861
     return (
844
     return (
862
-        f"INCOMPLETE. Task: \"{task}\"\n"
845
+        f"Task: \"{task}\"\n"
863
-        f"Actions taken: {actions_str} ({len(actions_taken)} total)\n\n"
846
+        f"You took {len(actions_taken)} action(s). "
864
-        f"You stopped too early. What about:\n"
847
+        f"If there's more to do, continue. Otherwise, confirm completion."
865
-        f"- Testing/verifying the result?\n"
866
-        f"- Running what you created?\n"
867
-        f"- Installing dependencies?\n\n"
868
-        f"USE YOUR TOOLS to continue. Do not just describe - EXECUTE."
869
     )
848
     )
870
 
849
 
871
 
850
 
src/loader/llm/base.pymodified
@@ -59,6 +59,9 @@ class StreamChunk:
59
     full_content: str = ""  # Accumulated full content (only set when is_done=True)
59
     full_content: str = ""  # Accumulated full content (only set when is_done=True)
60
     tool_calls: list[ToolCall] = field(default_factory=list)
60
     tool_calls: list[ToolCall] = field(default_factory=list)
61
     is_done: bool = False
61
     is_done: bool = False
62
+    # Pending tool call detected during streaming (ReAct mode)
63
+    # This allows showing tool widgets as they're detected, before streaming ends
64
+    pending_tool_call: ToolCall | None = None
62
 
65
 
63
 
66
 
64
 @dataclass
67
 @dataclass
src/loader/llm/ollama.pymodified
@@ -286,10 +286,17 @@ class OllamaBackend(LLMBackend):
286
         if "tool_calls" in message:
286
         if "tool_calls" in message:
287
             for i, tc in enumerate(message["tool_calls"]):
287
             for i, tc in enumerate(message["tool_calls"]):
288
                 func = tc.get("function", {})
288
                 func = tc.get("function", {})
289
+                # Arguments may be a JSON string or dict
290
+                args = func.get("arguments", {})
291
+                if isinstance(args, str):
292
+                    try:
293
+                        args = json.loads(args)
294
+                    except json.JSONDecodeError:
295
+                        args = {}
289
                 tool_calls.append(ToolCall(
296
                 tool_calls.append(ToolCall(
290
                     id=tc.get("id", f"call_{i}"),
297
                     id=tc.get("id", f"call_{i}"),
291
                     name=func.get("name", ""),
298
                     name=func.get("name", ""),
292
-                    arguments=func.get("arguments", {}),
299
+                    arguments=args,
293
                 ))
300
                 ))
294
         else:
301
         else:
295
             # Try to parse tool calls from text
302
             # Try to parse tool calls from text
@@ -362,6 +369,14 @@ class OllamaBackend(LLMBackend):
362
             async for chunk in self._stream_response(response):
369
             async for chunk in self._stream_response(response):
363
                 yield chunk
370
                 yield chunk
364
 
371
 
372
+    def _debug_log(self, message: str) -> None:
373
+        """Write debug message to log file."""
374
+        try:
375
+            with open("/tmp/loader_debug.log", "a") as f:
376
+                f.write(f"[ollama] {message}\n")
377
+        except Exception:
378
+            pass
379
+
365
     async def _stream_response(self, response) -> AsyncIterator[StreamChunk]:
380
     async def _stream_response(self, response) -> AsyncIterator[StreamChunk]:
366
         """Internal helper to stream response chunks."""
381
         """Internal helper to stream response chunks."""
367
         import re
382
         import re
@@ -369,9 +384,12 @@ class OllamaBackend(LLMBackend):
369
         full_content = ""
384
         full_content = ""
370
         display_content = ""  # Content to show (filtered)
385
         display_content = ""  # Content to show (filtered)
371
         json_buffer = ""  # Buffer for potential tool call JSON
386
         json_buffer = ""  # Buffer for potential tool call JSON
387
+        tool_call_buffer = ""  # Buffer for <tool_call> block content
372
         in_json_block = False
388
         in_json_block = False
373
         in_think_block = False  # For reasoning models like deepseek-r1
389
         in_think_block = False  # For reasoning models like deepseek-r1
374
         in_tool_call_block = False  # For ReAct <tool_call> tags
390
         in_tool_call_block = False  # For ReAct <tool_call> tags
391
+        detected_tool_calls: list[ToolCall] = []  # Track tool calls found during streaming
392
+        tool_call_counter = 0
375
 
393
 
376
         async for line in response.aiter_lines():
394
         async for line in response.aiter_lines():
377
             if not line:
395
             if not line:
@@ -392,18 +410,34 @@ class OllamaBackend(LLMBackend):
392
                 tool_calls = []
410
                 tool_calls = []
393
                 # Check for native tool calls first
411
                 # Check for native tool calls first
394
                 if "tool_calls" in message:
412
                 if "tool_calls" in message:
413
+                    self._debug_log(f"is_done: found native tool_calls in message: {len(message['tool_calls'])}")
395
                     for i, tc in enumerate(message["tool_calls"]):
414
                     for i, tc in enumerate(message["tool_calls"]):
396
                         func = tc.get("function", {})
415
                         func = tc.get("function", {})
416
+                        # Arguments may be a JSON string or dict
417
+                        args = func.get("arguments", {})
418
+                        if isinstance(args, str):
419
+                            try:
420
+                                args = json.loads(args)
421
+                            except json.JSONDecodeError:
422
+                                args = {}
397
                         tool_calls.append(ToolCall(
423
                         tool_calls.append(ToolCall(
398
                             id=tc.get("id", f"call_{i}"),
424
                             id=tc.get("id", f"call_{i}"),
399
                             name=func.get("name", ""),
425
                             name=func.get("name", ""),
400
-                            arguments=func.get("arguments", {}),
426
+                            arguments=args,
401
                         ))
427
                         ))
402
                 else:
428
                 else:
403
-                    # Try to parse tool calls from text
429
+                    # Use detected tool calls from streaming, or parse from text
430
+                    if detected_tool_calls:
431
+                        self._debug_log(f"is_done: using {len(detected_tool_calls)} detected_tool_calls from streaming")
432
+                        tool_calls = detected_tool_calls
433
+                    else:
434
+                        self._debug_log(f"is_done: parsing tool calls from text (len={len(full_content)})")
435
+                        self._debug_log(f"is_done: full_content = {repr(full_content[:500])}")
404
                         clean_content, tool_calls = self._parse_tool_calls(full_content)
436
                         clean_content, tool_calls = self._parse_tool_calls(full_content)
437
+                        self._debug_log(f"is_done: parsed {len(tool_calls)} tool calls")
405
                         display_content = clean_content
438
                         display_content = clean_content
406
 
439
 
440
+                self._debug_log(f"is_done: yielding final chunk with {len(tool_calls)} tool_calls")
407
                 yield StreamChunk(
441
                 yield StreamChunk(
408
                     content="",  # Don't emit final chunk content (already streamed)
442
                     content="",  # Don't emit final chunk content (already streamed)
409
                     full_content=display_content or full_content,
443
                     full_content=display_content or full_content,
@@ -431,27 +465,69 @@ class OllamaBackend(LLMBackend):
431
                     # Skip content inside think block
465
                     # Skip content inside think block
432
                     continue
466
                     continue
433
 
467
 
434
-                # Filter out <tool_call> blocks from ReAct mode
468
+                # Filter out <tool_call> blocks from ReAct mode - but parse them!
435
                 if "<tool_call>" in chunk_content:
469
                 if "<tool_call>" in chunk_content:
436
                     in_tool_call_block = True
470
                     in_tool_call_block = True
471
+                    tool_call_buffer = ""  # Reset buffer
437
                     # Keep content before <tool_call>
472
                     # Keep content before <tool_call>
438
                     before = chunk_content.split("<tool_call>")[0]
473
                     before = chunk_content.split("<tool_call>")[0]
439
                     if before:
474
                     if before:
440
                         display_content += before
475
                         display_content += before
441
                         yield StreamChunk(content=before)
476
                         yield StreamChunk(content=before)
477
+                    # Start buffering the tool call content
478
+                    after_tag = chunk_content.split("<tool_call>", 1)[-1]
479
+                    if "</tool_call>" in after_tag:
480
+                        # Complete tool call in same chunk
481
+                        tool_json = after_tag.split("</tool_call>")[0]
482
+                        after_close = after_tag.split("</tool_call>", 1)[-1]
483
+                        in_tool_call_block = False
484
+                        # Parse and yield the tool call
485
+                        try:
486
+                            tc_data = json.loads(tool_json.strip())
487
+                            tc = ToolCall(
488
+                                id=f"call_{tool_call_counter}",
489
+                                name=tc_data.get("name", ""),
490
+                                arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
491
+                            )
492
+                            tool_call_counter += 1
493
+                            detected_tool_calls.append(tc)
494
+                            yield StreamChunk(content="", pending_tool_call=tc)
495
+                        except (json.JSONDecodeError, KeyError):
496
+                            pass
497
+                        if after_close.strip():
498
+                            display_content += after_close
499
+                            yield StreamChunk(content=after_close)
500
+                    else:
501
+                        tool_call_buffer = after_tag
442
                     continue
502
                     continue
443
                 elif in_tool_call_block:
503
                 elif in_tool_call_block:
444
                     if "</tool_call>" in chunk_content:
504
                     if "</tool_call>" in chunk_content:
445
                         in_tool_call_block = False
505
                         in_tool_call_block = False
446
-                        # Keep content after </tool_call>
506
+                        # Complete the tool call buffer
447
-                        after = chunk_content.split("</tool_call>")[-1]
507
+                        tool_json = tool_call_buffer + chunk_content.split("</tool_call>")[0]
448
-                        if after:
508
+                        after_close = chunk_content.split("</tool_call>", 1)[-1]
449
-                            display_content += after
509
+                        # Parse and yield the tool call
450
-                            yield StreamChunk(content=after)
510
+                        try:
451
-                    # Skip content inside tool_call block
511
+                            tc_data = json.loads(tool_json.strip())
512
+                            tc = ToolCall(
513
+                                id=f"call_{tool_call_counter}",
514
+                                name=tc_data.get("name", ""),
515
+                                arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
516
+                            )
517
+                            tool_call_counter += 1
518
+                            detected_tool_calls.append(tc)
519
+                            yield StreamChunk(content="", pending_tool_call=tc)
520
+                        except (json.JSONDecodeError, KeyError):
521
+                            pass
522
+                        if after_close.strip():
523
+                            display_content += after_close
524
+                            yield StreamChunk(content=after_close)
525
+                    else:
526
+                        # Still accumulating tool call content
527
+                        tool_call_buffer += chunk_content
452
                     continue
528
                     continue
453
 
529
 
454
-                # Filter out tool call JSON from display
530
+                # Filter out tool call JSON from display (bare JSON without tags)
455
                 # Detect start of JSON tool call
531
                 # Detect start of JSON tool call
456
                 if not in_json_block and '{"name"' in chunk_content:
532
                 if not in_json_block and '{"name"' in chunk_content:
457
                     in_json_block = True
533
                     in_json_block = True
@@ -467,17 +543,29 @@ class OllamaBackend(LLMBackend):
467
                     open_braces = json_buffer.count('{')
543
                     open_braces = json_buffer.count('{')
468
                     close_braces = json_buffer.count('}')
544
                     close_braces = json_buffer.count('}')
469
                     if close_braces >= open_braces and open_braces > 0:
545
                     if close_braces >= open_braces and open_braces > 0:
470
-                        # JSON block complete, don't display it
546
+                        # JSON block complete, try to parse it
471
                         in_json_block = False
547
                         in_json_block = False
472
-                        # Check for content after the JSON
473
                         try:
548
                         try:
474
                             # Find where JSON ends
549
                             # Find where JSON ends
475
                             last_brace = json_buffer.rfind('}')
550
                             last_brace = json_buffer.rfind('}')
551
+                            json_str = json_buffer[:last_brace + 1]
476
                             after_json = json_buffer[last_brace + 1:]
552
                             after_json = json_buffer[last_brace + 1:]
553
+                            # Try to parse as tool call
554
+                            tc_data = json.loads(json_str)
555
+                            if "name" in tc_data:
556
+                                tc = ToolCall(
557
+                                    id=f"call_{tool_call_counter}",
558
+                                    name=tc_data.get("name", ""),
559
+                                    arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
560
+                                )
561
+                                tool_call_counter += 1
562
+                                detected_tool_calls.append(tc)
563
+                                yield StreamChunk(content="", pending_tool_call=tc)
477
                             if after_json.strip():
564
                             if after_json.strip():
478
                                 display_content += after_json
565
                                 display_content += after_json
479
                                 yield StreamChunk(content=after_json)
566
                                 yield StreamChunk(content=after_json)
480
-                        except Exception:
567
+                        except (json.JSONDecodeError, KeyError):
568
+                            # Not valid JSON, just discard
481
                             pass
569
                             pass
482
                         json_buffer = ""
570
                         json_buffer = ""
483
                 else:
571
                 else:
src/loader/ui/adapter.pymodified
@@ -178,12 +178,29 @@ class RollbackSummary(Message):
178
 class EventAdapter:
178
 class EventAdapter:
179
     """Adapts Agent callback events to Textual messages."""
179
     """Adapts Agent callback events to Textual messages."""
180
 
180
 
181
+    DEBUG_LOG_FILE = "/tmp/loader_debug.log"
182
+
181
     def __init__(self, app: "LoaderApp") -> None:  # noqa: F821
183
     def __init__(self, app: "LoaderApp") -> None:  # noqa: F821
182
         self.app = app
184
         self.app = app
183
         self._tool_args_queue: list[tuple[str, dict]] = []  # Queue of (tool_name, args)
185
         self._tool_args_queue: list[tuple[str, dict]] = []  # Queue of (tool_name, args)
186
+        # Clear debug log on start
187
+        try:
188
+            with open(self.DEBUG_LOG_FILE, "w") as f:
189
+                f.write("=== Loader Debug Log ===\n")
190
+        except Exception:
191
+            pass
192
+
193
+    def _debug_log(self, message: str) -> None:
194
+        """Write debug message to log file."""
195
+        try:
196
+            with open(self.DEBUG_LOG_FILE, "a") as f:
197
+                f.write(f"{message}\n")
198
+        except Exception:
199
+            pass
184
 
200
 
185
     def handle_event(self, event: AgentEvent) -> None:
201
     def handle_event(self, event: AgentEvent) -> None:
186
         """Convert AgentEvent to appropriate Textual message and post it."""
202
         """Convert AgentEvent to appropriate Textual message and post it."""
203
+        self._debug_log(f"handle_event: type={event.type}")
187
         match event.type:
204
         match event.type:
188
             case "thinking":
205
             case "thinking":
189
                 self.app.post_message(ThinkingStarted())
206
                 self.app.post_message(ThinkingStarted())
@@ -201,11 +218,22 @@ class EventAdapter:
201
 
218
 
202
             case "tool_call":
219
             case "tool_call":
203
                 # Queue args for matching with result (FIFO)
220
                 # Queue args for matching with result (FIFO)
204
-                self._tool_args_queue.append((event.tool_name or "", event.tool_args or {}))
221
+                tool_name = event.tool_name or ""
222
+                tool_args = event.tool_args or {}
223
+                self._tool_args_queue.append((tool_name, tool_args))
224
+
225
+                # Debug: log tool args for edit/write (helps diagnose diff view issues)
226
+                self._debug_log(f"tool_call '{tool_name}': queued, keys={list(tool_args.keys())}")
227
+                if tool_name == "write":
228
+                    content = tool_args.get("content", "")
229
+                    self._debug_log(f"  write content: {len(content) if content else 0} chars")
230
+                elif tool_name == "edit":
231
+                    self._debug_log(f"  edit old_string: {bool(tool_args.get('old_string'))}, new_string: {bool(tool_args.get('new_string'))}")
232
+
205
                 self.app.post_message(
233
                 self.app.post_message(
206
                     ToolCallStarted(
234
                     ToolCallStarted(
207
-                        tool_name=event.tool_name or "",
235
+                        tool_name=tool_name,
208
-                        tool_args=event.tool_args or {},
236
+                        tool_args=tool_args,
209
                     )
237
                     )
210
                 )
238
                 )
211
 
239
 
@@ -221,24 +249,63 @@ class EventAdapter:
221
                         if queued_name == tool_name:
249
                         if queued_name == tool_name:
222
                             tool_args = queued_args
250
                             tool_args = queued_args
223
                             self._tool_args_queue.pop(i)
251
                             self._tool_args_queue.pop(i)
252
+                            self._debug_log(f"tool_result '{tool_name}': matched in queue, keys={list(tool_args.keys())}")
224
                             break
253
                             break
225
                     else:
254
                     else:
226
                         # No match found, use FIFO
255
                         # No match found, use FIFO
227
-                        _, tool_args = self._tool_args_queue.pop(0)
256
+                        popped_name, tool_args = self._tool_args_queue.pop(0)
257
+                        self._debug_log(f"tool_result '{tool_name}': no match, used FIFO (got '{popped_name}'), keys={list(tool_args.keys())}")
258
+                else:
259
+                    self._debug_log(f"tool_result '{tool_name}': queue was EMPTY!")
228
 
260
 
229
                 # Extract diff info for edit/write tools
261
                 # Extract diff info for edit/write tools
230
                 old_string = None
262
                 old_string = None
231
                 new_string = None
263
                 new_string = None
232
                 file_path = None
264
                 file_path = None
233
 
265
 
234
-                if tool_name == "edit" and tool_args:
266
+                if tool_name == "edit":
235
-                    old_string = tool_args.get("old_string")
267
+                    if tool_args:
236
-                    new_string = tool_args.get("new_string")
268
+                        # Try multiple key names that models might use
237
-                    file_path = tool_args.get("file_path")
269
+                        old_string = (
238
-                elif tool_name == "write" and tool_args:
270
+                            tool_args.get("old_string")
271
+                            or tool_args.get("old")
272
+                            or tool_args.get("original")
273
+                            or tool_args.get("search")
274
+                            or tool_args.get("find")
275
+                        )
276
+                        new_string = (
277
+                            tool_args.get("new_string")
278
+                            or tool_args.get("new")
279
+                            or tool_args.get("replacement")
280
+                            or tool_args.get("replace")
281
+                        )
282
+                        file_path = (
283
+                            tool_args.get("file_path")
284
+                            or tool_args.get("path")
285
+                            or tool_args.get("filename")
286
+                            or tool_args.get("file")
287
+                        )
288
+                        self._debug_log(f"  edit extracted: old={bool(old_string)} ({len(old_string) if old_string else 0} chars), new={bool(new_string)} ({len(new_string) if new_string else 0} chars), path={file_path}")
289
+                    else:
290
+                        self._debug_log(f"  edit: tool_args was empty!")
291
+                elif tool_name == "write":
239
                     # For writes, content is the new file content
292
                     # For writes, content is the new file content
240
-                    new_string = tool_args.get("content")
293
+                    # Try multiple key names that models might use
241
-                    file_path = tool_args.get("file_path")
294
+                    if tool_args:
295
+                        new_string = (
296
+                            tool_args.get("content")
297
+                            or tool_args.get("contents")
298
+                            or tool_args.get("text")
299
+                            or tool_args.get("data")
300
+                        )
301
+                        file_path = (
302
+                            tool_args.get("file_path")
303
+                            or tool_args.get("path")
304
+                            or tool_args.get("filename")
305
+                        )
306
+                        self._debug_log(f"  write extracted: new={bool(new_string)} ({len(new_string) if new_string else 0} chars), path={file_path}")
307
+                    else:
308
+                        self._debug_log(f"  write: tool_args was empty!")
242
 
309
 
243
                 self.app.post_message(
310
                 self.app.post_message(
244
                     ToolCallCompleted(
311
                     ToolCallCompleted(
src/loader/ui/app.pymodified
@@ -68,6 +68,14 @@ class LoaderApp(App):
68
         self._tool_widget_queue: list[ToolCallWidget] = []  # Queue of pending tool widgets
68
         self._tool_widget_queue: list[ToolCallWidget] = []  # Queue of pending tool widgets
69
         self._timer_handle = None
69
         self._timer_handle = None
70
 
70
 
71
+    def _debug_log(self, message: str) -> None:
72
+        """Write debug message to log file."""
73
+        try:
74
+            with open("/tmp/loader_debug.log", "a") as f:
75
+                f.write(f"{message}\n")
76
+        except Exception:
77
+            pass
78
+
71
     def compose(self) -> ComposeResult:
79
     def compose(self) -> ComposeResult:
72
         yield Container(
80
         yield Container(
73
             ScrollableContainer(id="message-area"),
81
             ScrollableContainer(id="message-area"),
@@ -137,6 +145,10 @@ class LoaderApp(App):
137
 
145
 
138
         # If agent is running, this is a steering message
146
         # If agent is running, this is a steering message
139
         if self.is_generating and self.agent.is_running:
147
         if self.is_generating and self.agent.is_running:
148
+            # Finalize current streaming so new content appears below user's message
149
+            if self._current_streaming is not None:
150
+                self._current_streaming.stop_streaming()
151
+                self._current_streaming = None
140
             self._add_steering_message(user_input)
152
             self._add_steering_message(user_input)
141
             self.agent.steer(user_input)
153
             self.agent.steer(user_input)
142
             return
154
             return
@@ -258,6 +270,11 @@ class LoaderApp(App):
258
         """Handle tool call start."""
270
         """Handle tool call start."""
259
         msg_area = self.query_one("#message-area", ScrollableContainer)
271
         msg_area = self.query_one("#message-area", ScrollableContainer)
260
 
272
 
273
+        # Finalize any ongoing streaming - tool calls interrupt thinking
274
+        if self._current_streaming is not None:
275
+            self._current_streaming.stop_streaming()
276
+            self._current_streaming = None
277
+
261
         # Create tool widget
278
         # Create tool widget
262
         widget = ToolCallWidget(
279
         widget = ToolCallWidget(
263
             tool_name=message.tool_name,
280
             tool_name=message.tool_name,
@@ -272,12 +289,20 @@ class LoaderApp(App):
272
         """Handle tool call completion."""
289
         """Handle tool call completion."""
273
         msg_area = self.query_one("#message-area", ScrollableContainer)
290
         msg_area = self.query_one("#message-area", ScrollableContainer)
274
 
291
 
292
+        # Debug: log what we received
293
+        try:
294
+            with open("/tmp/loader_debug.log", "a") as f:
295
+                f.write(f"on_tool_call_completed: tool={message.tool_name}, new_string={bool(message.new_string)}, old_string={bool(message.old_string)}, file_path={message.file_path}\n")
296
+        except Exception:
297
+            pass
298
+
275
         # Get the corresponding tool widget from queue (FIFO)
299
         # Get the corresponding tool widget from queue (FIFO)
276
         tool_widget = self._tool_widget_queue.pop(0) if self._tool_widget_queue else None
300
         tool_widget = self._tool_widget_queue.pop(0) if self._tool_widget_queue else None
277
 
301
 
278
         # Check if this is an edit tool with diff info
302
         # Check if this is an edit tool with diff info
279
         if message.tool_name == "edit" and message.old_string and message.new_string:
303
         if message.tool_name == "edit" and message.old_string and message.new_string:
280
             # Replace tool widget with diff widget
304
             # Replace tool widget with diff widget
305
+            self._debug_log("  -> showing EDIT diff widget")
281
             if tool_widget:
306
             if tool_widget:
282
                 tool_widget.remove()
307
                 tool_widget.remove()
283
 
308
 
@@ -289,6 +314,7 @@ class LoaderApp(App):
289
             msg_area.mount(diff_widget)
314
             msg_area.mount(diff_widget)
290
         # Check if this is a write tool - show as diff (new file)
315
         # Check if this is a write tool - show as diff (new file)
291
         elif message.tool_name == "write" and message.new_string:
316
         elif message.tool_name == "write" and message.new_string:
317
+            self._debug_log("  -> showing WRITE diff widget")
292
             if tool_widget:
318
             if tool_widget:
293
                 tool_widget.remove()
319
                 tool_widget.remove()
294
 
320
 
@@ -300,6 +326,7 @@ class LoaderApp(App):
300
             msg_area.mount(diff_widget)
326
             msg_area.mount(diff_widget)
301
         elif tool_widget:
327
         elif tool_widget:
302
             # Update existing tool widget with result
328
             # Update existing tool widget with result
329
+            self._debug_log("  -> showing regular tool widget result")
303
             tool_widget.set_result(
330
             tool_widget.set_result(
304
                 message.content, is_error=message.is_error
331
                 message.content, is_error=message.is_error
305
             )
332
             )
src/loader/ui/widgets/streaming.pymodified
@@ -19,11 +19,25 @@ class StreamingText(Static):
19
     def render(self) -> Text:
19
     def render(self) -> Text:
20
         """Render the content with optional cursor."""
20
         """Render the content with optional cursor."""
21
         # Use Text object to avoid markup interpretation of LLM output
21
         # Use Text object to avoid markup interpretation of LLM output
22
-        text = Text(self._content_buffer)
22
+        # Clean any tool_call tags that slipped through filtering
23
+        content = self._clean_tool_tags(self._content_buffer)
24
+        text = Text(content)
23
         if self.is_streaming:
25
         if self.is_streaming:
24
             text.append("|", style="dim")  # Cursor indicator
26
             text.append("|", style="dim")  # Cursor indicator
25
         return text
27
         return text
26
 
28
 
29
+    def _clean_tool_tags(self, content: str) -> str:
30
+        """Remove any tool_call/think tags that weren't filtered during streaming."""
31
+        import re
32
+        # Remove <tool_call>...</tool_call> blocks
33
+        content = re.sub(r'<tool_call>.*?</tool_call>', '', content, flags=re.DOTALL | re.IGNORECASE)
34
+        # Remove orphaned tags
35
+        content = re.sub(r'</?tool_call>', '', content, flags=re.IGNORECASE)
36
+        content = re.sub(r'</?think>', '', content, flags=re.IGNORECASE)
37
+        # Clean up excess newlines from removed blocks
38
+        content = re.sub(r'\n{3,}', '\n\n', content)
39
+        return content
40
+
27
     def append(self, chunk: str) -> None:
41
     def append(self, chunk: str) -> None:
28
         """Append a chunk to the content."""
42
         """Append a chunk to the content."""
29
         self._content_buffer += chunk
43
         self._content_buffer += chunk
tests/test_parsing.pymodified
@@ -102,6 +102,37 @@ Action: <tool_call>
102
         assert "<tool_call>" not in result.content
102
         assert "<tool_call>" not in result.content
103
         assert "</tool_call>" not in result.content
103
         assert "</tool_call>" not in result.content
104
 
104
 
105
+    def test_parse_bracketed_calls_format(self):
106
+        """Test parsing [calls tool with: key=value] format."""
107
+        text = '''I'll create the file now.
108
+[calls write tool with: file_path=/tmp/test.txt, content="hello world"]
109
+Created the file.'''
110
+        result = parse_tool_calls(text)
111
+        assert len(result.tool_calls) == 1
112
+        assert result.tool_calls[0].name == "write"
113
+        assert result.tool_calls[0].arguments["file_path"] == "/tmp/test.txt"
114
+        assert result.tool_calls[0].arguments["content"] == "hello world"
115
+        # Bracketed call should be removed from content
116
+        assert "[calls" not in result.content
117
+
118
+    def test_parse_bracketed_use_format(self):
119
+        """Test parsing [USE tool: key=value] format."""
120
+        text = '[USE bash tool: command="ls -la"]'
121
+        result = parse_tool_calls(text)
122
+        assert len(result.tool_calls) == 1
123
+        assert result.tool_calls[0].name == "bash"
124
+        assert result.tool_calls[0].arguments["command"] == "ls -la"
125
+
126
+    def test_parse_bracketed_edit_format(self):
127
+        """Test parsing bracketed format with edit tool."""
128
+        text = '[calls edit tool with: file_path="test.py", old_string="foo", new_string="bar"]'
129
+        result = parse_tool_calls(text)
130
+        assert len(result.tool_calls) == 1
131
+        assert result.tool_calls[0].name == "edit"
132
+        assert result.tool_calls[0].arguments["file_path"] == "test.py"
133
+        assert result.tool_calls[0].arguments["old_string"] == "foo"
134
+        assert result.tool_calls[0].arguments["new_string"] == "bar"
135
+
105
 
136
 
106
 class TestFormatToolResult:
137
 class TestFormatToolResult:
107
     """Tests for format_tool_result function."""
138
     """Tests for format_tool_result function."""