tenseleyflow/loader / 6e4f880

Browse files

fixes to agent behavior and more i forget

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6e4f880d2032c3e49e6642c143ba919fdb197fb0
Parents
833086d
Tree
f3fed0c

11 changed files

StatusFile+-
A package-lock.json 56 0
A package.json 30 0
M src/loader/agent/loop.py 61 20
M src/loader/agent/parsing.py 50 0
M src/loader/agent/reasoning.py 67 88
M src/loader/llm/base.py 3 0
M src/loader/llm/ollama.py 105 17
M src/loader/ui/adapter.py 78 11
M src/loader/ui/app.py 27 0
M src/loader/ui/widgets/streaming.py 15 1
M tests/test_parsing.py 31 0
package-lock.jsonadded
@@ -0,0 +1,56 @@
1
+{
2
+  "name": "loader",
3
+  "version": "1.0.0",
4
+  "lockfileVersion": 3,
5
+  "requires": true,
6
+  "packages": {
7
+    "": {
8
+      "name": "loader",
9
+      "version": "1.0.0",
10
+      "license": "ISC",
11
+      "dependencies": {
12
+        "react": "^19.2.3",
13
+        "react-dom": "^19.2.3",
14
+        "react-parallax-tilt": "^1.7.315"
15
+      },
16
+      "devDependencies": {}
17
+    },
18
+    "node_modules/react": {
19
+      "version": "19.2.3",
20
+      "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
21
+      "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
22
+      "license": "MIT",
23
+      "engines": {
24
+        "node": ">=0.10.0"
25
+      }
26
+    },
27
+    "node_modules/react-dom": {
28
+      "version": "19.2.3",
29
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz",
30
+      "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==",
31
+      "license": "MIT",
32
+      "dependencies": {
33
+        "scheduler": "^0.27.0"
34
+      },
35
+      "peerDependencies": {
36
+        "react": "^19.2.3"
37
+      }
38
+    },
39
+    "node_modules/react-parallax-tilt": {
40
+      "version": "1.7.315",
41
+      "resolved": "https://registry.npmjs.org/react-parallax-tilt/-/react-parallax-tilt-1.7.315.tgz",
42
+      "integrity": "sha512-m0I2yPEmzEC+qGelF+8P+L60lH/S50OJE+pz1bVmurnkKNMyd2Q4qhtAi8zRibNkwFd6oOGvA8qEqAySBbAOJg==",
43
+      "license": "MIT",
44
+      "peerDependencies": {
45
+        "react": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
46
+        "react-dom": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
47
+      }
48
+    },
49
+    "node_modules/scheduler": {
50
+      "version": "0.27.0",
51
+      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
52
+      "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==",
53
+      "license": "MIT"
54
+    }
55
+  }
56
+}
package.jsonadded
@@ -0,0 +1,30 @@
1
+{
2
+  "dependencies": {
3
+    "react": "^19.2.3",
4
+    "react-dom": "^19.2.3",
5
+    "react-parallax-tilt": "^1.7.315"
6
+  },
7
+  "name": "loader",
8
+  "version": "1.0.0",
9
+  "description": "Local agentic coding assistant. Runs on your hardware with local LLMs.",
10
+  "main": "index.js",
11
+  "directories": {
12
+    "doc": "docs",
13
+    "test": "tests"
14
+  },
15
+  "scripts": {
16
+    "test": "echo \"Error: no test specified\" && exit 1"
17
+  },
18
+  "repository": {
19
+    "type": "git",
20
+    "url": "git+https://github.com/tenseleyFlow/loader.git"
21
+  },
22
+  "keywords": [],
23
+  "author": "",
24
+  "license": "ISC",
25
+  "type": "commonjs",
26
+  "bugs": {
27
+    "url": "https://github.com/tenseleyFlow/loader/issues"
28
+  },
29
+  "homepage": "https://github.com/tenseleyFlow/loader#readme"
30
+}
src/loader/agent/loop.pymodified
@@ -194,7 +194,14 @@ class Agent:
194194
 
195195
         # Check if backend supports native tools
196196
         if hasattr(self.backend, "supports_native_tools"):
197
-            self._use_react = not self.backend.supports_native_tools()
197
+            supports_native = self.backend.supports_native_tools()
198
+            self._use_react = not supports_native
199
+            # Debug log
200
+            try:
201
+                with open("/tmp/loader_debug.log", "a") as f:
202
+                    f.write(f"[loop] use_react: supports_native={supports_native}, use_react={self._use_react}\n")
203
+            except Exception:
204
+                pass
198205
         else:
199206
             # Default to ReAct for unknown backends
200207
             self._use_react = True
@@ -586,6 +593,7 @@ class Agent:
586593
             tools = None if self.use_react else self.registry.get_schemas()
587594
 
588595
             # Use streaming or regular completion
596
+            pending_tool_calls_seen: set[str] = set()  # Track IDs of pending tool calls shown
589597
             if self.config.stream:
590598
                 full_content = ""
591599
                 tool_calls: list[ToolCall] = []
@@ -596,15 +604,30 @@ class Agent:
596604
                     temperature=self.config.temperature,
597605
                     max_tokens=effective_max_tokens,
598606
                 ):
599
-                    if chunk.content:
607
+                    # Emit stream events for content OR for final chunk (to signal end)
608
+                    if chunk.content or chunk.is_done:
600609
                         await emit(AgentEvent(
601610
                             type="stream",
602611
                             content=chunk.content,
603612
                             is_stream_end=chunk.is_done,
604613
                         ))
614
+                    # Show pending tool calls as they're detected (ReAct mode interleaving)
615
+                    if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
616
+                        pending_tool_calls_seen.add(chunk.pending_tool_call.id)
617
+                        await emit(AgentEvent(
618
+                            type="tool_call",
619
+                            tool_name=chunk.pending_tool_call.name,
620
+                            tool_args=chunk.pending_tool_call.arguments,
621
+                        ))
605622
                     if chunk.is_done:
606623
                         full_content = chunk.full_content or full_content
607624
                         tool_calls = chunk.tool_calls
625
+                        # Debug log
626
+                        try:
627
+                            with open("/tmp/loader_debug.log", "a") as f:
628
+                                f.write(f"[loop] chunk.is_done: got {len(tool_calls)} tool_calls\n")
629
+                        except Exception:
630
+                            pass
608631
 
609632
                 content = full_content
610633
                 response_content = full_content
@@ -638,6 +661,15 @@ class Agent:
638661
 
639662
             # If there are tool calls, execute them
640663
             if tool_calls:
664
+                # Debug log
665
+                try:
666
+                    with open("/tmp/loader_debug.log", "a") as f:
667
+                        f.write(f"[loop] executing {len(tool_calls)} tool_calls\n")
668
+                        for tc in tool_calls:
669
+                            f.write(f"[loop]   - {tc.name}: id={tc.id}, args_keys={list(tc.arguments.keys())}\n")
670
+                except Exception:
671
+                    pass
672
+
641673
                 # Add assistant message with tool calls
642674
                 self.messages.append(Message(
643675
                     role=Role.ASSISTANT,
@@ -682,11 +714,24 @@ class Agent:
682714
                             ))
683715
                             continue  # Skip this tool call, let LLM reconsider
684716
 
685
-                    await emit(AgentEvent(
686
-                        type="tool_call",
687
-                        tool_name=tool_call.name,
688
-                        tool_args=tool_call.arguments,
689
-                    ))
717
+                    # Only emit tool_call if not already shown during streaming
718
+                    if tool_call.id not in pending_tool_calls_seen:
719
+                        try:
720
+                            with open("/tmp/loader_debug.log", "a") as f:
721
+                                f.write(f"[loop] emitting tool_call event for {tool_call.name}\n")
722
+                        except Exception:
723
+                            pass
724
+                        await emit(AgentEvent(
725
+                            type="tool_call",
726
+                            tool_name=tool_call.name,
727
+                            tool_args=tool_call.arguments,
728
+                        ))
729
+                    else:
730
+                        try:
731
+                            with open("/tmp/loader_debug.log", "a") as f:
732
+                                f.write(f"[loop] SKIPPING tool_call event for {tool_call.name} (already in pending_seen)\n")
733
+                        except Exception:
734
+                            pass
690735
 
691736
                     # Track this action for completion checking
692737
                     action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
@@ -904,25 +949,21 @@ class Agent:
904949
                 ))
905950
                 continue
906951
 
907
-            # No tool calls and early in the task - likely giving up too soon
908
-            # This catches native mode models that stop without using tools
909
-            if not self.use_react and len(actions_taken) < 5 and iterations < self.config.max_iterations - 2:
910
-                # Check if response looks like a stopping point but we haven't done much
911
-                stopping_phrases = [
912
-                    "let me know", "feel free", "hope this", "happy to help",
913
-                    "anything else", "is there", "that's", "all done", "complete",
914
-                ]
915
-                looks_like_stopping = any(p in content.lower() for p in stopping_phrases)
916
-
917
-                if looks_like_stopping or len(content) < 150:
952
+            # No tool calls and early in the task - MAY be giving up too soon
953
+            # But only intervene if we haven't done ANY work yet
954
+            if not self.use_react and len(actions_taken) == 0 and iterations < self.config.max_iterations - 2:
955
+                # Check if response looks like deflection without having done anything
956
+                deflection_phrases = ["you can", "you should", "you could", "try running"]
957
+                looks_like_deflection = any(p in content.lower() for p in deflection_phrases)
958
+
959
+                if looks_like_deflection:
918960
                     self.messages.append(Message(
919961
                         role=Role.ASSISTANT,
920962
                         content=response_content,
921963
                     ))
922964
                     self.messages.append(Message(
923965
                         role=Role.USER,
924
-                        content="You stopped without completing the task. Continue executing - "
925
-                                "use your tools to finish the job. Don't describe what to do, DO IT.",
966
+                        content="Please use your tools to execute the task rather than telling me what to do.",
926967
                     ))
927968
                     continue
928969
 
src/loader/agent/parsing.pymodified
@@ -26,6 +26,38 @@ def _extract_arguments(data: dict) -> dict:
2626
     return {}
2727
 
2828
 
29
+def _parse_bracket_args(args_str: str) -> dict:
30
+    """Parse arguments from bracketed tool call format.
31
+
32
+    Handles formats like:
33
+        file_path=/tmp/test.txt, content="hello world"
34
+        command="ls -la"
35
+        file_path="test.py", old_string="foo", new_string="bar"
36
+
37
+    Args:
38
+        args_str: The arguments string (everything after "tool with:" or "tool:")
39
+
40
+    Returns:
41
+        Dictionary of parsed arguments
42
+    """
43
+    args = {}
44
+
45
+    # Pattern to match key=value pairs where value can be:
46
+    # - quoted string (single or double quotes)
47
+    # - unquoted value (until comma or end)
48
+    pattern = r'(\w+)\s*=\s*(?:"([^"]*?)"|\'([^\']*?)\'|([^,\]]+?))\s*(?:,|$)'
49
+
50
+    for match in re.finditer(pattern, args_str):
51
+        key = match.group(1)
52
+        # Value is in one of the capture groups (2=double quoted, 3=single quoted, 4=unquoted)
53
+        value = match.group(2) or match.group(3) or match.group(4)
54
+        if value is not None:
55
+            value = value.strip()
56
+            args[key] = value
57
+
58
+    return args
59
+
60
+
2961
 def parse_tool_calls(text: str) -> ParsedResponse:
3062
     """Parse tool calls from LLM text output.
3163
 
@@ -96,6 +128,24 @@ def parse_tool_calls(text: str) -> ParsedResponse:
96128
         if tool_calls:
97129
             content = re.sub(bare_json_pattern, "", content)
98130
 
131
+    # Pattern 3: Bracketed format [calls/USE tool with/: key=value, ...]
132
+    # Examples:
133
+    #   [calls write tool with: file_path=/tmp/test.txt, content="hello"]
134
+    #   [USE bash tool: command="ls -la"]
135
+    if not tool_calls:
136
+        bracket_pattern = r'\[(?:calls|USE)\s+(\w+)\s+tool(?:\s+with)?[:\s]+([^\]]+)\]'
137
+        for i, (name, args_str) in enumerate(re.findall(bracket_pattern, text, re.IGNORECASE)):
138
+            args = _parse_bracket_args(args_str)
139
+            if args:
140
+                tool_calls.append(ToolCall(
141
+                    id=f"call_{i}",
142
+                    name=name.lower(),
143
+                    arguments=args,
144
+                ))
145
+        # Remove bracketed tool calls from content
146
+        if tool_calls:
147
+            content = re.sub(bracket_pattern, "", content, flags=re.IGNORECASE)
148
+
99149
     # Clean up content
100150
     content = content.strip()
101151
 
src/loader/agent/reasoning.pymodified
@@ -722,46 +722,52 @@ def detect_premature_completion(task: str, response: str, actions_taken: list[st
722722
     """Quick heuristic to detect if agent is stopping too early.
723723
 
724724
     Returns True if the agent might be giving up prematurely.
725
+    This should be CONSERVATIVE - only trigger when really needed,
726
+    not for simple tasks that are genuinely complete.
725727
     """
726728
     task_lower = task.lower()
727729
     response_lower = response.lower()
728730
 
729
-    # Keywords that suggest the task should involve multiple steps
730
-    multi_step_indicators = [
731
-        "create a", "build a", "make a", "set up", "setup",
732
-        "initialize", "scaffold", "generate", "implement",
733
-        "project", "application", "app", "website", "api",
734
-        "add", "write", "develop", "design", "help me",
735
-    ]
731
+    # If no actions taken at all and task requires action, that's premature
732
+    if not actions_taken:
733
+        # But only if this looks like an actionable task
734
+        action_verbs = ["create", "write", "make", "edit", "fix", "add", "delete", "run"]
735
+        if any(verb in task_lower for verb in action_verbs):
736
+            return True
737
+        return False  # Informational/conversational tasks don't need actions
736738
 
737
-    # Keywords that suggest testing/verification should happen
738
-    verification_indicators = [
739
-        "test", "run", "start", "launch", "verify", "check",
740
-        "demo", "show", "demonstrate", "work", "function",
739
+    # If we took actions and got successful results, trust that we're done
740
+    # Check for success indicators in response
741
+    success_indicators = [
742
+        "successfully", "created", "written", "done", "completed",
743
+        "file now contains", "has been updated", "installed",
741744
     ]
745
+    if any(ind in response_lower for ind in success_indicators) and len(actions_taken) >= 1:
746
+        return False  # Likely actually done
742747
 
743
-    # Keywords in response that suggest premature completion
744
-    premature_phrases = [
745
-        "i've created", "i created", "file has been created",
746
-        "here's the", "i've set up the basic", "i've written",
747
-        "you can now", "you should now", "you can run",
748
-        "that's it", "all done", "complete", "finished",
749
-        "let me know", "feel free to", "hope this helps",
750
-        "is there anything else",
748
+    # Keywords that suggest COMPLEX multi-step tasks (not simple ones)
749
+    complex_indicators = [
750
+        "set up a project", "create a project", "build a complete",
751
+        "scaffold", "initialize a new", "create a full",
752
+        "implement a full", "develop a complete",
751753
     ]
754
+    is_complex = any(ind in task_lower for ind in complex_indicators)
752755
 
753
-    # Check if this looks like a multi-step task
754
-    is_multi_step = any(ind in task_lower for ind in multi_step_indicators)
755
-
756
-    # Check if verification was expected but not done
757
-    expects_verification = any(ind in task_lower for ind in verification_indicators)
756
+    # Simple creation tasks don't need follow-up
757
+    simple_creation = [
758
+        "create a file", "write a file", "make a file",
759
+        "add a function", "edit the", "fix the", "update the",
760
+        "read the", "show me", "list",
761
+    ]
762
+    is_simple = any(ind in task_lower for ind in simple_creation)
758763
 
759
-    # Check for premature completion phrases
760
-    has_premature_phrase = any(phrase in response_lower for phrase in premature_phrases)
764
+    # If it's a simple task with at least one action, it's probably done
765
+    if is_simple and len(actions_taken) >= 1:
766
+        return False
761767
 
762
-    # Action count thresholds
763
-    few_actions = len(actions_taken) < 3
764
-    very_few_actions = len(actions_taken) < 2
768
+    # Explicit verification requests need bash
769
+    explicit_verification = ["and test", "and run", "and verify", "make sure it works"]
770
+    needs_verification = any(ind in task_lower for ind in explicit_verification)
765771
 
766772
     # Categorize what actions were taken
767773
     action_types = set()
@@ -778,33 +784,19 @@ def detect_premature_completion(task: str, response: str, actions_taken: list[st
778784
         elif "glob" in action_lower or "grep" in action_lower:
779785
             action_types.add("search")
780786
 
781
-    # More aggressive detection:
787
+    # Detection rules (more conservative):
782788
 
783
-    # 1. Multi-step task with premature phrases and few actions
784
-    if is_multi_step and has_premature_phrase and few_actions:
789
+    # 1. Complex project tasks with very few actions
790
+    if is_complex and len(actions_taken) < 3:
785791
         return True
786792
 
787
-    # 2. Multi-step task with very few actions (regardless of phrases)
788
-    if is_multi_step and very_few_actions:
793
+    # 2. Explicitly requested verification but no bash run
794
+    if needs_verification and "bash" not in action_types:
789795
         return True
790796
 
791
-    # 3. Only wrote/edited files but never ran/tested anything
792
-    if action_types and action_types <= {"write", "edit", "read"} and few_actions:
793
-        # Wrote files but never executed bash to test
794
-        if "write" in action_types or "edit" in action_types:
795
-            return True
796
-
797
-    # 4. Verification expected but no bash commands run
798
-    if expects_verification and "bash" not in action_types:
799
-        return True
800
-
801
-    # 5. Response has chatbot-style "let me know" phrases
802
-    chatbot_phrases = ["let me know", "feel free", "hope this", "happy to help"]
803
-    if any(phrase in response_lower for phrase in chatbot_phrases):
804
-        return True
805
-
806
-    # 6. Response is very short but task seems substantial
807
-    if len(response) < 200 and is_multi_step and len(actions_taken) > 0:
797
+    # 3. Chatbot-style deflection with no real work done
798
+    deflection_phrases = ["you can now", "you should", "you can run", "you can use"]
799
+    if any(phrase in response_lower for phrase in deflection_phrases) and len(actions_taken) < 2:
808800
         return True
809801
 
810802
     return False
@@ -814,6 +806,7 @@ def get_continuation_prompt(task: str, actions_taken: list[str], response: str)
814806
     """Generate a prompt to encourage the agent to continue.
815807
 
816808
     Returns a prompt that nudges the agent to follow through.
809
+    Should be helpful, not aggressive.
817810
     """
818811
     task_lower = task.lower()
819812
     actions_str = ", ".join(a.split(":")[0] for a in actions_taken[-5:]) if actions_taken else "none"
@@ -821,51 +814,37 @@ def get_continuation_prompt(task: str, actions_taken: list[str], response: str)
821814
     # Determine what type of follow-up is needed
822815
     follow_ups = []
823816
 
824
-    # Project setup tasks should initialize
825
-    if any(kw in task_lower for kw in ["node", "npm", "javascript", "react", "vue", "next"]):
826
-        if not any("npm" in a for a in actions_taken):
827
-            follow_ups.append("Run `npm install` to install dependencies")
828
-            follow_ups.append("Start the development server to verify it works")
829
-
830
-    if any(kw in task_lower for kw in ["python", "pip", "django", "flask", "fastapi"]):
831
-        if not any("pip" in a or "uv" in a for a in actions_taken):
832
-            follow_ups.append("Install dependencies with pip/uv")
833
-            follow_ups.append("Run the application to verify it works")
834
-
835
-    # Test tasks should run tests
836
-    if "test" in task_lower:
817
+    # Only suggest package install if explicitly mentioned in task
818
+    if any(kw in task_lower for kw in ["install", "dependencies", "set up project"]):
819
+        if "node" in task_lower or "npm" in task_lower:
820
+            if not any("npm" in a for a in actions_taken):
821
+                follow_ups.append("Run `npm install` to install dependencies")
822
+        if "python" in task_lower or "pip" in task_lower:
823
+            if not any("pip" in a or "uv" in a for a in actions_taken):
824
+                follow_ups.append("Install dependencies")
825
+
826
+    # Only suggest running tests if "test" is explicitly in task
827
+    if "test" in task_lower and "run" in task_lower:
837828
         if not any("test" in a or "pytest" in a or "jest" in a for a in actions_taken):
838
-            follow_ups.append("Run the tests to verify they pass")
839
-
840
-    # Build tasks should verify build
841
-    if "build" in task_lower or "compile" in task_lower:
842
-        if not any("build" in a or "compile" in a for a in actions_taken):
843
-            follow_ups.append("Run the build to verify it succeeds")
829
+            follow_ups.append("Run the tests")
844830
 
845
-    # Generic follow-ups for creation tasks
846
-    if any(kw in task_lower for kw in ["create", "make", "build", "set up"]):
847
-        if len(actions_taken) < 3:
848
-            follow_ups.append("Verify the creation was successful")
849
-            follow_ups.append("Demonstrate that it works as expected")
831
+    # If task explicitly asks to run/verify, remind to do so
832
+    if any(kw in task_lower for kw in ["and run", "and test", "and verify", "make sure it works"]):
833
+        follow_ups.append("Execute what was created to verify it works")
850834
 
851835
     if follow_ups:
852
-        steps = "\n".join(f"- {step}" for step in follow_ups[:3])
836
+        steps = "\n".join(f"- {step}" for step in follow_ups[:2])
853837
         return (
854
-            f"STOP - You are NOT done. The task was: \"{task}\"\n\n"
855
-            f"Actions so far: {actions_str}\n"
856
-            f"You MUST also:\n{steps}\n\n"
857
-            f"DO NOT respond with text. USE YOUR TOOLS NOW to complete these steps."
838
+            f"The task was: \"{task}\"\n\n"
839
+            f"You may need to also:\n{steps}\n\n"
840
+            f"If the task is actually complete, just confirm what was done."
858841
         )
859842
 
860
-    # Generic continuation - be forceful
843
+    # Generic - be gentle
861844
     return (
862
-        f"INCOMPLETE. Task: \"{task}\"\n"
863
-        f"Actions taken: {actions_str} ({len(actions_taken)} total)\n\n"
864
-        f"You stopped too early. What about:\n"
865
-        f"- Testing/verifying the result?\n"
866
-        f"- Running what you created?\n"
867
-        f"- Installing dependencies?\n\n"
868
-        f"USE YOUR TOOLS to continue. Do not just describe - EXECUTE."
845
+        f"Task: \"{task}\"\n"
846
+        f"You took {len(actions_taken)} action(s). "
847
+        f"If there's more to do, continue. Otherwise, confirm completion."
869848
     )
870849
 
871850
 
src/loader/llm/base.pymodified
@@ -59,6 +59,9 @@ class StreamChunk:
5959
     full_content: str = ""  # Accumulated full content (only set when is_done=True)
6060
     tool_calls: list[ToolCall] = field(default_factory=list)
6161
     is_done: bool = False
62
+    # Pending tool call detected during streaming (ReAct mode)
63
+    # This allows showing tool widgets as they're detected, before streaming ends
64
+    pending_tool_call: ToolCall | None = None
6265
 
6366
 
6467
 @dataclass
src/loader/llm/ollama.pymodified
@@ -286,10 +286,17 @@ class OllamaBackend(LLMBackend):
286286
         if "tool_calls" in message:
287287
             for i, tc in enumerate(message["tool_calls"]):
288288
                 func = tc.get("function", {})
289
+                # Arguments may be a JSON string or dict
290
+                args = func.get("arguments", {})
291
+                if isinstance(args, str):
292
+                    try:
293
+                        args = json.loads(args)
294
+                    except json.JSONDecodeError:
295
+                        args = {}
289296
                 tool_calls.append(ToolCall(
290297
                     id=tc.get("id", f"call_{i}"),
291298
                     name=func.get("name", ""),
292
-                    arguments=func.get("arguments", {}),
299
+                    arguments=args,
293300
                 ))
294301
         else:
295302
             # Try to parse tool calls from text
@@ -362,6 +369,14 @@ class OllamaBackend(LLMBackend):
362369
             async for chunk in self._stream_response(response):
363370
                 yield chunk
364371
 
372
+    def _debug_log(self, message: str) -> None:
373
+        """Write debug message to log file."""
374
+        try:
375
+            with open("/tmp/loader_debug.log", "a") as f:
376
+                f.write(f"[ollama] {message}\n")
377
+        except Exception:
378
+            pass
379
+
365380
     async def _stream_response(self, response) -> AsyncIterator[StreamChunk]:
366381
         """Internal helper to stream response chunks."""
367382
         import re
@@ -369,9 +384,12 @@ class OllamaBackend(LLMBackend):
369384
         full_content = ""
370385
         display_content = ""  # Content to show (filtered)
371386
         json_buffer = ""  # Buffer for potential tool call JSON
387
+        tool_call_buffer = ""  # Buffer for <tool_call> block content
372388
         in_json_block = False
373389
         in_think_block = False  # For reasoning models like deepseek-r1
374390
         in_tool_call_block = False  # For ReAct <tool_call> tags
391
+        detected_tool_calls: list[ToolCall] = []  # Track tool calls found during streaming
392
+        tool_call_counter = 0
375393
 
376394
         async for line in response.aiter_lines():
377395
             if not line:
@@ -392,18 +410,34 @@ class OllamaBackend(LLMBackend):
392410
                 tool_calls = []
393411
                 # Check for native tool calls first
394412
                 if "tool_calls" in message:
413
+                    self._debug_log(f"is_done: found native tool_calls in message: {len(message['tool_calls'])}")
395414
                     for i, tc in enumerate(message["tool_calls"]):
396415
                         func = tc.get("function", {})
416
+                        # Arguments may be a JSON string or dict
417
+                        args = func.get("arguments", {})
418
+                        if isinstance(args, str):
419
+                            try:
420
+                                args = json.loads(args)
421
+                            except json.JSONDecodeError:
422
+                                args = {}
397423
                         tool_calls.append(ToolCall(
398424
                             id=tc.get("id", f"call_{i}"),
399425
                             name=func.get("name", ""),
400
-                            arguments=func.get("arguments", {}),
426
+                            arguments=args,
401427
                         ))
402428
                 else:
403
-                    # Try to parse tool calls from text
404
-                    clean_content, tool_calls = self._parse_tool_calls(full_content)
405
-                    display_content = clean_content
406
-
429
+                    # Use detected tool calls from streaming, or parse from text
430
+                    if detected_tool_calls:
431
+                        self._debug_log(f"is_done: using {len(detected_tool_calls)} detected_tool_calls from streaming")
432
+                        tool_calls = detected_tool_calls
433
+                    else:
434
+                        self._debug_log(f"is_done: parsing tool calls from text (len={len(full_content)})")
435
+                        self._debug_log(f"is_done: full_content = {repr(full_content[:500])}")
436
+                        clean_content, tool_calls = self._parse_tool_calls(full_content)
437
+                        self._debug_log(f"is_done: parsed {len(tool_calls)} tool calls")
438
+                        display_content = clean_content
439
+
440
+                self._debug_log(f"is_done: yielding final chunk with {len(tool_calls)} tool_calls")
407441
                 yield StreamChunk(
408442
                     content="",  # Don't emit final chunk content (already streamed)
409443
                     full_content=display_content or full_content,
@@ -431,27 +465,69 @@ class OllamaBackend(LLMBackend):
431465
                     # Skip content inside think block
432466
                     continue
433467
 
434
-                # Filter out <tool_call> blocks from ReAct mode
468
+                # Filter out <tool_call> blocks from ReAct mode - but parse them!
435469
                 if "<tool_call>" in chunk_content:
436470
                     in_tool_call_block = True
471
+                    tool_call_buffer = ""  # Reset buffer
437472
                     # Keep content before <tool_call>
438473
                     before = chunk_content.split("<tool_call>")[0]
439474
                     if before:
440475
                         display_content += before
441476
                         yield StreamChunk(content=before)
477
+                    # Start buffering the tool call content
478
+                    after_tag = chunk_content.split("<tool_call>", 1)[-1]
479
+                    if "</tool_call>" in after_tag:
480
+                        # Complete tool call in same chunk
481
+                        tool_json = after_tag.split("</tool_call>")[0]
482
+                        after_close = after_tag.split("</tool_call>", 1)[-1]
483
+                        in_tool_call_block = False
484
+                        # Parse and yield the tool call
485
+                        try:
486
+                            tc_data = json.loads(tool_json.strip())
487
+                            tc = ToolCall(
488
+                                id=f"call_{tool_call_counter}",
489
+                                name=tc_data.get("name", ""),
490
+                                arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
491
+                            )
492
+                            tool_call_counter += 1
493
+                            detected_tool_calls.append(tc)
494
+                            yield StreamChunk(content="", pending_tool_call=tc)
495
+                        except (json.JSONDecodeError, KeyError):
496
+                            pass
497
+                        if after_close.strip():
498
+                            display_content += after_close
499
+                            yield StreamChunk(content=after_close)
500
+                    else:
501
+                        tool_call_buffer = after_tag
442502
                     continue
443503
                 elif in_tool_call_block:
444504
                     if "</tool_call>" in chunk_content:
445505
                         in_tool_call_block = False
446
-                        # Keep content after </tool_call>
447
-                        after = chunk_content.split("</tool_call>")[-1]
448
-                        if after:
449
-                            display_content += after
450
-                            yield StreamChunk(content=after)
451
-                    # Skip content inside tool_call block
506
+                        # Complete the tool call buffer
507
+                        tool_json = tool_call_buffer + chunk_content.split("</tool_call>")[0]
508
+                        after_close = chunk_content.split("</tool_call>", 1)[-1]
509
+                        # Parse and yield the tool call
510
+                        try:
511
+                            tc_data = json.loads(tool_json.strip())
512
+                            tc = ToolCall(
513
+                                id=f"call_{tool_call_counter}",
514
+                                name=tc_data.get("name", ""),
515
+                                arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
516
+                            )
517
+                            tool_call_counter += 1
518
+                            detected_tool_calls.append(tc)
519
+                            yield StreamChunk(content="", pending_tool_call=tc)
520
+                        except (json.JSONDecodeError, KeyError):
521
+                            pass
522
+                        if after_close.strip():
523
+                            display_content += after_close
524
+                            yield StreamChunk(content=after_close)
525
+                    else:
526
+                        # Still accumulating tool call content
527
+                        tool_call_buffer += chunk_content
452528
                     continue
453529
 
454
-                # Filter out tool call JSON from display
530
+                # Filter out tool call JSON from display (bare JSON without tags)
455531
                 # Detect start of JSON tool call
456532
                 if not in_json_block and '{"name"' in chunk_content:
457533
                     in_json_block = True
@@ -467,17 +543,29 @@ class OllamaBackend(LLMBackend):
467543
                     open_braces = json_buffer.count('{')
468544
                     close_braces = json_buffer.count('}')
469545
                     if close_braces >= open_braces and open_braces > 0:
470
-                        # JSON block complete, don't display it
546
+                        # JSON block complete, try to parse it
471547
                         in_json_block = False
472
-                        # Check for content after the JSON
473548
                         try:
474549
                             # Find where JSON ends
475550
                             last_brace = json_buffer.rfind('}')
551
+                            json_str = json_buffer[:last_brace + 1]
476552
                             after_json = json_buffer[last_brace + 1:]
553
+                            # Try to parse as tool call
554
+                            tc_data = json.loads(json_str)
555
+                            if "name" in tc_data:
556
+                                tc = ToolCall(
557
+                                    id=f"call_{tool_call_counter}",
558
+                                    name=tc_data.get("name", ""),
559
+                                    arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
560
+                                )
561
+                                tool_call_counter += 1
562
+                                detected_tool_calls.append(tc)
563
+                                yield StreamChunk(content="", pending_tool_call=tc)
477564
                             if after_json.strip():
478565
                                 display_content += after_json
479566
                                 yield StreamChunk(content=after_json)
480
-                        except Exception:
567
+                        except (json.JSONDecodeError, KeyError):
568
+                            # Not valid JSON, just discard
481569
                             pass
482570
                         json_buffer = ""
483571
                 else:
src/loader/ui/adapter.pymodified
@@ -178,12 +178,29 @@ class RollbackSummary(Message):
178178
 class EventAdapter:
179179
     """Adapts Agent callback events to Textual messages."""
180180
 
181
+    DEBUG_LOG_FILE = "/tmp/loader_debug.log"
182
+
181183
     def __init__(self, app: "LoaderApp") -> None:  # noqa: F821
182184
         self.app = app
183185
         self._tool_args_queue: list[tuple[str, dict]] = []  # Queue of (tool_name, args)
186
+        # Clear debug log on start
187
+        try:
188
+            with open(self.DEBUG_LOG_FILE, "w") as f:
189
+                f.write("=== Loader Debug Log ===\n")
190
+        except Exception:
191
+            pass
192
+
193
+    def _debug_log(self, message: str) -> None:
194
+        """Write debug message to log file."""
195
+        try:
196
+            with open(self.DEBUG_LOG_FILE, "a") as f:
197
+                f.write(f"{message}\n")
198
+        except Exception:
199
+            pass
184200
 
185201
     def handle_event(self, event: AgentEvent) -> None:
186202
         """Convert AgentEvent to appropriate Textual message and post it."""
203
+        self._debug_log(f"handle_event: type={event.type}")
187204
         match event.type:
188205
             case "thinking":
189206
                 self.app.post_message(ThinkingStarted())
@@ -201,11 +218,22 @@ class EventAdapter:
201218
 
202219
             case "tool_call":
203220
                 # Queue args for matching with result (FIFO)
204
-                self._tool_args_queue.append((event.tool_name or "", event.tool_args or {}))
221
+                tool_name = event.tool_name or ""
222
+                tool_args = event.tool_args or {}
223
+                self._tool_args_queue.append((tool_name, tool_args))
224
+
225
+                # Debug: log tool args for edit/write (helps diagnose diff view issues)
226
+                self._debug_log(f"tool_call '{tool_name}': queued, keys={list(tool_args.keys())}")
227
+                if tool_name == "write":
228
+                    content = tool_args.get("content", "")
229
+                    self._debug_log(f"  write content: {len(content) if content else 0} chars")
230
+                elif tool_name == "edit":
231
+                    self._debug_log(f"  edit old_string: {bool(tool_args.get('old_string'))}, new_string: {bool(tool_args.get('new_string'))}")
232
+
205233
                 self.app.post_message(
206234
                     ToolCallStarted(
207
-                        tool_name=event.tool_name or "",
208
-                        tool_args=event.tool_args or {},
235
+                        tool_name=tool_name,
236
+                        tool_args=tool_args,
209237
                     )
210238
                 )
211239
 
@@ -221,24 +249,63 @@ class EventAdapter:
221249
                         if queued_name == tool_name:
222250
                             tool_args = queued_args
223251
                             self._tool_args_queue.pop(i)
252
+                            self._debug_log(f"tool_result '{tool_name}': matched in queue, keys={list(tool_args.keys())}")
224253
                             break
225254
                     else:
226255
                         # No match found, use FIFO
227
-                        _, tool_args = self._tool_args_queue.pop(0)
256
+                        popped_name, tool_args = self._tool_args_queue.pop(0)
257
+                        self._debug_log(f"tool_result '{tool_name}': no match, used FIFO (got '{popped_name}'), keys={list(tool_args.keys())}")
258
+                else:
259
+                    self._debug_log(f"tool_result '{tool_name}': queue was EMPTY!")
228260
 
229261
                 # Extract diff info for edit/write tools
230262
                 old_string = None
231263
                 new_string = None
232264
                 file_path = None
233265
 
234
-                if tool_name == "edit" and tool_args:
235
-                    old_string = tool_args.get("old_string")
236
-                    new_string = tool_args.get("new_string")
237
-                    file_path = tool_args.get("file_path")
238
-                elif tool_name == "write" and tool_args:
266
+                if tool_name == "edit":
267
+                    if tool_args:
268
+                        # Try multiple key names that models might use
269
+                        old_string = (
270
+                            tool_args.get("old_string")
271
+                            or tool_args.get("old")
272
+                            or tool_args.get("original")
273
+                            or tool_args.get("search")
274
+                            or tool_args.get("find")
275
+                        )
276
+                        new_string = (
277
+                            tool_args.get("new_string")
278
+                            or tool_args.get("new")
279
+                            or tool_args.get("replacement")
280
+                            or tool_args.get("replace")
281
+                        )
282
+                        file_path = (
283
+                            tool_args.get("file_path")
284
+                            or tool_args.get("path")
285
+                            or tool_args.get("filename")
286
+                            or tool_args.get("file")
287
+                        )
288
+                        self._debug_log(f"  edit extracted: old={bool(old_string)} ({len(old_string) if old_string else 0} chars), new={bool(new_string)} ({len(new_string) if new_string else 0} chars), path={file_path}")
289
+                    else:
290
+                        self._debug_log(f"  edit: tool_args was empty!")
291
+                elif tool_name == "write":
239292
                     # For writes, content is the new file content
240
-                    new_string = tool_args.get("content")
241
-                    file_path = tool_args.get("file_path")
293
+                    # Try multiple key names that models might use
294
+                    if tool_args:
295
+                        new_string = (
296
+                            tool_args.get("content")
297
+                            or tool_args.get("contents")
298
+                            or tool_args.get("text")
299
+                            or tool_args.get("data")
300
+                        )
301
+                        file_path = (
302
+                            tool_args.get("file_path")
303
+                            or tool_args.get("path")
304
+                            or tool_args.get("filename")
305
+                        )
306
+                        self._debug_log(f"  write extracted: new={bool(new_string)} ({len(new_string) if new_string else 0} chars), path={file_path}")
307
+                    else:
308
+                        self._debug_log(f"  write: tool_args was empty!")
242309
 
243310
                 self.app.post_message(
244311
                     ToolCallCompleted(
src/loader/ui/app.pymodified
@@ -68,6 +68,14 @@ class LoaderApp(App):
6868
         self._tool_widget_queue: list[ToolCallWidget] = []  # Queue of pending tool widgets
6969
         self._timer_handle = None
7070
 
71
+    def _debug_log(self, message: str) -> None:
72
+        """Write debug message to log file."""
73
+        try:
74
+            with open("/tmp/loader_debug.log", "a") as f:
75
+                f.write(f"{message}\n")
76
+        except Exception:
77
+            pass
78
+
7179
     def compose(self) -> ComposeResult:
7280
         yield Container(
7381
             ScrollableContainer(id="message-area"),
@@ -137,6 +145,10 @@ class LoaderApp(App):
137145
 
138146
         # If agent is running, this is a steering message
139147
         if self.is_generating and self.agent.is_running:
148
+            # Finalize current streaming so new content appears below user's message
149
+            if self._current_streaming is not None:
150
+                self._current_streaming.stop_streaming()
151
+                self._current_streaming = None
140152
             self._add_steering_message(user_input)
141153
             self.agent.steer(user_input)
142154
             return
@@ -258,6 +270,11 @@ class LoaderApp(App):
258270
         """Handle tool call start."""
259271
         msg_area = self.query_one("#message-area", ScrollableContainer)
260272
 
273
+        # Finalize any ongoing streaming - tool calls interrupt thinking
274
+        if self._current_streaming is not None:
275
+            self._current_streaming.stop_streaming()
276
+            self._current_streaming = None
277
+
261278
         # Create tool widget
262279
         widget = ToolCallWidget(
263280
             tool_name=message.tool_name,
@@ -272,12 +289,20 @@ class LoaderApp(App):
272289
         """Handle tool call completion."""
273290
         msg_area = self.query_one("#message-area", ScrollableContainer)
274291
 
292
+        # Debug: log what we received
293
+        try:
294
+            with open("/tmp/loader_debug.log", "a") as f:
295
+                f.write(f"on_tool_call_completed: tool={message.tool_name}, new_string={bool(message.new_string)}, old_string={bool(message.old_string)}, file_path={message.file_path}\n")
296
+        except Exception:
297
+            pass
298
+
275299
         # Get the corresponding tool widget from queue (FIFO)
276300
         tool_widget = self._tool_widget_queue.pop(0) if self._tool_widget_queue else None
277301
 
278302
         # Check if this is an edit tool with diff info
279303
         if message.tool_name == "edit" and message.old_string and message.new_string:
280304
             # Replace tool widget with diff widget
305
+            self._debug_log("  -> showing EDIT diff widget")
281306
             if tool_widget:
282307
                 tool_widget.remove()
283308
 
@@ -289,6 +314,7 @@ class LoaderApp(App):
289314
             msg_area.mount(diff_widget)
290315
         # Check if this is a write tool - show as diff (new file)
291316
         elif message.tool_name == "write" and message.new_string:
317
+            self._debug_log("  -> showing WRITE diff widget")
292318
             if tool_widget:
293319
                 tool_widget.remove()
294320
 
@@ -300,6 +326,7 @@ class LoaderApp(App):
300326
             msg_area.mount(diff_widget)
301327
         elif tool_widget:
302328
             # Update existing tool widget with result
329
+            self._debug_log("  -> showing regular tool widget result")
303330
             tool_widget.set_result(
304331
                 message.content, is_error=message.is_error
305332
             )
src/loader/ui/widgets/streaming.pymodified
@@ -19,11 +19,25 @@ class StreamingText(Static):
1919
     def render(self) -> Text:
2020
         """Render the content with optional cursor."""
2121
         # Use Text object to avoid markup interpretation of LLM output
22
-        text = Text(self._content_buffer)
22
+        # Clean any tool_call tags that slipped through filtering
23
+        content = self._clean_tool_tags(self._content_buffer)
24
+        text = Text(content)
2325
         if self.is_streaming:
2426
             text.append("|", style="dim")  # Cursor indicator
2527
         return text
2628
 
29
+    def _clean_tool_tags(self, content: str) -> str:
30
+        """Remove any tool_call/think tags that weren't filtered during streaming."""
31
+        import re
32
+        # Remove <tool_call>...</tool_call> blocks
33
+        content = re.sub(r'<tool_call>.*?</tool_call>', '', content, flags=re.DOTALL | re.IGNORECASE)
34
+        # Remove orphaned tags
35
+        content = re.sub(r'</?tool_call>', '', content, flags=re.IGNORECASE)
36
+        content = re.sub(r'</?think>', '', content, flags=re.IGNORECASE)
37
+        # Clean up excess newlines from removed blocks
38
+        content = re.sub(r'\n{3,}', '\n\n', content)
39
+        return content
40
+
2741
     def append(self, chunk: str) -> None:
2842
         """Append a chunk to the content."""
2943
         self._content_buffer += chunk
tests/test_parsing.pymodified
@@ -102,6 +102,37 @@ Action: <tool_call>
102102
         assert "<tool_call>" not in result.content
103103
         assert "</tool_call>" not in result.content
104104
 
105
+    def test_parse_bracketed_calls_format(self):
106
+        """Test parsing [calls tool with: key=value] format."""
107
+        text = '''I'll create the file now.
108
+[calls write tool with: file_path=/tmp/test.txt, content="hello world"]
109
+Created the file.'''
110
+        result = parse_tool_calls(text)
111
+        assert len(result.tool_calls) == 1
112
+        assert result.tool_calls[0].name == "write"
113
+        assert result.tool_calls[0].arguments["file_path"] == "/tmp/test.txt"
114
+        assert result.tool_calls[0].arguments["content"] == "hello world"
115
+        # Bracketed call should be removed from content
116
+        assert "[calls" not in result.content
117
+
118
+    def test_parse_bracketed_use_format(self):
119
+        """Test parsing [USE tool: key=value] format."""
120
+        text = '[USE bash tool: command="ls -la"]'
121
+        result = parse_tool_calls(text)
122
+        assert len(result.tool_calls) == 1
123
+        assert result.tool_calls[0].name == "bash"
124
+        assert result.tool_calls[0].arguments["command"] == "ls -la"
125
+
126
+    def test_parse_bracketed_edit_format(self):
127
+        """Test parsing bracketed format with edit tool."""
128
+        text = '[calls edit tool with: file_path="test.py", old_string="foo", new_string="bar"]'
129
+        result = parse_tool_calls(text)
130
+        assert len(result.tool_calls) == 1
131
+        assert result.tool_calls[0].name == "edit"
132
+        assert result.tool_calls[0].arguments["file_path"] == "test.py"
133
+        assert result.tool_calls[0].arguments["old_string"] == "foo"
134
+        assert result.tool_calls[0].arguments["new_string"] == "bar"
135
+
105136
 
106137
 class TestFormatToolResult:
107138
     """Tests for format_tool_result function."""