fixes to agent behavior and more i forget

Status	File	+	-
A	`package-lock.json`	56	0
A	`package.json`	30	0
M	`src/loader/agent/loop.py`	61	20
M	`src/loader/agent/parsing.py`	50	0
M	`src/loader/agent/reasoning.py`	67	88
M	`src/loader/llm/base.py`	3	0
M	`src/loader/llm/ollama.py`	105	17
M	`src/loader/ui/adapter.py`	78	11
M	`src/loader/ui/app.py`	27	0
M	`src/loader/ui/widgets/streaming.py`	15	1
M	`tests/test_parsing.py`	31	0

package-lock.jsonadded

++{
++  "name": "loader",
++  "version": "1.0.0",
++  "lockfileVersion": 3,
++  "requires": true,
++  "packages": {
++    "": {
++      "name": "loader",
++      "version": "1.0.0",
++      "license": "ISC",
++      "dependencies": {
++        "react": "^19.2.3",
++        "react-dom": "^19.2.3",
++        "react-parallax-tilt": "^1.7.315"
++      },
++      "devDependencies": {}
++    },
++    "node_modules/react": {
++      "version": "19.2.3",
++      "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
++      "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
++      "license": "MIT",
++      "engines": {
++        "node": ">=0.10.0"
++      }
++    },
++    "node_modules/react-dom": {
++      "version": "19.2.3",
++      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz",
++      "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==",
++      "license": "MIT",
++      "dependencies": {
++        "scheduler": "^0.27.0"
++      },
++      "peerDependencies": {
++        "react": "^19.2.3"
++      }
++    },
++    "node_modules/react-parallax-tilt": {
++      "version": "1.7.315",
++      "resolved": "https://registry.npmjs.org/react-parallax-tilt/-/react-parallax-tilt-1.7.315.tgz",
++      "integrity": "sha512-m0I2yPEmzEC+qGelF+8P+L60lH/S50OJE+pz1bVmurnkKNMyd2Q4qhtAi8zRibNkwFd6oOGvA8qEqAySBbAOJg==",
++      "license": "MIT",
++      "peerDependencies": {
++        "react": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
++        "react-dom": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
++      }
++    },
++    "node_modules/scheduler": {
++      "version": "0.27.0",
++      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
++      "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==",
++      "license": "MIT"
++    }
++  }
++}

package.jsonadded

++{
++  "dependencies": {
++    "react": "^19.2.3",
++    "react-dom": "^19.2.3",
++    "react-parallax-tilt": "^1.7.315"
++  },
++  "name": "loader",
++  "version": "1.0.0",
++  "description": "Local agentic coding assistant. Runs on your hardware with local LLMs.",
++  "main": "index.js",
++  "directories": {
++    "doc": "docs",
++    "test": "tests"
++  },
++  "scripts": {
++    "test": "echo \"Error: no test specified\" && exit 1"
++  },
++  "repository": {
++    "type": "git",
++    "url": "git+https://github.com/tenseleyFlow/loader.git"
++  },
++  "keywords": [],
++  "author": "",
++  "license": "ISC",
++  "type": "commonjs",
++  "bugs": {
++    "url": "https://github.com/tenseleyFlow/loader/issues"
++  },
++  "homepage": "https://github.com/tenseleyFlow/loader#readme"
++}

src/loader/agent/loop.pymodified

          # Check if backend supports native tools
          if hasattr(self.backend, "supports_native_tools"):
--            self._use_react = not self.backend.supports_native_tools()
++            supports_native = self.backend.supports_native_tools()
++            self._use_react = not supports_native
++            # Debug log
++            try:
++                with open("/tmp/loader_debug.log", "a") as f:
++                    f.write(f"[loop] use_react: supports_native={supports_native}, use_react={self._use_react}\n")
++            except Exception:
++                pass
          else:
              # Default to ReAct for unknown backends
              self._use_react = True
              tools = None if self.use_react else self.registry.get_schemas()
              # Use streaming or regular completion
++            pending_tool_calls_seen: set[str] = set()  # Track IDs of pending tool calls shown
              if self.config.stream:
                  full_content = ""
                  tool_calls: list[ToolCall] = []
                      temperature=self.config.temperature,
                      max_tokens=effective_max_tokens,
                  ):
--                    if chunk.content:
++                    # Emit stream events for content OR for final chunk (to signal end)
++                    if chunk.content or chunk.is_done:
                          await emit(AgentEvent(
                              type="stream",
                              content=chunk.content,
                              is_stream_end=chunk.is_done,
                          ))
++                    # Show pending tool calls as they're detected (ReAct mode interleaving)
++                    if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
++                        pending_tool_calls_seen.add(chunk.pending_tool_call.id)
++                        await emit(AgentEvent(
++                            type="tool_call",
++                            tool_name=chunk.pending_tool_call.name,
++                            tool_args=chunk.pending_tool_call.arguments,
++                        ))
                      if chunk.is_done:
                          full_content = chunk.full_content or full_content
                          tool_calls = chunk.tool_calls
++                        # Debug log
++                        try:
++                            with open("/tmp/loader_debug.log", "a") as f:
++                                f.write(f"[loop] chunk.is_done: got {len(tool_calls)} tool_calls\n")
++                        except Exception:
++                            pass
                  content = full_content
                  response_content = full_content
              # If there are tool calls, execute them
              if tool_calls:
++                # Debug log
++                try:
++                    with open("/tmp/loader_debug.log", "a") as f:
++                        f.write(f"[loop] executing {len(tool_calls)} tool_calls\n")
++                        for tc in tool_calls:
++                            f.write(f"[loop]   - {tc.name}: id={tc.id}, args_keys={list(tc.arguments.keys())}\n")
++                except Exception:
++                    pass
++
                  # Add assistant message with tool calls
                  self.messages.append(Message(
                      role=Role.ASSISTANT,
                              ))
                              continue  # Skip this tool call, let LLM reconsider
--                    await emit(AgentEvent(
++                    # Only emit tool_call if not already shown during streaming
--                        type="tool_call",
++                    if tool_call.id not in pending_tool_calls_seen:
--                        tool_name=tool_call.name,
++                        try:
--                        tool_args=tool_call.arguments,
++                            with open("/tmp/loader_debug.log", "a") as f:
--                    ))
++                                f.write(f"[loop] emitting tool_call event for {tool_call.name}\n")
++                        except Exception:
++                            pass
++                        await emit(AgentEvent(
++                            type="tool_call",
++                            tool_name=tool_call.name,
++                            tool_args=tool_call.arguments,
++                        ))
++                    else:
++                        try:
++                            with open("/tmp/loader_debug.log", "a") as f:
++                                f.write(f"[loop] SKIPPING tool_call event for {tool_call.name} (already in pending_seen)\n")
++                        except Exception:
++                            pass
                      # Track this action for completion checking
                      action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
                  ))
                  continue
--            # No tool calls and early in the task - likely giving up too soon
++            # No tool calls and early in the task - MAY be giving up too soon
--            # This catches native mode models that stop without using tools
++            # But only intervene if we haven't done ANY work yet
--            if not self.use_react and len(actions_taken) < 5 and iterations < self.config.max_iterations - 2:
++            if not self.use_react and len(actions_taken) == 0 and iterations < self.config.max_iterations - 2:
--                # Check if response looks like a stopping point but we haven't done much
++                # Check if response looks like deflection without having done anything
--                stopping_phrases = [
++                deflection_phrases = ["you can", "you should", "you could", "try running"]
--                    "let me know", "feel free", "hope this", "happy to help",
++                looks_like_deflection = any(p in content.lower() for p in deflection_phrases)
--                    "anything else", "is there", "that's", "all done", "complete",
++
--                ]
++                if looks_like_deflection:
--                looks_like_stopping = any(p in content.lower() for p in stopping_phrases)
--
--                if looks_like_stopping or len(content) < 150:
                      self.messages.append(Message(
                          role=Role.ASSISTANT,
                          content=response_content,
                      ))
                      self.messages.append(Message(
                          role=Role.USER,
--                        content="You stopped without completing the task. Continue executing - "
++                        content="Please use your tools to execute the task rather than telling me what to do.",
--                                "use your tools to finish the job. Don't describe what to do, DO IT.",
                      ))
                      continue

src/loader/agent/parsing.pymodified

      return {}
++def _parse_bracket_args(args_str: str) -> dict:
++    """Parse arguments from bracketed tool call format.
++
++    Handles formats like:
++        file_path=/tmp/test.txt, content="hello world"
++        command="ls -la"
++        file_path="test.py", old_string="foo", new_string="bar"
++
++    Args:
++        args_str: The arguments string (everything after "tool with:" or "tool:")
++
++    Returns:
++        Dictionary of parsed arguments
++    """
++    args = {}
++
++    # Pattern to match key=value pairs where value can be:
++    # - quoted string (single or double quotes)
++    # - unquoted value (until comma or end)
++    pattern = r'(\w+)\s*=\s*(?:"([^"]*?)"|\'([^\']*?)\'|([^,\]]+?))\s*(?:,|$)'
++
++    for match in re.finditer(pattern, args_str):
++        key = match.group(1)
++        # Value is in one of the capture groups (2=double quoted, 3=single quoted, 4=unquoted)
++        value = match.group(2) or match.group(3) or match.group(4)
++        if value is not None:
++            value = value.strip()
++            args[key] = value
++
++    return args
++
++
  def parse_tool_calls(text: str) -> ParsedResponse:
      """Parse tool calls from LLM text output.
          if tool_calls:
              content = re.sub(bare_json_pattern, "", content)
++    # Pattern 3: Bracketed format [calls/USE tool with/: key=value, ...]
++    # Examples:
++    #   [calls write tool with: file_path=/tmp/test.txt, content="hello"]
++    #   [USE bash tool: command="ls -la"]
++    if not tool_calls:
++        bracket_pattern = r'\[(?:calls|USE)\s+(\w+)\s+tool(?:\s+with)?[:\s]+([^\]]+)\]'
++        for i, (name, args_str) in enumerate(re.findall(bracket_pattern, text, re.IGNORECASE)):
++            args = _parse_bracket_args(args_str)
++            if args:
++                tool_calls.append(ToolCall(
++                    id=f"call_{i}",
++                    name=name.lower(),
++                    arguments=args,
++                ))
++        # Remove bracketed tool calls from content
++        if tool_calls:
++            content = re.sub(bracket_pattern, "", content, flags=re.IGNORECASE)
++
      # Clean up content
      content = content.strip()

src/loader/agent/reasoning.pymodified

      """Quick heuristic to detect if agent is stopping too early.
      Returns True if the agent might be giving up prematurely.
++    This should be CONSERVATIVE - only trigger when really needed,
++    not for simple tasks that are genuinely complete.
      """
      task_lower = task.lower()
      response_lower = response.lower()
--    # Keywords that suggest the task should involve multiple steps
++    # If no actions taken at all and task requires action, that's premature
--    multi_step_indicators = [
++    if not actions_taken:
--        "create a", "build a", "make a", "set up", "setup",
++        # But only if this looks like an actionable task
--        "initialize", "scaffold", "generate", "implement",
++        action_verbs = ["create", "write", "make", "edit", "fix", "add", "delete", "run"]
--        "project", "application", "app", "website", "api",
++        if any(verb in task_lower for verb in action_verbs):
--        "add", "write", "develop", "design", "help me",
++            return True
--    ]
++        return False  # Informational/conversational tasks don't need actions
--    # Keywords that suggest testing/verification should happen
++    # If we took actions and got successful results, trust that we're done
--    verification_indicators = [
++    # Check for success indicators in response
--        "test", "run", "start", "launch", "verify", "check",
++    success_indicators = [
--        "demo", "show", "demonstrate", "work", "function",
++        "successfully", "created", "written", "done", "completed",
++        "file now contains", "has been updated", "installed",
+     ]
++    if any(ind in response_lower for ind in success_indicators) and len(actions_taken) >= 1:
++        return False  # Likely actually done
--    # Keywords in response that suggest premature completion
++    # Keywords that suggest COMPLEX multi-step tasks (not simple ones)
--    premature_phrases = [
++    complex_indicators = [
--        "i've created", "i created", "file has been created",
++        "set up a project", "create a project", "build a complete",
--        "here's the", "i've set up the basic", "i've written",
++        "scaffold", "initialize a new", "create a full",
--        "you can now", "you should now", "you can run",
++        "implement a full", "develop a complete",
--        "that's it", "all done", "complete", "finished",
--        "let me know", "feel free to", "hope this helps",
--        "is there anything else",
+     ]
++    is_complex = any(ind in task_lower for ind in complex_indicators)
--    # Check if this looks like a multi-step task
++    # Simple creation tasks don't need follow-up
--    is_multi_step = any(ind in task_lower for ind in multi_step_indicators)
++    simple_creation = [
--
++        "create a file", "write a file", "make a file",
--    # Check if verification was expected but not done
++        "add a function", "edit the", "fix the", "update the",
--    expects_verification = any(ind in task_lower for ind in verification_indicators)
++        "read the", "show me", "list",
++    ]
++    is_simple = any(ind in task_lower for ind in simple_creation)
--    # Check for premature completion phrases
++    # If it's a simple task with at least one action, it's probably done
--    has_premature_phrase = any(phrase in response_lower for phrase in premature_phrases)
++    if is_simple and len(actions_taken) >= 1:
++        return False
--    # Action count thresholds
++    # Explicit verification requests need bash
--    few_actions = len(actions_taken) < 3
++    explicit_verification = ["and test", "and run", "and verify", "make sure it works"]
--    very_few_actions = len(actions_taken) < 2
++    needs_verification = any(ind in task_lower for ind in explicit_verification)
      # Categorize what actions were taken
      action_types = set()
          elif "glob" in action_lower or "grep" in action_lower:
              action_types.add("search")
--    # More aggressive detection:
++    # Detection rules (more conservative):
--    # 1. Multi-step task with premature phrases and few actions
++    # 1. Complex project tasks with very few actions
--    if is_multi_step and has_premature_phrase and few_actions:
++    if is_complex and len(actions_taken) < 3:
          return True
--    # 2. Multi-step task with very few actions (regardless of phrases)
++    # 2. Explicitly requested verification but no bash run
--    if is_multi_step and very_few_actions:
++    if needs_verification and "bash" not in action_types:
          return True
--    # 3. Only wrote/edited files but never ran/tested anything
++    # 3. Chatbot-style deflection with no real work done
--    if action_types and action_types <= {"write", "edit", "read"} and few_actions:
++    deflection_phrases = ["you can now", "you should", "you can run", "you can use"]
--        # Wrote files but never executed bash to test
++    if any(phrase in response_lower for phrase in deflection_phrases) and len(actions_taken) < 2:
--        if "write" in action_types or "edit" in action_types:
--            return True
--
--    # 4. Verification expected but no bash commands run
--    if expects_verification and "bash" not in action_types:
--        return True
--
--    # 5. Response has chatbot-style "let me know" phrases
--    chatbot_phrases = ["let me know", "feel free", "hope this", "happy to help"]
--    if any(phrase in response_lower for phrase in chatbot_phrases):
--        return True
--
--    # 6. Response is very short but task seems substantial
--    if len(response) < 200 and is_multi_step and len(actions_taken) > 0:
          return True
      return False
      """Generate a prompt to encourage the agent to continue.
      Returns a prompt that nudges the agent to follow through.
++    Should be helpful, not aggressive.
      """
      task_lower = task.lower()
      actions_str = ", ".join(a.split(":")[0] for a in actions_taken[-5:]) if actions_taken else "none"
      # Determine what type of follow-up is needed
      follow_ups = []
--    # Project setup tasks should initialize
++    # Only suggest package install if explicitly mentioned in task
--    if any(kw in task_lower for kw in ["node", "npm", "javascript", "react", "vue", "next"]):
++    if any(kw in task_lower for kw in ["install", "dependencies", "set up project"]):
--        if not any("npm" in a for a in actions_taken):
++        if "node" in task_lower or "npm" in task_lower:
--            follow_ups.append("Run `npm install` to install dependencies")
++            if not any("npm" in a for a in actions_taken):
--            follow_ups.append("Start the development server to verify it works")
++                follow_ups.append("Run `npm install` to install dependencies")
--
++        if "python" in task_lower or "pip" in task_lower:
--    if any(kw in task_lower for kw in ["python", "pip", "django", "flask", "fastapi"]):
++            if not any("pip" in a or "uv" in a for a in actions_taken):
--        if not any("pip" in a or "uv" in a for a in actions_taken):
++                follow_ups.append("Install dependencies")
--            follow_ups.append("Install dependencies with pip/uv")
++
--            follow_ups.append("Run the application to verify it works")
++    # Only suggest running tests if "test" is explicitly in task
--
++    if "test" in task_lower and "run" in task_lower:
--    # Test tasks should run tests
--    if "test" in task_lower:
          if not any("test" in a or "pytest" in a or "jest" in a for a in actions_taken):
--            follow_ups.append("Run the tests to verify they pass")
++            follow_ups.append("Run the tests")
--
--    # Build tasks should verify build
--    if "build" in task_lower or "compile" in task_lower:
--        if not any("build" in a or "compile" in a for a in actions_taken):
--            follow_ups.append("Run the build to verify it succeeds")
--    # Generic follow-ups for creation tasks
++    # If task explicitly asks to run/verify, remind to do so
--    if any(kw in task_lower for kw in ["create", "make", "build", "set up"]):
++    if any(kw in task_lower for kw in ["and run", "and test", "and verify", "make sure it works"]):
--        if len(actions_taken) < 3:
++        follow_ups.append("Execute what was created to verify it works")
--            follow_ups.append("Verify the creation was successful")
--            follow_ups.append("Demonstrate that it works as expected")
      if follow_ups:
--        steps = "\n".join(f"- {step}" for step in follow_ups[:3])
++        steps = "\n".join(f"- {step}" for step in follow_ups[:2])
          return (
--            f"STOP - You are NOT done. The task was: \"{task}\"\n\n"
++            f"The task was: \"{task}\"\n\n"
--            f"Actions so far: {actions_str}\n"
++            f"You may need to also:\n{steps}\n\n"
--            f"You MUST also:\n{steps}\n\n"
++            f"If the task is actually complete, just confirm what was done."
--            f"DO NOT respond with text. USE YOUR TOOLS NOW to complete these steps."
+         )
--    # Generic continuation - be forceful
++    # Generic - be gentle
      return (
--        f"INCOMPLETE. Task: \"{task}\"\n"
++        f"Task: \"{task}\"\n"
--        f"Actions taken: {actions_str} ({len(actions_taken)} total)\n\n"
++        f"You took {len(actions_taken)} action(s). "
--        f"You stopped too early. What about:\n"
++        f"If there's more to do, continue. Otherwise, confirm completion."
--        f"- Testing/verifying the result?\n"
--        f"- Running what you created?\n"
--        f"- Installing dependencies?\n\n"
--        f"USE YOUR TOOLS to continue. Do not just describe - EXECUTE."
+     )

src/loader/llm/base.pymodified


     full_content: str = ""  # Accumulated full content (only set when is_done=True)
     tool_calls: list[ToolCall] = field(default_factory=list)
     is_done: bool = False
+    # Pending tool call detected during streaming (ReAct mode)
+    # This allows showing tool widgets as they're detected, before streaming ends
+    pending_tool_call: ToolCall | None = None
 
 
 @dataclass

src/loader/llm/ollama.pymodified

          if "tool_calls" in message:
              for i, tc in enumerate(message["tool_calls"]):
                  func = tc.get("function", {})
++                # Arguments may be a JSON string or dict
++                args = func.get("arguments", {})
++                if isinstance(args, str):
++                    try:
++                        args = json.loads(args)
++                    except json.JSONDecodeError:
++                        args = {}
                  tool_calls.append(ToolCall(
                      id=tc.get("id", f"call_{i}"),
                      name=func.get("name", ""),
--                    arguments=func.get("arguments", {}),
++                    arguments=args,
                  ))
          else:
              # Try to parse tool calls from text
              async for chunk in self._stream_response(response):
                  yield chunk
++    def _debug_log(self, message: str) -> None:
++        """Write debug message to log file."""
++        try:
++            with open("/tmp/loader_debug.log", "a") as f:
++                f.write(f"[ollama] {message}\n")
++        except Exception:
++            pass
++
      async def _stream_response(self, response) -> AsyncIterator[StreamChunk]:
          """Internal helper to stream response chunks."""
          import re
          full_content = ""
          display_content = ""  # Content to show (filtered)
          json_buffer = ""  # Buffer for potential tool call JSON
++        tool_call_buffer = ""  # Buffer for <tool_call> block content
          in_json_block = False
          in_think_block = False  # For reasoning models like deepseek-r1
          in_tool_call_block = False  # For ReAct <tool_call> tags
++        detected_tool_calls: list[ToolCall] = []  # Track tool calls found during streaming
++        tool_call_counter = 0
          async for line in response.aiter_lines():
              if not line:
                  tool_calls = []
                  # Check for native tool calls first
                  if "tool_calls" in message:
++                    self._debug_log(f"is_done: found native tool_calls in message: {len(message['tool_calls'])}")
                      for i, tc in enumerate(message["tool_calls"]):
                          func = tc.get("function", {})
++                        # Arguments may be a JSON string or dict
++                        args = func.get("arguments", {})
++                        if isinstance(args, str):
++                            try:
++                                args = json.loads(args)
++                            except json.JSONDecodeError:
++                                args = {}
                          tool_calls.append(ToolCall(
                              id=tc.get("id", f"call_{i}"),
                              name=func.get("name", ""),
--                            arguments=func.get("arguments", {}),
++                            arguments=args,
                          ))
                  else:
--                    # Try to parse tool calls from text
++                    # Use detected tool calls from streaming, or parse from text
--                    clean_content, tool_calls = self._parse_tool_calls(full_content)
++                    if detected_tool_calls:
--                    display_content = clean_content
++                        self._debug_log(f"is_done: using {len(detected_tool_calls)} detected_tool_calls from streaming")
--
++                        tool_calls = detected_tool_calls
++                    else:
++                        self._debug_log(f"is_done: parsing tool calls from text (len={len(full_content)})")
++                        self._debug_log(f"is_done: full_content = {repr(full_content[:500])}")
++                        clean_content, tool_calls = self._parse_tool_calls(full_content)
++                        self._debug_log(f"is_done: parsed {len(tool_calls)} tool calls")
++                        display_content = clean_content
++
++                self._debug_log(f"is_done: yielding final chunk with {len(tool_calls)} tool_calls")
                  yield StreamChunk(
                      content="",  # Don't emit final chunk content (already streamed)
                      full_content=display_content or full_content,
                      # Skip content inside think block
                      continue
--                # Filter out <tool_call> blocks from ReAct mode
++                # Filter out <tool_call> blocks from ReAct mode - but parse them!
                  if "<tool_call>" in chunk_content:
                      in_tool_call_block = True
++                    tool_call_buffer = ""  # Reset buffer
                      # Keep content before <tool_call>
                      before = chunk_content.split("<tool_call>")[0]
                      if before:
                          display_content += before
                          yield StreamChunk(content=before)
++                    # Start buffering the tool call content
++                    after_tag = chunk_content.split("<tool_call>", 1)[-1]
++                    if "</tool_call>" in after_tag:
++                        # Complete tool call in same chunk
++                        tool_json = after_tag.split("</tool_call>")[0]
++                        after_close = after_tag.split("</tool_call>", 1)[-1]
++                        in_tool_call_block = False
++                        # Parse and yield the tool call
++                        try:
++                            tc_data = json.loads(tool_json.strip())
++                            tc = ToolCall(
++                                id=f"call_{tool_call_counter}",
++                                name=tc_data.get("name", ""),
++                                arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
++                            )
++                            tool_call_counter += 1
++                            detected_tool_calls.append(tc)
++                            yield StreamChunk(content="", pending_tool_call=tc)
++                        except (json.JSONDecodeError, KeyError):
++                            pass
++                        if after_close.strip():
++                            display_content += after_close
++                            yield StreamChunk(content=after_close)
++                    else:
++                        tool_call_buffer = after_tag
                      continue
                  elif in_tool_call_block:
                      if "</tool_call>" in chunk_content:
                          in_tool_call_block = False
--                        # Keep content after </tool_call>
++                        # Complete the tool call buffer
--                        after = chunk_content.split("</tool_call>")[-1]
++                        tool_json = tool_call_buffer + chunk_content.split("</tool_call>")[0]
--                        if after:
++                        after_close = chunk_content.split("</tool_call>", 1)[-1]
--                            display_content += after
++                        # Parse and yield the tool call
--                            yield StreamChunk(content=after)
++                        try:
--                    # Skip content inside tool_call block
++                            tc_data = json.loads(tool_json.strip())
++                            tc = ToolCall(
++                                id=f"call_{tool_call_counter}",
++                                name=tc_data.get("name", ""),
++                                arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
++                            )
++                            tool_call_counter += 1
++                            detected_tool_calls.append(tc)
++                            yield StreamChunk(content="", pending_tool_call=tc)
++                        except (json.JSONDecodeError, KeyError):
++                            pass
++                        if after_close.strip():
++                            display_content += after_close
++                            yield StreamChunk(content=after_close)
++                    else:
++                        # Still accumulating tool call content
++                        tool_call_buffer += chunk_content
                      continue
--                # Filter out tool call JSON from display
++                # Filter out tool call JSON from display (bare JSON without tags)
                  # Detect start of JSON tool call
                  if not in_json_block and '{"name"' in chunk_content:
                      in_json_block = True
                      open_braces = json_buffer.count('{')
                      close_braces = json_buffer.count('}')
                      if close_braces >= open_braces and open_braces > 0:
--                        # JSON block complete, don't display it
++                        # JSON block complete, try to parse it
                          in_json_block = False
--                        # Check for content after the JSON
                          try:
                              # Find where JSON ends
                              last_brace = json_buffer.rfind('}')
++                            json_str = json_buffer[:last_brace + 1]
                              after_json = json_buffer[last_brace + 1:]
++                            # Try to parse as tool call
++                            tc_data = json.loads(json_str)
++                            if "name" in tc_data:
++                                tc = ToolCall(
++                                    id=f"call_{tool_call_counter}",
++                                    name=tc_data.get("name", ""),
++                                    arguments=tc_data.get("arguments", tc_data.get("parameters", {})),
++                                )
++                                tool_call_counter += 1
++                                detected_tool_calls.append(tc)
++                                yield StreamChunk(content="", pending_tool_call=tc)
                              if after_json.strip():
                                  display_content += after_json
                                  yield StreamChunk(content=after_json)
--                        except Exception:
++                        except (json.JSONDecodeError, KeyError):
++                            # Not valid JSON, just discard
                              pass
                          json_buffer = ""
                  else:

src/loader/ui/adapter.pymodified

  class EventAdapter:
      """Adapts Agent callback events to Textual messages."""
++    DEBUG_LOG_FILE = "/tmp/loader_debug.log"
++
      def __init__(self, app: "LoaderApp") -> None:  # noqa: F821
          self.app = app
          self._tool_args_queue: list[tuple[str, dict]] = []  # Queue of (tool_name, args)
++        # Clear debug log on start
++        try:
++            with open(self.DEBUG_LOG_FILE, "w") as f:
++                f.write("=== Loader Debug Log ===\n")
++        except Exception:
++            pass
++
++    def _debug_log(self, message: str) -> None:
++        """Write debug message to log file."""
++        try:
++            with open(self.DEBUG_LOG_FILE, "a") as f:
++                f.write(f"{message}\n")
++        except Exception:
++            pass
      def handle_event(self, event: AgentEvent) -> None:
          """Convert AgentEvent to appropriate Textual message and post it."""
++        self._debug_log(f"handle_event: type={event.type}")
          match event.type:
              case "thinking":
                  self.app.post_message(ThinkingStarted())
              case "tool_call":
                  # Queue args for matching with result (FIFO)
--                self._tool_args_queue.append((event.tool_name or "", event.tool_args or {}))
++                tool_name = event.tool_name or ""
++                tool_args = event.tool_args or {}
++                self._tool_args_queue.append((tool_name, tool_args))
++
++                # Debug: log tool args for edit/write (helps diagnose diff view issues)
++                self._debug_log(f"tool_call '{tool_name}': queued, keys={list(tool_args.keys())}")
++                if tool_name == "write":
++                    content = tool_args.get("content", "")
++                    self._debug_log(f"  write content: {len(content) if content else 0} chars")
++                elif tool_name == "edit":
++                    self._debug_log(f"  edit old_string: {bool(tool_args.get('old_string'))}, new_string: {bool(tool_args.get('new_string'))}")
++
                  self.app.post_message(
                      ToolCallStarted(
--                        tool_name=event.tool_name or "",
++                        tool_name=tool_name,
--                        tool_args=event.tool_args or {},
++                        tool_args=tool_args,
+                     )
+                 )
                          if queued_name == tool_name:
                              tool_args = queued_args
                              self._tool_args_queue.pop(i)
++                            self._debug_log(f"tool_result '{tool_name}': matched in queue, keys={list(tool_args.keys())}")
                              break
                      else:
                          # No match found, use FIFO
--                        _, tool_args = self._tool_args_queue.pop(0)
++                        popped_name, tool_args = self._tool_args_queue.pop(0)
++                        self._debug_log(f"tool_result '{tool_name}': no match, used FIFO (got '{popped_name}'), keys={list(tool_args.keys())}")
++                else:
++                    self._debug_log(f"tool_result '{tool_name}': queue was EMPTY!")
                  # Extract diff info for edit/write tools
                  old_string = None
                  new_string = None
                  file_path = None
--                if tool_name == "edit" and tool_args:
++                if tool_name == "edit":
--                    old_string = tool_args.get("old_string")
++                    if tool_args:
--                    new_string = tool_args.get("new_string")
++                        # Try multiple key names that models might use
--                    file_path = tool_args.get("file_path")
++                        old_string = (
--                elif tool_name == "write" and tool_args:
++                            tool_args.get("old_string")
++                            or tool_args.get("old")
++                            or tool_args.get("original")
++                            or tool_args.get("search")
++                            or tool_args.get("find")
++                        )
++                        new_string = (
++                            tool_args.get("new_string")
++                            or tool_args.get("new")
++                            or tool_args.get("replacement")
++                            or tool_args.get("replace")
++                        )
++                        file_path = (
++                            tool_args.get("file_path")
++                            or tool_args.get("path")
++                            or tool_args.get("filename")
++                            or tool_args.get("file")
++                        )
++                        self._debug_log(f"  edit extracted: old={bool(old_string)} ({len(old_string) if old_string else 0} chars), new={bool(new_string)} ({len(new_string) if new_string else 0} chars), path={file_path}")
++                    else:
++                        self._debug_log(f"  edit: tool_args was empty!")
++                elif tool_name == "write":
                      # For writes, content is the new file content
--                    new_string = tool_args.get("content")
++                    # Try multiple key names that models might use
--                    file_path = tool_args.get("file_path")
++                    if tool_args:
++                        new_string = (
++                            tool_args.get("content")
++                            or tool_args.get("contents")
++                            or tool_args.get("text")
++                            or tool_args.get("data")
++                        )
++                        file_path = (
++                            tool_args.get("file_path")
++                            or tool_args.get("path")
++                            or tool_args.get("filename")
++                        )
++                        self._debug_log(f"  write extracted: new={bool(new_string)} ({len(new_string) if new_string else 0} chars), path={file_path}")
++                    else:
++                        self._debug_log(f"  write: tool_args was empty!")
                  self.app.post_message(
                      ToolCallCompleted(

src/loader/ui/app.pymodified

          self._tool_widget_queue: list[ToolCallWidget] = []  # Queue of pending tool widgets
          self._timer_handle = None
++    def _debug_log(self, message: str) -> None:
++        """Write debug message to log file."""
++        try:
++            with open("/tmp/loader_debug.log", "a") as f:
++                f.write(f"{message}\n")
++        except Exception:
++            pass
++
      def compose(self) -> ComposeResult:
          yield Container(
              ScrollableContainer(id="message-area"),
          # If agent is running, this is a steering message
          if self.is_generating and self.agent.is_running:
++            # Finalize current streaming so new content appears below user's message
++            if self._current_streaming is not None:
++                self._current_streaming.stop_streaming()
++                self._current_streaming = None
              self._add_steering_message(user_input)
              self.agent.steer(user_input)
              return
          """Handle tool call start."""
          msg_area = self.query_one("#message-area", ScrollableContainer)
++        # Finalize any ongoing streaming - tool calls interrupt thinking
++        if self._current_streaming is not None:
++            self._current_streaming.stop_streaming()
++            self._current_streaming = None
++
          # Create tool widget
          widget = ToolCallWidget(
              tool_name=message.tool_name,
          """Handle tool call completion."""
          msg_area = self.query_one("#message-area", ScrollableContainer)
++        # Debug: log what we received
++        try:
++            with open("/tmp/loader_debug.log", "a") as f:
++                f.write(f"on_tool_call_completed: tool={message.tool_name}, new_string={bool(message.new_string)}, old_string={bool(message.old_string)}, file_path={message.file_path}\n")
++        except Exception:
++            pass
++
          # Get the corresponding tool widget from queue (FIFO)
          tool_widget = self._tool_widget_queue.pop(0) if self._tool_widget_queue else None
          # Check if this is an edit tool with diff info
          if message.tool_name == "edit" and message.old_string and message.new_string:
              # Replace tool widget with diff widget
++            self._debug_log("  -> showing EDIT diff widget")
              if tool_widget:
                  tool_widget.remove()
              msg_area.mount(diff_widget)
          # Check if this is a write tool - show as diff (new file)
          elif message.tool_name == "write" and message.new_string:
++            self._debug_log("  -> showing WRITE diff widget")
              if tool_widget:
                  tool_widget.remove()
              msg_area.mount(diff_widget)
          elif tool_widget:
              # Update existing tool widget with result
++            self._debug_log("  -> showing regular tool widget result")
              tool_widget.set_result(
                  message.content, is_error=message.is_error
              )

src/loader/ui/widgets/streaming.pymodified

      def render(self) -> Text:
          """Render the content with optional cursor."""
          # Use Text object to avoid markup interpretation of LLM output
--        text = Text(self._content_buffer)
++        # Clean any tool_call tags that slipped through filtering
++        content = self._clean_tool_tags(self._content_buffer)
++        text = Text(content)
          if self.is_streaming:
              text.append("|", style="dim")  # Cursor indicator
          return text
++    def _clean_tool_tags(self, content: str) -> str:
++        """Remove any tool_call/think tags that weren't filtered during streaming."""
++        import re
++        # Remove <tool_call>...</tool_call> blocks
++        content = re.sub(r'<tool_call>.*?</tool_call>', '', content, flags=re.DOTALL | re.IGNORECASE)
++        # Remove orphaned tags
++        content = re.sub(r'</?tool_call>', '', content, flags=re.IGNORECASE)
++        content = re.sub(r'</?think>', '', content, flags=re.IGNORECASE)
++        # Clean up excess newlines from removed blocks
++        content = re.sub(r'\n{3,}', '\n\n', content)
++        return content
++
      def append(self, chunk: str) -> None:
          """Append a chunk to the content."""
          self._content_buffer += chunk

tests/test_parsing.pymodified

          assert "<tool_call>" not in result.content
          assert "</tool_call>" not in result.content
++    def test_parse_bracketed_calls_format(self):
++        """Test parsing [calls tool with: key=value] format."""
++        text = '''I'll create the file now.
++[calls write tool with: file_path=/tmp/test.txt, content="hello world"]
++Created the file.'''
++        result = parse_tool_calls(text)
++        assert len(result.tool_calls) == 1
++        assert result.tool_calls[0].name == "write"
++        assert result.tool_calls[0].arguments["file_path"] == "/tmp/test.txt"
++        assert result.tool_calls[0].arguments["content"] == "hello world"
++        # Bracketed call should be removed from content
++        assert "[calls" not in result.content
++
++    def test_parse_bracketed_use_format(self):
++        """Test parsing [USE tool: key=value] format."""
++        text = '[USE bash tool: command="ls -la"]'
++        result = parse_tool_calls(text)
++        assert len(result.tool_calls) == 1
++        assert result.tool_calls[0].name == "bash"
++        assert result.tool_calls[0].arguments["command"] == "ls -la"
++
++    def test_parse_bracketed_edit_format(self):
++        """Test parsing bracketed format with edit tool."""
++        text = '[calls edit tool with: file_path="test.py", old_string="foo", new_string="bar"]'
++        result = parse_tool_calls(text)
++        assert len(result.tool_calls) == 1
++        assert result.tool_calls[0].name == "edit"
++        assert result.tool_calls[0].arguments["file_path"] == "test.py"
++        assert result.tool_calls[0].arguments["old_string"] == "foo"
++        assert result.tool_calls[0].arguments["new_string"] == "bar"
++
  class TestFormatToolResult:
      """Tests for format_tool_result function."""

tenseleyflow/loader / `6e4f880`

11 changed files

`@@ -59,6 +59,9 @@` class StreamChunk:
59	full_content: str = "" # Accumulated full content (only set when is_done=True)	59	full_content: str = "" # Accumulated full content (only set when is_done=True)
60	tool_calls: list[ToolCall] = field(default_factory=list)	60	tool_calls: list[ToolCall] = field(default_factory=list)
61	is_done: bool = False	61	is_done: bool = False
		62	+ # Pending tool call detected during streaming (ReAct mode)
		63	+ # This allows showing tool widgets as they're detected, before streaming ends
		64	+ pending_tool_call: ToolCall \| None = None
62		65
63		66
64	@dataclass	67	@dataclass