`bb48c80`

Move task classification into runtime

Authored by

espadonne 1 month ago

SHA: bb48c80b9ad7f3e5bfdbbf60db557097ebd8e6cf
Parents: e36e64f
Tree: 6ad44c9

4 changed files

Status	File	+	-
M	`src/loader/agent/reasoning.py`	5	108
M	`src/loader/runtime/conversation.py`	1	4
A	`src/loader/runtime/task_classification.py`	193	0
A	`tests/test_task_classification.py`	29	0

src/loader/agent/reasoning.pymodified

      TaskCompletionCheck,
      TaskDecomposition,
+ )
+-
+-
 -# === Query Classification ===
+-
 -def is_conversational(message: str) -> bool:
 -    """Detect if a message is conversational rather than a task.
+-
 -    Returns True for greetings, casual chat, simple questions about the agent.
 -    These don't need tool calling - just a quick response.
 -    """
 -    msg = message.lower().strip()
+-
 -    # Very short messages are usually conversational
 -    if len(msg) < 15:
 -        # Greetings
 -        greetings = [
 -            "hi", "hello", "hey", "yo", "sup", "hiya", "howdy",
 -            "ello", "hallo", "greetings", "good morning", "good afternoon",
 -            "good evening", "morning", "evening", "afternoon",
 -            "what's up", "whats up", "wassup", "how are you",
 -            "how's it going", "hows it going",
 -        ]
 -        if any(msg.startswith(g) or msg == g for g in greetings):
 -            return True
+-
 -    # Questions about the agent itself
 -    agent_questions = [
 -        "who are you", "what are you", "what can you do",
 -        "how do you work", "what is loader", "what's loader",
 -        "help", "what is this", "how does this work",
 -    ]
 -    if any(q in msg for q in agent_questions):
 -        return True
+-
 -    # Casual/social messages
 -    casual = [
 -        "thanks", "thank you", "thx", "ty",
 -        "cool", "nice", "great", "awesome", "ok", "okay",
 -        "bye", "goodbye", "see you", "later", "cya",
 -        "lol", "haha", "hehe", "lmao",
 -        "please", "sorry", "oops",
 -    ]
 -    if msg in casual or any(msg == c for c in casual):
 -        return True
+-
 -    # Messages that are clearly NOT conversational (tasks)
 -    task_indicators = [
 -        "create", "make", "build", "write", "edit", "delete", "remove",
 -        "run", "execute", "install", "fix", "debug", "test", "check",
 -        "find", "search", "show", "list", "read", "open", "close",
 -        "add", "update", "change", "modify", "refactor", "implement",
 -        "file", "folder", "directory", "code", "function", "class",
 -        "git", "npm", "pip", "python", "node", "bash", "command",
 -    ]
 -    if any(ind in msg for ind in task_indicators):
 -        return False
+-
 -    # Short messages without task indicators are likely conversational
 -    if len(msg) < 30 and not any(c in msg for c in [".", "/", "\\", "`"]):
 -        return True
+-
 -    return False
+-
+-
 -def estimate_complexity(message: str) -> str:
 -    """Estimate query complexity for token budgeting.
+-
 -    Returns: "trivial", "simple", "moderate", or "complex"
 -    """
 -    msg = message.lower()
 -    word_count = len(message.split())
+-
 -    # Trivial: greetings, thanks, very short
 -    if is_conversational(message) or word_count < 5:
 -        return "trivial"
+-
 -    # Complex indicators
 -    complex_indicators = [
 -        "project", "application", "website", "api", "database",
 -        "refactor", "migrate", "upgrade", "implement", "design",
 -        "multiple", "several", "all", "entire", "whole",
 -        "and then", "after that", "also", "as well",
 -    ]
 -    complex_count = sum(1 for ind in complex_indicators if ind in msg)
+-
 -    if complex_count >= 2 or word_count > 50:
 -        return "complex"
+-
 -    # Simple indicators
 -    simple_indicators = [
 -        "what is", "how do", "show me", "list", "read",
 -        "single", "one", "just", "only", "quick",
 -    ]
 -    if any(ind in msg for ind in simple_indicators) and word_count < 20:
 -        return "simple"
+-
 -    return "moderate"
+-
+-
 -def get_token_budget(complexity: str) -> tuple[int, int]:
 -    """Get (max_tokens, context_tokens) for a complexity level."""
 -    budgets = {
 -        "trivial": (256, 2048),    # Quick response, minimal context
 -        "simple": (512, 4096),     # Short response, some context
 -        "moderate": (1024, 8192),  # Normal response
 -        "complex": (2048, 16384),  # Full response, full context
 -    }
 -    return budgets.get(complexity, (1024, 8192))
 +from ..runtime.task_classification import (
 +    estimate_complexity,
 +    get_token_budget,
 +    is_conversational,
 +)
  # Prompts for reasoning stages

src/loader/runtime/conversation.pymodified

  from pathlib import Path
  from typing import Any
 -from ..agent.reasoning import (
 -    estimate_complexity,
 -    get_token_budget,
 -)
  from ..llm.base import Message, Role, ToolCall
  from .assistant_turns import AssistantTurnRequester
  from .completion_policy import CompletionPolicy
  from .phases import TurnPhase, TurnPhaseTracker
  from .repair import ResponseRepairer
  from .rollback import RollbackPlan
 +from .task_classification import estimate_complexity, get_token_budget
  from .tool_batches import ToolBatchRunner
  from .tracing import RuntimeTracer
  from .workflow import (

src/loader/runtime/task_classification.pyadded

 +"""Runtime-owned task classification helpers."""
++
 +from __future__ import annotations
++
++
 +def is_conversational(message: str) -> bool:
 +    """Detect if a message is conversational rather than a task."""
++
 +    msg = message.lower().strip()
++
 +    if len(msg) < 15:
 +        greetings = [
 +            "hi",
 +            "hello",
 +            "hey",
 +            "yo",
 +            "sup",
 +            "hiya",
 +            "howdy",
 +            "ello",
 +            "hallo",
 +            "greetings",
 +            "good morning",
 +            "good afternoon",
 +            "good evening",
 +            "morning",
 +            "evening",
 +            "afternoon",
 +            "what's up",
 +            "whats up",
 +            "wassup",
 +            "how are you",
 +            "how's it going",
 +            "hows it going",
 +        ]
 +        if any(msg.startswith(greeting) or msg == greeting for greeting in greetings):
 +            return True
++
 +    agent_questions = [
 +        "who are you",
 +        "what are you",
 +        "what can you do",
 +        "how do you work",
 +        "what is loader",
 +        "what's loader",
 +        "help",
 +        "what is this",
 +        "how does this work",
 +    ]
 +    if any(question in msg for question in agent_questions):
 +        return True
++
 +    casual = [
 +        "thanks",
 +        "thank you",
 +        "thx",
 +        "ty",
 +        "cool",
 +        "nice",
 +        "great",
 +        "awesome",
 +        "ok",
 +        "okay",
 +        "bye",
 +        "goodbye",
 +        "see you",
 +        "later",
 +        "cya",
 +        "lol",
 +        "haha",
 +        "hehe",
 +        "lmao",
 +        "please",
 +        "sorry",
 +        "oops",
 +    ]
 +    if msg in casual or any(msg == item for item in casual):
 +        return True
++
 +    task_indicators = [
 +        "create",
 +        "make",
 +        "build",
 +        "write",
 +        "edit",
 +        "delete",
 +        "remove",
 +        "run",
 +        "execute",
 +        "install",
 +        "fix",
 +        "debug",
 +        "test",
 +        "check",
 +        "find",
 +        "search",
 +        "show",
 +        "list",
 +        "read",
 +        "open",
 +        "close",
 +        "add",
 +        "update",
 +        "change",
 +        "modify",
 +        "refactor",
 +        "implement",
 +        "file",
 +        "folder",
 +        "directory",
 +        "code",
 +        "function",
 +        "class",
 +        "git",
 +        "npm",
 +        "pip",
 +        "python",
 +        "node",
 +        "bash",
 +        "command",
 +    ]
 +    if any(indicator in msg for indicator in task_indicators):
 +        return False
++
 +    if len(msg) < 30 and not any(char in msg for char in [".", "/", "\\", "`"]):
 +        return True
++
 +    return False
++
++
 +def estimate_complexity(message: str) -> str:
 +    """Estimate query complexity for token budgeting."""
++
 +    msg = message.lower()
 +    word_count = len(message.split())
++
 +    if is_conversational(message) or word_count < 5:
 +        return "trivial"
++
 +    complex_indicators = [
 +        "project",
 +        "application",
 +        "website",
 +        "api",
 +        "database",
 +        "refactor",
 +        "migrate",
 +        "upgrade",
 +        "implement",
 +        "design",
 +        "multiple",
 +        "several",
 +        "all",
 +        "entire",
 +        "whole",
 +        "and then",
 +        "after that",
 +        "also",
 +        "as well",
 +    ]
 +    complex_count = sum(1 for indicator in complex_indicators if indicator in msg)
++
 +    if complex_count >= 2 or word_count > 50:
 +        return "complex"
++
 +    simple_indicators = [
 +        "what is",
 +        "how do",
 +        "show me",
 +        "list",
 +        "read",
 +        "single",
 +        "one",
 +        "just",
 +        "only",
 +        "quick",
 +    ]
 +    if any(indicator in msg for indicator in simple_indicators) and word_count < 20:
 +        return "simple"
++
 +    return "moderate"
++
++
 +def get_token_budget(complexity: str) -> tuple[int, int]:
 +    """Get `(max_tokens, context_tokens)` for a complexity level."""
++
 +    budgets = {
 +        "trivial": (256, 2048),
 +        "simple": (512, 4096),
 +        "moderate": (1024, 8192),
 +        "complex": (2048, 16384),
 +    }
 +    return budgets.get(complexity, (1024, 8192))

tests/test_task_classification.pyadded

 +"""Tests for runtime-owned task classification helpers."""
++
 +from __future__ import annotations
++
 +from loader.runtime.task_classification import (
 +    estimate_complexity,
 +    get_token_budget,
 +    is_conversational,
 +)
++
++
 +def test_is_conversational_detects_small_talk() -> None:
 +    assert is_conversational("hi there") is True
 +    assert is_conversational("thanks") is True
 +    assert is_conversational("what can you do?") is True
++
++
 +def test_is_conversational_rejects_actionable_tasks() -> None:
 +    assert is_conversational("create a new README for this repo") is False
 +    assert is_conversational("run the tests and fix failures") is False
++
++
 +def test_estimate_complexity_and_token_budget_cover_common_paths() -> None:
 +    assert estimate_complexity("hi") == "trivial"
 +    assert estimate_complexity("show me the config file") == "simple"
 +    assert estimate_complexity(
 +        "implement a website and then refactor the database layer"
 +    ) == "complex"
 +    assert get_token_budget("simple") == (512, 4096)