`bb48c80`

Move task classification into runtime

Authored by

espadonne 1 month ago

SHA: bb48c80b9ad7f3e5bfdbbf60db557097ebd8e6cf
Parents: e36e64f
Tree: 6ad44c9

4 changed files

Status	File	+	-
M	`src/loader/agent/reasoning.py`	5	108
M	`src/loader/runtime/conversation.py`	1	4
A	`src/loader/runtime/task_classification.py`	193	0
A	`tests/test_task_classification.py`	29	0

src/loader/agent/reasoning.pymodified

      TaskCompletionCheck,
      TaskDecomposition,
+ )
--
++from ..runtime.task_classification import (
--
++    estimate_complexity,
--# === Query Classification ===
++    get_token_budget,
--
++    is_conversational,
--def is_conversational(message: str) -> bool:
++)
--    """Detect if a message is conversational rather than a task.
--
--    Returns True for greetings, casual chat, simple questions about the agent.
--    These don't need tool calling - just a quick response.
--    """
--    msg = message.lower().strip()
--
--    # Very short messages are usually conversational
--    if len(msg) < 15:
--        # Greetings
--        greetings = [
--            "hi", "hello", "hey", "yo", "sup", "hiya", "howdy",
--            "ello", "hallo", "greetings", "good morning", "good afternoon",
--            "good evening", "morning", "evening", "afternoon",
--            "what's up", "whats up", "wassup", "how are you",
--            "how's it going", "hows it going",
--        ]
--        if any(msg.startswith(g) or msg == g for g in greetings):
--            return True
--
--    # Questions about the agent itself
--    agent_questions = [
--        "who are you", "what are you", "what can you do",
--        "how do you work", "what is loader", "what's loader",
--        "help", "what is this", "how does this work",
--    ]
--    if any(q in msg for q in agent_questions):
--        return True
--
--    # Casual/social messages
--    casual = [
--        "thanks", "thank you", "thx", "ty",
--        "cool", "nice", "great", "awesome", "ok", "okay",
--        "bye", "goodbye", "see you", "later", "cya",
--        "lol", "haha", "hehe", "lmao",
--        "please", "sorry", "oops",
--    ]
--    if msg in casual or any(msg == c for c in casual):
--        return True
--
--    # Messages that are clearly NOT conversational (tasks)
--    task_indicators = [
--        "create", "make", "build", "write", "edit", "delete", "remove",
--        "run", "execute", "install", "fix", "debug", "test", "check",
--        "find", "search", "show", "list", "read", "open", "close",
--        "add", "update", "change", "modify", "refactor", "implement",
--        "file", "folder", "directory", "code", "function", "class",
--        "git", "npm", "pip", "python", "node", "bash", "command",
--    ]
--    if any(ind in msg for ind in task_indicators):
--        return False
--
--    # Short messages without task indicators are likely conversational
--    if len(msg) < 30 and not any(c in msg for c in [".", "/", "\\", "`"]):
--        return True
--
--    return False
--
--
--def estimate_complexity(message: str) -> str:
--    """Estimate query complexity for token budgeting.
--
--    Returns: "trivial", "simple", "moderate", or "complex"
--    """
--    msg = message.lower()
--    word_count = len(message.split())
--
--    # Trivial: greetings, thanks, very short
--    if is_conversational(message) or word_count < 5:
--        return "trivial"
--
--    # Complex indicators
--    complex_indicators = [
--        "project", "application", "website", "api", "database",
--        "refactor", "migrate", "upgrade", "implement", "design",
--        "multiple", "several", "all", "entire", "whole",
--        "and then", "after that", "also", "as well",
--    ]
--    complex_count = sum(1 for ind in complex_indicators if ind in msg)
--
--    if complex_count >= 2 or word_count > 50:
--        return "complex"
--
--    # Simple indicators
--    simple_indicators = [
--        "what is", "how do", "show me", "list", "read",
--        "single", "one", "just", "only", "quick",
--    ]
--    if any(ind in msg for ind in simple_indicators) and word_count < 20:
--        return "simple"
--
--    return "moderate"
--
--
--def get_token_budget(complexity: str) -> tuple[int, int]:
--    """Get (max_tokens, context_tokens) for a complexity level."""
--    budgets = {
--        "trivial": (256, 2048),    # Quick response, minimal context
--        "simple": (512, 4096),     # Short response, some context
--        "moderate": (1024, 8192),  # Normal response
--        "complex": (2048, 16384),  # Full response, full context
--    }
--    return budgets.get(complexity, (1024, 8192))
  # Prompts for reasoning stages

src/loader/runtime/conversation.pymodified

  from pathlib import Path
  from typing import Any
--from ..agent.reasoning import (
--    estimate_complexity,
--    get_token_budget,
--)
  from ..llm.base import Message, Role, ToolCall
  from .assistant_turns import AssistantTurnRequester
  from .completion_policy import CompletionPolicy
  from .phases import TurnPhase, TurnPhaseTracker
  from .repair import ResponseRepairer
  from .rollback import RollbackPlan
++from .task_classification import estimate_complexity, get_token_budget
  from .tool_batches import ToolBatchRunner
  from .tracing import RuntimeTracer
  from .workflow import (

src/loader/runtime/task_classification.pyadded

++"""Runtime-owned task classification helpers."""
++
++from __future__ import annotations
++
++
++def is_conversational(message: str) -> bool:
++    """Detect if a message is conversational rather than a task."""
++
++    msg = message.lower().strip()
++
++    if len(msg) < 15:
++        greetings = [
++            "hi",
++            "hello",
++            "hey",
++            "yo",
++            "sup",
++            "hiya",
++            "howdy",
++            "ello",
++            "hallo",
++            "greetings",
++            "good morning",
++            "good afternoon",
++            "good evening",
++            "morning",
++            "evening",
++            "afternoon",
++            "what's up",
++            "whats up",
++            "wassup",
++            "how are you",
++            "how's it going",
++            "hows it going",
++        ]
++        if any(msg.startswith(greeting) or msg == greeting for greeting in greetings):
++            return True
++
++    agent_questions = [
++        "who are you",
++        "what are you",
++        "what can you do",
++        "how do you work",
++        "what is loader",
++        "what's loader",
++        "help",
++        "what is this",
++        "how does this work",
++    ]
++    if any(question in msg for question in agent_questions):
++        return True
++
++    casual = [
++        "thanks",
++        "thank you",
++        "thx",
++        "ty",
++        "cool",
++        "nice",
++        "great",
++        "awesome",
++        "ok",
++        "okay",
++        "bye",
++        "goodbye",
++        "see you",
++        "later",
++        "cya",
++        "lol",
++        "haha",
++        "hehe",
++        "lmao",
++        "please",
++        "sorry",
++        "oops",
++    ]
++    if msg in casual or any(msg == item for item in casual):
++        return True
++
++    task_indicators = [
++        "create",
++        "make",
++        "build",
++        "write",
++        "edit",
++        "delete",
++        "remove",
++        "run",
++        "execute",
++        "install",
++        "fix",
++        "debug",
++        "test",
++        "check",
++        "find",
++        "search",
++        "show",
++        "list",
++        "read",
++        "open",
++        "close",
++        "add",
++        "update",
++        "change",
++        "modify",
++        "refactor",
++        "implement",
++        "file",
++        "folder",
++        "directory",
++        "code",
++        "function",
++        "class",
++        "git",
++        "npm",
++        "pip",
++        "python",
++        "node",
++        "bash",
++        "command",
++    ]
++    if any(indicator in msg for indicator in task_indicators):
++        return False
++
++    if len(msg) < 30 and not any(char in msg for char in [".", "/", "\\", "`"]):
++        return True
++
++    return False
++
++
++def estimate_complexity(message: str) -> str:
++    """Estimate query complexity for token budgeting."""
++
++    msg = message.lower()
++    word_count = len(message.split())
++
++    if is_conversational(message) or word_count < 5:
++        return "trivial"
++
++    complex_indicators = [
++        "project",
++        "application",
++        "website",
++        "api",
++        "database",
++        "refactor",
++        "migrate",
++        "upgrade",
++        "implement",
++        "design",
++        "multiple",
++        "several",
++        "all",
++        "entire",
++        "whole",
++        "and then",
++        "after that",
++        "also",
++        "as well",
++    ]
++    complex_count = sum(1 for indicator in complex_indicators if indicator in msg)
++
++    if complex_count >= 2 or word_count > 50:
++        return "complex"
++
++    simple_indicators = [
++        "what is",
++        "how do",
++        "show me",
++        "list",
++        "read",
++        "single",
++        "one",
++        "just",
++        "only",
++        "quick",
++    ]
++    if any(indicator in msg for indicator in simple_indicators) and word_count < 20:
++        return "simple"
++
++    return "moderate"
++
++
++def get_token_budget(complexity: str) -> tuple[int, int]:
++    """Get `(max_tokens, context_tokens)` for a complexity level."""
++
++    budgets = {
++        "trivial": (256, 2048),
++        "simple": (512, 4096),
++        "moderate": (1024, 8192),
++        "complex": (2048, 16384),
++    }
++    return budgets.get(complexity, (1024, 8192))

tests/test_task_classification.pyadded

++"""Tests for runtime-owned task classification helpers."""
++
++from __future__ import annotations
++
++from loader.runtime.task_classification import (
++    estimate_complexity,
++    get_token_budget,
++    is_conversational,
++)
++
++
++def test_is_conversational_detects_small_talk() -> None:
++    assert is_conversational("hi there") is True
++    assert is_conversational("thanks") is True
++    assert is_conversational("what can you do?") is True
++
++
++def test_is_conversational_rejects_actionable_tasks() -> None:
++    assert is_conversational("create a new README for this repo") is False
++    assert is_conversational("run the tests and fix failures") is False
++
++
++def test_estimate_complexity_and_token_budget_cover_common_paths() -> None:
++    assert estimate_complexity("hi") == "trivial"
++    assert estimate_complexity("show me the config file") == "simple"
++    assert estimate_complexity(
++        "implement a website and then refactor the database layer"
++    ) == "complex"
++    assert get_token_budget("simple") == (512, 4096)