@@ -5,6 +5,7 @@ from typing import Any, AsyncIterator |
| 5 | 5 | |
| 6 | 6 | import httpx |
| 7 | 7 | |
| 8 | +from ..runtime.capabilities import CapabilityProfile, resolve_capability_profile |
| 8 | 9 | from .base import ( |
| 9 | 10 | CompletionResponse, |
| 10 | 11 | LLMBackend, |
@@ -18,40 +19,6 @@ from .base import ( |
| 18 | 19 | class OllamaBackend(LLMBackend): |
| 19 | 20 | """Ollama API backend for local LLM inference.""" |
| 20 | 21 | |
| 21 | | - # Models known to support native function calling in Ollama |
| 22 | | - # Verified working with Ollama's tool calling API |
| 23 | | - NATIVE_TOOL_MODELS = { |
| 24 | | - "llama3.1", "llama3.2", "llama3.3", |
| 25 | | - "qwen2.5", "qwen2", |
| 26 | | - "mistral", "mixtral", |
| 27 | | - "command-r", |
| 28 | | - "granite", |
| 29 | | - # Note: deepseek-coder, codestral, starcoder do NOT support tools in Ollama |
| 30 | | - } |
| 31 | | - |
| 32 | | - # Models that definitely do NOT support native tools (use ReAct) |
| 33 | | - NO_TOOL_MODELS = { |
| 34 | | - "llama2", "llama:latest", # Base llama without version |
| 35 | | - "phi", "phi3", |
| 36 | | - "gemma", "gemma2", |
| 37 | | - "tinyllama", |
| 38 | | - "orca", |
| 39 | | - "vicuna", |
| 40 | | - "wizard", |
| 41 | | - "neural-chat", |
| 42 | | - "starling", |
| 43 | | - "openchat", |
| 44 | | - "yi", |
| 45 | | - "solar", |
| 46 | | - "dolphin", |
| 47 | | - # Coding models that don't support Ollama tools |
| 48 | | - "codestral", |
| 49 | | - "deepseek-coder", |
| 50 | | - "starcoder", |
| 51 | | - "codegemma", |
| 52 | | - "deepseek-r1", # Reasoning model, no tools |
| 53 | | - } |
| 54 | | - |
| 55 | 22 | def __init__( |
| 56 | 23 | self, |
| 57 | 24 | model: str = "llama3.1:8b", |
@@ -72,6 +39,8 @@ class OllamaBackend(LLMBackend): |
| 72 | 39 | self.num_gpu = num_gpu |
| 73 | 40 | self._client = httpx.AsyncClient(timeout=timeout) |
| 74 | 41 | self._supports_native_tools: bool | None = None |
| 42 | + self._model_details_cache: dict[str, Any] | None = None |
| 43 | + self._capability_profile: CapabilityProfile | None = None |
| 75 | 44 | |
| 76 | 45 | def _build_options(self, temperature: float, max_tokens: int) -> dict: |
| 77 | 46 | """Build Ollama options dict with performance settings.""" |
@@ -136,6 +105,40 @@ class OllamaBackend(LLMBackend): |
| 136 | 105 | except Exception: |
| 137 | 106 | return [] |
| 138 | 107 | |
| 108 | + async def describe_model(self) -> dict[str, Any] | None: |
| 109 | + """Fetch and cache Ollama model details for capability resolution.""" |
| 110 | + |
| 111 | + if self._model_details_cache is not None: |
| 112 | + return self._model_details_cache |
| 113 | + |
| 114 | + if not self.model: |
| 115 | + return None |
| 116 | + |
| 117 | + try: |
| 118 | + response = await self._client.post( |
| 119 | + f"{self.base_url}/api/show", |
| 120 | + json={"name": self.model}, |
| 121 | + ) |
| 122 | + response.raise_for_status() |
| 123 | + self._model_details_cache = response.json() |
| 124 | + except Exception: |
| 125 | + self._model_details_cache = None |
| 126 | + |
| 127 | + return self._model_details_cache |
| 128 | + |
| 129 | + def capability_profile(self) -> CapabilityProfile: |
| 130 | + """Return the resolved capability profile for the current model.""" |
| 131 | + |
| 132 | + if ( |
| 133 | + self._capability_profile is None |
| 134 | + or self._capability_profile.model_name != self.model |
| 135 | + ): |
| 136 | + self._capability_profile = resolve_capability_profile( |
| 137 | + self.model, |
| 138 | + model_details=self._model_details_cache, |
| 139 | + ) |
| 140 | + return self._capability_profile |
| 141 | + |
| 139 | 142 | def supports_native_tools(self) -> bool: |
| 140 | 143 | """Check if current model supports native function calling. |
| 141 | 144 | |
@@ -145,36 +148,19 @@ class OllamaBackend(LLMBackend): |
| 145 | 148 | if self.force_react: |
| 146 | 149 | return False |
| 147 | 150 | |
| 151 | + if self._capability_profile is not None and self._capability_profile.model_name != self.model: |
| 152 | + self._capability_profile = None |
| 153 | + self._supports_native_tools = None |
| 154 | + |
| 148 | 155 | if self._supports_native_tools is not None: |
| 149 | 156 | return self._supports_native_tools |
| 150 | 157 | |
| 151 | | - model_lower = self.model.lower() |
| 152 | | - |
| 153 | | - # First check if it's explicitly a NO_TOOL model |
| 154 | | - for no_tool_model in self.NO_TOOL_MODELS: |
| 155 | | - if no_tool_model in model_lower: |
| 156 | | - self._supports_native_tools = False |
| 157 | | - return False |
| 158 | | - |
| 159 | | - # Check if model name contains any known native tool model |
| 160 | | - for native_model in self.NATIVE_TOOL_MODELS: |
| 161 | | - if native_model in model_lower: |
| 162 | | - self._supports_native_tools = True |
| 163 | | - return True |
| 164 | | - |
| 165 | | - # Default to False for unknown models (safer - uses ReAct) |
| 166 | | - self._supports_native_tools = False |
| 167 | | - return False |
| 158 | + self._supports_native_tools = self.capability_profile().supports_native_tools |
| 159 | + return self._supports_native_tools |
| 168 | 160 | |
| 169 | 161 | def _format_messages(self, messages: list[Message]) -> list[dict[str, Any]]: |
| 170 | 162 | """Format messages for Ollama API.""" |
| 171 | | - formatted = [] |
| 172 | | - for msg in messages: |
| 173 | | - formatted.append({ |
| 174 | | - "role": msg.role.value, |
| 175 | | - "content": msg.content, |
| 176 | | - }) |
| 177 | | - return formatted |
| 163 | + return [message.to_dict() for message in messages] |
| 178 | 164 | |
| 179 | 165 | def _format_tools(self, tools: list[dict[str, Any]] | None) -> list[dict[str, Any]] | None: |
| 180 | 166 | """Format tools for Ollama API.""" |