@@ -17,9 +17,18 @@ type OllamaClient struct { |
| 17 | 17 | } |
| 18 | 18 | |
| 19 | 19 | type GenerateRequest struct { |
| 20 | | - Model string `json:"model"` |
| 21 | | - Prompt string `json:"prompt"` |
| 22 | | - Stream bool `json:"stream"` |
| 20 | + Model string `json:"model"` |
| 21 | + Prompt string `json:"prompt"` |
| 22 | + Stream bool `json:"stream"` |
| 23 | + KeepAlive string `json:"keep_alive,omitempty"` |
| 24 | + Options *GenerateOptions `json:"options,omitempty"` |
| 25 | +} |
| 26 | + |
| 27 | +// GenerateOptions controls Ollama generation behavior for speed optimization |
| 28 | +type GenerateOptions struct { |
| 29 | + NumPredict int `json:"num_predict,omitempty"` // Max tokens to generate (60 is plenty for insults) |
| 30 | + NumCtx int `json:"num_ctx,omitempty"` // Context window size (512 is enough for small prompts) |
| 31 | + Temperature float64 `json:"temperature,omitempty"` // Creativity (0.8 for variety) |
| 23 | 32 | } |
| 24 | 33 | |
| 25 | 34 | type GenerateResponse struct { |
@@ -51,9 +60,15 @@ func (c *OllamaClient) Generate(ctx context.Context, prompt string) (string, err |
| 51 | 60 | } |
| 52 | 61 | |
| 53 | 62 | req := GenerateRequest{ |
| 54 | | - Model: c.Model, |
| 55 | | - Prompt: prompt, |
| 56 | | - Stream: false, |
| 63 | + Model: c.Model, |
| 64 | + Prompt: prompt, |
| 65 | + Stream: false, |
| 66 | + KeepAlive: "10m", // Keep model loaded for 10 minutes to avoid cold starts |
| 67 | + Options: &GenerateOptions{ |
| 68 | + NumPredict: 60, // Limit output tokens (insults are short) |
| 69 | + NumCtx: 512, // Small context window (prompts are ~500 chars) |
| 70 | + Temperature: 0.8, // Good creativity for variety |
| 71 | + }, |
| 57 | 72 | } |
| 58 | 73 | |
| 59 | 74 | reqBody, err := json.Marshal(req) |
@@ -120,9 +135,14 @@ func (c *OllamaClient) WarmupModel() error { |
| 120 | 135 | } |
| 121 | 136 | |
| 122 | 137 | req := GenerateRequest{ |
| 123 | | - Model: c.Model, |
| 124 | | - Prompt: "test", // Minimal prompt to load model |
| 125 | | - Stream: false, |
| 138 | + Model: c.Model, |
| 139 | + Prompt: "Say OK", // Minimal prompt to load model |
| 140 | + Stream: false, |
| 141 | + KeepAlive: "10m", |
| 142 | + Options: &GenerateOptions{ |
| 143 | + NumPredict: 5, // Minimal output for warmup |
| 144 | + NumCtx: 256, // Minimal context |
| 145 | + }, |
| 126 | 146 | } |
| 127 | 147 | |
| 128 | 148 | reqBody, err := json.Marshal(req) |