snippy/snappy
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
a0a7d5e5086a7ab4ceb4a7051b3fdf47704a6858- Parents
-
77cfd69 - Tree
22f3a14
a0a7d5e
a0a7d5e5086a7ab4ceb4a7051b3fdf47704a685877cfd69
22f3a14| Status | File | + | - |
|---|---|---|---|
| M |
.gitignore
|
2 | 0 |
| M |
cmd/benchmark/main.go
|
3 | 3 |
| M |
cmd/mock.go
|
40 | 10 |
| M |
internal/config/config.go
|
15 | 10 |
| A |
internal/llm/cache.go
|
153 | 0 |
| M |
internal/llm/fallback_database.go
|
5 | 0 |
| A |
internal/llm/insult_expansion_v3.go
|
247 | 0 |
| M |
internal/llm/manager.go
|
44 | 13 |
| M |
internal/llm/ollama.go
|
36 | 7 |
| M |
internal/llm/tier4_templates.go
|
1 | 1 |
.gitignoremodified@@ -1,3 +1,5 @@ | ||
| 1 | 1 | dist/ |
| 2 | 2 | rpmbuild/ |
| 3 | 3 | parrot |
| 4 | +CLAUDE.md | |
| 5 | +parrot-race | |
cmd/benchmark/main.gomodified@@ -7,7 +7,7 @@ import ( | ||
| 7 | 7 | |
| 8 | 8 | func main() { |
| 9 | 9 | fmt.Println("Parrot Insult System Benchmark") |
| 10 | - fmt.Println("================================\n") | |
| 10 | + fmt.Println("================================") | |
| 11 | 11 | |
| 12 | 12 | // Create benchmark |
| 13 | 13 | benchmark := llm.NewBenchmark() |
@@ -22,7 +22,7 @@ func main() { | ||
| 22 | 22 | |
| 23 | 23 | fmt.Println("Training ensemble system...") |
| 24 | 24 | ensemble.Train() |
| 25 | - fmt.Println("Training complete!\n") | |
| 25 | + fmt.Println("Training complete!") | |
| 26 | 26 | |
| 27 | 27 | // Run benchmark |
| 28 | 28 | fmt.Println("Running benchmark...") |
@@ -34,7 +34,7 @@ func main() { | ||
| 34 | 34 | |
| 35 | 35 | // Print detailed sample results |
| 36 | 36 | fmt.Println("\nDetailed Sample Results:") |
| 37 | - fmt.Println("========================\n") | |
| 37 | + fmt.Println("========================") | |
| 38 | 38 | |
| 39 | 39 | for i, score := range results.DetailedScores { |
| 40 | 40 | if i >= 10 { // Show first 10 |
cmd/mock.gomodified@@ -32,6 +32,9 @@ func Execute() { | ||
| 32 | 32 | } |
| 33 | 33 | } |
| 34 | 34 | |
| 35 | +// CLI flags | |
| 36 | +var spicyMode bool | |
| 37 | + | |
| 35 | 38 | var mockCmd = &cobra.Command{ |
| 36 | 39 | Use: "mock [command] [exit_code]", |
| 37 | 40 | Short: "Mock a failed command", |
@@ -42,6 +45,9 @@ var mockCmd = &cobra.Command{ | ||
| 42 | 45 | |
| 43 | 46 | func init() { |
| 44 | 47 | rootCmd.AddCommand(mockCmd) |
| 48 | + | |
| 49 | + // Add --spicy flag for quality mode (default is snappy/fast) | |
| 50 | + mockCmd.Flags().BoolVar(&spicyMode, "spicy", false, "Use spicy mode (richer responses, slightly slower)") | |
| 45 | 51 | } |
| 46 | 52 | |
| 47 | 53 | func mockCommand(cmd *cobra.Command, args []string) { |
@@ -95,8 +101,8 @@ func detectCommandType(command string) string { | ||
| 95 | 101 | return "kubernetes" |
| 96 | 102 | |
| 97 | 103 | // HTTP/Network |
| 98 | - case "curl", "wget", "http", "https": | |
| 99 | - return "http" | |
| 104 | + case "curl", "wget", "http", "https", "httpie": | |
| 105 | + return "http_errors" | |
| 100 | 106 | |
| 101 | 107 | // SSH/Remote |
| 102 | 108 | case "ssh", "scp", "sftp", "rsync": |
@@ -110,8 +116,16 @@ func detectCommandType(command string) string { | ||
| 110 | 116 | case "cd", "pushd", "popd": |
| 111 | 117 | return "navigation" |
| 112 | 118 | |
| 113 | - // Python | |
| 119 | + // Python - check for ML frameworks first | |
| 114 | 120 | case "python", "python3", "pip", "pip3", "poetry", "pipenv", "conda": |
| 121 | + // Check if this is an AI/ML command | |
| 122 | + if strings.Contains(command, "torch") || strings.Contains(command, "tensorflow") || | |
| 123 | + strings.Contains(command, "keras") || strings.Contains(command, "sklearn") || | |
| 124 | + strings.Contains(command, "pytorch") || strings.Contains(command, "transformers") || | |
| 125 | + strings.Contains(command, "cuda") || strings.Contains(command, "gpu") || | |
| 126 | + strings.Contains(command, "train") || strings.Contains(command, "model") { | |
| 127 | + return "ai_ml" | |
| 128 | + } | |
| 115 | 129 | return "python_expanded" |
| 116 | 130 | |
| 117 | 131 | // Rust |
@@ -161,8 +175,16 @@ func detectCommandType(command string) string { | ||
| 161 | 175 | case "perf", "valgrind", "gprof", "strace", "ltrace", "top", "htop", "iotop": |
| 162 | 176 | return "performance" |
| 163 | 177 | |
| 178 | + // AI/ML tools | |
| 179 | + case "nvidia-smi", "nvcc", "tensorboard", "mlflow", "wandb", "jupyter", "ipython": | |
| 180 | + return "ai_ml" | |
| 181 | + | |
| 182 | + // Terraform/IaC | |
| 183 | + case "terraform", "pulumi", "cdktf", "terragrunt": | |
| 184 | + return "terraform" | |
| 185 | + | |
| 164 | 186 | // Cloud providers |
| 165 | - case "aws", "gcloud", "az", "terraform", "pulumi", "cloudformation": | |
| 187 | + case "aws", "gcloud", "az", "cloudformation", "cdk": | |
| 166 | 188 | return "cloud" |
| 167 | 189 | |
| 168 | 190 | // DevOps tools |
@@ -190,16 +212,24 @@ func generateSmartResponse(cmdType, command, exitCode string) (string, *config.C | ||
| 190 | 212 | defaultCfg := config.DefaultConfig() |
| 191 | 213 | return getFallbackResponse(cmdType), defaultCfg |
| 192 | 214 | } |
| 193 | - | |
| 215 | + | |
| 216 | + // Override mode if --spicy flag is set | |
| 217 | + if spicyMode { | |
| 218 | + cfg.General.GenerationMode = "spicy" | |
| 219 | + } | |
| 220 | + | |
| 194 | 221 | // Initialize LLM manager |
| 195 | 222 | manager := llm.NewLLMManager(cfg) |
| 196 | - | |
| 223 | + | |
| 197 | 224 | // Build context-aware prompt with personality |
| 198 | 225 | prompt := prompts.BuildPrompt(cmdType, command, exitCode, cfg.General.Personality) |
| 199 | - | |
| 200 | - // Use a reasonable timeout for LLM responses (6 seconds max) | |
| 201 | - // With optimized Ollama options, responses should be under 2 seconds when warm | |
| 202 | - maxTimeout := 6 * time.Second | |
| 226 | + | |
| 227 | + // Set timeout based on generation mode | |
| 228 | + // Snappy: 4s max (3s LLM + 1s buffer), Spicy: 6s max (5s LLM + 1s buffer) | |
| 229 | + maxTimeout := 4 * time.Second | |
| 230 | + if cfg.General.GenerationMode == "spicy" { | |
| 231 | + maxTimeout = 6 * time.Second | |
| 232 | + } | |
| 203 | 233 | ctx, cancel := context.WithTimeout(context.Background(), maxTimeout) |
| 204 | 234 | defer cancel() |
| 205 | 235 | |
internal/config/config.gomodified@@ -37,11 +37,12 @@ type LocalConfig struct { | ||
| 37 | 37 | } |
| 38 | 38 | |
| 39 | 39 | type GeneralConfig struct { |
| 40 | - Personality string `toml:"personality"` // "savage", "sarcastic", "mild" | |
| 41 | - FallbackMode bool `toml:"fallback_mode"` // Use hardcoded responses only | |
| 42 | - Debug bool `toml:"debug"` // Debug logging | |
| 43 | - Colors bool `toml:"colors"` // Enable colored output | |
| 44 | - Enhanced bool `toml:"enhanced"` // Enhanced formatting with borders/emphasis | |
| 40 | + Personality string `toml:"personality"` // "savage", "sarcastic", "mild" | |
| 41 | + GenerationMode string `toml:"generation_mode"` // "snappy" (fast) or "spicy" (quality) | |
| 42 | + FallbackMode bool `toml:"fallback_mode"` // Use hardcoded responses only | |
| 43 | + Debug bool `toml:"debug"` // Debug logging | |
| 44 | + Colors bool `toml:"colors"` // Enable colored output | |
| 45 | + Enhanced bool `toml:"enhanced"` // Enhanced formatting with borders/emphasis | |
| 45 | 46 | } |
| 46 | 47 | |
| 47 | 48 | // Default configuration |
@@ -63,11 +64,12 @@ func DefaultConfig() *Config { | ||
| 63 | 64 | Timeout: 5, // 5 seconds with optimized generation options should be plenty |
| 64 | 65 | }, |
| 65 | 66 | General: GeneralConfig{ |
| 66 | - Personality: "savage", | |
| 67 | - FallbackMode: false, | |
| 68 | - Debug: false, | |
| 69 | - Colors: true, | |
| 70 | - Enhanced: false, | |
| 67 | + Personality: "savage", | |
| 68 | + GenerationMode: "snappy", // Default to fast mode | |
| 69 | + FallbackMode: false, | |
| 70 | + Debug: false, | |
| 71 | + Colors: true, | |
| 72 | + Enhanced: false, | |
| 71 | 73 | }, |
| 72 | 74 | } |
| 73 | 75 | } |
@@ -149,6 +151,9 @@ func loadFromEnv(config *Config) { | ||
| 149 | 151 | if personality := os.Getenv("PARROT_PERSONALITY"); personality != "" { |
| 150 | 152 | config.General.Personality = personality |
| 151 | 153 | } |
| 154 | + if mode := os.Getenv("PARROT_MODE"); mode != "" { | |
| 155 | + config.General.GenerationMode = mode | |
| 156 | + } | |
| 152 | 157 | if os.Getenv("PARROT_FALLBACK_ONLY") == "true" { |
| 153 | 158 | config.General.FallbackMode = true |
| 154 | 159 | } |
internal/llm/cache.goadded@@ -0,0 +1,153 @@ | ||
| 1 | +package llm | |
| 2 | + | |
| 3 | +import ( | |
| 4 | + "crypto/sha256" | |
| 5 | + "encoding/hex" | |
| 6 | + "sync" | |
| 7 | + "time" | |
| 8 | +) | |
| 9 | + | |
| 10 | +// ResponseCache provides an LRU cache for LLM responses to avoid redundant calls | |
| 11 | +type ResponseCache struct { | |
| 12 | + mu sync.RWMutex | |
| 13 | + entries map[string]*cacheEntry | |
| 14 | + maxSize int | |
| 15 | + ttl time.Duration | |
| 16 | +} | |
| 17 | + | |
| 18 | +type cacheEntry struct { | |
| 19 | + response string | |
| 20 | + timestamp time.Time | |
| 21 | +} | |
| 22 | + | |
| 23 | +// Global cache instance | |
| 24 | +var ( | |
| 25 | + responseCache *ResponseCache | |
| 26 | + responseCacheOnce sync.Once | |
| 27 | +) | |
| 28 | + | |
| 29 | +// GetResponseCache returns the singleton response cache | |
| 30 | +func GetResponseCache() *ResponseCache { | |
| 31 | + responseCacheOnce.Do(func() { | |
| 32 | + responseCache = NewResponseCache(100, 5*time.Minute) // 100 entries, 5 min TTL | |
| 33 | + }) | |
| 34 | + return responseCache | |
| 35 | +} | |
| 36 | + | |
| 37 | +// NewResponseCache creates a new response cache | |
| 38 | +func NewResponseCache(maxSize int, ttl time.Duration) *ResponseCache { | |
| 39 | + cache := &ResponseCache{ | |
| 40 | + entries: make(map[string]*cacheEntry), | |
| 41 | + maxSize: maxSize, | |
| 42 | + ttl: ttl, | |
| 43 | + } | |
| 44 | + | |
| 45 | + // Start background cleanup goroutine | |
| 46 | + go cache.cleanupLoop() | |
| 47 | + | |
| 48 | + return cache | |
| 49 | +} | |
| 50 | + | |
| 51 | +// generateKey creates a cache key from command signature | |
| 52 | +func (c *ResponseCache) generateKey(command, commandType, exitCode, mode string) string { | |
| 53 | + // Create a hash of the command signature | |
| 54 | + h := sha256.New() | |
| 55 | + h.Write([]byte(command)) | |
| 56 | + h.Write([]byte("|")) | |
| 57 | + h.Write([]byte(commandType)) | |
| 58 | + h.Write([]byte("|")) | |
| 59 | + h.Write([]byte(exitCode)) | |
| 60 | + h.Write([]byte("|")) | |
| 61 | + h.Write([]byte(mode)) | |
| 62 | + return hex.EncodeToString(h.Sum(nil))[:16] // Use first 16 chars of hash | |
| 63 | +} | |
| 64 | + | |
| 65 | +// Get retrieves a cached response if available and not expired | |
| 66 | +func (c *ResponseCache) Get(command, commandType, exitCode, mode string) (string, bool) { | |
| 67 | + key := c.generateKey(command, commandType, exitCode, mode) | |
| 68 | + | |
| 69 | + c.mu.RLock() | |
| 70 | + defer c.mu.RUnlock() | |
| 71 | + | |
| 72 | + entry, exists := c.entries[key] | |
| 73 | + if !exists { | |
| 74 | + return "", false | |
| 75 | + } | |
| 76 | + | |
| 77 | + // Check if expired | |
| 78 | + if time.Since(entry.timestamp) > c.ttl { | |
| 79 | + return "", false | |
| 80 | + } | |
| 81 | + | |
| 82 | + return entry.response, true | |
| 83 | +} | |
| 84 | + | |
| 85 | +// Set stores a response in the cache | |
| 86 | +func (c *ResponseCache) Set(command, commandType, exitCode, mode, response string) { | |
| 87 | + key := c.generateKey(command, commandType, exitCode, mode) | |
| 88 | + | |
| 89 | + c.mu.Lock() | |
| 90 | + defer c.mu.Unlock() | |
| 91 | + | |
| 92 | + // Evict oldest entries if at capacity | |
| 93 | + if len(c.entries) >= c.maxSize { | |
| 94 | + c.evictOldest() | |
| 95 | + } | |
| 96 | + | |
| 97 | + c.entries[key] = &cacheEntry{ | |
| 98 | + response: response, | |
| 99 | + timestamp: time.Now(), | |
| 100 | + } | |
| 101 | +} | |
| 102 | + | |
| 103 | +// evictOldest removes the oldest entry (must be called with lock held) | |
| 104 | +func (c *ResponseCache) evictOldest() { | |
| 105 | + var oldestKey string | |
| 106 | + var oldestTime time.Time | |
| 107 | + | |
| 108 | + for key, entry := range c.entries { | |
| 109 | + if oldestKey == "" || entry.timestamp.Before(oldestTime) { | |
| 110 | + oldestKey = key | |
| 111 | + oldestTime = entry.timestamp | |
| 112 | + } | |
| 113 | + } | |
| 114 | + | |
| 115 | + if oldestKey != "" { | |
| 116 | + delete(c.entries, oldestKey) | |
| 117 | + } | |
| 118 | +} | |
| 119 | + | |
| 120 | +// cleanupLoop periodically removes expired entries | |
| 121 | +func (c *ResponseCache) cleanupLoop() { | |
| 122 | + ticker := time.NewTicker(1 * time.Minute) | |
| 123 | + defer ticker.Stop() | |
| 124 | + | |
| 125 | + for range ticker.C { | |
| 126 | + c.cleanup() | |
| 127 | + } | |
| 128 | +} | |
| 129 | + | |
| 130 | +// cleanup removes all expired entries | |
| 131 | +func (c *ResponseCache) cleanup() { | |
| 132 | + c.mu.Lock() | |
| 133 | + defer c.mu.Unlock() | |
| 134 | + | |
| 135 | + now := time.Now() | |
| 136 | + for key, entry := range c.entries { | |
| 137 | + if now.Sub(entry.timestamp) > c.ttl { | |
| 138 | + delete(c.entries, key) | |
| 139 | + } | |
| 140 | + } | |
| 141 | +} | |
| 142 | + | |
| 143 | +// Stats returns cache statistics | |
| 144 | +func (c *ResponseCache) Stats() map[string]interface{} { | |
| 145 | + c.mu.RLock() | |
| 146 | + defer c.mu.RUnlock() | |
| 147 | + | |
| 148 | + return map[string]interface{}{ | |
| 149 | + "size": len(c.entries), | |
| 150 | + "max_size": c.maxSize, | |
| 151 | + "ttl_secs": c.ttl.Seconds(), | |
| 152 | + } | |
| 153 | +} | |
internal/llm/fallback_database.gomodified@@ -2913,6 +2913,11 @@ func GetExpandedFallback(commandType string, command string) string { | ||
| 2913 | 2913 | responses, exists = InsultExpansionV2[commandType] |
| 2914 | 2914 | } |
| 2915 | 2915 | |
| 2916 | + // Try v3 expansion database (kubernetes, terraform, ai_ml, http_errors, cloud) | |
| 2917 | + if !exists { | |
| 2918 | + responses, exists = InsultExpansionV3[commandType] | |
| 2919 | + } | |
| 2920 | + | |
| 2916 | 2921 | // Fall back to generic if still not found |
| 2917 | 2922 | if !exists { |
| 2918 | 2923 | responses = ExpandedFallbackDatabase["generic"] |
internal/llm/insult_expansion_v3.goadded@@ -0,0 +1,247 @@ | ||
| 1 | +package llm | |
| 2 | + | |
| 3 | +// InsultExpansionV3 - ROUND 3: Kubernetes, Terraform/Cloud, AI/ML, and HTTP/API insults | |
| 4 | +// Because DevOps and ML disasters deserve their own category | |
| 5 | +var InsultExpansionV3 = map[string][]string{ | |
| 6 | + | |
| 7 | + // ==================== KUBERNETES (45 insults) ==================== | |
| 8 | + "kubernetes": { | |
| 9 | + // Pod failures | |
| 10 | + "CrashLoopBackOff: when even the cluster gives up on you.", | |
| 11 | + "Pod evicted: your code wasn't paying rent in the cluster.", | |
| 12 | + "ImagePullBackOff: Docker Hub is ghosting you today.", | |
| 13 | + "OOMKilled: your memory management is the real disaster here.", | |
| 14 | + "Pod pending forever: even Kubernetes can't figure out your mess.", | |
| 15 | + "Container terminated: the only sensible exit strategy.", | |
| 16 | + "Init container failed: couldn't even start failing properly.", | |
| 17 | + "Liveness probe failed: your pod is dead, just like your skills.", | |
| 18 | + "Readiness probe failed: your app wasn't ready. Neither are you.", | |
| 19 | + "Your pod is restarting more than your failed relationships.", | |
| 20 | + | |
| 21 | + // Deployment disasters | |
| 22 | + "Deployment rollback triggered: even Kubernetes wants to undo you.", | |
| 23 | + "ReplicaSet can't scale: your code doesn't scale either.", | |
| 24 | + "Rolling update stuck: stuck like your career.", | |
| 25 | + "kubectl apply failed: YAML isn't that hard. For most people.", | |
| 26 | + "HPA gave up: your app can't handle success anyway.", | |
| 27 | + "PodDisruptionBudget violated: budget for disaster was exceeded.", | |
| 28 | + "Your deployment strategy is 'recreate everything and pray.'", | |
| 29 | + "Surge capacity exceeded: your mistakes surge faster than your pods.", | |
| 30 | + "Deployment deadline exceeded: deadline for competence also passed.", | |
| 31 | + "Your rollout is rolling back faster than your resume updates.", | |
| 32 | + | |
| 33 | + // Resource issues | |
| 34 | + "ResourceQuota exceeded: you exceeded the incompetence quota too.", | |
| 35 | + "CPU throttled: your code efficiency matches your brain efficiency.", | |
| 36 | + "Memory limit reached: should have limited your ambitions instead.", | |
| 37 | + "PersistentVolumeClaim pending: your claims to skill are also pending.", | |
| 38 | + "StorageClass not found: class 'Competent Developer' also not found.", | |
| 39 | + "No nodes available: nodes are hiding from your workload.", | |
| 40 | + "Insufficient resources: for your pods AND your excuses.", | |
| 41 | + "Node pressure: your code puts pressure on everything it touches.", | |
| 42 | + "Eviction threshold reached: you reached the threshold for employment too.", | |
| 43 | + "Your resource requests are fiction. So is your understanding of k8s.", | |
| 44 | + | |
| 45 | + // Configuration chaos | |
| 46 | + "ConfigMap missing: so is your configuration management skill.", | |
| 47 | + "Secret not found: your incompetence is no secret though.", | |
| 48 | + "RBAC denied: Role-Based Access says you can't access success.", | |
| 49 | + "ServiceAccount error: your account of events is also wrong.", | |
| 50 | + "Namespace not found: you're lost in more ways than one.", | |
| 51 | + "Context switching failed: between k8s clusters AND competence.", | |
| 52 | + "kubeconfig invalid: your config for life is also questionable.", | |
| 53 | + "API server unreachable: like your career goals.", | |
| 54 | + "etcd timeout: your learning also timed out years ago.", | |
| 55 | + "Admission webhook rejected: webhook has better judgment than your hiring manager.", | |
| 56 | + | |
| 57 | + // Networking nightmares | |
| 58 | + "Service unavailable: like your competence.", | |
| 59 | + "Ingress misconfigured: traffic can't find your app. Neither can users.", | |
| 60 | + "NetworkPolicy blocking: blocking your code is actually correct.", | |
| 61 | + "DNS resolution failed: your code can't even find localhost.", | |
| 62 | + "ClusterIP not working: nothing in your cluster works.", | |
| 63 | + }, | |
| 64 | + | |
| 65 | + // ==================== TERRAFORM/CLOUD (35 insults) ==================== | |
| 66 | + "terraform": { | |
| 67 | + // State disasters | |
| 68 | + "terraform destroy: finally doing something useful with your infra.", | |
| 69 | + "State file corrupted: a metaphor for your career trajectory.", | |
| 70 | + "State lock failed: someone else is already fixing your mistakes.", | |
| 71 | + "Drift detected: your code drifted from reality long ago.", | |
| 72 | + "Backend configuration error: your backend knowledge is also in error.", | |
| 73 | + "terraform import failed: can't import competence either.", | |
| 74 | + "State refresh error: refreshing won't fix fundamental problems.", | |
| 75 | + "Remote state not found: your remote chance of success also not found.", | |
| 76 | + "State file too large: like your ego vs your abilities.", | |
| 77 | + "Workspace confusion: you're confused in all workspaces.", | |
| 78 | + | |
| 79 | + // Provider problems | |
| 80 | + "Provider error: even AWS doesn't want to work with you.", | |
| 81 | + "API rate limited: your mistakes exceeded the API's patience.", | |
| 82 | + "Credentials expired: so did your relevance.", | |
| 83 | + "Region not available: neither is your future in DevOps.", | |
| 84 | + "Service quota exceeded: quota for bad decisions also exceeded.", | |
| 85 | + "Provider version mismatch: your version and 'competent' don't match.", | |
| 86 | + "Authentication failed: terraform can tell you're a fraud.", | |
| 87 | + "IAM denied: Identity and Access confirms you shouldn't access anything.", | |
| 88 | + "Resource not found: your resources for learning also not found.", | |
| 89 | + "Provider crashed: looking at your code will do that.", | |
| 90 | + | |
| 91 | + // Resource failures | |
| 92 | + "Resource creation failed: creation of your career also failed.", | |
| 93 | + "Dependency cycle detected: you depend on failure consistently.", | |
| 94 | + "Timeout waiting for resource: still waiting for your skill to deploy.", | |
| 95 | + "Validation failed: your code failed validation. So did your degree.", | |
| 96 | + "Variables undefined: your career path is also undefined.", | |
| 97 | + "Output error: the only output is embarrassment.", | |
| 98 | + "Module not found: 'successful_deployment' module missing.", | |
| 99 | + "Plan failed: your life plan also needs review.", | |
| 100 | + "Apply error: apply this to your resume: 'needs improvement.'", | |
| 101 | + "Destroy failed: can't even destroy properly. Impressive.", | |
| 102 | + | |
| 103 | + // Cloud catastrophes | |
| 104 | + "S3 bucket public: your mistakes are also very public.", | |
| 105 | + "Lambda timeout: your functions fail as slowly as possible.", | |
| 106 | + "EC2 terminated: instance of competence also terminated.", | |
| 107 | + "RDS connection refused: database refused your terrible queries.", | |
| 108 | + "CloudFormation drift: drifting further from employability.", | |
| 109 | + }, | |
| 110 | + | |
| 111 | + // ==================== AI/ML (40 insults) ==================== | |
| 112 | + "ai_ml": { | |
| 113 | + // GPU/CUDA disasters | |
| 114 | + "CUDA out of memory: your model is as bloated as your ego.", | |
| 115 | + "GPU not found: your neural network found nothing either.", | |
| 116 | + "CUDA version mismatch: mismatch between your skills and requirements too.", | |
| 117 | + "cuDNN error: your deep learning is very shallow.", | |
| 118 | + "NCCL error: distributed training can't distribute your incompetence.", | |
| 119 | + "torch.cuda.is_available() returns False, and so does your career.", | |
| 120 | + "GPU utilization 0%: matches your brain utilization.", | |
| 121 | + "OOM killer struck: should have killed your model idea first.", | |
| 122 | + "Driver version incompatible: you're incompatible with success.", | |
| 123 | + "Memory allocation failed: allocate some time for learning basics.", | |
| 124 | + | |
| 125 | + // Training failures | |
| 126 | + "NaN loss: your gradients vanished like your debugging skills.", | |
| 127 | + "Loss not decreasing: your competence isn't increasing either.", | |
| 128 | + "Validation loss exploding: your mistakes also explode exponentially.", | |
| 129 | + "Overfitting to training data: and overfitting to bad practices.", | |
| 130 | + "Underfitting everything: including job requirements.", | |
| 131 | + "Gradient explosion: the only thing exploding is your career.", | |
| 132 | + "Learning rate too high: ambition too high, skill too low.", | |
| 133 | + "Model diverged: diverged from anything resembling ML knowledge.", | |
| 134 | + "Early stopping triggered: should have stopped you earlier.", | |
| 135 | + "Accuracy stuck at 50%: your model learned to flip a coin.", | |
| 136 | + | |
| 137 | + // Model issues | |
| 138 | + "Model too large: compensating for something?", | |
| 139 | + "Model won't load: brain cells also won't load.", | |
| 140 | + "Checkpoint corrupted: your understanding is also corrupted.", | |
| 141 | + "Weights initialization failed: your project was doomed from the start.", | |
| 142 | + "Architecture makes no sense: designed by throwing layers at the wall.", | |
| 143 | + "Batch size too large: bigger isn't always better. Applies to egos too.", | |
| 144 | + "Embedding dimension mismatch: dimensions of your confusion also mismatch.", | |
| 145 | + "Tokenizer error: can't tokenize your excuses.", | |
| 146 | + "Inference failed: your ability to infer solutions also failed.", | |
| 147 | + "Model prediction: always wrong. Like your career choices.", | |
| 148 | + | |
| 149 | + // Data disasters | |
| 150 | + "Dataset not found: your dataset of achievements also empty.", | |
| 151 | + "Data loader crashed: crashed harder than your GPU.", | |
| 152 | + "Label mismatch: your labels and reality don't match.", | |
| 153 | + "Preprocessing failed: pre-thinking also failed.", | |
| 154 | + "Data augmentation broke: augmenting garbage gives more garbage.", | |
| 155 | + "Feature extraction error: can't extract features from nothing.", | |
| 156 | + "Normalization failed: nothing normal about your approach.", | |
| 157 | + "Train/test split leaked: your incompetence also leaked everywhere.", | |
| 158 | + "Class imbalance: your skills are imbalanced too.", | |
| 159 | + "Corrupted samples: sample of your work is also corrupted.", | |
| 160 | + }, | |
| 161 | + | |
| 162 | + // ==================== HTTP/API ERRORS (35 insults) ==================== | |
| 163 | + "http_errors": { | |
| 164 | + // Client errors (4xx) | |
| 165 | + "400 Bad Request: your request is as bad as your code.", | |
| 166 | + "401 Unauthorized: even the API knows you shouldn't be here.", | |
| 167 | + "403 Forbidden: the server has better judgment than your manager.", | |
| 168 | + "404 Not Found: your skills are also not found.", | |
| 169 | + "405 Method Not Allowed: your methods aren't allowed in production either.", | |
| 170 | + "408 Request Timeout: patience for your code also timed out.", | |
| 171 | + "409 Conflict: the only thing consistent about you.", | |
| 172 | + "410 Gone: like your chances of success.", | |
| 173 | + "413 Payload Too Large: your ego is also payload too large.", | |
| 174 | + "415 Unsupported Media Type: your code type is also unsupported.", | |
| 175 | + "418 I'm a Teapot: you're a disaster.", | |
| 176 | + "422 Unprocessable Entity: your code is unprocessable by any brain.", | |
| 177 | + "429 Too Many Requests: slow down, the API isn't your therapist.", | |
| 178 | + "451 Unavailable For Legal Reasons: your code should also be illegal.", | |
| 179 | + | |
| 180 | + // Server errors (5xx) | |
| 181 | + "500 Internal Server Error: you broke the server. Congratulations.", | |
| 182 | + "501 Not Implemented: like your understanding of REST.", | |
| 183 | + "502 Bad Gateway: the server between you and success has crashed.", | |
| 184 | + "503 Service Unavailable: like your competence.", | |
| 185 | + "504 Gateway Timeout: gateway gave up waiting for your code to work.", | |
| 186 | + "505 HTTP Version Not Supported: your version of 'working code' isn't supported.", | |
| 187 | + "507 Insufficient Storage: insufficient storage for all your mistakes.", | |
| 188 | + "508 Loop Detected: you're stuck in a loop of bad decisions.", | |
| 189 | + "511 Network Authentication Required: authenticate your claims to skill first.", | |
| 190 | + | |
| 191 | + // curl/wget specific | |
| 192 | + "curl: Connection refused: server is refusing your advances.", | |
| 193 | + "curl: Could not resolve host: your code can't resolve anything.", | |
| 194 | + "wget: Connection timed out: even wget is tired of waiting.", | |
| 195 | + "SSL certificate problem: your certificate of competence is also invalid.", | |
| 196 | + "Connection reset by peer: peer reviewed your code and reset everything.", | |
| 197 | + "Network unreachable: like your career aspirations.", | |
| 198 | + "curl: (7) Failed to connect: you fail to connect with success too.", | |
| 199 | + "Host not found: hosting your code should also not be found.", | |
| 200 | + "Certificate verification failed: your skills failed verification too.", | |
| 201 | + "Protocol error: you're speaking the wrong protocol. In life too.", | |
| 202 | + "Response too large: larger than your debugging capabilities.", | |
| 203 | + "Malformed response: your understanding is also malformed.", | |
| 204 | + }, | |
| 205 | + | |
| 206 | + // ==================== CLOUD PROVIDER SPECIFIC (25 insults) ==================== | |
| 207 | + "cloud": { | |
| 208 | + // AWS | |
| 209 | + "AWS bill arrived: your wallet just filed for bankruptcy.", | |
| 210 | + "Lambda cold start: your brain also has cold starts.", | |
| 211 | + "S3 access denied: denied like your promotion.", | |
| 212 | + "DynamoDB throttled: your throughput of good ideas is also limited.", | |
| 213 | + "EC2 instance terminated: unlike your employment. For now.", | |
| 214 | + "CloudWatch alarm: alarming how bad this is.", | |
| 215 | + "ECS task failed: task 'be competent' also failed.", | |
| 216 | + "SQS message lost: like your message to the team about testing.", | |
| 217 | + | |
| 218 | + // GCP | |
| 219 | + "GCP quota exceeded: quota for patience also exceeded.", | |
| 220 | + "BigQuery timeout: big questions about your competence too.", | |
| 221 | + "Cloud Functions crashed: function 'write_good_code' not defined.", | |
| 222 | + "GKE cluster error: cluster of mistakes growing.", | |
| 223 | + | |
| 224 | + // Azure | |
| 225 | + "Azure outage: your code causes outages too.", | |
| 226 | + "Blob storage error: blob of errors in your code.", | |
| 227 | + "Azure Functions timeout: functions of your brain also timeout.", | |
| 228 | + "App Service failed: your service to the team has also failed.", | |
| 229 | + | |
| 230 | + // General cloud | |
| 231 | + "Cloud costs: $10,000/month for 'Hello World.'", | |
| 232 | + "Auto-scaling scaled to zero: correct assessment of your value.", | |
| 233 | + "CDN cache miss: your code misses the point entirely.", | |
| 234 | + "Load balancer unhealthy: health check for your code: terminal.", | |
| 235 | + "Database connection pool exhausted: pool of excuses also exhausted.", | |
| 236 | + "Message queue backed up: backed up like your technical debt.", | |
| 237 | + "Container registry error: registering your failures since day one.", | |
| 238 | + "VPC misconfigured: Very Poorly Configured.", | |
| 239 | + "IAM role missing: role 'competent developer' is also missing.", | |
| 240 | + }, | |
| 241 | +} | |
| 242 | + | |
| 243 | +// init registers V3 categories into the lookup system | |
| 244 | +func init() { | |
| 245 | + // V3 categories are automatically available through GetExpandedFallback | |
| 246 | + // which checks InsultExpansionV3 after V2 | |
| 247 | +} | |
internal/llm/manager.gomodified@@ -10,9 +10,10 @@ import ( | ||
| 10 | 10 | ) |
| 11 | 11 | |
| 12 | 12 | type LLMManager struct { |
| 13 | - config *config.Config | |
| 14 | - apiClient *APIClient | |
| 13 | + config *config.Config | |
| 14 | + apiClient *APIClient | |
| 15 | 15 | ollamaClient *OllamaClient |
| 16 | + cache *ResponseCache | |
| 16 | 17 | } |
| 17 | 18 | |
| 18 | 19 | type Backend string |
@@ -23,9 +24,18 @@ const ( | ||
| 23 | 24 | BackendFallback Backend = "fallback" |
| 24 | 25 | ) |
| 25 | 26 | |
| 27 | +// getLocalTimeout returns the appropriate timeout based on generation mode | |
| 28 | +func getLocalTimeout(cfg *config.Config) time.Duration { | |
| 29 | + if cfg.General.GenerationMode == "spicy" { | |
| 30 | + return 5 * time.Second // Patient timeout for quality mode | |
| 31 | + } | |
| 32 | + return 3 * time.Second // Snappy timeout (raw Ollama ~1.4s, needs headroom) | |
| 33 | +} | |
| 34 | + | |
| 26 | 35 | func NewLLMManager(cfg *config.Config) *LLMManager { |
| 27 | 36 | manager := &LLMManager{ |
| 28 | 37 | config: cfg, |
| 38 | + cache: GetResponseCache(), | |
| 29 | 39 | } |
| 30 | 40 | |
| 31 | 41 | // Initialize API client if enabled |
@@ -44,7 +54,10 @@ func NewLLMManager(cfg *config.Config) *LLMManager { | ||
| 44 | 54 | cfg.Local.Endpoint, |
| 45 | 55 | cfg.Local.Model, |
| 46 | 56 | ) |
| 47 | - | |
| 57 | + | |
| 58 | + // Set generation mode (snappy = fast, spicy = quality) | |
| 59 | + manager.ollamaClient.SetMode(cfg.General.GenerationMode) | |
| 60 | + | |
| 48 | 61 | // Warm up the model in the background for better performance |
| 49 | 62 | if manager.ollamaClient.IsAvailable() { |
| 50 | 63 | go func() { |
@@ -96,14 +109,14 @@ func (m *LLMManager) Generate(ctx context.Context, prompt string, commandType st | ||
| 96 | 109 | // 2. Try local Ollama (if available) |
| 97 | 110 | if m.ollamaClient != nil && m.config.Local.Enabled { |
| 98 | 111 | if m.config.General.Debug { |
| 99 | - fmt.Printf("🔍 Trying local backend...\n") | |
| 112 | + fmt.Printf("🔍 Trying local backend (%s mode)...\n", m.config.General.GenerationMode) | |
| 100 | 113 | } |
| 101 | - | |
| 102 | - // Create timeout context for local calls | |
| 103 | - timeoutDuration := time.Duration(m.config.Local.Timeout) * time.Second | |
| 114 | + | |
| 115 | + // Create timeout context based on generation mode | |
| 116 | + timeoutDuration := getLocalTimeout(m.config) | |
| 104 | 117 | localCtx, cancel := context.WithTimeout(ctx, timeoutDuration) |
| 105 | 118 | defer cancel() |
| 106 | - | |
| 119 | + | |
| 107 | 120 | response, err := m.ollamaClient.Generate(localCtx, prompt) |
| 108 | 121 | if m.config.General.Debug { |
| 109 | 122 | fmt.Printf("🐛 Raw Ollama response: '%s', error: %v\n", response, err) |
@@ -115,12 +128,12 @@ func (m *LLMManager) Generate(ctx context.Context, prompt string, commandType st | ||
| 115 | 128 | } |
| 116 | 129 | return response, BackendLocal |
| 117 | 130 | } |
| 118 | - | |
| 131 | + | |
| 119 | 132 | if m.config.General.Debug { |
| 120 | 133 | fmt.Printf("❌ Local backend failed: %v\n", err) |
| 121 | 134 | } |
| 122 | 135 | } |
| 123 | - | |
| 136 | + | |
| 124 | 137 | // 3. Fallback to hardcoded responses |
| 125 | 138 | if m.config.General.Debug { |
| 126 | 139 | fmt.Printf("🔄 Using fallback backend\n") |
@@ -135,6 +148,16 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com | ||
| 135 | 148 | return m.generateFallback(commandType, fullCommand, exitCode), BackendFallback |
| 136 | 149 | } |
| 137 | 150 | |
| 151 | + // Check cache first for repeated failures | |
| 152 | + if m.cache != nil { | |
| 153 | + if cached, found := m.cache.Get(fullCommand, commandType, exitCode, m.config.General.GenerationMode); found { | |
| 154 | + if m.config.General.Debug { | |
| 155 | + fmt.Printf("⚡ Cache hit!\n") | |
| 156 | + } | |
| 157 | + return cached, BackendLocal // Treat cache as local backend | |
| 158 | + } | |
| 159 | + } | |
| 160 | + | |
| 138 | 161 | // Try backends in priority order: API -> Local -> Fallback |
| 139 | 162 | |
| 140 | 163 | // 1. Try API first (if available) |
@@ -154,6 +177,10 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com | ||
| 154 | 177 | if m.config.General.Debug { |
| 155 | 178 | fmt.Printf("✅ API backend succeeded\n") |
| 156 | 179 | } |
| 180 | + // Cache successful response | |
| 181 | + if m.cache != nil { | |
| 182 | + m.cache.Set(fullCommand, commandType, exitCode, m.config.General.GenerationMode, response) | |
| 183 | + } | |
| 157 | 184 | return response, BackendAPI |
| 158 | 185 | } |
| 159 | 186 | |
@@ -165,11 +192,11 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com | ||
| 165 | 192 | // 2. Try local Ollama (if available) |
| 166 | 193 | if m.ollamaClient != nil && m.config.Local.Enabled { |
| 167 | 194 | if m.config.General.Debug { |
| 168 | - fmt.Printf("🔍 Trying local backend...\n") | |
| 195 | + fmt.Printf("🔍 Trying local backend (%s mode)...\n", m.config.General.GenerationMode) | |
| 169 | 196 | } |
| 170 | 197 | |
| 171 | - // Create timeout context for local calls | |
| 172 | - timeoutDuration := time.Duration(m.config.Local.Timeout) * time.Second | |
| 198 | + // Create timeout context based on generation mode | |
| 199 | + timeoutDuration := getLocalTimeout(m.config) | |
| 173 | 200 | localCtx, cancel := context.WithTimeout(ctx, timeoutDuration) |
| 174 | 201 | defer cancel() |
| 175 | 202 | |
@@ -182,6 +209,10 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com | ||
| 182 | 209 | if m.config.General.Debug { |
| 183 | 210 | fmt.Printf("✅ Local backend succeeded with: '%s'\n", response) |
| 184 | 211 | } |
| 212 | + // Cache successful response | |
| 213 | + if m.cache != nil { | |
| 214 | + m.cache.Set(fullCommand, commandType, exitCode, m.config.General.GenerationMode, response) | |
| 215 | + } | |
| 185 | 216 | return response, BackendLocal |
| 186 | 217 | } |
| 187 | 218 | |
internal/llm/ollama.gomodified@@ -13,6 +13,7 @@ import ( | ||
| 13 | 13 | type OllamaClient struct { |
| 14 | 14 | BaseURL string |
| 15 | 15 | Model string |
| 16 | + Mode string // "snappy" (fast) or "spicy" (quality) | |
| 16 | 17 | client *http.Client |
| 17 | 18 | } |
| 18 | 19 | |
@@ -43,32 +44,60 @@ func NewOllamaClient(baseURL, model string) *OllamaClient { | ||
| 43 | 44 | if model == "" { |
| 44 | 45 | model = "llama3.2:3b" |
| 45 | 46 | } |
| 46 | - | |
| 47 | + | |
| 47 | 48 | return &OllamaClient{ |
| 48 | 49 | BaseURL: baseURL, |
| 49 | 50 | Model: model, |
| 51 | + Mode: "snappy", // Default to fast mode | |
| 50 | 52 | client: &http.Client{ |
| 51 | 53 | Timeout: 60 * time.Second, // Maximum timeout; actual timeout controlled by context |
| 52 | 54 | }, |
| 53 | 55 | } |
| 54 | 56 | } |
| 55 | 57 | |
| 58 | +// SetMode sets the generation mode ("snappy" for speed, "spicy" for quality) | |
| 59 | +func (c *OllamaClient) SetMode(mode string) { | |
| 60 | + if mode == "spicy" || mode == "snappy" { | |
| 61 | + c.Mode = mode | |
| 62 | + } | |
| 63 | +} | |
| 64 | + | |
| 65 | +// getOptionsForMode returns optimized generation options based on mode | |
| 66 | +func (c *OllamaClient) getOptionsForMode() *GenerateOptions { | |
| 67 | + if c.Mode == "spicy" { | |
| 68 | + // Spicy mode: richer responses, more creative, willing to wait | |
| 69 | + return &GenerateOptions{ | |
| 70 | + NumPredict: 80, // Longer responses | |
| 71 | + NumCtx: 1024, // Rich context window | |
| 72 | + Temperature: 0.85, // More creative | |
| 73 | + } | |
| 74 | + } | |
| 75 | + // Snappy mode (default): fast and punchy | |
| 76 | + return &GenerateOptions{ | |
| 77 | + NumPredict: 40, // Short and punchy | |
| 78 | + NumCtx: 256, // Minimal context | |
| 79 | + Temperature: 0.6, // Faster convergence | |
| 80 | + } | |
| 81 | +} | |
| 82 | + | |
| 56 | 83 | func (c *OllamaClient) Generate(ctx context.Context, prompt string) (string, error) { |
| 57 | 84 | u, err := url.JoinPath(c.BaseURL, "/api/generate") |
| 58 | 85 | if err != nil { |
| 59 | 86 | return "", fmt.Errorf("invalid base URL: %w", err) |
| 60 | 87 | } |
| 61 | 88 | |
| 89 | + // Use mode-specific generation options | |
| 90 | + keepAlive := "5m" | |
| 91 | + if c.Mode == "spicy" { | |
| 92 | + keepAlive = "15m" // Keep model warm longer for quality mode | |
| 93 | + } | |
| 94 | + | |
| 62 | 95 | req := GenerateRequest{ |
| 63 | 96 | Model: c.Model, |
| 64 | 97 | Prompt: prompt, |
| 65 | 98 | Stream: false, |
| 66 | - KeepAlive: "10m", // Keep model loaded for 10 minutes to avoid cold starts | |
| 67 | - Options: &GenerateOptions{ | |
| 68 | - NumPredict: 60, // Limit output tokens (insults are short) | |
| 69 | - NumCtx: 512, // Small context window (prompts are ~500 chars) | |
| 70 | - Temperature: 0.8, // Good creativity for variety | |
| 71 | - }, | |
| 99 | + KeepAlive: keepAlive, | |
| 100 | + Options: c.getOptionsForMode(), | |
| 72 | 101 | } |
| 73 | 102 | |
| 74 | 103 | reqBody, err := json.Marshal(req) |
internal/llm/tier4_templates.gomodified@@ -283,7 +283,7 @@ func GenerateStreakEscalation(streak int, ctx SmartFallbackContext) string { | ||
| 283 | 283 | } |
| 284 | 284 | } else if streak >= 3 { |
| 285 | 285 | templates = []string{ |
| 286 | - fmt.Sprintf("Third failure in a row. Three strikes: You're out.", streak), | |
| 286 | + fmt.Sprintf("Failure #%d in a row. Three strikes: You're out.", streak), | |
| 287 | 287 | fmt.Sprintf("%d consecutive fails. Trying the same thing expecting different results?", streak), |
| 288 | 288 | fmt.Sprintf("Failure #%d. Pattern detected: You.", streak), |
| 289 | 289 | fmt.Sprintf("%d in a row. Maybe read the docs this time?", streak), |