tenseleyflow/parrot / a0a7d5e

Browse files

snippy/snappy

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
a0a7d5e5086a7ab4ceb4a7051b3fdf47704a6858
Parents
77cfd69
Tree
22f3a14

10 changed files

StatusFile+-
M .gitignore 2 0
M cmd/benchmark/main.go 3 3
M cmd/mock.go 40 10
M internal/config/config.go 15 10
A internal/llm/cache.go 153 0
M internal/llm/fallback_database.go 5 0
A internal/llm/insult_expansion_v3.go 247 0
M internal/llm/manager.go 44 13
M internal/llm/ollama.go 36 7
M internal/llm/tier4_templates.go 1 1
.gitignoremodified
@@ -1,3 +1,5 @@
11
 dist/
22
 rpmbuild/
33
 parrot
4
+CLAUDE.md
5
+parrot-race
cmd/benchmark/main.gomodified
@@ -7,7 +7,7 @@ import (
77
 
88
 func main() {
99
 	fmt.Println("Parrot Insult System Benchmark")
10
-	fmt.Println("================================\n")
10
+	fmt.Println("================================")
1111
 
1212
 	// Create benchmark
1313
 	benchmark := llm.NewBenchmark()
@@ -22,7 +22,7 @@ func main() {
2222
 
2323
 	fmt.Println("Training ensemble system...")
2424
 	ensemble.Train()
25
-	fmt.Println("Training complete!\n")
25
+	fmt.Println("Training complete!")
2626
 
2727
 	// Run benchmark
2828
 	fmt.Println("Running benchmark...")
@@ -34,7 +34,7 @@ func main() {
3434
 
3535
 	// Print detailed sample results
3636
 	fmt.Println("\nDetailed Sample Results:")
37
-	fmt.Println("========================\n")
37
+	fmt.Println("========================")
3838
 
3939
 	for i, score := range results.DetailedScores {
4040
 		if i >= 10 { // Show first 10
cmd/mock.gomodified
@@ -32,6 +32,9 @@ func Execute() {
3232
 	}
3333
 }
3434
 
35
+// CLI flags
36
+var spicyMode bool
37
+
3538
 var mockCmd = &cobra.Command{
3639
 	Use:   "mock [command] [exit_code]",
3740
 	Short: "Mock a failed command",
@@ -42,6 +45,9 @@ var mockCmd = &cobra.Command{
4245
 
4346
 func init() {
4447
 	rootCmd.AddCommand(mockCmd)
48
+
49
+	// Add --spicy flag for quality mode (default is snappy/fast)
50
+	mockCmd.Flags().BoolVar(&spicyMode, "spicy", false, "Use spicy mode (richer responses, slightly slower)")
4551
 }
4652
 
4753
 func mockCommand(cmd *cobra.Command, args []string) {
@@ -95,8 +101,8 @@ func detectCommandType(command string) string {
95101
 		return "kubernetes"
96102
 
97103
 	// HTTP/Network
98
-	case "curl", "wget", "http", "https":
99
-		return "http"
104
+	case "curl", "wget", "http", "https", "httpie":
105
+		return "http_errors"
100106
 
101107
 	// SSH/Remote
102108
 	case "ssh", "scp", "sftp", "rsync":
@@ -110,8 +116,16 @@ func detectCommandType(command string) string {
110116
 	case "cd", "pushd", "popd":
111117
 		return "navigation"
112118
 
113
-	// Python
119
+	// Python - check for ML frameworks first
114120
 	case "python", "python3", "pip", "pip3", "poetry", "pipenv", "conda":
121
+		// Check if this is an AI/ML command
122
+		if strings.Contains(command, "torch") || strings.Contains(command, "tensorflow") ||
123
+			strings.Contains(command, "keras") || strings.Contains(command, "sklearn") ||
124
+			strings.Contains(command, "pytorch") || strings.Contains(command, "transformers") ||
125
+			strings.Contains(command, "cuda") || strings.Contains(command, "gpu") ||
126
+			strings.Contains(command, "train") || strings.Contains(command, "model") {
127
+			return "ai_ml"
128
+		}
115129
 		return "python_expanded"
116130
 
117131
 	// Rust
@@ -161,8 +175,16 @@ func detectCommandType(command string) string {
161175
 	case "perf", "valgrind", "gprof", "strace", "ltrace", "top", "htop", "iotop":
162176
 		return "performance"
163177
 
178
+	// AI/ML tools
179
+	case "nvidia-smi", "nvcc", "tensorboard", "mlflow", "wandb", "jupyter", "ipython":
180
+		return "ai_ml"
181
+
182
+	// Terraform/IaC
183
+	case "terraform", "pulumi", "cdktf", "terragrunt":
184
+		return "terraform"
185
+
164186
 	// Cloud providers
165
-	case "aws", "gcloud", "az", "terraform", "pulumi", "cloudformation":
187
+	case "aws", "gcloud", "az", "cloudformation", "cdk":
166188
 		return "cloud"
167189
 
168190
 	// DevOps tools
@@ -190,16 +212,24 @@ func generateSmartResponse(cmdType, command, exitCode string) (string, *config.C
190212
 		defaultCfg := config.DefaultConfig()
191213
 		return getFallbackResponse(cmdType), defaultCfg
192214
 	}
193
-	
215
+
216
+	// Override mode if --spicy flag is set
217
+	if spicyMode {
218
+		cfg.General.GenerationMode = "spicy"
219
+	}
220
+
194221
 	// Initialize LLM manager
195222
 	manager := llm.NewLLMManager(cfg)
196
-	
223
+
197224
 	// Build context-aware prompt with personality
198225
 	prompt := prompts.BuildPrompt(cmdType, command, exitCode, cfg.General.Personality)
199
-	
200
-	// Use a reasonable timeout for LLM responses (6 seconds max)
201
-	// With optimized Ollama options, responses should be under 2 seconds when warm
202
-	maxTimeout := 6 * time.Second
226
+
227
+	// Set timeout based on generation mode
228
+	// Snappy: 4s max (3s LLM + 1s buffer), Spicy: 6s max (5s LLM + 1s buffer)
229
+	maxTimeout := 4 * time.Second
230
+	if cfg.General.GenerationMode == "spicy" {
231
+		maxTimeout = 6 * time.Second
232
+	}
203233
 	ctx, cancel := context.WithTimeout(context.Background(), maxTimeout)
204234
 	defer cancel()
205235
 	
internal/config/config.gomodified
@@ -37,11 +37,12 @@ type LocalConfig struct {
3737
 }
3838
 
3939
 type GeneralConfig struct {
40
-	Personality  string `toml:"personality"`   // "savage", "sarcastic", "mild"
41
-	FallbackMode bool   `toml:"fallback_mode"` // Use hardcoded responses only
42
-	Debug        bool   `toml:"debug"`         // Debug logging
43
-	Colors       bool   `toml:"colors"`        // Enable colored output
44
-	Enhanced     bool   `toml:"enhanced"`      // Enhanced formatting with borders/emphasis
40
+	Personality    string `toml:"personality"`     // "savage", "sarcastic", "mild"
41
+	GenerationMode string `toml:"generation_mode"` // "snappy" (fast) or "spicy" (quality)
42
+	FallbackMode   bool   `toml:"fallback_mode"`   // Use hardcoded responses only
43
+	Debug          bool   `toml:"debug"`           // Debug logging
44
+	Colors         bool   `toml:"colors"`          // Enable colored output
45
+	Enhanced       bool   `toml:"enhanced"`        // Enhanced formatting with borders/emphasis
4546
 }
4647
 
4748
 // Default configuration
@@ -63,11 +64,12 @@ func DefaultConfig() *Config {
6364
 			Timeout:  5,  // 5 seconds with optimized generation options should be plenty
6465
 		},
6566
 		General: GeneralConfig{
66
-			Personality:  "savage",
67
-			FallbackMode: false,
68
-			Debug:        false,
69
-			Colors:       true,
70
-			Enhanced:     false,
67
+			Personality:    "savage",
68
+			GenerationMode: "snappy", // Default to fast mode
69
+			FallbackMode:   false,
70
+			Debug:          false,
71
+			Colors:         true,
72
+			Enhanced:       false,
7173
 		},
7274
 	}
7375
 }
@@ -149,6 +151,9 @@ func loadFromEnv(config *Config) {
149151
 	if personality := os.Getenv("PARROT_PERSONALITY"); personality != "" {
150152
 		config.General.Personality = personality
151153
 	}
154
+	if mode := os.Getenv("PARROT_MODE"); mode != "" {
155
+		config.General.GenerationMode = mode
156
+	}
152157
 	if os.Getenv("PARROT_FALLBACK_ONLY") == "true" {
153158
 		config.General.FallbackMode = true
154159
 	}
internal/llm/cache.goadded
@@ -0,0 +1,153 @@
1
+package llm
2
+
3
+import (
4
+	"crypto/sha256"
5
+	"encoding/hex"
6
+	"sync"
7
+	"time"
8
+)
9
+
10
+// ResponseCache provides an LRU cache for LLM responses to avoid redundant calls
11
+type ResponseCache struct {
12
+	mu       sync.RWMutex
13
+	entries  map[string]*cacheEntry
14
+	maxSize  int
15
+	ttl      time.Duration
16
+}
17
+
18
+type cacheEntry struct {
19
+	response  string
20
+	timestamp time.Time
21
+}
22
+
23
+// Global cache instance
24
+var (
25
+	responseCache     *ResponseCache
26
+	responseCacheOnce sync.Once
27
+)
28
+
29
+// GetResponseCache returns the singleton response cache
30
+func GetResponseCache() *ResponseCache {
31
+	responseCacheOnce.Do(func() {
32
+		responseCache = NewResponseCache(100, 5*time.Minute) // 100 entries, 5 min TTL
33
+	})
34
+	return responseCache
35
+}
36
+
37
+// NewResponseCache creates a new response cache
38
+func NewResponseCache(maxSize int, ttl time.Duration) *ResponseCache {
39
+	cache := &ResponseCache{
40
+		entries: make(map[string]*cacheEntry),
41
+		maxSize: maxSize,
42
+		ttl:     ttl,
43
+	}
44
+
45
+	// Start background cleanup goroutine
46
+	go cache.cleanupLoop()
47
+
48
+	return cache
49
+}
50
+
51
+// generateKey creates a cache key from command signature
52
+func (c *ResponseCache) generateKey(command, commandType, exitCode, mode string) string {
53
+	// Create a hash of the command signature
54
+	h := sha256.New()
55
+	h.Write([]byte(command))
56
+	h.Write([]byte("|"))
57
+	h.Write([]byte(commandType))
58
+	h.Write([]byte("|"))
59
+	h.Write([]byte(exitCode))
60
+	h.Write([]byte("|"))
61
+	h.Write([]byte(mode))
62
+	return hex.EncodeToString(h.Sum(nil))[:16] // Use first 16 chars of hash
63
+}
64
+
65
+// Get retrieves a cached response if available and not expired
66
+func (c *ResponseCache) Get(command, commandType, exitCode, mode string) (string, bool) {
67
+	key := c.generateKey(command, commandType, exitCode, mode)
68
+
69
+	c.mu.RLock()
70
+	defer c.mu.RUnlock()
71
+
72
+	entry, exists := c.entries[key]
73
+	if !exists {
74
+		return "", false
75
+	}
76
+
77
+	// Check if expired
78
+	if time.Since(entry.timestamp) > c.ttl {
79
+		return "", false
80
+	}
81
+
82
+	return entry.response, true
83
+}
84
+
85
+// Set stores a response in the cache
86
+func (c *ResponseCache) Set(command, commandType, exitCode, mode, response string) {
87
+	key := c.generateKey(command, commandType, exitCode, mode)
88
+
89
+	c.mu.Lock()
90
+	defer c.mu.Unlock()
91
+
92
+	// Evict oldest entries if at capacity
93
+	if len(c.entries) >= c.maxSize {
94
+		c.evictOldest()
95
+	}
96
+
97
+	c.entries[key] = &cacheEntry{
98
+		response:  response,
99
+		timestamp: time.Now(),
100
+	}
101
+}
102
+
103
+// evictOldest removes the oldest entry (must be called with lock held)
104
+func (c *ResponseCache) evictOldest() {
105
+	var oldestKey string
106
+	var oldestTime time.Time
107
+
108
+	for key, entry := range c.entries {
109
+		if oldestKey == "" || entry.timestamp.Before(oldestTime) {
110
+			oldestKey = key
111
+			oldestTime = entry.timestamp
112
+		}
113
+	}
114
+
115
+	if oldestKey != "" {
116
+		delete(c.entries, oldestKey)
117
+	}
118
+}
119
+
120
+// cleanupLoop periodically removes expired entries
121
+func (c *ResponseCache) cleanupLoop() {
122
+	ticker := time.NewTicker(1 * time.Minute)
123
+	defer ticker.Stop()
124
+
125
+	for range ticker.C {
126
+		c.cleanup()
127
+	}
128
+}
129
+
130
+// cleanup removes all expired entries
131
+func (c *ResponseCache) cleanup() {
132
+	c.mu.Lock()
133
+	defer c.mu.Unlock()
134
+
135
+	now := time.Now()
136
+	for key, entry := range c.entries {
137
+		if now.Sub(entry.timestamp) > c.ttl {
138
+			delete(c.entries, key)
139
+		}
140
+	}
141
+}
142
+
143
+// Stats returns cache statistics
144
+func (c *ResponseCache) Stats() map[string]interface{} {
145
+	c.mu.RLock()
146
+	defer c.mu.RUnlock()
147
+
148
+	return map[string]interface{}{
149
+		"size":     len(c.entries),
150
+		"max_size": c.maxSize,
151
+		"ttl_secs": c.ttl.Seconds(),
152
+	}
153
+}
internal/llm/fallback_database.gomodified
@@ -2913,6 +2913,11 @@ func GetExpandedFallback(commandType string, command string) string {
29132913
 		responses, exists = InsultExpansionV2[commandType]
29142914
 	}
29152915
 
2916
+	// Try v3 expansion database (kubernetes, terraform, ai_ml, http_errors, cloud)
2917
+	if !exists {
2918
+		responses, exists = InsultExpansionV3[commandType]
2919
+	}
2920
+
29162921
 	// Fall back to generic if still not found
29172922
 	if !exists {
29182923
 		responses = ExpandedFallbackDatabase["generic"]
internal/llm/insult_expansion_v3.goadded
@@ -0,0 +1,247 @@
1
+package llm
2
+
3
+// InsultExpansionV3 - ROUND 3: Kubernetes, Terraform/Cloud, AI/ML, and HTTP/API insults
4
+// Because DevOps and ML disasters deserve their own category
5
+var InsultExpansionV3 = map[string][]string{
6
+
7
+	// ==================== KUBERNETES (45 insults) ====================
8
+	"kubernetes": {
9
+		// Pod failures
10
+		"CrashLoopBackOff: when even the cluster gives up on you.",
11
+		"Pod evicted: your code wasn't paying rent in the cluster.",
12
+		"ImagePullBackOff: Docker Hub is ghosting you today.",
13
+		"OOMKilled: your memory management is the real disaster here.",
14
+		"Pod pending forever: even Kubernetes can't figure out your mess.",
15
+		"Container terminated: the only sensible exit strategy.",
16
+		"Init container failed: couldn't even start failing properly.",
17
+		"Liveness probe failed: your pod is dead, just like your skills.",
18
+		"Readiness probe failed: your app wasn't ready. Neither are you.",
19
+		"Your pod is restarting more than your failed relationships.",
20
+
21
+		// Deployment disasters
22
+		"Deployment rollback triggered: even Kubernetes wants to undo you.",
23
+		"ReplicaSet can't scale: your code doesn't scale either.",
24
+		"Rolling update stuck: stuck like your career.",
25
+		"kubectl apply failed: YAML isn't that hard. For most people.",
26
+		"HPA gave up: your app can't handle success anyway.",
27
+		"PodDisruptionBudget violated: budget for disaster was exceeded.",
28
+		"Your deployment strategy is 'recreate everything and pray.'",
29
+		"Surge capacity exceeded: your mistakes surge faster than your pods.",
30
+		"Deployment deadline exceeded: deadline for competence also passed.",
31
+		"Your rollout is rolling back faster than your resume updates.",
32
+
33
+		// Resource issues
34
+		"ResourceQuota exceeded: you exceeded the incompetence quota too.",
35
+		"CPU throttled: your code efficiency matches your brain efficiency.",
36
+		"Memory limit reached: should have limited your ambitions instead.",
37
+		"PersistentVolumeClaim pending: your claims to skill are also pending.",
38
+		"StorageClass not found: class 'Competent Developer' also not found.",
39
+		"No nodes available: nodes are hiding from your workload.",
40
+		"Insufficient resources: for your pods AND your excuses.",
41
+		"Node pressure: your code puts pressure on everything it touches.",
42
+		"Eviction threshold reached: you reached the threshold for employment too.",
43
+		"Your resource requests are fiction. So is your understanding of k8s.",
44
+
45
+		// Configuration chaos
46
+		"ConfigMap missing: so is your configuration management skill.",
47
+		"Secret not found: your incompetence is no secret though.",
48
+		"RBAC denied: Role-Based Access says you can't access success.",
49
+		"ServiceAccount error: your account of events is also wrong.",
50
+		"Namespace not found: you're lost in more ways than one.",
51
+		"Context switching failed: between k8s clusters AND competence.",
52
+		"kubeconfig invalid: your config for life is also questionable.",
53
+		"API server unreachable: like your career goals.",
54
+		"etcd timeout: your learning also timed out years ago.",
55
+		"Admission webhook rejected: webhook has better judgment than your hiring manager.",
56
+
57
+		// Networking nightmares
58
+		"Service unavailable: like your competence.",
59
+		"Ingress misconfigured: traffic can't find your app. Neither can users.",
60
+		"NetworkPolicy blocking: blocking your code is actually correct.",
61
+		"DNS resolution failed: your code can't even find localhost.",
62
+		"ClusterIP not working: nothing in your cluster works.",
63
+	},
64
+
65
+	// ==================== TERRAFORM/CLOUD (35 insults) ====================
66
+	"terraform": {
67
+		// State disasters
68
+		"terraform destroy: finally doing something useful with your infra.",
69
+		"State file corrupted: a metaphor for your career trajectory.",
70
+		"State lock failed: someone else is already fixing your mistakes.",
71
+		"Drift detected: your code drifted from reality long ago.",
72
+		"Backend configuration error: your backend knowledge is also in error.",
73
+		"terraform import failed: can't import competence either.",
74
+		"State refresh error: refreshing won't fix fundamental problems.",
75
+		"Remote state not found: your remote chance of success also not found.",
76
+		"State file too large: like your ego vs your abilities.",
77
+		"Workspace confusion: you're confused in all workspaces.",
78
+
79
+		// Provider problems
80
+		"Provider error: even AWS doesn't want to work with you.",
81
+		"API rate limited: your mistakes exceeded the API's patience.",
82
+		"Credentials expired: so did your relevance.",
83
+		"Region not available: neither is your future in DevOps.",
84
+		"Service quota exceeded: quota for bad decisions also exceeded.",
85
+		"Provider version mismatch: your version and 'competent' don't match.",
86
+		"Authentication failed: terraform can tell you're a fraud.",
87
+		"IAM denied: Identity and Access confirms you shouldn't access anything.",
88
+		"Resource not found: your resources for learning also not found.",
89
+		"Provider crashed: looking at your code will do that.",
90
+
91
+		// Resource failures
92
+		"Resource creation failed: creation of your career also failed.",
93
+		"Dependency cycle detected: you depend on failure consistently.",
94
+		"Timeout waiting for resource: still waiting for your skill to deploy.",
95
+		"Validation failed: your code failed validation. So did your degree.",
96
+		"Variables undefined: your career path is also undefined.",
97
+		"Output error: the only output is embarrassment.",
98
+		"Module not found: 'successful_deployment' module missing.",
99
+		"Plan failed: your life plan also needs review.",
100
+		"Apply error: apply this to your resume: 'needs improvement.'",
101
+		"Destroy failed: can't even destroy properly. Impressive.",
102
+
103
+		// Cloud catastrophes
104
+		"S3 bucket public: your mistakes are also very public.",
105
+		"Lambda timeout: your functions fail as slowly as possible.",
106
+		"EC2 terminated: instance of competence also terminated.",
107
+		"RDS connection refused: database refused your terrible queries.",
108
+		"CloudFormation drift: drifting further from employability.",
109
+	},
110
+
111
+	// ==================== AI/ML (40 insults) ====================
112
+	"ai_ml": {
113
+		// GPU/CUDA disasters
114
+		"CUDA out of memory: your model is as bloated as your ego.",
115
+		"GPU not found: your neural network found nothing either.",
116
+		"CUDA version mismatch: mismatch between your skills and requirements too.",
117
+		"cuDNN error: your deep learning is very shallow.",
118
+		"NCCL error: distributed training can't distribute your incompetence.",
119
+		"torch.cuda.is_available() returns False, and so does your career.",
120
+		"GPU utilization 0%: matches your brain utilization.",
121
+		"OOM killer struck: should have killed your model idea first.",
122
+		"Driver version incompatible: you're incompatible with success.",
123
+		"Memory allocation failed: allocate some time for learning basics.",
124
+
125
+		// Training failures
126
+		"NaN loss: your gradients vanished like your debugging skills.",
127
+		"Loss not decreasing: your competence isn't increasing either.",
128
+		"Validation loss exploding: your mistakes also explode exponentially.",
129
+		"Overfitting to training data: and overfitting to bad practices.",
130
+		"Underfitting everything: including job requirements.",
131
+		"Gradient explosion: the only thing exploding is your career.",
132
+		"Learning rate too high: ambition too high, skill too low.",
133
+		"Model diverged: diverged from anything resembling ML knowledge.",
134
+		"Early stopping triggered: should have stopped you earlier.",
135
+		"Accuracy stuck at 50%: your model learned to flip a coin.",
136
+
137
+		// Model issues
138
+		"Model too large: compensating for something?",
139
+		"Model won't load: brain cells also won't load.",
140
+		"Checkpoint corrupted: your understanding is also corrupted.",
141
+		"Weights initialization failed: your project was doomed from the start.",
142
+		"Architecture makes no sense: designed by throwing layers at the wall.",
143
+		"Batch size too large: bigger isn't always better. Applies to egos too.",
144
+		"Embedding dimension mismatch: dimensions of your confusion also mismatch.",
145
+		"Tokenizer error: can't tokenize your excuses.",
146
+		"Inference failed: your ability to infer solutions also failed.",
147
+		"Model prediction: always wrong. Like your career choices.",
148
+
149
+		// Data disasters
150
+		"Dataset not found: your dataset of achievements also empty.",
151
+		"Data loader crashed: crashed harder than your GPU.",
152
+		"Label mismatch: your labels and reality don't match.",
153
+		"Preprocessing failed: pre-thinking also failed.",
154
+		"Data augmentation broke: augmenting garbage gives more garbage.",
155
+		"Feature extraction error: can't extract features from nothing.",
156
+		"Normalization failed: nothing normal about your approach.",
157
+		"Train/test split leaked: your incompetence also leaked everywhere.",
158
+		"Class imbalance: your skills are imbalanced too.",
159
+		"Corrupted samples: sample of your work is also corrupted.",
160
+	},
161
+
162
+	// ==================== HTTP/API ERRORS (35 insults) ====================
163
+	"http_errors": {
164
+		// Client errors (4xx)
165
+		"400 Bad Request: your request is as bad as your code.",
166
+		"401 Unauthorized: even the API knows you shouldn't be here.",
167
+		"403 Forbidden: the server has better judgment than your manager.",
168
+		"404 Not Found: your skills are also not found.",
169
+		"405 Method Not Allowed: your methods aren't allowed in production either.",
170
+		"408 Request Timeout: patience for your code also timed out.",
171
+		"409 Conflict: the only thing consistent about you.",
172
+		"410 Gone: like your chances of success.",
173
+		"413 Payload Too Large: your ego is also payload too large.",
174
+		"415 Unsupported Media Type: your code type is also unsupported.",
175
+		"418 I'm a Teapot: you're a disaster.",
176
+		"422 Unprocessable Entity: your code is unprocessable by any brain.",
177
+		"429 Too Many Requests: slow down, the API isn't your therapist.",
178
+		"451 Unavailable For Legal Reasons: your code should also be illegal.",
179
+
180
+		// Server errors (5xx)
181
+		"500 Internal Server Error: you broke the server. Congratulations.",
182
+		"501 Not Implemented: like your understanding of REST.",
183
+		"502 Bad Gateway: the server between you and success has crashed.",
184
+		"503 Service Unavailable: like your competence.",
185
+		"504 Gateway Timeout: gateway gave up waiting for your code to work.",
186
+		"505 HTTP Version Not Supported: your version of 'working code' isn't supported.",
187
+		"507 Insufficient Storage: insufficient storage for all your mistakes.",
188
+		"508 Loop Detected: you're stuck in a loop of bad decisions.",
189
+		"511 Network Authentication Required: authenticate your claims to skill first.",
190
+
191
+		// curl/wget specific
192
+		"curl: Connection refused: server is refusing your advances.",
193
+		"curl: Could not resolve host: your code can't resolve anything.",
194
+		"wget: Connection timed out: even wget is tired of waiting.",
195
+		"SSL certificate problem: your certificate of competence is also invalid.",
196
+		"Connection reset by peer: peer reviewed your code and reset everything.",
197
+		"Network unreachable: like your career aspirations.",
198
+		"curl: (7) Failed to connect: you fail to connect with success too.",
199
+		"Host not found: hosting your code should also not be found.",
200
+		"Certificate verification failed: your skills failed verification too.",
201
+		"Protocol error: you're speaking the wrong protocol. In life too.",
202
+		"Response too large: larger than your debugging capabilities.",
203
+		"Malformed response: your understanding is also malformed.",
204
+	},
205
+
206
+	// ==================== CLOUD PROVIDER SPECIFIC (25 insults) ====================
207
+	"cloud": {
208
+		// AWS
209
+		"AWS bill arrived: your wallet just filed for bankruptcy.",
210
+		"Lambda cold start: your brain also has cold starts.",
211
+		"S3 access denied: denied like your promotion.",
212
+		"DynamoDB throttled: your throughput of good ideas is also limited.",
213
+		"EC2 instance terminated: unlike your employment. For now.",
214
+		"CloudWatch alarm: alarming how bad this is.",
215
+		"ECS task failed: task 'be competent' also failed.",
216
+		"SQS message lost: like your message to the team about testing.",
217
+
218
+		// GCP
219
+		"GCP quota exceeded: quota for patience also exceeded.",
220
+		"BigQuery timeout: big questions about your competence too.",
221
+		"Cloud Functions crashed: function 'write_good_code' not defined.",
222
+		"GKE cluster error: cluster of mistakes growing.",
223
+
224
+		// Azure
225
+		"Azure outage: your code causes outages too.",
226
+		"Blob storage error: blob of errors in your code.",
227
+		"Azure Functions timeout: functions of your brain also timeout.",
228
+		"App Service failed: your service to the team has also failed.",
229
+
230
+		// General cloud
231
+		"Cloud costs: $10,000/month for 'Hello World.'",
232
+		"Auto-scaling scaled to zero: correct assessment of your value.",
233
+		"CDN cache miss: your code misses the point entirely.",
234
+		"Load balancer unhealthy: health check for your code: terminal.",
235
+		"Database connection pool exhausted: pool of excuses also exhausted.",
236
+		"Message queue backed up: backed up like your technical debt.",
237
+		"Container registry error: registering your failures since day one.",
238
+		"VPC misconfigured: Very Poorly Configured.",
239
+		"IAM role missing: role 'competent developer' is also missing.",
240
+	},
241
+}
242
+
243
+// init registers V3 categories into the lookup system
244
+func init() {
245
+	// V3 categories are automatically available through GetExpandedFallback
246
+	// which checks InsultExpansionV3 after V2
247
+}
internal/llm/manager.gomodified
@@ -10,9 +10,10 @@ import (
1010
 )
1111
 
1212
 type LLMManager struct {
13
-	config     *config.Config
14
-	apiClient  *APIClient
13
+	config       *config.Config
14
+	apiClient    *APIClient
1515
 	ollamaClient *OllamaClient
16
+	cache        *ResponseCache
1617
 }
1718
 
1819
 type Backend string
@@ -23,9 +24,18 @@ const (
2324
 	BackendFallback Backend = "fallback"
2425
 )
2526
 
27
+// getLocalTimeout returns the appropriate timeout based on generation mode
28
+func getLocalTimeout(cfg *config.Config) time.Duration {
29
+	if cfg.General.GenerationMode == "spicy" {
30
+		return 5 * time.Second // Patient timeout for quality mode
31
+	}
32
+	return 3 * time.Second // Snappy timeout (raw Ollama ~1.4s, needs headroom)
33
+}
34
+
2635
 func NewLLMManager(cfg *config.Config) *LLMManager {
2736
 	manager := &LLMManager{
2837
 		config: cfg,
38
+		cache:  GetResponseCache(),
2939
 	}
3040
 	
3141
 	// Initialize API client if enabled
@@ -44,7 +54,10 @@ func NewLLMManager(cfg *config.Config) *LLMManager {
4454
 			cfg.Local.Endpoint,
4555
 			cfg.Local.Model,
4656
 		)
47
-		
57
+
58
+		// Set generation mode (snappy = fast, spicy = quality)
59
+		manager.ollamaClient.SetMode(cfg.General.GenerationMode)
60
+
4861
 		// Warm up the model in the background for better performance
4962
 		if manager.ollamaClient.IsAvailable() {
5063
 			go func() {
@@ -96,14 +109,14 @@ func (m *LLMManager) Generate(ctx context.Context, prompt string, commandType st
96109
 	// 2. Try local Ollama (if available)
97110
 	if m.ollamaClient != nil && m.config.Local.Enabled {
98111
 		if m.config.General.Debug {
99
-			fmt.Printf("🔍 Trying local backend...\n")
112
+			fmt.Printf("🔍 Trying local backend (%s mode)...\n", m.config.General.GenerationMode)
100113
 		}
101
-		
102
-		// Create timeout context for local calls
103
-		timeoutDuration := time.Duration(m.config.Local.Timeout) * time.Second
114
+
115
+		// Create timeout context based on generation mode
116
+		timeoutDuration := getLocalTimeout(m.config)
104117
 		localCtx, cancel := context.WithTimeout(ctx, timeoutDuration)
105118
 		defer cancel()
106
-		
119
+
107120
 		response, err := m.ollamaClient.Generate(localCtx, prompt)
108121
 		if m.config.General.Debug {
109122
 			fmt.Printf("🐛 Raw Ollama response: '%s', error: %v\n", response, err)
@@ -115,12 +128,12 @@ func (m *LLMManager) Generate(ctx context.Context, prompt string, commandType st
115128
 			}
116129
 			return response, BackendLocal
117130
 		}
118
-		
131
+
119132
 		if m.config.General.Debug {
120133
 			fmt.Printf("❌ Local backend failed: %v\n", err)
121134
 		}
122135
 	}
123
-	
136
+
124137
 	// 3. Fallback to hardcoded responses
125138
 	if m.config.General.Debug {
126139
 		fmt.Printf("🔄 Using fallback backend\n")
@@ -135,6 +148,16 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com
135148
 		return m.generateFallback(commandType, fullCommand, exitCode), BackendFallback
136149
 	}
137150
 
151
+	// Check cache first for repeated failures
152
+	if m.cache != nil {
153
+		if cached, found := m.cache.Get(fullCommand, commandType, exitCode, m.config.General.GenerationMode); found {
154
+			if m.config.General.Debug {
155
+				fmt.Printf("⚡ Cache hit!\n")
156
+			}
157
+			return cached, BackendLocal // Treat cache as local backend
158
+		}
159
+	}
160
+
138161
 	// Try backends in priority order: API -> Local -> Fallback
139162
 
140163
 	// 1. Try API first (if available)
@@ -154,6 +177,10 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com
154177
 			if m.config.General.Debug {
155178
 				fmt.Printf("✅ API backend succeeded\n")
156179
 			}
180
+			// Cache successful response
181
+			if m.cache != nil {
182
+				m.cache.Set(fullCommand, commandType, exitCode, m.config.General.GenerationMode, response)
183
+			}
157184
 			return response, BackendAPI
158185
 		}
159186
 
@@ -165,11 +192,11 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com
165192
 	// 2. Try local Ollama (if available)
166193
 	if m.ollamaClient != nil && m.config.Local.Enabled {
167194
 		if m.config.General.Debug {
168
-			fmt.Printf("🔍 Trying local backend...\n")
195
+			fmt.Printf("🔍 Trying local backend (%s mode)...\n", m.config.General.GenerationMode)
169196
 		}
170197
 
171
-		// Create timeout context for local calls
172
-		timeoutDuration := time.Duration(m.config.Local.Timeout) * time.Second
198
+		// Create timeout context based on generation mode
199
+		timeoutDuration := getLocalTimeout(m.config)
173200
 		localCtx, cancel := context.WithTimeout(ctx, timeoutDuration)
174201
 		defer cancel()
175202
 
@@ -182,6 +209,10 @@ func (m *LLMManager) GenerateWithContext(ctx context.Context, prompt string, com
182209
 			if m.config.General.Debug {
183210
 				fmt.Printf("✅ Local backend succeeded with: '%s'\n", response)
184211
 			}
212
+			// Cache successful response
213
+			if m.cache != nil {
214
+				m.cache.Set(fullCommand, commandType, exitCode, m.config.General.GenerationMode, response)
215
+			}
185216
 			return response, BackendLocal
186217
 		}
187218
 
internal/llm/ollama.gomodified
@@ -13,6 +13,7 @@ import (
1313
 type OllamaClient struct {
1414
 	BaseURL string
1515
 	Model   string
16
+	Mode    string // "snappy" (fast) or "spicy" (quality)
1617
 	client  *http.Client
1718
 }
1819
 
@@ -43,32 +44,60 @@ func NewOllamaClient(baseURL, model string) *OllamaClient {
4344
 	if model == "" {
4445
 		model = "llama3.2:3b"
4546
 	}
46
-	
47
+
4748
 	return &OllamaClient{
4849
 		BaseURL: baseURL,
4950
 		Model:   model,
51
+		Mode:    "snappy", // Default to fast mode
5052
 		client: &http.Client{
5153
 			Timeout: 60 * time.Second, // Maximum timeout; actual timeout controlled by context
5254
 		},
5355
 	}
5456
 }
5557
 
58
+// SetMode sets the generation mode ("snappy" for speed, "spicy" for quality)
59
+func (c *OllamaClient) SetMode(mode string) {
60
+	if mode == "spicy" || mode == "snappy" {
61
+		c.Mode = mode
62
+	}
63
+}
64
+
65
+// getOptionsForMode returns optimized generation options based on mode
66
+func (c *OllamaClient) getOptionsForMode() *GenerateOptions {
67
+	if c.Mode == "spicy" {
68
+		// Spicy mode: richer responses, more creative, willing to wait
69
+		return &GenerateOptions{
70
+			NumPredict:  80,   // Longer responses
71
+			NumCtx:      1024, // Rich context window
72
+			Temperature: 0.85, // More creative
73
+		}
74
+	}
75
+	// Snappy mode (default): fast and punchy
76
+	return &GenerateOptions{
77
+		NumPredict:  40,  // Short and punchy
78
+		NumCtx:      256, // Minimal context
79
+		Temperature: 0.6, // Faster convergence
80
+	}
81
+}
82
+
5683
 func (c *OllamaClient) Generate(ctx context.Context, prompt string) (string, error) {
5784
 	u, err := url.JoinPath(c.BaseURL, "/api/generate")
5885
 	if err != nil {
5986
 		return "", fmt.Errorf("invalid base URL: %w", err)
6087
 	}
6188
 
89
+	// Use mode-specific generation options
90
+	keepAlive := "5m"
91
+	if c.Mode == "spicy" {
92
+		keepAlive = "15m" // Keep model warm longer for quality mode
93
+	}
94
+
6295
 	req := GenerateRequest{
6396
 		Model:     c.Model,
6497
 		Prompt:    prompt,
6598
 		Stream:    false,
66
-		KeepAlive: "10m", // Keep model loaded for 10 minutes to avoid cold starts
67
-		Options: &GenerateOptions{
68
-			NumPredict:  60,  // Limit output tokens (insults are short)
69
-			NumCtx:      512, // Small context window (prompts are ~500 chars)
70
-			Temperature: 0.8, // Good creativity for variety
71
-		},
99
+		KeepAlive: keepAlive,
100
+		Options:   c.getOptionsForMode(),
72101
 	}
73102
 
74103
 	reqBody, err := json.Marshal(req)
internal/llm/tier4_templates.gomodified
@@ -283,7 +283,7 @@ func GenerateStreakEscalation(streak int, ctx SmartFallbackContext) string {
283283
 		}
284284
 	} else if streak >= 3 {
285285
 		templates = []string{
286
-			fmt.Sprintf("Third failure in a row. Three strikes: You're out.", streak),
286
+			fmt.Sprintf("Failure #%d in a row. Three strikes: You're out.", streak),
287287
 			fmt.Sprintf("%d consecutive fails. Trying the same thing expecting different results?", streak),
288288
 			fmt.Sprintf("Failure #%d. Pattern detected: You.", streak),
289289
 			fmt.Sprintf("%d in a row. Maybe read the docs this time?", streak),