`a0a7d5e`

snippy/snappy

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 4 months ago

SHA: a0a7d5e5086a7ab4ceb4a7051b3fdf47704a6858
Parents: 77cfd69
Tree: 22f3a14

10 changed files

Status	File	+	-
M	`.gitignore`	2	0
M	`cmd/benchmark/main.go`	3	3
M	`cmd/mock.go`	40	10
M	`internal/config/config.go`	15	10
A	`internal/llm/cache.go`	153	0
M	`internal/llm/fallback_database.go`	5	0
A	`internal/llm/insult_expansion_v3.go`	247	0
M	`internal/llm/manager.go`	44	13
M	`internal/llm/ollama.go`	36	7
M	`internal/llm/tier4_templates.go`	1	1

.gitignoremodified

  dist/
  rpmbuild/
  parrot
 +CLAUDE.md
 +parrot-race

cmd/benchmark/main.gomodified

  func main() {
  	fmt.Println("Parrot Insult System Benchmark")
 -	fmt.Println("================================\n")
 +	fmt.Println("================================")
  	// Create benchmark
  	benchmark := llm.NewBenchmark()
  	fmt.Println("Training ensemble system...")
  	ensemble.Train()
 -	fmt.Println("Training complete!\n")
 +	fmt.Println("Training complete!")
  	// Run benchmark
  	fmt.Println("Running benchmark...")
  	// Print detailed sample results
  	fmt.Println("\nDetailed Sample Results:")
 -	fmt.Println("========================\n")
 +	fmt.Println("========================")
  	for i, score := range results.DetailedScores {
  		if i >= 10 { // Show first 10

cmd/mock.gomodified

+ 	}
+ }
 +// CLI flags
 +var spicyMode bool
++
  var mockCmd = &cobra.Command{
  	Use:   "mock [command] [exit_code]",
  	Short: "Mock a failed command",
  func init() {
  	rootCmd.AddCommand(mockCmd)
++
 +	// Add --spicy flag for quality mode (default is snappy/fast)
 +	mockCmd.Flags().BoolVar(&spicyMode, "spicy", false, "Use spicy mode (richer responses, slightly slower)")
+ }
  func mockCommand(cmd *cobra.Command, args []string) {
  		return "kubernetes"
  	// HTTP/Network
 -	case "curl", "wget", "http", "https":
 -		return "http"
 +	case "curl", "wget", "http", "https", "httpie":
 +		return "http_errors"
  	// SSH/Remote
  	case "ssh", "scp", "sftp", "rsync":
  	case "cd", "pushd", "popd":
  		return "navigation"
 -	// Python
 +	// Python - check for ML frameworks first
  	case "python", "python3", "pip", "pip3", "poetry", "pipenv", "conda":
 +		// Check if this is an AI/ML command
 +		if strings.Contains(command, "torch") || strings.Contains(command, "tensorflow") ||
 +			strings.Contains(command, "keras") || strings.Contains(command, "sklearn") ||
 +			strings.Contains(command, "pytorch") || strings.Contains(command, "transformers") ||
 +			strings.Contains(command, "cuda") || strings.Contains(command, "gpu") ||
 +			strings.Contains(command, "train") || strings.Contains(command, "model") {
 +			return "ai_ml"
 +		}
  		return "python_expanded"
  	// Rust
  	case "perf", "valgrind", "gprof", "strace", "ltrace", "top", "htop", "iotop":
  		return "performance"
 +	// AI/ML tools
 +	case "nvidia-smi", "nvcc", "tensorboard", "mlflow", "wandb", "jupyter", "ipython":
 +		return "ai_ml"
++
 +	// Terraform/IaC
 +	case "terraform", "pulumi", "cdktf", "terragrunt":
 +		return "terraform"
++
  	// Cloud providers
 -	case "aws", "gcloud", "az", "terraform", "pulumi", "cloudformation":
 +	case "aws", "gcloud", "az", "cloudformation", "cdk":
  		return "cloud"
  	// DevOps tools
  		defaultCfg := config.DefaultConfig()
  		return getFallbackResponse(cmdType), defaultCfg
+ 	}
+-
++
 +	// Override mode if --spicy flag is set
 +	if spicyMode {
 +		cfg.General.GenerationMode = "spicy"
 +	}
++
  	// Initialize LLM manager
  	manager := llm.NewLLMManager(cfg)
+-
++
  	// Build context-aware prompt with personality
  	prompt := prompts.BuildPrompt(cmdType, command, exitCode, cfg.General.Personality)
+-
 -	// Use a reasonable timeout for LLM responses (6 seconds max)
 -	// With optimized Ollama options, responses should be under 2 seconds when warm
 -	maxTimeout := 6 * time.Second
++
 +	// Set timeout based on generation mode
 +	// Snappy: 4s max (3s LLM + 1s buffer), Spicy: 6s max (5s LLM + 1s buffer)
 +	maxTimeout := 4 * time.Second
 +	if cfg.General.GenerationMode == "spicy" {
 +		maxTimeout = 6 * time.Second
 +	}
  	ctx, cancel := context.WithTimeout(context.Background(), maxTimeout)
  	defer cancel()

internal/config/config.gomodified

+ }
  type GeneralConfig struct {
 -	Personality  string `toml:"personality"`   // "savage", "sarcastic", "mild"
 -	FallbackMode bool   `toml:"fallback_mode"` // Use hardcoded responses only
 -	Debug        bool   `toml:"debug"`         // Debug logging
 -	Colors       bool   `toml:"colors"`        // Enable colored output
 -	Enhanced     bool   `toml:"enhanced"`      // Enhanced formatting with borders/emphasis
 +	Personality    string `toml:"personality"`     // "savage", "sarcastic", "mild"
 +	GenerationMode string `toml:"generation_mode"` // "snappy" (fast) or "spicy" (quality)
 +	FallbackMode   bool   `toml:"fallback_mode"`   // Use hardcoded responses only
 +	Debug          bool   `toml:"debug"`           // Debug logging
 +	Colors         bool   `toml:"colors"`          // Enable colored output
 +	Enhanced       bool   `toml:"enhanced"`        // Enhanced formatting with borders/emphasis
+ }
  // Default configuration
  			Timeout:  5,  // 5 seconds with optimized generation options should be plenty
  		},
  		General: GeneralConfig{
 -			Personality:  "savage",
 -			FallbackMode: false,
 -			Debug:        false,
 -			Colors:       true,
 -			Enhanced:     false,
 +			Personality:    "savage",
 +			GenerationMode: "snappy", // Default to fast mode
 +			FallbackMode:   false,
 +			Debug:          false,
 +			Colors:         true,
 +			Enhanced:       false,
  		},
+ 	}
+ }
  	if personality := os.Getenv("PARROT_PERSONALITY"); personality != "" {
  		config.General.Personality = personality
+ 	}
 +	if mode := os.Getenv("PARROT_MODE"); mode != "" {
 +		config.General.GenerationMode = mode
 +	}
  	if os.Getenv("PARROT_FALLBACK_ONLY") == "true" {
  		config.General.FallbackMode = true
+ 	}

internal/llm/cache.goadded

 +package llm
++
 +import (
 +	"crypto/sha256"
 +	"encoding/hex"
 +	"sync"
 +	"time"
 +)
++
 +// ResponseCache provides an LRU cache for LLM responses to avoid redundant calls
 +type ResponseCache struct {
 +	mu       sync.RWMutex
 +	entries  map[string]*cacheEntry
 +	maxSize  int
 +	ttl      time.Duration
 +}
++
 +type cacheEntry struct {
 +	response  string
 +	timestamp time.Time
 +}
++
 +// Global cache instance
 +var (
 +	responseCache     *ResponseCache
 +	responseCacheOnce sync.Once
 +)
++
 +// GetResponseCache returns the singleton response cache
 +func GetResponseCache() *ResponseCache {
 +	responseCacheOnce.Do(func() {
 +		responseCache = NewResponseCache(100, 5*time.Minute) // 100 entries, 5 min TTL
 +	})
 +	return responseCache
 +}
++
 +// NewResponseCache creates a new response cache
 +func NewResponseCache(maxSize int, ttl time.Duration) *ResponseCache {
 +	cache := &ResponseCache{
 +		entries: make(map[string]*cacheEntry),
 +		maxSize: maxSize,
 +		ttl:     ttl,
 +	}
++
 +	// Start background cleanup goroutine
 +	go cache.cleanupLoop()
++
 +	return cache
 +}
++
 +// generateKey creates a cache key from command signature
 +func (c *ResponseCache) generateKey(command, commandType, exitCode, mode string) string {
 +	// Create a hash of the command signature
 +	h := sha256.New()
 +	h.Write([]byte(command))
 +	h.Write([]byte("|"))
 +	h.Write([]byte(commandType))
 +	h.Write([]byte("|"))
 +	h.Write([]byte(exitCode))
 +	h.Write([]byte("|"))
 +	h.Write([]byte(mode))
 +	return hex.EncodeToString(h.Sum(nil))[:16] // Use first 16 chars of hash
 +}
++
 +// Get retrieves a cached response if available and not expired
 +func (c *ResponseCache) Get(command, commandType, exitCode, mode string) (string, bool) {
 +	key := c.generateKey(command, commandType, exitCode, mode)
++
 +	c.mu.RLock()
 +	defer c.mu.RUnlock()
++
 +	entry, exists := c.entries[key]
 +	if !exists {
 +		return "", false
 +	}
++
 +	// Check if expired
 +	if time.Since(entry.timestamp) > c.ttl {
 +		return "", false
 +	}
++
 +	return entry.response, true
 +}
++
 +// Set stores a response in the cache
 +func (c *ResponseCache) Set(command, commandType, exitCode, mode, response string) {
 +	key := c.generateKey(command, commandType, exitCode, mode)
++
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
++
 +	// Evict oldest entries if at capacity
 +	if len(c.entries) >= c.maxSize {
 +		c.evictOldest()
 +	}
++
 +	c.entries[key] = &cacheEntry{
 +		response:  response,
 +		timestamp: time.Now(),
 +	}
 +}
++
 +// evictOldest removes the oldest entry (must be called with lock held)
 +func (c *ResponseCache) evictOldest() {
 +	var oldestKey string
 +	var oldestTime time.Time
++
 +	for key, entry := range c.entries {
 +		if oldestKey == "" || entry.timestamp.Before(oldestTime) {
 +			oldestKey = key
 +			oldestTime = entry.timestamp
 +		}
 +	}
++
 +	if oldestKey != "" {
 +		delete(c.entries, oldestKey)
 +	}
 +}
++
 +// cleanupLoop periodically removes expired entries
 +func (c *ResponseCache) cleanupLoop() {
 +	ticker := time.NewTicker(1 * time.Minute)
 +	defer ticker.Stop()
++
 +	for range ticker.C {
 +		c.cleanup()
 +	}
 +}
++
 +// cleanup removes all expired entries
 +func (c *ResponseCache) cleanup() {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
++
 +	now := time.Now()
 +	for key, entry := range c.entries {
 +		if now.Sub(entry.timestamp) > c.ttl {
 +			delete(c.entries, key)
 +		}
 +	}
 +}
++
 +// Stats returns cache statistics
 +func (c *ResponseCache) Stats() map[string]interface{} {
 +	c.mu.RLock()
 +	defer c.mu.RUnlock()
++
 +	return map[string]interface{}{
 +		"size":     len(c.entries),
 +		"max_size": c.maxSize,
 +		"ttl_secs": c.ttl.Seconds(),
 +	}
 +}

internal/llm/fallback_database.gomodified

  		responses, exists = InsultExpansionV2[commandType]
+ 	}
 +	// Try v3 expansion database (kubernetes, terraform, ai_ml, http_errors, cloud)
 +	if !exists {
 +		responses, exists = InsultExpansionV3[commandType]
 +	}
++
  	// Fall back to generic if still not found
  	if !exists {
  		responses = ExpandedFallbackDatabase["generic"]

internal/llm/insult_expansion_v3.goadded

 +package llm
++
 +// InsultExpansionV3 - ROUND 3: Kubernetes, Terraform/Cloud, AI/ML, and HTTP/API insults
 +// Because DevOps and ML disasters deserve their own category
 +var InsultExpansionV3 = map[string][]string{
++
 +	// ==================== KUBERNETES (45 insults) ====================
 +	"kubernetes": {
 +		// Pod failures
 +		"CrashLoopBackOff: when even the cluster gives up on you.",
 +		"Pod evicted: your code wasn't paying rent in the cluster.",
 +		"ImagePullBackOff: Docker Hub is ghosting you today.",
 +		"OOMKilled: your memory management is the real disaster here.",
 +		"Pod pending forever: even Kubernetes can't figure out your mess.",
 +		"Container terminated: the only sensible exit strategy.",
 +		"Init container failed: couldn't even start failing properly.",
 +		"Liveness probe failed: your pod is dead, just like your skills.",
 +		"Readiness probe failed: your app wasn't ready. Neither are you.",
 +		"Your pod is restarting more than your failed relationships.",
++
 +		// Deployment disasters
 +		"Deployment rollback triggered: even Kubernetes wants to undo you.",
 +		"ReplicaSet can't scale: your code doesn't scale either.",
 +		"Rolling update stuck: stuck like your career.",
 +		"kubectl apply failed: YAML isn't that hard. For most people.",
 +		"HPA gave up: your app can't handle success anyway.",
 +		"PodDisruptionBudget violated: budget for disaster was exceeded.",
 +		"Your deployment strategy is 'recreate everything and pray.'",
 +		"Surge capacity exceeded: your mistakes surge faster than your pods.",
 +		"Deployment deadline exceeded: deadline for competence also passed.",
 +		"Your rollout is rolling back faster than your resume updates.",
++
 +		// Resource issues
 +		"ResourceQuota exceeded: you exceeded the incompetence quota too.",
 +		"CPU throttled: your code efficiency matches your brain efficiency.",
 +		"Memory limit reached: should have limited your ambitions instead.",
 +		"PersistentVolumeClaim pending: your claims to skill are also pending.",
 +		"StorageClass not found: class 'Competent Developer' also not found.",
 +		"No nodes available: nodes are hiding from your workload.",
 +		"Insufficient resources: for your pods AND your excuses.",
 +		"Node pressure: your code puts pressure on everything it touches.",
 +		"Eviction threshold reached: you reached the threshold for employment too.",
 +		"Your resource requests are fiction. So is your understanding of k8s.",
++
 +		// Configuration chaos
 +		"ConfigMap missing: so is your configuration management skill.",
 +		"Secret not found: your incompetence is no secret though.",
 +		"RBAC denied: Role-Based Access says you can't access success.",
 +		"ServiceAccount error: your account of events is also wrong.",
 +		"Namespace not found: you're lost in more ways than one.",
 +		"Context switching failed: between k8s clusters AND competence.",
 +		"kubeconfig invalid: your config for life is also questionable.",
 +		"API server unreachable: like your career goals.",
 +		"etcd timeout: your learning also timed out years ago.",
 +		"Admission webhook rejected: webhook has better judgment than your hiring manager.",
++
 +		// Networking nightmares
 +		"Service unavailable: like your competence.",
 +		"Ingress misconfigured: traffic can't find your app. Neither can users.",
 +		"NetworkPolicy blocking: blocking your code is actually correct.",
 +		"DNS resolution failed: your code can't even find localhost.",
 +		"ClusterIP not working: nothing in your cluster works.",
 +	},
++
 +	// ==================== TERRAFORM/CLOUD (35 insults) ====================
 +	"terraform": {
 +		// State disasters
 +		"terraform destroy: finally doing something useful with your infra.",
 +		"State file corrupted: a metaphor for your career trajectory.",
 +		"State lock failed: someone else is already fixing your mistakes.",
 +		"Drift detected: your code drifted from reality long ago.",
 +		"Backend configuration error: your backend knowledge is also in error.",
 +		"terraform import failed: can't import competence either.",
 +		"State refresh error: refreshing won't fix fundamental problems.",
 +		"Remote state not found: your remote chance of success also not found.",
 +		"State file too large: like your ego vs your abilities.",
 +		"Workspace confusion: you're confused in all workspaces.",
++
 +		// Provider problems
 +		"Provider error: even AWS doesn't want to work with you.",
 +		"API rate limited: your mistakes exceeded the API's patience.",
 +		"Credentials expired: so did your relevance.",
 +		"Region not available: neither is your future in DevOps.",
 +		"Service quota exceeded: quota for bad decisions also exceeded.",
 +		"Provider version mismatch: your version and 'competent' don't match.",
 +		"Authentication failed: terraform can tell you're a fraud.",
 +		"IAM denied: Identity and Access confirms you shouldn't access anything.",
 +		"Resource not found: your resources for learning also not found.",
 +		"Provider crashed: looking at your code will do that.",
++
 +		// Resource failures
 +		"Resource creation failed: creation of your career also failed.",
 +		"Dependency cycle detected: you depend on failure consistently.",
 +		"Timeout waiting for resource: still waiting for your skill to deploy.",
 +		"Validation failed: your code failed validation. So did your degree.",
 +		"Variables undefined: your career path is also undefined.",
 +		"Output error: the only output is embarrassment.",
 +		"Module not found: 'successful_deployment' module missing.",
 +		"Plan failed: your life plan also needs review.",
 +		"Apply error: apply this to your resume: 'needs improvement.'",
 +		"Destroy failed: can't even destroy properly. Impressive.",
++
 +		// Cloud catastrophes
 +		"S3 bucket public: your mistakes are also very public.",
 +		"Lambda timeout: your functions fail as slowly as possible.",
 +		"EC2 terminated: instance of competence also terminated.",
 +		"RDS connection refused: database refused your terrible queries.",
 +		"CloudFormation drift: drifting further from employability.",
 +	},
++
 +	// ==================== AI/ML (40 insults) ====================
 +	"ai_ml": {
 +		// GPU/CUDA disasters
 +		"CUDA out of memory: your model is as bloated as your ego.",
 +		"GPU not found: your neural network found nothing either.",
 +		"CUDA version mismatch: mismatch between your skills and requirements too.",
 +		"cuDNN error: your deep learning is very shallow.",
 +		"NCCL error: distributed training can't distribute your incompetence.",
 +		"torch.cuda.is_available() returns False, and so does your career.",
 +		"GPU utilization 0%: matches your brain utilization.",
 +		"OOM killer struck: should have killed your model idea first.",
 +		"Driver version incompatible: you're incompatible with success.",
 +		"Memory allocation failed: allocate some time for learning basics.",
++
 +		// Training failures
 +		"NaN loss: your gradients vanished like your debugging skills.",
 +		"Loss not decreasing: your competence isn't increasing either.",
 +		"Validation loss exploding: your mistakes also explode exponentially.",
 +		"Overfitting to training data: and overfitting to bad practices.",
 +		"Underfitting everything: including job requirements.",
 +		"Gradient explosion: the only thing exploding is your career.",
 +		"Learning rate too high: ambition too high, skill too low.",
 +		"Model diverged: diverged from anything resembling ML knowledge.",
 +		"Early stopping triggered: should have stopped you earlier.",
 +		"Accuracy stuck at 50%: your model learned to flip a coin.",
++
 +		// Model issues
 +		"Model too large: compensating for something?",
 +		"Model won't load: brain cells also won't load.",
 +		"Checkpoint corrupted: your understanding is also corrupted.",
 +		"Weights initialization failed: your project was doomed from the start.",
 +		"Architecture makes no sense: designed by throwing layers at the wall.",
 +		"Batch size too large: bigger isn't always better. Applies to egos too.",
 +		"Embedding dimension mismatch: dimensions of your confusion also mismatch.",
 +		"Tokenizer error: can't tokenize your excuses.",
 +		"Inference failed: your ability to infer solutions also failed.",
 +		"Model prediction: always wrong. Like your career choices.",
++
 +		// Data disasters
 +		"Dataset not found: your dataset of achievements also empty.",
 +		"Data loader crashed: crashed harder than your GPU.",
 +		"Label mismatch: your labels and reality don't match.",
 +		"Preprocessing failed: pre-thinking also failed.",
 +		"Data augmentation broke: augmenting garbage gives more garbage.",
 +		"Feature extraction error: can't extract features from nothing.",
 +		"Normalization failed: nothing normal about your approach.",
 +		"Train/test split leaked: your incompetence also leaked everywhere.",
 +		"Class imbalance: your skills are imbalanced too.",
 +		"Corrupted samples: sample of your work is also corrupted.",
 +	},
++
 +	// ==================== HTTP/API ERRORS (35 insults) ====================
 +	"http_errors": {
 +		// Client errors (4xx)
 +		"400 Bad Request: your request is as bad as your code.",
 +		"401 Unauthorized: even the API knows you shouldn't be here.",
 +		"403 Forbidden: the server has better judgment than your manager.",
 +		"404 Not Found: your skills are also not found.",
 +		"405 Method Not Allowed: your methods aren't allowed in production either.",
 +		"408 Request Timeout: patience for your code also timed out.",
 +		"409 Conflict: the only thing consistent about you.",
 +		"410 Gone: like your chances of success.",
 +		"413 Payload Too Large: your ego is also payload too large.",
 +		"415 Unsupported Media Type: your code type is also unsupported.",
 +		"418 I'm a Teapot: you're a disaster.",
 +		"422 Unprocessable Entity: your code is unprocessable by any brain.",
 +		"429 Too Many Requests: slow down, the API isn't your therapist.",
 +		"451 Unavailable For Legal Reasons: your code should also be illegal.",
++
 +		// Server errors (5xx)
 +		"500 Internal Server Error: you broke the server. Congratulations.",
 +		"501 Not Implemented: like your understanding of REST.",
 +		"502 Bad Gateway: the server between you and success has crashed.",
 +		"503 Service Unavailable: like your competence.",
 +		"504 Gateway Timeout: gateway gave up waiting for your code to work.",
 +		"505 HTTP Version Not Supported: your version of 'working code' isn't supported.",
 +		"507 Insufficient Storage: insufficient storage for all your mistakes.",
 +		"508 Loop Detected: you're stuck in a loop of bad decisions.",
 +		"511 Network Authentication Required: authenticate your claims to skill first.",
++
 +		// curl/wget specific
 +		"curl: Connection refused: server is refusing your advances.",
 +		"curl: Could not resolve host: your code can't resolve anything.",
 +		"wget: Connection timed out: even wget is tired of waiting.",
 +		"SSL certificate problem: your certificate of competence is also invalid.",
 +		"Connection reset by peer: peer reviewed your code and reset everything.",
 +		"Network unreachable: like your career aspirations.",
 +		"curl: (7) Failed to connect: you fail to connect with success too.",
 +		"Host not found: hosting your code should also not be found.",
 +		"Certificate verification failed: your skills failed verification too.",
 +		"Protocol error: you're speaking the wrong protocol. In life too.",
 +		"Response too large: larger than your debugging capabilities.",
 +		"Malformed response: your understanding is also malformed.",
 +	},
++
 +	// ==================== CLOUD PROVIDER SPECIFIC (25 insults) ====================
 +	"cloud": {
 +		// AWS
 +		"AWS bill arrived: your wallet just filed for bankruptcy.",
 +		"Lambda cold start: your brain also has cold starts.",
 +		"S3 access denied: denied like your promotion.",
 +		"DynamoDB throttled: your throughput of good ideas is also limited.",
 +		"EC2 instance terminated: unlike your employment. For now.",
 +		"CloudWatch alarm: alarming how bad this is.",
 +		"ECS task failed: task 'be competent' also failed.",
 +		"SQS message lost: like your message to the team about testing.",
++
 +		// GCP
 +		"GCP quota exceeded: quota for patience also exceeded.",
 +		"BigQuery timeout: big questions about your competence too.",
 +		"Cloud Functions crashed: function 'write_good_code' not defined.",
 +		"GKE cluster error: cluster of mistakes growing.",
++
 +		// Azure
 +		"Azure outage: your code causes outages too.",
 +		"Blob storage error: blob of errors in your code.",
 +		"Azure Functions timeout: functions of your brain also timeout.",
 +		"App Service failed: your service to the team has also failed.",
++
 +		// General cloud
 +		"Cloud costs: $10,000/month for 'Hello World.'",
 +		"Auto-scaling scaled to zero: correct assessment of your value.",
 +		"CDN cache miss: your code misses the point entirely.",
 +		"Load balancer unhealthy: health check for your code: terminal.",
 +		"Database connection pool exhausted: pool of excuses also exhausted.",
 +		"Message queue backed up: backed up like your technical debt.",
 +		"Container registry error: registering your failures since day one.",
 +		"VPC misconfigured: Very Poorly Configured.",
 +		"IAM role missing: role 'competent developer' is also missing.",
 +	},
 +}
++
 +// init registers V3 categories into the lookup system
 +func init() {
 +	// V3 categories are automatically available through GetExpandedFallback
 +	// which checks InsultExpansionV3 after V2
 +}

internal/llm/manager.gomodified

+ )
  type LLMManager struct {
 -	config     *config.Config
 -	apiClient  *APIClient
 +	config       *config.Config
 +	apiClient    *APIClient
  	ollamaClient *OllamaClient
 +	cache        *ResponseCache
+ }
  type Backend string
  	BackendFallback Backend = "fallback"
+ )
 +// getLocalTimeout returns the appropriate timeout based on generation mode
 +func getLocalTimeout(cfg *config.Config) time.Duration {
 +	if cfg.General.GenerationMode == "spicy" {
 +		return 5 * time.Second // Patient timeout for quality mode
 +	}
 +	return 3 * time.Second // Snappy timeout (raw Ollama ~1.4s, needs headroom)
 +}
++
  func NewLLMManager(cfg *config.Config) *LLMManager {
  	manager := &LLMManager{
  		config: cfg,
 +		cache:  GetResponseCache(),
+ 	}
  	// Initialize API client if enabled
  			cfg.Local.Endpoint,
  			cfg.Local.Model,
+ 		)
+-
++
 +		// Set generation mode (snappy = fast, spicy = quality)
 +		manager.ollamaClient.SetMode(cfg.General.GenerationMode)
++
  		// Warm up the model in the background for better performance
  		if manager.ollamaClient.IsAvailable() {
  			go func() {
  	// 2. Try local Ollama (if available)
  	if m.ollamaClient != nil && m.config.Local.Enabled {
  		if m.config.General.Debug {
 -			fmt.Printf("🔍 Trying local backend...\n")
 +			fmt.Printf("🔍 Trying local backend (%s mode)...\n", m.config.General.GenerationMode)
+ 		}
+-
 -		// Create timeout context for local calls
 -		timeoutDuration := time.Duration(m.config.Local.Timeout) * time.Second
++
 +		// Create timeout context based on generation mode
 +		timeoutDuration := getLocalTimeout(m.config)
  		localCtx, cancel := context.WithTimeout(ctx, timeoutDuration)
  		defer cancel()
+-
++
  		response, err := m.ollamaClient.Generate(localCtx, prompt)
  		if m.config.General.Debug {
  			fmt.Printf("🐛 Raw Ollama response: '%s', error: %v\n", response, err)
+ 			}
  			return response, BackendLocal
+ 		}
+-
++
  		if m.config.General.Debug {
  			fmt.Printf("❌ Local backend failed: %v\n", err)
+ 		}
+ 	}
+-
++
  	// 3. Fallback to hardcoded responses
  	if m.config.General.Debug {
  		fmt.Printf("🔄 Using fallback backend\n")
  		return m.generateFallback(commandType, fullCommand, exitCode), BackendFallback
+ 	}
 +	// Check cache first for repeated failures
 +	if m.cache != nil {
 +		if cached, found := m.cache.Get(fullCommand, commandType, exitCode, m.config.General.GenerationMode); found {
 +			if m.config.General.Debug {
 +				fmt.Printf("⚡ Cache hit!\n")
 +			}
 +			return cached, BackendLocal // Treat cache as local backend
 +		}
 +	}
++
  	// Try backends in priority order: API -> Local -> Fallback
  	// 1. Try API first (if available)
  			if m.config.General.Debug {
  				fmt.Printf("✅ API backend succeeded\n")
+ 			}
 +			// Cache successful response
 +			if m.cache != nil {
 +				m.cache.Set(fullCommand, commandType, exitCode, m.config.General.GenerationMode, response)
 +			}
  			return response, BackendAPI
+ 		}
  	// 2. Try local Ollama (if available)
  	if m.ollamaClient != nil && m.config.Local.Enabled {
  		if m.config.General.Debug {
 -			fmt.Printf("🔍 Trying local backend...\n")
 +			fmt.Printf("🔍 Trying local backend (%s mode)...\n", m.config.General.GenerationMode)
+ 		}
 -		// Create timeout context for local calls
 -		timeoutDuration := time.Duration(m.config.Local.Timeout) * time.Second
 +		// Create timeout context based on generation mode
 +		timeoutDuration := getLocalTimeout(m.config)
  		localCtx, cancel := context.WithTimeout(ctx, timeoutDuration)
  		defer cancel()
  			if m.config.General.Debug {
  				fmt.Printf("✅ Local backend succeeded with: '%s'\n", response)
+ 			}
 +			// Cache successful response
 +			if m.cache != nil {
 +				m.cache.Set(fullCommand, commandType, exitCode, m.config.General.GenerationMode, response)
 +			}
  			return response, BackendLocal
+ 		}

internal/llm/ollama.gomodified

  type OllamaClient struct {
  	BaseURL string
  	Model   string
 +	Mode    string // "snappy" (fast) or "spicy" (quality)
  	client  *http.Client
+ }
  	if model == "" {
  		model = "llama3.2:3b"
+ 	}
+-
++
  	return &OllamaClient{
  		BaseURL: baseURL,
  		Model:   model,
 +		Mode:    "snappy", // Default to fast mode
  		client: &http.Client{
  			Timeout: 60 * time.Second, // Maximum timeout; actual timeout controlled by context
  		},
+ 	}
+ }
 +// SetMode sets the generation mode ("snappy" for speed, "spicy" for quality)
 +func (c *OllamaClient) SetMode(mode string) {
 +	if mode == "spicy" || mode == "snappy" {
 +		c.Mode = mode
 +	}
 +}
++
 +// getOptionsForMode returns optimized generation options based on mode
 +func (c *OllamaClient) getOptionsForMode() *GenerateOptions {
 +	if c.Mode == "spicy" {
 +		// Spicy mode: richer responses, more creative, willing to wait
 +		return &GenerateOptions{
 +			NumPredict:  80,   // Longer responses
 +			NumCtx:      1024, // Rich context window
 +			Temperature: 0.85, // More creative
 +		}
 +	}
 +	// Snappy mode (default): fast and punchy
 +	return &GenerateOptions{
 +		NumPredict:  40,  // Short and punchy
 +		NumCtx:      256, // Minimal context
 +		Temperature: 0.6, // Faster convergence
 +	}
 +}
++
  func (c *OllamaClient) Generate(ctx context.Context, prompt string) (string, error) {
  	u, err := url.JoinPath(c.BaseURL, "/api/generate")
  	if err != nil {
  		return "", fmt.Errorf("invalid base URL: %w", err)
+ 	}
 +	// Use mode-specific generation options
 +	keepAlive := "5m"
 +	if c.Mode == "spicy" {
 +		keepAlive = "15m" // Keep model warm longer for quality mode
 +	}
++
  	req := GenerateRequest{
  		Model:     c.Model,
  		Prompt:    prompt,
  		Stream:    false,
 -		KeepAlive: "10m", // Keep model loaded for 10 minutes to avoid cold starts
 -		Options: &GenerateOptions{
 -			NumPredict:  60,  // Limit output tokens (insults are short)
 -			NumCtx:      512, // Small context window (prompts are ~500 chars)
 -			Temperature: 0.8, // Good creativity for variety
 -		},
 +		KeepAlive: keepAlive,
 +		Options:   c.getOptionsForMode(),
+ 	}
  	reqBody, err := json.Marshal(req)

internal/llm/tier4_templates.gomodified

+ 		}
  	} else if streak >= 3 {
  		templates = []string{
 -			fmt.Sprintf("Third failure in a row. Three strikes: You're out.", streak),
 +			fmt.Sprintf("Failure #%d in a row. Three strikes: You're out.", streak),
  			fmt.Sprintf("%d consecutive fails. Trying the same thing expecting different results?", streak),
  			fmt.Sprintf("Failure #%d. Pattern detected: You.", streak),
  			fmt.Sprintf("%d in a row. Maybe read the docs this time?", streak),