parrot Public

Watch 0 Fork 0 Star 0

Go · 9286 bytes Raw Blame History

  
        1
        package llm
      
        2
        
        3
        import (
      
        4
        	"math/rand"
      
        5
        	"strings"
      
        6
        	"sync"
      
        7
        	"time"
      
        8
        )
      
        9
        
        10
        // MarkovGenerator generates novel insults using Markov chains
      
        11
        type MarkovGenerator struct {
      
        12
        	mu          sync.RWMutex
      
        13
        	chains      map[string]map[string]int // state -> next_word -> count
      
        14
        	starters    []string                   // possible starting words
      
        15
        	order       int                        // n-gram order (2 = bigram)
      
        16
        	minLength   int                        // minimum generated text length
      
        17
        	maxLength   int                        // maximum generated text length
      
        18
        	rng         *rand.Rand
      
        19
        }
      
        20
        
        21
        // NewMarkovGenerator creates a new Markov chain generator
      
        22
        func NewMarkovGenerator(order int) *MarkovGenerator {
      
        23
        	return &MarkovGenerator{
      
        24
        		chains:    make(map[string]map[string]int),
      
        25
        		starters:  make([]string, 0),
      
        26
        		order:     order,
      
        27
        		minLength: 30,  // Minimum 30 characters
      
        28
        		maxLength: 150, // Maximum 150 characters
      
        29
        		rng:       rand.New(rand.NewSource(time.Now().UnixNano())),
      
        30
        	}
      
        31
        }
      
        32
        
        33
        // Train trains the Markov chain on a corpus of insults
      
        34
        func (mg *MarkovGenerator) Train(insults []string) {
      
        35
        	mg.mu.Lock()
      
        36
        	defer mg.mu.Unlock()
      
        37
        
        38
        	for _, insult := range insults {
      
        39
        		mg.trainOnTextUnlocked(insult)
      
        40
        	}
      
        41
        }
      
        42
        
        43
        // trainOnTextUnlocked trains on a single text (caller must hold lock)
      
        44
        func (mg *MarkovGenerator) trainOnTextUnlocked(text string) {
      
        45
        	words := mg.tokenize(text)
      
        46
        	if len(words) < mg.order+1 {
      
        47
        		return
      
        48
        	}
      
        49
        
        50
        	// Add first state as starter
      
        51
        	state := strings.Join(words[:mg.order], " ")
      
        52
        	mg.starters = append(mg.starters, state)
      
        53
        
        54
        	// Build chain
      
        55
        	for i := 0; i < len(words)-mg.order; i++ {
      
        56
        		state := strings.Join(words[i:i+mg.order], " ")
      
        57
        		nextWord := words[i+mg.order]
      
        58
        
        59
        		if _, exists := mg.chains[state]; !exists {
      
        60
        			mg.chains[state] = make(map[string]int)
      
        61
        		}
      
        62
        
        63
        		mg.chains[state][nextWord]++
      
        64
        	}
      
        65
        }
      
        66
        
        67
        // tokenize splits text into words
      
        68
        func (mg *MarkovGenerator) tokenize(text string) []string {
      
        69
        	// Split on spaces and punctuation, but keep punctuation
      
        70
        	var words []string
      
        71
        	var currentWord strings.Builder
      
        72
        
        73
        	for _, r := range text {
      
        74
        		if r == ' ' || r == '\n' || r == '\t' {
      
        75
        			if currentWord.Len() > 0 {
      
        76
        				words = append(words, currentWord.String())
      
        77
        				currentWord.Reset()
      
        78
        			}
      
        79
        		} else if r == '.' || r == '!' || r == '?' || r == ',' || r == ':' || r == ';' {
      
        80
        			if currentWord.Len() > 0 {
      
        81
        				words = append(words, currentWord.String())
      
        82
        				currentWord.Reset()
      
        83
        			}
      
        84
        			words = append(words, string(r))
      
        85
        		} else {
      
        86
        			currentWord.WriteRune(r)
      
        87
        		}
      
        88
        	}
      
        89
        
        90
        	if currentWord.Len() > 0 {
      
        91
        		words = append(words, currentWord.String())
      
        92
        	}
      
        93
        
        94
        	return words
      
        95
        }
      
        96
        
        97
        // Generate generates a novel insult
      
        98
        func (mg *MarkovGenerator) Generate() string {
      
        99
        	mg.mu.RLock()
      
        100
        	defer mg.mu.RUnlock()
      
        101
        
        102
        	if len(mg.starters) == 0 || len(mg.chains) == 0 {
      
        103
        		return "" // Not trained yet
      
        104
        	}
      
        105
        
        106
        	// Pick a random starting state
      
        107
        	state := mg.starters[mg.rng.Intn(len(mg.starters))]
      
        108
        	words := strings.Split(state, " ")
      
        109
        
        110
        	// Generate until we hit max length or a terminal state
      
        111
        	attempts := 0
      
        112
        	maxAttempts := 100
      
        113
        
        114
        	for len(strings.Join(words, " ")) < mg.maxLength && attempts < maxAttempts {
      
        115
        		attempts++
      
        116
        
        117
        		// Get next word choices
      
        118
        		nextWords := mg.chains[state]
      
        119
        		if len(nextWords) == 0 {
      
        120
        			break // Terminal state
      
        121
        		}
      
        122
        
        123
        		// Choose next word based on frequency
      
        124
        		nextWord := mg.weightedChoice(nextWords)
      
        125
        		words = append(words, nextWord)
      
        126
        
        127
        		// Update state
      
        128
        		if len(words) >= mg.order {
      
        129
        			state = strings.Join(words[len(words)-mg.order:], " ")
      
        130
        		}
      
        131
        
        132
        		// Stop at sentence endings if we've generated enough
      
        133
        		if (nextWord == "." || nextWord == "!" || nextWord == "?") &&
      
        134
        			len(strings.Join(words, " ")) >= mg.minLength {
      
        135
        			break
      
        136
        		}
      
        137
        	}
      
        138
        
        139
        	// Reconstruct text with proper spacing
      
        140
        	return mg.reconstructText(words)
      
        141
        }
      
        142
        
        143
        // weightedChoice selects a word based on frequency weights
      
        144
        func (mg *MarkovGenerator) weightedChoice(choices map[string]int) string {
      
        145
        	// Calculate total weight
      
        146
        	totalWeight := 0
      
        147
        	for _, count := range choices {
      
        148
        		totalWeight += count
      
        149
        	}
      
        150
        
        151
        	// Random selection
      
        152
        	r := mg.rng.Intn(totalWeight)
      
        153
        	cumulative := 0
      
        154
        
        155
        	for word, count := range choices {
      
        156
        		cumulative += count
      
        157
        		if r < cumulative {
      
        158
        			return word
      
        159
        		}
      
        160
        	}
      
        161
        
        162
        	// Fallback (shouldn't reach here)
      
        163
        	for word := range choices {
      
        164
        		return word
      
        165
        	}
      
        166
        
        167
        	return ""
      
        168
        }
      
        169
        
        170
        // reconstructText reconstructs text with proper spacing around punctuation
      
        171
        func (mg *MarkovGenerator) reconstructText(words []string) string {
      
        172
        	var result strings.Builder
      
        173
        
        174
        	for i, word := range words {
      
        175
        		// Don't add space before punctuation
      
        176
        		if i > 0 && !mg.isPunctuation(word) {
      
        177
        			result.WriteString(" ")
      
        178
        		}
      
        179
        
        180
        		result.WriteString(word)
      
        181
        	}
      
        182
        
        183
        	return result.String()
      
        184
        }
      
        185
        
        186
        // isPunctuation checks if a word is punctuation
      
        187
        func (mg *MarkovGenerator) isPunctuation(word string) bool {
      
        188
        	return word == "." || word == "!" || word == "?" ||
      
        189
        		word == "," || word == ":" || word == ";" ||
      
        190
        		word == "(" || word == ")"
      
        191
        }
      
        192
        
        193
        // GenerateContextual generates an insult with context hints
      
        194
        func (mg *MarkovGenerator) GenerateContextual(seedWords []string) string {
      
        195
        	mg.mu.RLock()
      
        196
        	defer mg.mu.RUnlock()
      
        197
        
        198
        	if len(mg.chains) == 0 {
      
        199
        		return ""
      
        200
        	}
      
        201
        
        202
        	// Find states that contain any of the seed words
      
        203
        	var matchingStarters []string
      
        204
        	for _, starter := range mg.starters {
      
        205
        		for _, seed := range seedWords {
      
        206
        			if strings.Contains(strings.ToLower(starter), strings.ToLower(seed)) {
      
        207
        				matchingStarters = append(matchingStarters, starter)
      
        208
        				break
      
        209
        			}
      
        210
        		}
      
        211
        	}
      
        212
        
        213
        	// If we found matching starters, use them; otherwise use any starter
      
        214
        	if len(matchingStarters) == 0 {
      
        215
        		matchingStarters = mg.starters
      
        216
        	}
      
        217
        
        218
        	// Pick a random matching starter
      
        219
        	state := matchingStarters[mg.rng.Intn(len(matchingStarters))]
      
        220
        	words := strings.Split(state, " ")
      
        221
        
        222
        	// Generate as normal
      
        223
        	attempts := 0
      
        224
        	maxAttempts := 100
      
        225
        
        226
        	for len(strings.Join(words, " ")) < mg.maxLength && attempts < maxAttempts {
      
        227
        		attempts++
      
        228
        
        229
        		nextWords := mg.chains[state]
      
        230
        		if len(nextWords) == 0 {
      
        231
        			break
      
        232
        		}
      
        233
        
        234
        		nextWord := mg.weightedChoice(nextWords)
      
        235
        		words = append(words, nextWord)
      
        236
        
        237
        		if len(words) >= mg.order {
      
        238
        			state = strings.Join(words[len(words)-mg.order:], " ")
      
        239
        		}
      
        240
        
        241
        		if (nextWord == "." || nextWord == "!" || nextWord == "?") &&
      
        242
        			len(strings.Join(words, " ")) >= mg.minLength {
      
        243
        			break
      
        244
        		}
      
        245
        	}
      
        246
        
        247
        	return mg.reconstructText(words)
      
        248
        }
      
        249
        
        250
        // GenerateWithTemplate generates using a template with variable slots
      
        251
        func (mg *MarkovGenerator) GenerateWithTemplate(template string, variables map[string]string) string {
      
        252
        	result := template
      
        253
        
        254
        	for key, value := range variables {
      
        255
        		placeholder := "{" + key + "}"
      
        256
        		result = strings.ReplaceAll(result, placeholder, value)
      
        257
        	}
      
        258
        
        259
        	// Fill remaining slots with Markov-generated content
      
        260
        	if strings.Contains(result, "{random}") {
      
        261
        		generated := mg.Generate()
      
        262
        		result = strings.ReplaceAll(result, "{random}", generated)
      
        263
        	}
      
        264
        
        265
        	return result
      
        266
        }
      
        267
        
        268
        // Blend creates a hybrid insult by blending Markov generation with templates
      
        269
        func (mg *MarkovGenerator) Blend(ctx *SmartFallbackContext) string {
      
        270
        	// Extract key terms from the context
      
        271
        	seedWords := []string{}
      
        272
        
        273
        	// Add command type
      
        274
        	if ctx.CommandType != "" {
      
        275
        		seedWords = append(seedWords, ctx.CommandType)
      
        276
        	}
      
        277
        
        278
        	// Add command
      
        279
        	if ctx.Command != "" {
      
        280
        		seedWords = append(seedWords, ctx.Command)
      
        281
        	}
      
        282
        
        283
        	// Add error pattern
      
        284
        	if ctx.ErrorPattern != "" {
      
        285
        		seedWords = append(seedWords, strings.ReplaceAll(ctx.ErrorPattern, "_", " "))
      
        286
        	}
      
        287
        
        288
        	// Generate contextual insult
      
        289
        	generated := mg.GenerateContextual(seedWords)
      
        290
        
        291
        	// Post-process: ensure it's not too similar to training data
      
        292
        	if mg.tooSimilarToTraining(generated) {
      
        293
        		// Try again with different seed
      
        294
        		return mg.Generate()
      
        295
        	}
      
        296
        
        297
        	return generated
      
        298
        }
      
        299
        
        300
        // tooSimilarToTraining checks if generated text is too close to training data
      
        301
        func (mg *MarkovGenerator) tooSimilarToTraining(text string) bool {
      
        302
        	// Simple heuristic: if the text is very short or contains many consecutive
      
        303
        	// words from a single training example, it's too similar
      
        304
        	return len(text) < mg.minLength
      
        305
        }
      
        306
        
        307
        // HybridGenerate combines Markov with template system for best results
      
        308
        func (mg *MarkovGenerator) HybridGenerate(
      
        309
        	ctx *SmartFallbackContext,
      
        310
        	templates []string,
      
        311
        ) string {
      
        312
        	// 50% chance to use pure Markov, 50% template + Markov
      
        313
        	if mg.rng.Float64() < 0.5 {
      
        314
        		return mg.Blend(ctx)
      
        315
        	}
      
        316
        
        317
        	// Pick a random template
      
        318
        	if len(templates) == 0 {
      
        319
        		return mg.Blend(ctx)
      
        320
        	}
      
        321
        
        322
        	template := templates[mg.rng.Intn(len(templates))]
      
        323
        
        324
        	// Fill template variables
      
        325
        	variables := map[string]string{
      
        326
        		"command":     ctx.Command,
      
        327
        		"commandType": ctx.CommandType,
      
        328
        		"exitCode":    string(rune(ctx.ExitCode)),
      
        329
        		"error":       ctx.ErrorPattern,
      
        330
        	}
      
        331
        
        332
        	return mg.GenerateWithTemplate(template, variables)
      
        333
        }
      
        334
        
        335
        // GetStats returns statistics about the trained model
      
        336
        func (mg *MarkovGenerator) GetStats() map[string]interface{} {
      
        337
        	return map[string]interface{}{
      
        338
        		"states":        len(mg.chains),
      
        339
        		"starters":      len(mg.starters),
      
        340
        		"order":         mg.order,
      
        341
        		"vocabulary":    mg.countVocabulary(),
      
        342
        		"avg_choices":   mg.averageChoices(),
      
        343
        	}
      
        344
        }
      
        345
        
        346
        func (mg *MarkovGenerator) countVocabulary() int {
      
        347
        	vocab := make(map[string]bool)
      
        348
        	for state := range mg.chains {
      
        349
        		words := strings.Split(state, " ")
      
        350
        		for _, word := range words {
      
        351
        			vocab[word] = true
      
        352
        		}
      
        353
        	}
      
        354
        	return len(vocab)
      
        355
        }
      
        356
        
        357
        func (mg *MarkovGenerator) averageChoices() float64 {
      
        358
        	if len(mg.chains) == 0 {
      
        359
        		return 0
      
        360
        	}
      
        361
        
        362
        	total := 0
      
        363
        	for _, choices := range mg.chains {
      
        364
        		total += len(choices)
      
        365
        	}
      
        366
        
        367
        	return float64(total) / float64(len(mg.chains))
      
        368
        }
      
        369

1	package llm
2
3	import (
4	"math/rand"
5	"strings"
6	"sync"
7	"time"
8	)
9
10	// MarkovGenerator generates novel insults using Markov chains
11	type MarkovGenerator struct {
12	mu sync.RWMutex
13	chains map[string]map[string]int // state -> next_word -> count
14	starters []string // possible starting words
15	order int // n-gram order (2 = bigram)
16	minLength int // minimum generated text length
17	maxLength int // maximum generated text length
18	rng *rand.Rand
19	}
20
21	// NewMarkovGenerator creates a new Markov chain generator
22	func NewMarkovGenerator(order int) *MarkovGenerator {
23	return &MarkovGenerator{
24	chains: make(map[string]map[string]int),
25	starters: make([]string, 0),
26	order: order,
27	minLength: 30, // Minimum 30 characters
28	maxLength: 150, // Maximum 150 characters
29	rng: rand.New(rand.NewSource(time.Now().UnixNano())),
30	}
31	}
32
33	// Train trains the Markov chain on a corpus of insults
34	func (mg *MarkovGenerator) Train(insults []string) {
35	mg.mu.Lock()
36	defer mg.mu.Unlock()
37
38	for _, insult := range insults {
39	mg.trainOnTextUnlocked(insult)
40	}
41	}
42
43	// trainOnTextUnlocked trains on a single text (caller must hold lock)
44	func (mg *MarkovGenerator) trainOnTextUnlocked(text string) {
45	words := mg.tokenize(text)
46	if len(words) < mg.order+1 {
47	return
48	}
49
50	// Add first state as starter
51	state := strings.Join(words[:mg.order], " ")
52	mg.starters = append(mg.starters, state)
53
54	// Build chain
55	for i := 0; i < len(words)-mg.order; i++ {
56	state := strings.Join(words[i:i+mg.order], " ")
57	nextWord := words[i+mg.order]
58
59	if _, exists := mg.chains[state]; !exists {
60	mg.chains[state] = make(map[string]int)
61	}
62
63	mg.chains[state][nextWord]++
64	}
65	}
66
67	// tokenize splits text into words
68	func (mg *MarkovGenerator) tokenize(text string) []string {
69	// Split on spaces and punctuation, but keep punctuation
70	var words []string
71	var currentWord strings.Builder
72
73	for _, r := range text {
74	if r == ' ' \|\| r == '\n' \|\| r == '\t' {
75	if currentWord.Len() > 0 {
76	words = append(words, currentWord.String())
77	currentWord.Reset()
78	}
79	} else if r == '.' \|\| r == '!' \|\| r == '?' \|\| r == ',' \|\| r == ':' \|\| r == ';' {
80	if currentWord.Len() > 0 {
81	words = append(words, currentWord.String())
82	currentWord.Reset()
83	}
84	words = append(words, string(r))
85	} else {
86	currentWord.WriteRune(r)
87	}
88	}
89
90	if currentWord.Len() > 0 {
91	words = append(words, currentWord.String())
92	}
93
94	return words
95	}
96
97	// Generate generates a novel insult
98	func (mg *MarkovGenerator) Generate() string {
99	mg.mu.RLock()
100	defer mg.mu.RUnlock()
101
102	if len(mg.starters) == 0 \|\| len(mg.chains) == 0 {
103	return "" // Not trained yet
104	}
105
106	// Pick a random starting state
107	state := mg.starters[mg.rng.Intn(len(mg.starters))]
108	words := strings.Split(state, " ")
109
110	// Generate until we hit max length or a terminal state
111	attempts := 0
112	maxAttempts := 100
113
114	for len(strings.Join(words, " ")) < mg.maxLength && attempts < maxAttempts {
115	attempts++
116
117	// Get next word choices
118	nextWords := mg.chains[state]
119	if len(nextWords) == 0 {
120	break // Terminal state
121	}
122
123	// Choose next word based on frequency
124	nextWord := mg.weightedChoice(nextWords)
125	words = append(words, nextWord)
126
127	// Update state
128	if len(words) >= mg.order {
129	state = strings.Join(words[len(words)-mg.order:], " ")
130	}
131
132	// Stop at sentence endings if we've generated enough
133	if (nextWord == "." \|\| nextWord == "!" \|\| nextWord == "?") &&
134	len(strings.Join(words, " ")) >= mg.minLength {
135	break
136	}
137	}
138
139	// Reconstruct text with proper spacing
140	return mg.reconstructText(words)
141	}
142
143	// weightedChoice selects a word based on frequency weights
144	func (mg *MarkovGenerator) weightedChoice(choices map[string]int) string {
145	// Calculate total weight
146	totalWeight := 0
147	for _, count := range choices {
148	totalWeight += count
149	}
150
151	// Random selection
152	r := mg.rng.Intn(totalWeight)
153	cumulative := 0
154
155	for word, count := range choices {
156	cumulative += count
157	if r < cumulative {
158	return word
159	}
160	}
161
162	// Fallback (shouldn't reach here)
163	for word := range choices {
164	return word
165	}
166
167	return ""
168	}
169
170	// reconstructText reconstructs text with proper spacing around punctuation
171	func (mg *MarkovGenerator) reconstructText(words []string) string {
172	var result strings.Builder
173
174	for i, word := range words {
175	// Don't add space before punctuation
176	if i > 0 && !mg.isPunctuation(word) {
177	result.WriteString(" ")
178	}
179
180	result.WriteString(word)
181	}
182
183	return result.String()
184	}
185
186	// isPunctuation checks if a word is punctuation
187	func (mg *MarkovGenerator) isPunctuation(word string) bool {
188	return word == "." \|\| word == "!" \|\| word == "?" \|\|
189	word == "," \|\| word == ":" \|\| word == ";" \|\|
190	word == "(" \|\| word == ")"
191	}
192
193	// GenerateContextual generates an insult with context hints
194	func (mg *MarkovGenerator) GenerateContextual(seedWords []string) string {
195	mg.mu.RLock()
196	defer mg.mu.RUnlock()
197
198	if len(mg.chains) == 0 {
199	return ""
200	}
201
202	// Find states that contain any of the seed words
203	var matchingStarters []string
204	for _, starter := range mg.starters {
205	for _, seed := range seedWords {
206	if strings.Contains(strings.ToLower(starter), strings.ToLower(seed)) {
207	matchingStarters = append(matchingStarters, starter)
208	break
209	}
210	}
211	}
212
213	// If we found matching starters, use them; otherwise use any starter
214	if len(matchingStarters) == 0 {
215	matchingStarters = mg.starters
216	}
217
218	// Pick a random matching starter
219	state := matchingStarters[mg.rng.Intn(len(matchingStarters))]
220	words := strings.Split(state, " ")
221
222	// Generate as normal
223	attempts := 0
224	maxAttempts := 100
225
226	for len(strings.Join(words, " ")) < mg.maxLength && attempts < maxAttempts {
227	attempts++
228
229	nextWords := mg.chains[state]
230	if len(nextWords) == 0 {
231	break
232	}
233
234	nextWord := mg.weightedChoice(nextWords)
235	words = append(words, nextWord)
236
237	if len(words) >= mg.order {
238	state = strings.Join(words[len(words)-mg.order:], " ")
239	}
240
241	if (nextWord == "." \|\| nextWord == "!" \|\| nextWord == "?") &&
242	len(strings.Join(words, " ")) >= mg.minLength {
243	break
244	}
245	}
246
247	return mg.reconstructText(words)
248	}
249
250	// GenerateWithTemplate generates using a template with variable slots
251	func (mg *MarkovGenerator) GenerateWithTemplate(template string, variables map[string]string) string {
252	result := template
253
254	for key, value := range variables {
255	placeholder := "{" + key + "}"
256	result = strings.ReplaceAll(result, placeholder, value)
257	}
258
259	// Fill remaining slots with Markov-generated content
260	if strings.Contains(result, "{random}") {
261	generated := mg.Generate()
262	result = strings.ReplaceAll(result, "{random}", generated)
263	}
264
265	return result
266	}
267
268	// Blend creates a hybrid insult by blending Markov generation with templates
269	func (mg MarkovGenerator) Blend(ctx SmartFallbackContext) string {
270	// Extract key terms from the context
271	seedWords := []string{}
272
273	// Add command type
274	if ctx.CommandType != "" {
275	seedWords = append(seedWords, ctx.CommandType)
276	}
277
278	// Add command
279	if ctx.Command != "" {
280	seedWords = append(seedWords, ctx.Command)
281	}
282
283	// Add error pattern
284	if ctx.ErrorPattern != "" {
285	seedWords = append(seedWords, strings.ReplaceAll(ctx.ErrorPattern, "_", " "))
286	}
287
288	// Generate contextual insult
289	generated := mg.GenerateContextual(seedWords)
290
291	// Post-process: ensure it's not too similar to training data
292	if mg.tooSimilarToTraining(generated) {
293	// Try again with different seed
294	return mg.Generate()
295	}
296
297	return generated
298	}
299
300	// tooSimilarToTraining checks if generated text is too close to training data
301	func (mg *MarkovGenerator) tooSimilarToTraining(text string) bool {
302	// Simple heuristic: if the text is very short or contains many consecutive
303	// words from a single training example, it's too similar
304	return len(text) < mg.minLength
305	}
306
307	// HybridGenerate combines Markov with template system for best results
308	func (mg *MarkovGenerator) HybridGenerate(
309	ctx *SmartFallbackContext,
310	templates []string,
311	) string {
312	// 50% chance to use pure Markov, 50% template + Markov
313	if mg.rng.Float64() < 0.5 {
314	return mg.Blend(ctx)
315	}
316
317	// Pick a random template
318	if len(templates) == 0 {
319	return mg.Blend(ctx)
320	}
321
322	template := templates[mg.rng.Intn(len(templates))]
323
324	// Fill template variables
325	variables := map[string]string{
326	"command": ctx.Command,
327	"commandType": ctx.CommandType,
328	"exitCode": string(rune(ctx.ExitCode)),
329	"error": ctx.ErrorPattern,
330	}
331
332	return mg.GenerateWithTemplate(template, variables)
333	}
334
335	// GetStats returns statistics about the trained model
336	func (mg *MarkovGenerator) GetStats() map[string]interface{} {
337	return map[string]interface{}{
338	"states": len(mg.chains),
339	"starters": len(mg.starters),
340	"order": mg.order,
341	"vocabulary": mg.countVocabulary(),
342	"avg_choices": mg.averageChoices(),
343	}
344	}
345
346	func (mg *MarkovGenerator) countVocabulary() int {
347	vocab := make(map[string]bool)
348	for state := range mg.chains {
349	words := strings.Split(state, " ")
350	for _, word := range words {
351	vocab[word] = true
352	}
353	}
354	return len(vocab)
355	}
356
357	func (mg *MarkovGenerator) averageChoices() float64 {
358	if len(mg.chains) == 0 {
359	return 0
360	}
361
362	total := 0
363	for _, choices := range mg.chains {
364	total += len(choices)
365	}
366
367	return float64(total) / float64(len(mg.chains))
368	}
369