Rust · 23788 bytes Raw Blame History
1 //! Reed-Solomon Error Correction
2 //!
3 //! Efficient Reed-Solomon coding for ZephyrFS chunks, providing mathematical
4 //! redundancy that can reconstruct data from partial chunks
5
6 use anyhow::Result;
7 use serde::{Deserialize, Serialize};
8 use std::collections::HashMap;
9 use chrono::{DateTime, Utc};
10
11 /// Reed-Solomon encoder/decoder for ZephyrFS
12 #[derive(Debug, Clone)]
13 pub struct ReedSolomonCodec {
14 /// Coding parameters
15 pub config: ReedSolomonConfig,
16 /// Galois field arithmetic
17 pub galois_field: GaloisField,
18 /// Encoding matrices
19 pub encoding_matrix: Vec<Vec<u8>>,
20 pub decoding_cache: HashMap<Vec<usize>, Vec<Vec<u8>>>,
21 }
22
23 #[derive(Debug, Clone, Serialize, Deserialize)]
24 pub struct ReedSolomonConfig {
25 /// Number of data chunks
26 pub data_chunks: u32,
27 /// Number of parity chunks
28 pub parity_chunks: u32,
29 /// Chunk size in bytes
30 pub chunk_size: usize,
31 /// Galois field size (typically 2^8 = 256)
32 pub field_size: u32,
33 /// Primitive polynomial for GF(2^8)
34 pub primitive_poly: u32,
35 }
36
37 /// Galois Field arithmetic for Reed-Solomon
38 #[derive(Debug, Clone)]
39 pub struct GaloisField {
40 /// Field size (2^8 = 256)
41 pub size: u32,
42 /// Generator polynomial
43 pub generator: u32,
44 /// Logarithm table
45 pub log_table: Vec<u8>,
46 /// Antilogarithm table
47 pub antilog_table: Vec<u8>,
48 }
49
50 #[derive(Debug, Clone, Serialize, Deserialize)]
51 pub struct EncodedChunk {
52 pub chunk_id: String,
53 pub chunk_index: u32,
54 pub chunk_type: ChunkType,
55 pub data: Vec<u8>,
56 pub metadata: ChunkMetadata,
57 pub created_at: DateTime<Utc>,
58 }
59
60 #[derive(Debug, Clone, Serialize, Deserialize)]
61 pub enum ChunkType {
62 Data,
63 Parity,
64 }
65
66 #[derive(Debug, Clone, Serialize, Deserialize)]
67 pub struct ChunkMetadata {
68 pub original_size: usize,
69 pub checksum: String,
70 pub encoding_config: ReedSolomonConfig,
71 pub chunk_position: u32,
72 pub total_chunks: u32,
73 }
74
75 #[derive(Debug, Clone, Serialize, Deserialize)]
76 pub struct ReconstructionRequest {
77 pub original_chunk_id: String,
78 pub available_chunks: Vec<EncodedChunk>,
79 pub missing_chunk_indices: Vec<u32>,
80 pub target_chunk_size: usize,
81 }
82
83 #[derive(Debug, Clone, Serialize, Deserialize)]
84 pub struct ReconstructionResult {
85 pub success: bool,
86 pub reconstructed_chunks: Vec<EncodedChunk>,
87 pub reconstruction_time_ms: u64,
88 pub error_message: Option<String>,
89 pub verification_passed: bool,
90 }
91
92 impl Default for ReedSolomonConfig {
93 fn default() -> Self {
94 Self {
95 data_chunks: 6,
96 parity_chunks: 3,
97 chunk_size: 1024 * 1024, // 1MB chunks
98 field_size: 256,
99 primitive_poly: 0x11d, // Standard GF(2^8) primitive polynomial
100 }
101 }
102 }
103
104 impl GaloisField {
105 /// Create new Galois Field GF(2^8)
106 pub fn new() -> Self {
107 let mut gf = Self {
108 size: 256,
109 generator: 0x11d, // x^8 + x^4 + x^3 + x^2 + 1
110 log_table: vec![0; 256],
111 antilog_table: vec![0; 256],
112 };
113
114 gf.build_tables();
115 gf
116 }
117
118 /// Build logarithm and antilogarithm tables
119 fn build_tables(&mut self) {
120 let mut val = 1u32;
121
122 for i in 0..255 {
123 self.antilog_table[i] = val as u8;
124 self.log_table[val as usize] = i as u8;
125
126 val <<= 1;
127 if val & self.size != 0 {
128 val ^= self.generator;
129 }
130 }
131
132 self.antilog_table[255] = self.antilog_table[0];
133 }
134
135 /// Galois field multiplication
136 pub fn multiply(&self, a: u8, b: u8) -> u8 {
137 if a == 0 || b == 0 {
138 return 0;
139 }
140
141 let log_sum = (self.log_table[a as usize] as u32 + self.log_table[b as usize] as u32) % 255;
142 self.antilog_table[log_sum as usize]
143 }
144
145 /// Galois field division
146 pub fn divide(&self, a: u8, b: u8) -> u8 {
147 if a == 0 {
148 return 0;
149 }
150 if b == 0 {
151 panic!("Division by zero in Galois field");
152 }
153
154 let log_diff = (self.log_table[a as usize] as i32 - self.log_table[b as usize] as i32 + 255) % 255;
155 self.antilog_table[log_diff as usize]
156 }
157
158 /// Galois field power
159 pub fn power(&self, base: u8, exp: u8) -> u8 {
160 if base == 0 {
161 return 0;
162 }
163
164 let log_result = (self.log_table[base as usize] as u32 * exp as u32) % 255;
165 self.antilog_table[log_result as usize]
166 }
167 }
168
169 impl ReedSolomonCodec {
170 /// Create new Reed-Solomon codec
171 pub fn new(config: ReedSolomonConfig) -> Result<Self> {
172 let galois_field = GaloisField::new();
173 let encoding_matrix = Self::build_encoding_matrix(&config, &galois_field)?;
174
175 Ok(Self {
176 config,
177 galois_field,
178 encoding_matrix,
179 decoding_cache: HashMap::new(),
180 })
181 }
182
183 /// Build encoding matrix for Reed-Solomon
184 fn build_encoding_matrix(config: &ReedSolomonConfig, gf: &GaloisField) -> Result<Vec<Vec<u8>>> {
185 let total_chunks = config.data_chunks + config.parity_chunks;
186 let mut matrix = Vec::with_capacity(total_chunks as usize);
187
188 // Identity matrix for data chunks
189 for i in 0..config.data_chunks {
190 let mut row = vec![0u8; config.data_chunks as usize];
191 row[i as usize] = 1;
192 matrix.push(row);
193 }
194
195 // Vandermonde matrix for parity chunks
196 for i in 0..config.parity_chunks {
197 let mut row = Vec::with_capacity(config.data_chunks as usize);
198 let alpha = (i + 1) as u8; // Generator element
199
200 for j in 0..config.data_chunks {
201 row.push(gf.power(alpha, j as u8));
202 }
203 matrix.push(row);
204 }
205
206 Ok(matrix)
207 }
208
209 /// Encode data into Reed-Solomon chunks
210 pub fn encode(&self, chunk_id: String, data: &[u8]) -> Result<Vec<EncodedChunk>> {
211 let chunk_size = self.config.chunk_size;
212 let data_chunks = self.config.data_chunks as usize;
213 let total_chunks = (self.config.data_chunks + self.config.parity_chunks) as usize;
214
215 // Pad data to fit evenly into data chunks
216 let padded_size = ((data.len() + data_chunks - 1) / data_chunks) * data_chunks;
217 let mut padded_data = data.to_vec();
218 padded_data.resize(padded_size, 0);
219
220 let chunk_data_size = padded_size / data_chunks;
221 let mut chunks = Vec::with_capacity(total_chunks);
222
223 // Create data chunks
224 for i in 0..data_chunks {
225 let start = i * chunk_data_size;
226 let end = start + chunk_data_size;
227 let chunk_data = padded_data[start..end].to_vec();
228
229 chunks.push(EncodedChunk {
230 chunk_id: format!("{}_{}", chunk_id, i),
231 chunk_index: i as u32,
232 chunk_type: ChunkType::Data,
233 data: chunk_data,
234 metadata: ChunkMetadata {
235 original_size: if i == data_chunks - 1 {
236 data.len() - (data_chunks - 1) * chunk_data_size
237 } else {
238 chunk_data_size
239 },
240 checksum: self.calculate_checksum(&padded_data[start..end]),
241 encoding_config: self.config.clone(),
242 chunk_position: i as u32,
243 total_chunks: total_chunks as u32,
244 },
245 created_at: Utc::now(),
246 });
247 }
248
249 // Create parity chunks
250 for i in data_chunks..total_chunks {
251 let mut parity_data = vec![0u8; chunk_data_size];
252
253 // Calculate parity using encoding matrix
254 for j in 0..chunk_data_size {
255 let mut parity_byte = 0u8;
256
257 for k in 0..data_chunks {
258 let data_byte = padded_data[k * chunk_data_size + j];
259 let coeff = self.encoding_matrix[i][k];
260 parity_byte ^= self.galois_field.multiply(data_byte, coeff);
261 }
262
263 parity_data[j] = parity_byte;
264 }
265
266 chunks.push(EncodedChunk {
267 chunk_id: format!("{}_{}", chunk_id, i),
268 chunk_index: i as u32,
269 chunk_type: ChunkType::Parity,
270 data: parity_data.clone(),
271 metadata: ChunkMetadata {
272 original_size: chunk_data_size,
273 checksum: self.calculate_checksum(&parity_data),
274 encoding_config: self.config.clone(),
275 chunk_position: i as u32,
276 total_chunks: total_chunks as u32,
277 },
278 created_at: Utc::now(),
279 });
280 }
281
282 Ok(chunks)
283 }
284
285 /// Decode/reconstruct data from available chunks
286 pub fn decode(&mut self, request: ReconstructionRequest) -> Result<ReconstructionResult> {
287 let start_time = crate::SerializableInstant::now();
288
289 // Verify we have enough chunks for reconstruction
290 if request.available_chunks.len() < self.config.data_chunks as usize {
291 return Ok(ReconstructionResult {
292 success: false,
293 reconstructed_chunks: Vec::new(),
294 reconstruction_time_ms: start_time.elapsed().as_millis() as u64,
295 error_message: Some(format!(
296 "Insufficient chunks: need {}, have {}",
297 self.config.data_chunks,
298 request.available_chunks.len()
299 )),
300 verification_passed: false,
301 });
302 }
303
304 let data_chunks = self.config.data_chunks as usize;
305 let chunk_size = request.available_chunks[0].data.len();
306
307 // Build decoding matrix
308 let available_indices: Vec<usize> = request.available_chunks
309 .iter()
310 .map(|chunk| chunk.chunk_index as usize)
311 .collect();
312
313 let decoding_matrix = self.get_or_build_decoding_matrix(&available_indices)?;
314
315 // Reconstruct missing data
316 let mut reconstructed_data = vec![vec![0u8; chunk_size]; data_chunks];
317
318 // Copy available data chunks
319 for chunk in &request.available_chunks {
320 let idx = chunk.chunk_index as usize;
321 if idx < data_chunks {
322 reconstructed_data[idx] = chunk.data.clone();
323 }
324 }
325
326 // Reconstruct missing data chunks
327 for missing_idx in &request.missing_chunk_indices {
328 let missing_idx = *missing_idx as usize;
329 if missing_idx >= data_chunks {
330 continue; // Skip parity chunks for now
331 }
332
333 let mut reconstructed_chunk = vec![0u8; chunk_size];
334
335 for byte_pos in 0..chunk_size {
336 let mut reconstructed_byte = 0u8;
337
338 for (matrix_col, chunk) in request.available_chunks.iter().enumerate().take(data_chunks) {
339 let coeff = decoding_matrix[missing_idx][matrix_col];
340 let data_byte = chunk.data[byte_pos];
341 reconstructed_byte ^= self.galois_field.multiply(data_byte, coeff);
342 }
343
344 reconstructed_chunk[byte_pos] = reconstructed_byte;
345 }
346
347 reconstructed_data[missing_idx] = reconstructed_chunk;
348 }
349
350 // Create reconstructed chunk objects
351 let mut reconstructed_chunks = Vec::new();
352 for missing_idx in &request.missing_chunk_indices {
353 let idx = *missing_idx as usize;
354 if idx < data_chunks {
355 reconstructed_chunks.push(EncodedChunk {
356 chunk_id: format!("{}_{}", request.original_chunk_id, idx),
357 chunk_index: idx as u32,
358 chunk_type: ChunkType::Data,
359 data: reconstructed_data[idx].clone(),
360 metadata: ChunkMetadata {
361 original_size: chunk_size,
362 checksum: self.calculate_checksum(&reconstructed_data[idx]),
363 encoding_config: self.config.clone(),
364 chunk_position: idx as u32,
365 total_chunks: (self.config.data_chunks + self.config.parity_chunks),
366 },
367 created_at: Utc::now(),
368 });
369 }
370 }
371
372 // Verify reconstruction
373 let verification_passed = self.verify_reconstruction(&reconstructed_chunks)?;
374
375 Ok(ReconstructionResult {
376 success: true,
377 reconstructed_chunks,
378 reconstruction_time_ms: start_time.elapsed().as_millis() as u64,
379 error_message: None,
380 verification_passed,
381 })
382 }
383
384 /// Get or build decoding matrix for given available chunks
385 fn get_or_build_decoding_matrix(&mut self, available_indices: &[usize]) -> Result<Vec<Vec<u8>>> {
386 // Check cache first
387 if let Some(cached_matrix) = self.decoding_cache.get(available_indices) {
388 return Ok(cached_matrix.clone());
389 }
390
391 let data_chunks = self.config.data_chunks as usize;
392
393 // Build submatrix from encoding matrix using available chunks
394 let mut submatrix = Vec::with_capacity(data_chunks);
395 for &idx in available_indices.iter().take(data_chunks) {
396 submatrix.push(self.encoding_matrix[idx].clone());
397 }
398
399 // Invert the submatrix
400 let decoding_matrix = self.invert_matrix(submatrix)?;
401
402 // Cache the result
403 self.decoding_cache.insert(available_indices.to_vec(), decoding_matrix.clone());
404
405 Ok(decoding_matrix)
406 }
407
408 /// Invert a matrix in Galois field
409 fn invert_matrix(&self, mut matrix: Vec<Vec<u8>>) -> Result<Vec<Vec<u8>>> {
410 let n = matrix.len();
411
412 // Create augmented matrix [A|I]
413 let mut augmented = Vec::with_capacity(n);
414 for i in 0..n {
415 let mut row = matrix[i].clone();
416 for j in 0..n {
417 row.push(if i == j { 1 } else { 0 });
418 }
419 augmented.push(row);
420 }
421
422 // Gaussian elimination
423 for i in 0..n {
424 // Find pivot
425 let mut pivot_row = i;
426 for j in (i + 1)..n {
427 if augmented[j][i] != 0 {
428 pivot_row = j;
429 break;
430 }
431 }
432
433 if augmented[pivot_row][i] == 0 {
434 return Err(anyhow::anyhow!("Matrix is not invertible"));
435 }
436
437 // Swap rows if needed
438 if pivot_row != i {
439 augmented.swap(i, pivot_row);
440 }
441
442 // Scale pivot row
443 let pivot = augmented[i][i];
444 for j in 0..(2 * n) {
445 augmented[i][j] = self.galois_field.divide(augmented[i][j], pivot);
446 }
447
448 // Eliminate column
449 for j in 0..n {
450 if i != j && augmented[j][i] != 0 {
451 let factor = augmented[j][i];
452 for k in 0..(2 * n) {
453 let product = self.galois_field.multiply(factor, augmented[i][k]);
454 augmented[j][k] ^= product;
455 }
456 }
457 }
458 }
459
460 // Extract inverse matrix
461 let mut inverse = Vec::with_capacity(n);
462 for i in 0..n {
463 inverse.push(augmented[i][n..].to_vec());
464 }
465
466 Ok(inverse)
467 }
468
469 /// Calculate checksum for data
470 fn calculate_checksum(&self, data: &[u8]) -> String {
471 // Simple checksum - in production would use cryptographic hash
472 let sum: u32 = data.iter().map(|&b| b as u32).sum();
473 format!("{:08x}", sum)
474 }
475
476 /// Verify reconstruction correctness
477 fn verify_reconstruction(&self, chunks: &[EncodedChunk]) -> Result<bool> {
478 // Verify checksums
479 for chunk in chunks {
480 let calculated_checksum = self.calculate_checksum(&chunk.data);
481 if calculated_checksum != chunk.metadata.checksum {
482 return Ok(false);
483 }
484 }
485
486 // Additional verification could include re-encoding and comparing
487 Ok(true)
488 }
489
490 /// Get redundancy schemes available
491 pub fn get_available_schemes() -> Vec<ReedSolomonConfig> {
492 vec![
493 // Standard schemes
494 ReedSolomonConfig {
495 data_chunks: 3,
496 parity_chunks: 2,
497 chunk_size: 1024 * 1024,
498 field_size: 256,
499 primitive_poly: 0x11d,
500 },
501 ReedSolomonConfig {
502 data_chunks: 6,
503 parity_chunks: 3,
504 chunk_size: 1024 * 1024,
505 field_size: 256,
506 primitive_poly: 0x11d,
507 },
508 ReedSolomonConfig {
509 data_chunks: 10,
510 parity_chunks: 4,
511 chunk_size: 1024 * 1024,
512 field_size: 256,
513 primitive_poly: 0x11d,
514 },
515 // High redundancy for critical data
516 ReedSolomonConfig {
517 data_chunks: 8,
518 parity_chunks: 6,
519 chunk_size: 1024 * 1024,
520 field_size: 256,
521 primitive_poly: 0x11d,
522 },
523 ]
524 }
525
526 /// Calculate storage efficiency for a scheme
527 pub fn calculate_storage_efficiency(&self) -> f64 {
528 self.config.data_chunks as f64 / (self.config.data_chunks + self.config.parity_chunks) as f64
529 }
530
531 /// Calculate fault tolerance
532 pub fn calculate_fault_tolerance(&self) -> u32 {
533 self.config.parity_chunks
534 }
535
536 /// Estimate reconstruction performance
537 pub fn estimate_reconstruction_time(&self, chunk_size_mb: f64, available_bandwidth_mbps: f64) -> f64 {
538 let data_to_transfer = chunk_size_mb * self.config.data_chunks as f64;
539 let transfer_time = data_to_transfer / available_bandwidth_mbps;
540 let computation_time = chunk_size_mb * 0.1; // Estimate 0.1 seconds per MB for computation
541
542 transfer_time + computation_time
543 }
544 }
545
546 /// Reed-Solomon manager for handling multiple coding schemes
547 #[derive(Debug)]
548 pub struct ReedSolomonManager {
549 pub codecs: HashMap<String, ReedSolomonCodec>,
550 pub default_scheme: String,
551 }
552
553 impl ReedSolomonManager {
554 /// Create new Reed-Solomon manager with multiple schemes
555 pub fn new() -> Result<Self> {
556 let mut manager = Self {
557 codecs: HashMap::new(),
558 default_scheme: "6+3".to_string(),
559 };
560
561 // Initialize standard schemes
562 let schemes = [
563 ("3+2", ReedSolomonConfig { data_chunks: 3, parity_chunks: 2, ..Default::default() }),
564 ("6+3", ReedSolomonConfig { data_chunks: 6, parity_chunks: 3, ..Default::default() }),
565 ("10+4", ReedSolomonConfig { data_chunks: 10, parity_chunks: 4, ..Default::default() }),
566 ("8+6", ReedSolomonConfig { data_chunks: 8, parity_chunks: 6, ..Default::default() }),
567 ];
568
569 for (name, config) in schemes {
570 let codec = ReedSolomonCodec::new(config)?;
571 manager.codecs.insert(name.to_string(), codec);
572 }
573
574 Ok(manager)
575 }
576
577 /// Get codec for scheme
578 pub fn get_codec(&mut self, scheme: &str) -> Result<&mut ReedSolomonCodec> {
579 self.codecs.get_mut(scheme)
580 .ok_or_else(|| anyhow::anyhow!("Reed-Solomon scheme '{}' not found", scheme))
581 }
582
583 /// Recommend scheme based on requirements
584 pub fn recommend_scheme(&self, durability_requirement: f64, cost_sensitivity: f64) -> String {
585 if durability_requirement > 0.99999 {
586 "8+6".to_string() // Ultra high durability
587 } else if durability_requirement > 0.9999 {
588 "10+4".to_string() // High durability
589 } else if cost_sensitivity > 0.7 {
590 "3+2".to_string() // Cost sensitive
591 } else {
592 "6+3".to_string() // Balanced
593 }
594 }
595
596 /// Get scheme performance characteristics
597 pub fn get_scheme_info(&self, scheme: &str) -> Option<SchemeInfo> {
598 self.codecs.get(scheme).map(|codec| {
599 SchemeInfo {
600 name: scheme.to_string(),
601 data_chunks: codec.config.data_chunks,
602 parity_chunks: codec.config.parity_chunks,
603 storage_efficiency: codec.calculate_storage_efficiency(),
604 fault_tolerance: codec.calculate_fault_tolerance(),
605 reconstruction_complexity: (codec.config.data_chunks * codec.config.parity_chunks) as f64,
606 }
607 })
608 }
609 }
610
611 #[derive(Debug, Clone, Serialize, Deserialize)]
612 pub struct SchemeInfo {
613 pub name: String,
614 pub data_chunks: u32,
615 pub parity_chunks: u32,
616 pub storage_efficiency: f64,
617 pub fault_tolerance: u32,
618 pub reconstruction_complexity: f64,
619 }
620
621 #[cfg(test)]
622 mod tests {
623 use super::*;
624
625 #[test]
626 fn test_galois_field_arithmetic() {
627 let gf = GaloisField::new();
628
629 // Test basic properties
630 assert_eq!(gf.multiply(0, 5), 0);
631 assert_eq!(gf.multiply(1, 5), 5);
632 assert_eq!(gf.divide(10, 2), gf.multiply(10, gf.divide(1, 2)));
633
634 // Test multiplicative inverse
635 for i in 1..=255u8 {
636 let inverse = gf.divide(1, i);
637 assert_eq!(gf.multiply(i, inverse), 1);
638 }
639 }
640
641 #[test]
642 fn test_reed_solomon_codec_creation() {
643 let config = ReedSolomonConfig::default();
644 let codec = ReedSolomonCodec::new(config).unwrap();
645
646 assert_eq!(codec.config.data_chunks, 6);
647 assert_eq!(codec.config.parity_chunks, 3);
648 assert_eq!(codec.encoding_matrix.len(), 9);
649 }
650
651 #[test]
652 fn test_encoding_and_reconstruction() {
653 let config = ReedSolomonConfig {
654 data_chunks: 3,
655 parity_chunks: 2,
656 chunk_size: 1024,
657 ..Default::default()
658 };
659
660 let mut codec = ReedSolomonCodec::new(config).unwrap();
661
662 // Test data
663 let test_data = b"Hello, Reed-Solomon World! This is a test of error correction.".to_vec();
664
665 // Encode
666 let chunks = codec.encode("test_chunk".to_string(), &test_data).unwrap();
667 assert_eq!(chunks.len(), 5); // 3 data + 2 parity
668
669 // Simulate losing 2 chunks (within error correction capability)
670 let available_chunks = vec![chunks[0].clone(), chunks[2].clone(), chunks[4].clone()];
671 let missing_indices = vec![1, 3];
672
673 // Reconstruct
674 let request = ReconstructionRequest {
675 original_chunk_id: "test_chunk".to_string(),
676 available_chunks,
677 missing_chunk_indices: missing_indices,
678 target_chunk_size: 1024,
679 };
680
681 let result = codec.decode(request).unwrap();
682 assert!(result.success);
683 assert_eq!(result.reconstructed_chunks.len(), 2);
684 assert!(result.verification_passed);
685 }
686
687 #[test]
688 fn test_manager_scheme_recommendation() {
689 let manager = ReedSolomonManager::new().unwrap();
690
691 // High durability requirement
692 assert_eq!(manager.recommend_scheme(0.99999, 0.3), "8+6");
693
694 // Cost sensitive
695 assert_eq!(manager.recommend_scheme(0.999, 0.8), "3+2");
696
697 // Balanced
698 assert_eq!(manager.recommend_scheme(0.999, 0.3), "6+3");
699 }
700
701 #[test]
702 fn test_storage_efficiency_calculation() {
703 let config = ReedSolomonConfig {
704 data_chunks: 6,
705 parity_chunks: 3,
706 ..Default::default()
707 };
708
709 let codec = ReedSolomonCodec::new(config).unwrap();
710 let efficiency = codec.calculate_storage_efficiency();
711
712 assert!((efficiency - 0.6667).abs() < 0.01); // 6/9 ≈ 0.6667
713 }
714 }