Rust · 18742 bytes Raw Blame History
1 //! Encrypted chunk storage for ZephyrFS
2 //!
3 //! Provides storage layer functionality for encrypted chunks while maintaining
4 //! zero-knowledge security. Storage nodes never see plaintext data.
5
6 use anyhow::{Context, Result};
7 use rocksdb::{DB, Options, WriteBatch};
8 use serde::{Deserialize, Serialize};
9 use sha2::{Digest, Sha256};
10 use std::collections::HashMap;
11 use std::path::Path;
12 use std::sync::Arc;
13 use tokio::sync::RwLock;
14 use tracing::{debug, info, warn};
15
16 use crate::crypto::{EncryptedData, ContentId};
17
18 /// Metadata for encrypted chunks stored in the system
19 ///
20 /// Zero-knowledge: Only stores encrypted data and hashes, no plaintext metadata
21 #[derive(Debug, Clone, Serialize, Deserialize)]
22 pub struct EncryptedChunkMetadata {
23 /// Content hash of the encrypted chunk (for deduplication)
24 pub encrypted_hash: String,
25
26 /// Size of the encrypted chunk in bytes
27 pub encrypted_size: u64,
28
29 /// Timestamp when chunk was stored
30 pub stored_at: u64,
31
32 /// Reference count (how many encrypted files reference this chunk)
33 pub ref_count: u32,
34
35 /// Verification checksum for integrity (of encrypted data)
36 pub checksum: String,
37
38 /// Content addressing hash (encrypted, for lookup)
39 pub content_id: Option<String>,
40
41 /// Encryption nonce (safe to store)
42 pub nonce: [u8; 12],
43
44 /// Additional authenticated data (encrypted metadata)
45 pub aad: Vec<u8>,
46
47 /// Key derivation path (safe to store, no keys)
48 pub key_path: Vec<u32>,
49 }
50
51 /// Enhanced file metadata that includes encryption information
52 ///
53 /// Zero-knowledge: Stores encrypted metadata and access patterns
54 #[derive(Debug, Clone, Serialize, Deserialize)]
55 pub struct EncryptedFileMetadata {
56 /// Original filename (encrypted)
57 pub encrypted_name: Vec<u8>,
58
59 /// Encrypted file size info
60 pub encrypted_size_info: Vec<u8>,
61
62 /// File hash of encrypted data
63 pub encrypted_file_hash: String,
64
65 /// List of encrypted chunk IDs
66 pub encrypted_chunk_ids: Vec<String>,
67
68 /// Timestamp (can be plaintext for sorting)
69 pub created_at: u64,
70 pub modified_at: u64,
71
72 /// Encryption metadata
73 pub encryption_metadata: EncryptionMetadata,
74
75 /// Access control capabilities (encrypted)
76 pub capabilities: Vec<u8>,
77 }
78
79 /// Encryption-specific metadata stored with files
80 ///
81 /// Zero-knowledge: No sensitive key material stored
82 #[derive(Debug, Clone, Serialize, Deserialize)]
83 pub struct EncryptionMetadata {
84 /// Encryption algorithm version
85 pub version: u32,
86
87 /// Number of encrypted segments
88 pub segment_count: u32,
89
90 /// Chunk size used for encryption (MB)
91 pub chunk_size_mb: u32,
92
93 /// Content addressing algorithm
94 pub content_hash_algorithm: String,
95
96 /// Verification hash algorithm
97 pub verification_hash_algorithm: String,
98
99 /// Master nonce for file-level operations
100 pub master_nonce: [u8; 12],
101
102 /// Encrypted content verification data
103 pub encrypted_content_verification: Vec<u8>,
104 }
105
106 /// Capability token for secure file access
107 ///
108 /// Zero-knowledge: Contains encrypted access permissions and keys
109 #[derive(Debug, Clone, Serialize, Deserialize)]
110 pub struct FileCapability {
111 /// Unique capability ID
112 pub capability_id: String,
113
114 /// File ID this capability grants access to
115 pub file_id: String,
116
117 /// Encrypted access permissions (read, write, share, etc.)
118 pub encrypted_permissions: Vec<u8>,
119
120 /// Encrypted key material for this capability
121 pub encrypted_key_material: Vec<u8>,
122
123 /// Capability expiration timestamp (optional)
124 pub expires_at: Option<u64>,
125
126 /// Created timestamp
127 pub created_at: u64,
128
129 /// Capability signature (for verification)
130 pub signature: Vec<u8>,
131 }
132
133 /// Enhanced chunk store that handles encrypted chunks
134 ///
135 /// Zero-knowledge: Never processes or sees plaintext data
136 pub struct EncryptedChunkStore {
137 /// Underlying chunk storage
138 db: Arc<DB>,
139
140 /// Metadata cache for encrypted chunks
141 metadata_cache: Arc<RwLock<HashMap<String, EncryptedChunkMetadata>>>,
142
143 /// File metadata storage
144 file_metadata_cache: Arc<RwLock<HashMap<String, EncryptedFileMetadata>>>,
145
146 /// Capability storage
147 capability_cache: Arc<RwLock<HashMap<String, FileCapability>>>,
148
149 /// Storage statistics
150 stats: Arc<RwLock<EncryptedStorageStats>>,
151 }
152
153 #[derive(Debug, Default, Clone)]
154 pub struct EncryptedStorageStats {
155 pub total_encrypted_chunks: u64,
156 pub total_encrypted_size: u64,
157 pub total_encrypted_files: u64,
158 pub active_capabilities: u64,
159 pub cache_hits: u64,
160 pub cache_misses: u64,
161 pub deduplication_savings: u64,
162 }
163
164 impl EncryptedChunkStore {
165 /// Create a new encrypted chunk store
166 ///
167 /// Zero-knowledge: Configures storage to handle only encrypted data
168 pub fn new<P: AsRef<Path>>(db_path: P) -> Result<Self> {
169 info!("Initializing EncryptedChunkStore for zero-knowledge storage");
170
171 let mut opts = Options::default();
172 opts.create_if_missing(true);
173 opts.set_paranoid_checks(true);
174 opts.set_use_fsync(true);
175
176 let db = DB::open(&opts, db_path)
177 .context("Failed to open encrypted chunk database")?;
178
179 let store = Self {
180 db: Arc::new(db),
181 metadata_cache: Arc::new(RwLock::new(HashMap::new())),
182 file_metadata_cache: Arc::new(RwLock::new(HashMap::new())),
183 capability_cache: Arc::new(RwLock::new(HashMap::new())),
184 stats: Arc::new(RwLock::new(EncryptedStorageStats::default())),
185 };
186
187 store.load_stats_from_db()?;
188 info!("EncryptedChunkStore initialized with zero-knowledge architecture");
189 Ok(store)
190 }
191
192 /// Store an encrypted chunk with deduplication
193 ///
194 /// Zero-knowledge: Only handles encrypted data, maintains content-based deduplication
195 pub async fn store_encrypted_chunk(&self, chunk_id: &str, encrypted_data: &EncryptedData) -> Result<String> {
196 debug!("Storing encrypted chunk: {} ({} bytes)", chunk_id, encrypted_data.ciphertext.len());
197
198 // Calculate hash of encrypted content for deduplication
199 let mut hasher = Sha256::new();
200 hasher.update(&encrypted_data.ciphertext);
201 hasher.update(&encrypted_data.nonce);
202 hasher.update(&encrypted_data.aad);
203 let encrypted_hash = hex::encode(hasher.finalize());
204
205 // Check if this encrypted chunk already exists (deduplication)
206 if let Some(existing_metadata) = self.get_encrypted_chunk_metadata(&encrypted_hash).await? {
207 // Increment reference count
208 let mut metadata = existing_metadata;
209 metadata.ref_count += 1;
210 self.update_encrypted_chunk_metadata(&encrypted_hash, &metadata).await?;
211
212 debug!("Deduplicated encrypted chunk: {} (ref_count: {})", encrypted_hash, metadata.ref_count);
213 return Ok(encrypted_hash);
214 }
215
216 // Create checksum for integrity verification
217 let checksum = self.calculate_encrypted_checksum(&encrypted_data.ciphertext, &encrypted_hash);
218
219 let metadata = EncryptedChunkMetadata {
220 encrypted_hash: encrypted_hash.clone(),
221 encrypted_size: encrypted_data.ciphertext.len() as u64,
222 stored_at: std::time::SystemTime::now()
223 .duration_since(std::time::UNIX_EPOCH)?
224 .as_secs(),
225 ref_count: 1,
226 checksum,
227 content_id: None, // Set by caller if needed
228 nonce: encrypted_data.nonce,
229 aad: encrypted_data.aad.clone(),
230 key_path: encrypted_data.key_path.clone(),
231 };
232
233 // Store encrypted chunk data and metadata atomically
234 let mut batch = WriteBatch::default();
235
236 // Store the encrypted ciphertext
237 let chunk_key = format!("chunk:{}", encrypted_hash);
238 batch.put(&chunk_key, &encrypted_data.ciphertext);
239
240 // Store metadata
241 let metadata_key = format!("meta:{}", encrypted_hash);
242 let metadata_bytes = bincode::serialize(&metadata)
243 .context("Failed to serialize encrypted chunk metadata")?;
244 batch.put(&metadata_key, &metadata_bytes);
245
246 self.db.write(batch)
247 .context("Failed to store encrypted chunk atomically")?;
248
249 // Update cache and stats
250 {
251 let mut cache = self.metadata_cache.write().await;
252 cache.insert(encrypted_hash.clone(), metadata);
253 }
254
255 {
256 let mut stats = self.stats.write().await;
257 stats.total_encrypted_chunks += 1;
258 stats.total_encrypted_size += encrypted_data.ciphertext.len() as u64;
259 }
260
261 info!("Stored new encrypted chunk: {}", encrypted_hash);
262 Ok(encrypted_hash)
263 }
264
265 /// Retrieve an encrypted chunk by hash
266 ///
267 /// Zero-knowledge: Returns encrypted data without any decryption
268 pub async fn retrieve_encrypted_chunk(&self, encrypted_hash: &str) -> Result<Option<EncryptedData>> {
269 debug!("Retrieving encrypted chunk: {}", encrypted_hash);
270
271 // Get metadata first
272 let metadata = match self.get_encrypted_chunk_metadata(encrypted_hash).await? {
273 Some(meta) => meta,
274 None => {
275 debug!("Encrypted chunk not found: {}", encrypted_hash);
276 return Ok(None);
277 }
278 };
279
280 // Retrieve encrypted ciphertext
281 let chunk_key = format!("chunk:{}", encrypted_hash);
282 let ciphertext = match self.db.get(&chunk_key)
283 .context("Failed to read encrypted chunk from database")? {
284 Some(data) => data,
285 None => {
286 warn!("Encrypted chunk data missing for hash: {}", encrypted_hash);
287 return Ok(None);
288 }
289 };
290
291 // Verify integrity
292 let computed_checksum = self.calculate_encrypted_checksum(&ciphertext, encrypted_hash);
293 if computed_checksum != metadata.checksum {
294 return Err(anyhow::anyhow!(
295 "Encrypted chunk integrity verification failed for {}", encrypted_hash
296 ));
297 }
298
299 // Reconstruct EncryptedData
300 let encrypted_data = EncryptedData {
301 segment_index: 0, // Will be set by caller
302 ciphertext,
303 nonce: metadata.nonce,
304 aad: metadata.aad,
305 key_path: metadata.key_path,
306 };
307
308 {
309 let mut stats = self.stats.write().await;
310 stats.cache_hits += 1;
311 }
312
313 Ok(Some(encrypted_data))
314 }
315
316 /// Store encrypted file metadata
317 ///
318 /// Zero-knowledge: All sensitive metadata is encrypted
319 pub async fn store_encrypted_file_metadata(&self, file_id: &str, metadata: &EncryptedFileMetadata) -> Result<()> {
320 debug!("Storing encrypted file metadata: {}", file_id);
321
322 let metadata_key = format!("file:{}", file_id);
323 let metadata_bytes = bincode::serialize(metadata)
324 .context("Failed to serialize encrypted file metadata")?;
325
326 self.db.put(&metadata_key, &metadata_bytes)
327 .context("Failed to store encrypted file metadata")?;
328
329 // Update cache
330 {
331 let mut cache = self.file_metadata_cache.write().await;
332 cache.insert(file_id.to_string(), metadata.clone());
333 }
334
335 {
336 let mut stats = self.stats.write().await;
337 stats.total_encrypted_files += 1;
338 }
339
340 Ok(())
341 }
342
343 /// Retrieve encrypted file metadata
344 ///
345 /// Zero-knowledge: Returns encrypted metadata without decryption
346 pub async fn get_encrypted_file_metadata(&self, file_id: &str) -> Result<Option<EncryptedFileMetadata>> {
347 // Check cache first
348 {
349 let cache = self.file_metadata_cache.read().await;
350 if let Some(metadata) = cache.get(file_id) {
351 return Ok(Some(metadata.clone()));
352 }
353 }
354
355 let metadata_key = format!("file:{}", file_id);
356 let metadata_bytes = match self.db.get(&metadata_key)
357 .context("Failed to read encrypted file metadata")? {
358 Some(data) => data,
359 None => return Ok(None),
360 };
361
362 let metadata: EncryptedFileMetadata = bincode::deserialize(&metadata_bytes)
363 .context("Failed to deserialize encrypted file metadata")?;
364
365 // Update cache
366 {
367 let mut cache = self.file_metadata_cache.write().await;
368 cache.insert(file_id.to_string(), metadata.clone());
369 }
370
371 Ok(Some(metadata))
372 }
373
374 /// Store a file capability for secure access control
375 ///
376 /// Zero-knowledge: Capability contains encrypted permissions and keys
377 pub async fn store_capability(&self, capability: &FileCapability) -> Result<()> {
378 debug!("Storing file capability: {}", capability.capability_id);
379
380 let cap_key = format!("cap:{}", capability.capability_id);
381 let cap_bytes = bincode::serialize(capability)
382 .context("Failed to serialize capability")?;
383
384 self.db.put(&cap_key, &cap_bytes)
385 .context("Failed to store capability")?;
386
387 // Update cache and stats
388 {
389 let mut cache = self.capability_cache.write().await;
390 cache.insert(capability.capability_id.clone(), capability.clone());
391 }
392
393 {
394 let mut stats = self.stats.write().await;
395 stats.active_capabilities += 1;
396 }
397
398 Ok(())
399 }
400
401 /// Retrieve a file capability
402 pub async fn get_capability(&self, capability_id: &str) -> Result<Option<FileCapability>> {
403 // Check cache first
404 {
405 let cache = self.capability_cache.read().await;
406 if let Some(capability) = cache.get(capability_id) {
407 return Ok(Some(capability.clone()));
408 }
409 }
410
411 let cap_key = format!("cap:{}", capability_id);
412 let cap_bytes = match self.db.get(&cap_key)
413 .context("Failed to read capability")? {
414 Some(data) => data,
415 None => return Ok(None),
416 };
417
418 let capability: FileCapability = bincode::deserialize(&cap_bytes)
419 .context("Failed to deserialize capability")?;
420
421 // Check expiration
422 if let Some(expires_at) = capability.expires_at {
423 let now = std::time::SystemTime::now()
424 .duration_since(std::time::UNIX_EPOCH)?
425 .as_secs();
426 if now > expires_at {
427 debug!("Capability expired: {}", capability_id);
428 return Ok(None);
429 }
430 }
431
432 // Update cache
433 {
434 let mut cache = self.capability_cache.write().await;
435 cache.insert(capability_id.to_string(), capability.clone());
436 }
437
438 Ok(Some(capability))
439 }
440
441 /// Get storage statistics
442 pub async fn get_encrypted_stats(&self) -> EncryptedStorageStats {
443 let stats = self.stats.read().await;
444 (*stats).clone()
445 }
446
447 /// Helper methods
448 async fn get_encrypted_chunk_metadata(&self, encrypted_hash: &str) -> Result<Option<EncryptedChunkMetadata>> {
449 // Check cache first
450 {
451 let cache = self.metadata_cache.read().await;
452 if let Some(metadata) = cache.get(encrypted_hash) {
453 return Ok(Some(metadata.clone()));
454 }
455 }
456
457 let metadata_key = format!("meta:{}", encrypted_hash);
458 let metadata_bytes = match self.db.get(&metadata_key)? {
459 Some(data) => data,
460 None => return Ok(None),
461 };
462
463 let metadata: EncryptedChunkMetadata = bincode::deserialize(&metadata_bytes)
464 .context("Failed to deserialize encrypted chunk metadata")?;
465
466 Ok(Some(metadata))
467 }
468
469 async fn update_encrypted_chunk_metadata(&self, encrypted_hash: &str, metadata: &EncryptedChunkMetadata) -> Result<()> {
470 let metadata_key = format!("meta:{}", encrypted_hash);
471 let metadata_bytes = bincode::serialize(metadata)?;
472
473 self.db.put(&metadata_key, &metadata_bytes)?;
474
475 // Update cache
476 {
477 let mut cache = self.metadata_cache.write().await;
478 cache.insert(encrypted_hash.to_string(), metadata.clone());
479 }
480
481 Ok(())
482 }
483
484 fn calculate_encrypted_checksum(&self, ciphertext: &[u8], hash: &str) -> String {
485 let mut hasher = Sha256::new();
486 hasher.update(ciphertext);
487 hasher.update(hash.as_bytes());
488 hasher.update(b"zephyrfs-encrypted-chunk-v1");
489 hex::encode(hasher.finalize())
490 }
491
492 fn load_stats_from_db(&self) -> Result<()> {
493 // Implementation would scan database to calculate stats
494 // For now, we'll initialize with defaults
495 Ok(())
496 }
497 }
498
499 #[cfg(test)]
500 mod tests {
501 use super::*;
502 use tempfile::tempdir;
503
504 #[tokio::test]
505 async fn test_encrypted_chunk_store_creation() {
506 let temp_dir = tempdir().unwrap();
507 let store = EncryptedChunkStore::new(temp_dir.path()).unwrap();
508 let stats = store.get_encrypted_stats().await;
509
510 assert_eq!(stats.total_encrypted_chunks, 0);
511 assert_eq!(stats.total_encrypted_files, 0);
512 assert_eq!(stats.active_capabilities, 0);
513 }
514
515 #[tokio::test]
516 async fn test_encrypted_chunk_deduplication() {
517 let temp_dir = tempdir().unwrap();
518 let store = EncryptedChunkStore::new(temp_dir.path()).unwrap();
519
520 let encrypted_data = EncryptedData {
521 segment_index: 0,
522 ciphertext: vec![1, 2, 3, 4, 5],
523 nonce: [0u8; 12],
524 aad: vec![],
525 key_path: vec![0, 1],
526 };
527
528 // Store same encrypted chunk twice
529 let hash1 = store.store_encrypted_chunk("chunk1", &encrypted_data).await.unwrap();
530 let hash2 = store.store_encrypted_chunk("chunk2", &encrypted_data).await.unwrap();
531
532 // Should be deduplicated (same hash)
533 assert_eq!(hash1, hash2);
534
535 // Should have reference count of 2
536 let metadata = store.get_encrypted_chunk_metadata(&hash1).await.unwrap().unwrap();
537 assert_eq!(metadata.ref_count, 2);
538 }
539 }