Rust · 16620 bytes Raw Blame History
1 //! Content addressing system for ZephyrFS
2 //!
3 //! Provides cryptographic content identifiers and verification using Blake3 and SHA-256.
4 //! Content IDs are used for integrity verification and deduplication.
5
6 use crate::crypto::{ContentHasher, VerificationHasher};
7 use blake3::Hasher as Blake3Hasher;
8 use serde::{Deserialize, Serialize};
9 use sha2::{Digest, Sha256};
10 use std::fmt;
11
12 /// Content identifier - cryptographic hash of data
13 #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
14 pub struct ContentId {
15 /// Hash algorithm used
16 pub algorithm: HashAlgorithm,
17 /// Hash bytes
18 pub hash: Vec<u8>,
19 }
20
21 /// Supported hash algorithms
22 #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
23 pub enum HashAlgorithm {
24 Blake3,
25 Sha256,
26 }
27
28 impl ContentId {
29 /// Generate content ID from data
30 pub fn generate(data: &[u8], hasher: &ContentHasher) -> Self {
31 match hasher {
32 ContentHasher::Blake3 => {
33 let mut hasher = Blake3Hasher::new();
34 hasher.update(data);
35 let hash = hasher.finalize();
36
37 Self {
38 algorithm: HashAlgorithm::Blake3,
39 hash: hash.as_bytes().to_vec(),
40 }
41 }
42 ContentHasher::Sha256 => {
43 let mut hasher = Sha256::new();
44 hasher.update(data);
45 let hash = hasher.finalize();
46
47 Self {
48 algorithm: HashAlgorithm::Sha256,
49 hash: hash.to_vec(),
50 }
51 }
52 }
53 }
54
55 /// Create from existing hash bytes and algorithm
56 pub fn from_hash(algorithm: HashAlgorithm, hash: Vec<u8>) -> Self {
57 Self { algorithm, hash }
58 }
59
60 /// Get hash bytes
61 pub fn hash_bytes(&self) -> &[u8] {
62 &self.hash
63 }
64
65 /// Get algorithm used
66 pub fn algorithm(&self) -> &HashAlgorithm {
67 &self.algorithm
68 }
69
70 /// Convert to hex string representation
71 pub fn to_hex(&self) -> String {
72 hex::encode(&self.hash)
73 }
74
75 /// Create from hex string
76 pub fn from_hex(algorithm: HashAlgorithm, hex_str: &str) -> Result<Self, hex::FromHexError> {
77 let hash = hex::decode(hex_str)?;
78 Ok(Self { algorithm, hash })
79 }
80
81 /// Get expected hash length for algorithm
82 pub fn expected_length(&self) -> usize {
83 match self.algorithm {
84 HashAlgorithm::Blake3 => 32, // Blake3 output is 32 bytes
85 HashAlgorithm::Sha256 => 32, // SHA-256 output is 32 bytes
86 }
87 }
88
89 /// Validate hash length matches algorithm
90 pub fn is_valid(&self) -> bool {
91 self.hash.len() == self.expected_length()
92 }
93
94 /// Create a multihash-style prefix for the content ID
95 pub fn multihash_prefix(&self) -> Vec<u8> {
96 let mut prefix = Vec::new();
97
98 // Add algorithm identifier
99 match self.algorithm {
100 HashAlgorithm::Blake3 => {
101 prefix.push(0x1e); // Blake3 multicodec
102 prefix.push(32); // Hash length
103 }
104 HashAlgorithm::Sha256 => {
105 prefix.push(0x12); // SHA-256 multicodec
106 prefix.push(32); // Hash length
107 }
108 }
109
110 prefix.extend_from_slice(&self.hash);
111 prefix
112 }
113
114 /// Parse from multihash format
115 pub fn from_multihash(data: &[u8]) -> Result<Self, String> {
116 if data.len() < 2 {
117 return Err("Multihash too short".to_string());
118 }
119
120 let algorithm = match data[0] {
121 0x1e => HashAlgorithm::Blake3,
122 0x12 => HashAlgorithm::Sha256,
123 _ => return Err(format!("Unsupported hash algorithm: {}", data[0])),
124 };
125
126 let expected_len = data[1] as usize;
127 if data.len() != expected_len + 2 {
128 return Err(format!(
129 "Invalid multihash length: expected {}, got {}",
130 expected_len + 2,
131 data.len()
132 ));
133 }
134
135 let hash = data[2..].to_vec();
136 let content_id = Self { algorithm, hash };
137
138 if !content_id.is_valid() {
139 return Err("Invalid hash length for algorithm".to_string());
140 }
141
142 Ok(content_id)
143 }
144 }
145
146 impl fmt::Display for ContentId {
147 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148 write!(f, "{}:{}",
149 match self.algorithm {
150 HashAlgorithm::Blake3 => "blake3",
151 HashAlgorithm::Sha256 => "sha256",
152 },
153 self.to_hex()
154 )
155 }
156 }
157
158 /// Content verification utilities
159 pub struct ContentVerifier;
160
161 impl ContentVerifier {
162 /// Verify data matches expected content ID
163 pub fn verify(data: &[u8], expected_id: &ContentId, hasher: &VerificationHasher) -> bool {
164 let computed_id = match hasher {
165 VerificationHasher::Blake3 => ContentId::generate(data, &ContentHasher::Blake3),
166 VerificationHasher::Sha256 => ContentId::generate(data, &ContentHasher::Sha256),
167 };
168
169 // Must use same algorithm and hash must match
170 computed_id.algorithm == expected_id.algorithm &&
171 computed_id.hash == expected_id.hash
172 }
173
174 /// Verify data with multiple hash algorithms (for maximum security)
175 pub fn verify_multi(data: &[u8], expected_ids: &[ContentId]) -> bool {
176 for expected_id in expected_ids {
177 let hasher = match expected_id.algorithm {
178 HashAlgorithm::Blake3 => VerificationHasher::Blake3,
179 HashAlgorithm::Sha256 => VerificationHasher::Sha256,
180 };
181
182 if !Self::verify(data, expected_id, &hasher) {
183 return false;
184 }
185 }
186 true
187 }
188
189 /// Compute multiple hashes for data (Blake3 + SHA-256 for maximum security)
190 pub fn compute_multi_hash(data: &[u8]) -> Vec<ContentId> {
191 vec![
192 ContentId::generate(data, &ContentHasher::Blake3),
193 ContentId::generate(data, &ContentHasher::Sha256),
194 ]
195 }
196
197 /// Create integrity proof for data chunk
198 pub fn create_integrity_proof(data: &[u8]) -> IntegrityProof {
199 IntegrityProof {
200 blake3_hash: ContentId::generate(data, &ContentHasher::Blake3),
201 sha256_hash: ContentId::generate(data, &ContentHasher::Sha256),
202 data_length: data.len() as u64,
203 }
204 }
205
206 /// Verify data against integrity proof
207 pub fn verify_integrity_proof(data: &[u8], proof: &IntegrityProof) -> bool {
208 if data.len() as u64 != proof.data_length {
209 return false;
210 }
211
212 let blake3_ok = Self::verify(data, &proof.blake3_hash, &VerificationHasher::Blake3);
213 let sha256_ok = Self::verify(data, &proof.sha256_hash, &VerificationHasher::Sha256);
214
215 blake3_ok && sha256_ok
216 }
217 }
218
219 /// Integrity proof containing multiple hashes for maximum security
220 #[derive(Debug, Clone, Serialize, Deserialize)]
221 pub struct IntegrityProof {
222 pub blake3_hash: ContentId,
223 pub sha256_hash: ContentId,
224 pub data_length: u64,
225 }
226
227 impl IntegrityProof {
228 /// Get the primary content ID (Blake3 for performance)
229 pub fn primary_id(&self) -> &ContentId {
230 &self.blake3_hash
231 }
232
233 /// Get the secondary content ID (SHA-256 for compatibility)
234 pub fn secondary_id(&self) -> &ContentId {
235 &self.sha256_hash
236 }
237
238 /// Convert to multihash format (primary hash only)
239 pub fn to_multihash(&self) -> Vec<u8> {
240 self.blake3_hash.multihash_prefix()
241 }
242 }
243
244 /// Content addressing utilities for file chunks
245 pub struct ChunkAddressing;
246
247 impl ChunkAddressing {
248 /// Generate content ID for a file chunk with metadata
249 pub fn chunk_id(chunk_data: &[u8], chunk_index: u64, file_id: &[u8]) -> ContentId {
250 let mut hasher = Blake3Hasher::new();
251
252 // Include chunk metadata in hash for uniqueness
253 hasher.update(b"ZephyrFS-chunk-v1:");
254 hasher.update(&chunk_index.to_be_bytes());
255 hasher.update(b":");
256 hasher.update(file_id);
257 hasher.update(b":");
258 hasher.update(chunk_data);
259
260 let hash = hasher.finalize();
261 ContentId {
262 algorithm: HashAlgorithm::Blake3,
263 hash: hash.as_bytes().to_vec(),
264 }
265 }
266
267 /// Generate file-level content ID from chunk IDs
268 pub fn file_id_from_chunks(chunk_ids: &[ContentId]) -> ContentId {
269 let mut hasher = Blake3Hasher::new();
270 hasher.update(b"ZephyrFS-file-v1:");
271
272 for chunk_id in chunk_ids {
273 hasher.update(&chunk_id.hash);
274 }
275
276 let hash = hasher.finalize();
277 ContentId {
278 algorithm: HashAlgorithm::Blake3,
279 hash: hash.as_bytes().to_vec(),
280 }
281 }
282
283 /// Create Merkle tree root from chunk hashes
284 pub fn merkle_root(chunk_ids: &[ContentId]) -> ContentId {
285 if chunk_ids.is_empty() {
286 // Empty file hash
287 return ContentId::generate(b"", &ContentHasher::Blake3);
288 }
289
290 if chunk_ids.len() == 1 {
291 return chunk_ids[0].clone();
292 }
293
294 // Build Merkle tree bottom-up
295 let mut level: Vec<ContentId> = chunk_ids.to_vec();
296
297 while level.len() > 1 {
298 let mut next_level = Vec::new();
299
300 for pair in level.chunks(2) {
301 let mut hasher = Blake3Hasher::new();
302 hasher.update(b"ZephyrFS-merkle-v1:");
303 hasher.update(&pair[0].hash);
304
305 if pair.len() == 2 {
306 hasher.update(&pair[1].hash);
307 } else {
308 // Odd number of nodes - hash with itself
309 hasher.update(&pair[0].hash);
310 }
311
312 let hash = hasher.finalize();
313 next_level.push(ContentId {
314 algorithm: HashAlgorithm::Blake3,
315 hash: hash.as_bytes().to_vec(),
316 });
317 }
318
319 level = next_level;
320 }
321
322 level.into_iter().next().unwrap()
323 }
324 }
325
326 #[cfg(test)]
327 mod tests {
328 use super::*;
329
330 #[test]
331 fn test_content_id_generation() {
332 let data = b"Hello, ZephyrFS!";
333
334 let blake3_id = ContentId::generate(data, &ContentHasher::Blake3);
335 let sha256_id = ContentId::generate(data, &ContentHasher::Sha256);
336
337 assert_eq!(blake3_id.algorithm, HashAlgorithm::Blake3);
338 assert_eq!(sha256_id.algorithm, HashAlgorithm::Sha256);
339 assert_eq!(blake3_id.hash.len(), 32);
340 assert_eq!(sha256_id.hash.len(), 32);
341 assert_ne!(blake3_id.hash, sha256_id.hash);
342 }
343
344 #[test]
345 fn test_content_verification() {
346 let data = b"Test data for verification";
347 let content_id = ContentId::generate(data, &ContentHasher::Blake3);
348
349 // Correct data should verify
350 assert!(ContentVerifier::verify(data, &content_id, &VerificationHasher::Blake3));
351
352 // Wrong data should not verify
353 let wrong_data = b"Wrong data";
354 assert!(!ContentVerifier::verify(wrong_data, &content_id, &VerificationHasher::Blake3));
355 }
356
357 #[test]
358 fn test_content_id_hex_serialization() {
359 let data = b"Serialization test";
360 let original_id = ContentId::generate(data, &ContentHasher::Blake3);
361
362 let hex_str = original_id.to_hex();
363 let restored_id = ContentId::from_hex(HashAlgorithm::Blake3, &hex_str).unwrap();
364
365 assert_eq!(original_id, restored_id);
366 }
367
368 #[test]
369 fn test_multihash_format() {
370 let data = b"Multihash test data";
371 let content_id = ContentId::generate(data, &ContentHasher::Blake3);
372
373 let multihash = content_id.multihash_prefix();
374 let restored_id = ContentId::from_multihash(&multihash).unwrap();
375
376 assert_eq!(content_id, restored_id);
377
378 // Test SHA-256 as well
379 let sha256_id = ContentId::generate(data, &ContentHasher::Sha256);
380 let sha256_multihash = sha256_id.multihash_prefix();
381 let restored_sha256 = ContentId::from_multihash(&sha256_multihash).unwrap();
382
383 assert_eq!(sha256_id, restored_sha256);
384 }
385
386 #[test]
387 fn test_integrity_proof() {
388 let data = b"Integrity proof test data";
389 let proof = ContentVerifier::create_integrity_proof(data);
390
391 assert_eq!(proof.data_length, data.len() as u64);
392 assert_eq!(proof.blake3_hash.algorithm, HashAlgorithm::Blake3);
393 assert_eq!(proof.sha256_hash.algorithm, HashAlgorithm::Sha256);
394
395 // Correct data should verify
396 assert!(ContentVerifier::verify_integrity_proof(data, &proof));
397
398 // Wrong data should not verify
399 let wrong_data = b"Wrong data";
400 assert!(!ContentVerifier::verify_integrity_proof(wrong_data, &proof));
401 }
402
403 #[test]
404 fn test_chunk_addressing() {
405 let chunk_data = b"Chunk data for addressing test";
406 let file_id = b"test_file_12345";
407
408 let chunk_id = ChunkAddressing::chunk_id(chunk_data, 0, file_id);
409 assert_eq!(chunk_id.algorithm, HashAlgorithm::Blake3);
410 assert_eq!(chunk_id.hash.len(), 32);
411
412 // Different chunk index should produce different ID
413 let chunk_id2 = ChunkAddressing::chunk_id(chunk_data, 1, file_id);
414 assert_ne!(chunk_id, chunk_id2);
415
416 // Different file ID should produce different ID
417 let chunk_id3 = ChunkAddressing::chunk_id(chunk_data, 0, b"different_file");
418 assert_ne!(chunk_id, chunk_id3);
419 }
420
421 #[test]
422 fn test_file_id_from_chunks() {
423 let chunk1 = ContentId::generate(b"chunk 1", &ContentHasher::Blake3);
424 let chunk2 = ContentId::generate(b"chunk 2", &ContentHasher::Blake3);
425 let chunk3 = ContentId::generate(b"chunk 3", &ContentHasher::Blake3);
426
427 let file_id = ChunkAddressing::file_id_from_chunks(&[chunk1.clone(), chunk2.clone(), chunk3.clone()]);
428 assert_eq!(file_id.algorithm, HashAlgorithm::Blake3);
429
430 // Different order should produce different file ID
431 let file_id2 = ChunkAddressing::file_id_from_chunks(&[chunk3, chunk1, chunk2]);
432 assert_ne!(file_id, file_id2);
433 }
434
435 #[test]
436 fn test_merkle_root() {
437 let chunk1 = ContentId::generate(b"chunk 1", &ContentHasher::Blake3);
438 let chunk2 = ContentId::generate(b"chunk 2", &ContentHasher::Blake3);
439 let chunk3 = ContentId::generate(b"chunk 3", &ContentHasher::Blake3);
440 let chunk4 = ContentId::generate(b"chunk 4", &ContentHasher::Blake3);
441
442 // Single chunk
443 let root1 = ChunkAddressing::merkle_root(&[chunk1.clone()]);
444 assert_eq!(root1, chunk1);
445
446 // Multiple chunks
447 let root2 = ChunkAddressing::merkle_root(&[chunk1.clone(), chunk2.clone()]);
448 let root4 = ChunkAddressing::merkle_root(&[chunk1, chunk2, chunk3, chunk4]);
449
450 assert_ne!(root2, root4);
451 assert_eq!(root2.algorithm, HashAlgorithm::Blake3);
452 assert_eq!(root4.algorithm, HashAlgorithm::Blake3);
453
454 // Empty chunks
455 let empty_root = ChunkAddressing::merkle_root(&[]);
456 assert_eq!(empty_root.algorithm, HashAlgorithm::Blake3);
457 }
458
459 #[test]
460 fn test_content_id_display() {
461 let content_id = ContentId::generate(b"display test", &ContentHasher::Blake3);
462 let display_str = format!("{}", content_id);
463
464 assert!(display_str.starts_with("blake3:"));
465 assert_eq!(display_str.len(), 71); // "blake3:" (7) + hex hash (64)
466
467 let sha256_id = ContentId::generate(b"display test", &ContentHasher::Sha256);
468 let sha256_str = format!("{}", sha256_id);
469
470 assert!(sha256_str.starts_with("sha256:"));
471 assert_eq!(sha256_str.len(), 71); // "sha256:" (7) + hex hash (64)
472 }
473
474 #[test]
475 fn test_multi_hash_verification() {
476 let data = b"Multi hash verification test";
477 let blake3_id = ContentId::generate(data, &ContentHasher::Blake3);
478 let sha256_id = ContentId::generate(data, &ContentHasher::Sha256);
479
480 let ids = vec![blake3_id, sha256_id];
481
482 // Correct data should verify against all hashes
483 assert!(ContentVerifier::verify_multi(data, &ids));
484
485 // Wrong data should fail verification
486 let wrong_data = b"Wrong data";
487 assert!(!ContentVerifier::verify_multi(wrong_data, &ids));
488 }
489 }