| 1 | //! Sprint 1 corpus gate. |
| 2 | //! |
| 3 | //! Assembles every `afs-as/tests/corpus/*.s` fixture with `xcrun as`, parses |
| 4 | //! the resulting Mach-O with our reader, re-emits the header + load-command |
| 5 | //! region, and asserts byte-level equality. Proves the reader lost no bits |
| 6 | //! decoding the load commands afs-as emits in practice. |
| 7 | //! |
| 8 | //! Section bodies, symbols, strings, and relocations are still untouched by |
| 9 | //! Sprint 1; we only round-trip the header + `sizeofcmds` region. |
| 10 | |
| 11 | use std::fs; |
| 12 | use std::path::{Path, PathBuf}; |
| 13 | use std::process::Command; |
| 14 | |
| 15 | use afs_ld::input::ObjectFile; |
| 16 | use afs_ld::macho::reader::{ |
| 17 | parse_commands, parse_header, write_commands, write_header, HEADER_SIZE, |
| 18 | }; |
| 19 | use afs_ld::reloc::{ |
| 20 | parse_raw_relocs, parse_relocs, validate_relocs, write_raw_relocs, write_relocs, RAW_RELOC_SIZE, |
| 21 | }; |
| 22 | use afs_ld::symbol::{write_nlist_table, NLIST_SIZE}; |
| 23 | |
| 24 | fn corpus_dir() -> PathBuf { |
| 25 | // CARGO_MANIFEST_DIR is afs-ld/; afs-as is a sibling submodule in armfortas. |
| 26 | Path::new(env!("CARGO_MANIFEST_DIR")) |
| 27 | .join("..") |
| 28 | .join("afs-as") |
| 29 | .join("tests") |
| 30 | .join("corpus") |
| 31 | } |
| 32 | |
| 33 | fn assemble(src: &Path, obj: &Path) -> Result<(), String> { |
| 34 | let out = Command::new("xcrun") |
| 35 | .args(["--sdk", "macosx", "as", "-arch", "arm64"]) |
| 36 | .arg(src) |
| 37 | .arg("-o") |
| 38 | .arg(obj) |
| 39 | .output() |
| 40 | .map_err(|e| format!("failed to spawn xcrun: {e}"))?; |
| 41 | if !out.status.success() { |
| 42 | return Err(format!( |
| 43 | "xcrun as failed on {}: {}", |
| 44 | src.display(), |
| 45 | String::from_utf8_lossy(&out.stderr) |
| 46 | )); |
| 47 | } |
| 48 | Ok(()) |
| 49 | } |
| 50 | |
| 51 | #[test] |
| 52 | fn every_afs_as_corpus_s_round_trips() { |
| 53 | let corpus = corpus_dir(); |
| 54 | if !corpus.is_dir() { |
| 55 | eprintln!( |
| 56 | "skipping: corpus not found at {} (run from the armfortas workspace)", |
| 57 | corpus.display() |
| 58 | ); |
| 59 | return; |
| 60 | } |
| 61 | |
| 62 | let which = Command::new("xcrun").arg("-f").arg("as").output(); |
| 63 | if !matches!(which, Ok(o) if o.status.success()) { |
| 64 | eprintln!("skipping: xcrun as not available"); |
| 65 | return; |
| 66 | } |
| 67 | |
| 68 | let scratch = tempdir(); |
| 69 | let mut fixture_count = 0usize; |
| 70 | let mut failures: Vec<String> = Vec::new(); |
| 71 | |
| 72 | let mut entries: Vec<PathBuf> = fs::read_dir(&corpus) |
| 73 | .expect("read corpus dir") |
| 74 | .filter_map(|e| e.ok().map(|e| e.path())) |
| 75 | .filter(|p| p.extension().map(|e| e == "s").unwrap_or(false)) |
| 76 | .collect(); |
| 77 | entries.sort(); |
| 78 | |
| 79 | for src in entries { |
| 80 | fixture_count += 1; |
| 81 | let obj_name = format!( |
| 82 | "{}.o", |
| 83 | src.file_stem() |
| 84 | .and_then(|s| s.to_str()) |
| 85 | .unwrap_or("fixture") |
| 86 | ); |
| 87 | let obj = scratch.join(&obj_name); |
| 88 | |
| 89 | if let Err(e) = assemble(&src, &obj) { |
| 90 | failures.push(format!("{}: assemble failed: {e}", src.display())); |
| 91 | continue; |
| 92 | } |
| 93 | |
| 94 | let bytes = match fs::read(&obj) { |
| 95 | Ok(b) => b, |
| 96 | Err(e) => { |
| 97 | failures.push(format!("{}: read-back failed: {e}", src.display())); |
| 98 | continue; |
| 99 | } |
| 100 | }; |
| 101 | |
| 102 | let hdr = match parse_header(&bytes) { |
| 103 | Ok(h) => h, |
| 104 | Err(e) => { |
| 105 | failures.push(format!("{}: parse_header: {e}", src.display())); |
| 106 | continue; |
| 107 | } |
| 108 | }; |
| 109 | |
| 110 | let cmds = match parse_commands(&hdr, &bytes) { |
| 111 | Ok(c) => c, |
| 112 | Err(e) => { |
| 113 | failures.push(format!("{}: parse_commands: {e}", src.display())); |
| 114 | continue; |
| 115 | } |
| 116 | }; |
| 117 | |
| 118 | let mut out = Vec::with_capacity(HEADER_SIZE + hdr.sizeofcmds as usize); |
| 119 | write_header(&hdr, &mut out); |
| 120 | write_commands(&cmds, &mut out); |
| 121 | |
| 122 | let cmds_end = HEADER_SIZE + hdr.sizeofcmds as usize; |
| 123 | if out.as_slice() != &bytes[..cmds_end] { |
| 124 | let delta = first_diff(&out, &bytes[..cmds_end]); |
| 125 | failures.push(format!( |
| 126 | "{}: re-emit mismatch at offset 0x{delta:x}", |
| 127 | src.display() |
| 128 | )); |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | assert!( |
| 133 | fixture_count > 0, |
| 134 | "no .s fixtures found in {}", |
| 135 | corpus.display() |
| 136 | ); |
| 137 | assert!( |
| 138 | failures.is_empty(), |
| 139 | "{} of {} corpus fixtures failed:\n{}", |
| 140 | failures.len(), |
| 141 | fixture_count, |
| 142 | failures.join("\n") |
| 143 | ); |
| 144 | } |
| 145 | |
| 146 | /// Sprint 2 gate: every corpus fixture must parse fully via `ObjectFile`, |
| 147 | /// and the nlist + string-table regions must survive a byte-level round-trip. |
| 148 | #[test] |
| 149 | fn every_afs_as_corpus_object_parses_fully() { |
| 150 | let corpus = corpus_dir(); |
| 151 | if !corpus.is_dir() { |
| 152 | eprintln!("skipping: corpus not found at {}", corpus.display()); |
| 153 | return; |
| 154 | } |
| 155 | let which = Command::new("xcrun").arg("-f").arg("as").output(); |
| 156 | if !matches!(which, Ok(o) if o.status.success()) { |
| 157 | eprintln!("skipping: xcrun as not available"); |
| 158 | return; |
| 159 | } |
| 160 | |
| 161 | let scratch = tempdir(); |
| 162 | let mut fixture_count = 0usize; |
| 163 | let mut failures: Vec<String> = Vec::new(); |
| 164 | |
| 165 | let mut entries: Vec<PathBuf> = fs::read_dir(&corpus) |
| 166 | .expect("read corpus dir") |
| 167 | .filter_map(|e| e.ok().map(|e| e.path())) |
| 168 | .filter(|p| p.extension().map(|e| e == "s").unwrap_or(false)) |
| 169 | .collect(); |
| 170 | entries.sort(); |
| 171 | |
| 172 | for src in entries { |
| 173 | fixture_count += 1; |
| 174 | let obj_path = scratch.join(format!( |
| 175 | "{}.o", |
| 176 | src.file_stem() |
| 177 | .and_then(|s| s.to_str()) |
| 178 | .unwrap_or("fixture") |
| 179 | )); |
| 180 | |
| 181 | if let Err(e) = assemble(&src, &obj_path) { |
| 182 | failures.push(format!("{}: assemble: {e}", src.display())); |
| 183 | continue; |
| 184 | } |
| 185 | |
| 186 | let bytes = match fs::read(&obj_path) { |
| 187 | Ok(b) => b, |
| 188 | Err(e) => { |
| 189 | failures.push(format!("{}: read: {e}", src.display())); |
| 190 | continue; |
| 191 | } |
| 192 | }; |
| 193 | |
| 194 | let obj = match ObjectFile::parse(&obj_path, &bytes) { |
| 195 | Ok(o) => o, |
| 196 | Err(e) => { |
| 197 | failures.push(format!("{}: ObjectFile::parse: {e}", src.display())); |
| 198 | continue; |
| 199 | } |
| 200 | }; |
| 201 | |
| 202 | // Every symbol name must resolve via the string table. |
| 203 | for (i, sym) in obj.symbols.iter().enumerate() { |
| 204 | if let Err(e) = obj.symbol_name(sym) { |
| 205 | failures.push(format!( |
| 206 | "{}: symbol[{i}].strx={} does not resolve: {e}", |
| 207 | src.display(), |
| 208 | sym.strx() |
| 209 | )); |
| 210 | break; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | // Section count and n_sect references — every SECT symbol should |
| 215 | // reference a valid 1-based section index. |
| 216 | for (i, sym) in obj.symbols.iter().enumerate() { |
| 217 | if sym.stab_kind().is_some() { |
| 218 | continue; // stab n_sect has a different meaning |
| 219 | } |
| 220 | if sym.kind() == afs_ld::symbol::SymKind::Sect && obj.section_for_symbol(sym).is_none() |
| 221 | { |
| 222 | failures.push(format!( |
| 223 | "{}: symbol[{i}] has SECT kind but n_sect={} is out of range ({} sections)", |
| 224 | src.display(), |
| 225 | sym.sect_idx(), |
| 226 | obj.sections.len() |
| 227 | )); |
| 228 | break; |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | // Byte-level round-trip of the nlist region. |
| 233 | if let Some(symtab) = obj.symtab { |
| 234 | let mut reemitted = Vec::with_capacity(obj.symbols.len() * NLIST_SIZE); |
| 235 | write_nlist_table(&obj.symbols, &mut reemitted); |
| 236 | let want = &bytes[symtab.symoff as usize |
| 237 | ..symtab.symoff as usize + symtab.nsyms as usize * NLIST_SIZE]; |
| 238 | if reemitted != want { |
| 239 | failures.push(format!( |
| 240 | "{}: nlist region re-emit mismatch (symoff=0x{:x} nsyms={})", |
| 241 | src.display(), |
| 242 | symtab.symoff, |
| 243 | symtab.nsyms |
| 244 | )); |
| 245 | continue; |
| 246 | } |
| 247 | |
| 248 | // Byte-level equality of the string-table blob. |
| 249 | let strtab_want = |
| 250 | &bytes[symtab.stroff as usize..symtab.stroff as usize + symtab.strsize as usize]; |
| 251 | if obj.strings.as_bytes() != strtab_want { |
| 252 | failures.push(format!( |
| 253 | "{}: strtab byte mismatch (stroff=0x{:x} strsize={})", |
| 254 | src.display(), |
| 255 | symtab.stroff, |
| 256 | symtab.strsize |
| 257 | )); |
| 258 | } |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | assert!(fixture_count > 0, "no fixtures found"); |
| 263 | assert!( |
| 264 | failures.is_empty(), |
| 265 | "{} of {} fixtures failed Sprint 2 invariants:\n{}", |
| 266 | failures.len(), |
| 267 | fixture_count, |
| 268 | failures.join("\n") |
| 269 | ); |
| 270 | } |
| 271 | |
| 272 | /// Sprint 3 gate: every corpus fixture's per-section relocation region |
| 273 | /// round-trips byte-exact through raw→fused→raw→bytes, every fused reloc |
| 274 | /// passes `validate_relocs`, and no fixture triggers a diagnostic. |
| 275 | #[test] |
| 276 | fn every_afs_as_corpus_section_relocs_round_trip() { |
| 277 | let corpus = corpus_dir(); |
| 278 | if !corpus.is_dir() { |
| 279 | eprintln!("skipping: corpus not found at {}", corpus.display()); |
| 280 | return; |
| 281 | } |
| 282 | let which = Command::new("xcrun").arg("-f").arg("as").output(); |
| 283 | if !matches!(which, Ok(o) if o.status.success()) { |
| 284 | eprintln!("skipping: xcrun as not available"); |
| 285 | return; |
| 286 | } |
| 287 | |
| 288 | let scratch = tempdir(); |
| 289 | let mut fixture_count = 0usize; |
| 290 | let mut reloc_count_total = 0usize; |
| 291 | let mut failures: Vec<String> = Vec::new(); |
| 292 | |
| 293 | let mut entries: Vec<PathBuf> = fs::read_dir(&corpus) |
| 294 | .expect("read corpus dir") |
| 295 | .filter_map(|e| e.ok().map(|e| e.path())) |
| 296 | .filter(|p| p.extension().map(|e| e == "s").unwrap_or(false)) |
| 297 | .collect(); |
| 298 | entries.sort(); |
| 299 | |
| 300 | for src in entries { |
| 301 | fixture_count += 1; |
| 302 | let obj_path = scratch.join(format!( |
| 303 | "{}.o", |
| 304 | src.file_stem() |
| 305 | .and_then(|s| s.to_str()) |
| 306 | .unwrap_or("fixture") |
| 307 | )); |
| 308 | if let Err(e) = assemble(&src, &obj_path) { |
| 309 | failures.push(format!("{}: assemble: {e}", src.display())); |
| 310 | continue; |
| 311 | } |
| 312 | let bytes = match fs::read(&obj_path) { |
| 313 | Ok(b) => b, |
| 314 | Err(e) => { |
| 315 | failures.push(format!("{}: read: {e}", src.display())); |
| 316 | continue; |
| 317 | } |
| 318 | }; |
| 319 | let obj = match ObjectFile::parse(&obj_path, &bytes) { |
| 320 | Ok(o) => o, |
| 321 | Err(e) => { |
| 322 | failures.push(format!("{}: ObjectFile::parse: {e}", src.display())); |
| 323 | continue; |
| 324 | } |
| 325 | }; |
| 326 | |
| 327 | let nsyms = obj.symbols.len() as u32; |
| 328 | let nsects = obj.sections.len() as u8; |
| 329 | |
| 330 | for (i, sec) in obj.sections.iter().enumerate() { |
| 331 | if sec.nreloc == 0 { |
| 332 | continue; |
| 333 | } |
| 334 | // Raw parse from the section's owned reloc bytes. |
| 335 | let raws = match parse_raw_relocs(&sec.raw_relocs, 0, sec.nreloc) { |
| 336 | Ok(r) => r, |
| 337 | Err(e) => { |
| 338 | failures.push(format!( |
| 339 | "{}: section[{i}] {} raw parse: {e}", |
| 340 | src.display(), |
| 341 | sec.sectname |
| 342 | )); |
| 343 | continue; |
| 344 | } |
| 345 | }; |
| 346 | |
| 347 | // Fuse ADDEND/SUBTRACTOR prefixes. |
| 348 | let fused = match parse_relocs(&raws) { |
| 349 | Ok(r) => r, |
| 350 | Err(e) => { |
| 351 | failures.push(format!( |
| 352 | "{}: section[{i}] {} fuse: {e}", |
| 353 | src.display(), |
| 354 | sec.sectname |
| 355 | )); |
| 356 | continue; |
| 357 | } |
| 358 | }; |
| 359 | reloc_count_total += fused.len(); |
| 360 | |
| 361 | // Validate bounds + referents. |
| 362 | if let Err(e) = validate_relocs(&fused, sec.size, nsyms, nsects) { |
| 363 | failures.push(format!( |
| 364 | "{}: section[{i}] {} validate: {e}", |
| 365 | src.display(), |
| 366 | sec.sectname |
| 367 | )); |
| 368 | continue; |
| 369 | } |
| 370 | |
| 371 | // Re-emit raw via write_relocs and compare to the original bytes. |
| 372 | let reemitted_raws = match write_relocs(&fused) { |
| 373 | Ok(r) => r, |
| 374 | Err(e) => { |
| 375 | failures.push(format!( |
| 376 | "{}: section[{i}] {} write_relocs: {e}", |
| 377 | src.display(), |
| 378 | sec.sectname |
| 379 | )); |
| 380 | continue; |
| 381 | } |
| 382 | }; |
| 383 | let mut reemitted_bytes = Vec::with_capacity(reemitted_raws.len() * RAW_RELOC_SIZE); |
| 384 | write_raw_relocs(&reemitted_raws, &mut reemitted_bytes); |
| 385 | if reemitted_bytes != sec.raw_relocs { |
| 386 | failures.push(format!( |
| 387 | "{}: section[{i}] {} reloc re-emit mismatch (nreloc={}, raws={} fused={})", |
| 388 | src.display(), |
| 389 | sec.sectname, |
| 390 | sec.nreloc, |
| 391 | raws.len(), |
| 392 | fused.len() |
| 393 | )); |
| 394 | } |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | assert!(fixture_count > 0, "no fixtures found"); |
| 399 | assert!( |
| 400 | reloc_count_total > 0, |
| 401 | "corpus produced zero relocs — reading wrong?" |
| 402 | ); |
| 403 | assert!( |
| 404 | failures.is_empty(), |
| 405 | "{} of {} fixtures failed Sprint 3 reloc invariants ({} fused relocs across corpus):\n{}", |
| 406 | failures.len(), |
| 407 | fixture_count, |
| 408 | reloc_count_total, |
| 409 | failures.join("\n") |
| 410 | ); |
| 411 | } |
| 412 | |
| 413 | fn first_diff(a: &[u8], b: &[u8]) -> usize { |
| 414 | a.iter() |
| 415 | .zip(b.iter()) |
| 416 | .position(|(x, y)| x != y) |
| 417 | .unwrap_or(a.len().min(b.len())) |
| 418 | } |
| 419 | |
| 420 | fn tempdir() -> PathBuf { |
| 421 | use std::sync::atomic::{AtomicUsize, Ordering}; |
| 422 | static COUNTER: AtomicUsize = AtomicUsize::new(0); |
| 423 | // Each caller gets a unique dir so cargo's parallel tests don't step on |
| 424 | // one another's .o files. |
| 425 | let seq = COUNTER.fetch_add(1, Ordering::Relaxed); |
| 426 | let base = std::env::temp_dir().join(format!("afs-ld-corpus-{}-{}", std::process::id(), seq)); |
| 427 | let _ = fs::remove_dir_all(&base); |
| 428 | fs::create_dir_all(&base).expect("create scratch dir"); |
| 429 | base |
| 430 | } |
| 431 |