| 1 | //! End-to-end atomization gate. |
| 2 | //! |
| 3 | //! Assembles a multi-symbol object with `xcrun as`, runs the full |
| 4 | //! resolve → atomize → back-patch pipeline, and verifies: |
| 5 | //! |
| 6 | //! - Every defined external symbol owns exactly one atom. |
| 7 | //! - Atom count for `__TEXT,__text` equals the number of non-alt |
| 8 | //! external symbols defined in that section. |
| 9 | //! - Each `Symbol::Defined { atom }` no longer points at the pre- |
| 10 | //! atomization `AtomId(0)` placeholder. |
| 11 | //! - Atom data bytes at the owner's atom-relative offset match the |
| 12 | //! section data at the symbol's n_value. |
| 13 | //! |
| 14 | //! Skipped if `xcrun as` is unavailable. |
| 15 | |
| 16 | use std::fs; |
| 17 | use std::path::PathBuf; |
| 18 | use std::process::Command; |
| 19 | |
| 20 | use afs_ld::atom::{atomize_object, backpatch_symbol_atoms, AtomSection, AtomTable}; |
| 21 | use afs_ld::resolve::{seed_all, AtomId, Inputs, Symbol, SymbolTable}; |
| 22 | |
| 23 | fn have_xcrun() -> bool { |
| 24 | Command::new("xcrun") |
| 25 | .arg("-f") |
| 26 | .arg("as") |
| 27 | .output() |
| 28 | .map(|o| o.status.success()) |
| 29 | .unwrap_or(false) |
| 30 | } |
| 31 | |
| 32 | fn assemble(src: &str, out: &PathBuf) -> Result<(), String> { |
| 33 | let tmp = std::env::temp_dir().join(format!( |
| 34 | "afs-ld-atom-{}-{}.s", |
| 35 | std::process::id(), |
| 36 | out.file_stem().and_then(|s| s.to_str()).unwrap_or("t") |
| 37 | )); |
| 38 | fs::write(&tmp, src).map_err(|e| format!("write: {e}"))?; |
| 39 | let status = Command::new("xcrun") |
| 40 | .args(["--sdk", "macosx", "as", "-arch", "arm64"]) |
| 41 | .arg(&tmp) |
| 42 | .arg("-o") |
| 43 | .arg(out) |
| 44 | .output() |
| 45 | .map_err(|e| format!("spawn xcrun as: {e}"))?; |
| 46 | if !status.status.success() { |
| 47 | return Err(format!( |
| 48 | "xcrun as failed: {}", |
| 49 | String::from_utf8_lossy(&status.stderr) |
| 50 | )); |
| 51 | } |
| 52 | let _ = fs::remove_file(&tmp); |
| 53 | Ok(()) |
| 54 | } |
| 55 | |
| 56 | #[test] |
| 57 | fn atomize_splits_text_at_symbol_boundaries_and_backpatches_symbols() { |
| 58 | if !have_xcrun() { |
| 59 | eprintln!("skipping: xcrun as unavailable"); |
| 60 | return; |
| 61 | } |
| 62 | |
| 63 | // Three functions in __text + one data global. afs-as sets |
| 64 | // MH_SUBSECTIONS_VIA_SYMBOLS, so we expect one atom per external |
| 65 | // function plus one for the data symbol. |
| 66 | // `.subsections_via_symbols` sets MH_SUBSECTIONS_VIA_SYMBOLS on the |
| 67 | // resulting object so our atomizer splits at symbol boundaries. Every |
| 68 | // fixture afs-as emits carries this flag, so the corpus round-trips |
| 69 | // already exercise it — we need the directive only for hand-written |
| 70 | // test inputs. |
| 71 | let src = r#" |
| 72 | .section __TEXT,__text,regular,pure_instructions |
| 73 | .globl _fn_a |
| 74 | _fn_a: |
| 75 | mov x0, #0 |
| 76 | ret |
| 77 | .globl _fn_b |
| 78 | _fn_b: |
| 79 | mov x0, #1 |
| 80 | ret |
| 81 | .globl _fn_c |
| 82 | _fn_c: |
| 83 | mov x0, #2 |
| 84 | ret |
| 85 | .section __DATA,__data |
| 86 | .globl _data_global |
| 87 | _data_global: |
| 88 | .quad 0x1122334455667788 |
| 89 | .subsections_via_symbols |
| 90 | "#; |
| 91 | |
| 92 | let obj_path = std::env::temp_dir().join(format!("afs-ld-atom-{}-test.o", std::process::id())); |
| 93 | if let Err(e) = assemble(src, &obj_path) { |
| 94 | eprintln!("skipping: assemble failed: {e}"); |
| 95 | return; |
| 96 | } |
| 97 | |
| 98 | let bytes = fs::read(&obj_path).unwrap(); |
| 99 | let mut inputs = Inputs::new(); |
| 100 | let input_id = inputs.add_object(obj_path.clone(), bytes, 0).unwrap(); |
| 101 | |
| 102 | // Seed the symbol table (produces Defined entries with AtomId(0) |
| 103 | // placeholders). |
| 104 | let mut sym_table = SymbolTable::new(); |
| 105 | let _ = seed_all(&inputs, &mut sym_table).expect("seed_all"); |
| 106 | |
| 107 | // Atomize + back-patch. |
| 108 | let obj = inputs.object_file(input_id).unwrap(); |
| 109 | let mut atom_table = AtomTable::new(); |
| 110 | let atomization = atomize_object(input_id, obj, &mut atom_table); |
| 111 | backpatch_symbol_atoms(&atomization, input_id, obj, &mut sym_table, &mut atom_table); |
| 112 | |
| 113 | // At least one atom per defined function plus one for data_global. |
| 114 | assert!( |
| 115 | atom_table.len() >= 4, |
| 116 | "expected ≥4 atoms (3 text + 1 data); got {}", |
| 117 | atom_table.len() |
| 118 | ); |
| 119 | |
| 120 | // Every external symbol defined in this object should now resolve to a |
| 121 | // non-placeholder atom. |
| 122 | for sym_name in ["_fn_a", "_fn_b", "_fn_c", "_data_global"] { |
| 123 | let istr = sym_table.intern(sym_name); |
| 124 | let sid = sym_table |
| 125 | .lookup(istr) |
| 126 | .unwrap_or_else(|| panic!("{sym_name} not in symbol table")); |
| 127 | let sym = sym_table.get(sid); |
| 128 | match sym { |
| 129 | Symbol::Defined { atom, value, .. } => { |
| 130 | assert_ne!( |
| 131 | *atom, |
| 132 | AtomId(0), |
| 133 | "{sym_name} still points at AtomId(0) placeholder" |
| 134 | ); |
| 135 | // Primary owner symbols are at atom-relative offset 0. |
| 136 | assert_eq!(*value, 0, "{sym_name} should be at atom start"); |
| 137 | // The atom itself should match the symbol's origin and be a |
| 138 | // reasonable section kind. |
| 139 | let atom = atom_table.get(*atom); |
| 140 | assert_eq!(atom.origin, input_id); |
| 141 | assert!(matches!( |
| 142 | atom.section, |
| 143 | AtomSection::Text | AtomSection::Data | AtomSection::ConstData |
| 144 | )); |
| 145 | assert_eq!(atom.owner, Some(sid)); |
| 146 | } |
| 147 | other => panic!("{sym_name} should be Defined; got {other:?}"), |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | // The __text atoms should collectively cover the same bytes as the |
| 152 | // original __text section data. |
| 153 | let text_atoms: Vec<_> = atom_table |
| 154 | .iter() |
| 155 | .filter(|(_, a)| a.section == AtomSection::Text) |
| 156 | .collect(); |
| 157 | assert!(!text_atoms.is_empty()); |
| 158 | for (_, atom) in &text_atoms { |
| 159 | // Each atom has real content bytes (not zerofill). |
| 160 | assert!(!atom.data.is_empty(), "text atom has empty data"); |
| 161 | assert_eq!( |
| 162 | atom.data.len(), |
| 163 | atom.size as usize, |
| 164 | "text atom data length mismatches size" |
| 165 | ); |
| 166 | } |
| 167 | |
| 168 | let _ = fs::remove_file(&obj_path); |
| 169 | } |
| 170 | |
| 171 | #[test] |
| 172 | fn atomize_cstring_splits_at_null_terminators() { |
| 173 | if !have_xcrun() { |
| 174 | eprintln!("skipping: xcrun as unavailable"); |
| 175 | return; |
| 176 | } |
| 177 | |
| 178 | let src = r#" |
| 179 | .section __TEXT,__cstring,cstring_literals |
| 180 | .globl _s1 |
| 181 | _s1: |
| 182 | .asciz "alpha" |
| 183 | .globl _s2 |
| 184 | _s2: |
| 185 | .asciz "beta" |
| 186 | .globl _s3 |
| 187 | _s3: |
| 188 | .asciz "gamma" |
| 189 | .subsections_via_symbols |
| 190 | "#; |
| 191 | |
| 192 | let obj_path = |
| 193 | std::env::temp_dir().join(format!("afs-ld-atom-{}-cstrings.o", std::process::id())); |
| 194 | if let Err(e) = assemble(src, &obj_path) { |
| 195 | eprintln!("skipping: assemble failed: {e}"); |
| 196 | return; |
| 197 | } |
| 198 | |
| 199 | let bytes = fs::read(&obj_path).unwrap(); |
| 200 | let mut inputs = Inputs::new(); |
| 201 | let input_id = inputs.add_object(obj_path.clone(), bytes, 0).unwrap(); |
| 202 | let mut sym_table = SymbolTable::new(); |
| 203 | let _ = seed_all(&inputs, &mut sym_table).expect("seed_all"); |
| 204 | let obj = inputs.object_file(input_id).unwrap(); |
| 205 | let mut atom_table = AtomTable::new(); |
| 206 | let _atomization = atomize_object(input_id, obj, &mut atom_table); |
| 207 | |
| 208 | let cstring_atoms: Vec<_> = atom_table |
| 209 | .iter() |
| 210 | .filter(|(_, a)| a.section == AtomSection::CStringLiterals) |
| 211 | .collect(); |
| 212 | assert_eq!( |
| 213 | cstring_atoms.len(), |
| 214 | 3, |
| 215 | "expected 3 cstring atoms (one per asciz), got {}", |
| 216 | cstring_atoms.len() |
| 217 | ); |
| 218 | // Each atom's data should end in 0x00. |
| 219 | for (_, atom) in &cstring_atoms { |
| 220 | assert!( |
| 221 | atom.data.last() == Some(&0), |
| 222 | "cstring atom data should end at null terminator; got {:?}", |
| 223 | atom.data |
| 224 | ); |
| 225 | } |
| 226 | |
| 227 | let _ = fs::remove_file(&obj_path); |
| 228 | } |
| 229 |