Rust · 7656 bytes Raw Blame History
1 //! End-to-end atomization gate.
2 //!
3 //! Assembles a multi-symbol object with `xcrun as`, runs the full
4 //! resolve → atomize → back-patch pipeline, and verifies:
5 //!
6 //! - Every defined external symbol owns exactly one atom.
7 //! - Atom count for `__TEXT,__text` equals the number of non-alt
8 //! external symbols defined in that section.
9 //! - Each `Symbol::Defined { atom }` no longer points at the pre-
10 //! atomization `AtomId(0)` placeholder.
11 //! - Atom data bytes at the owner's atom-relative offset match the
12 //! section data at the symbol's n_value.
13 //!
14 //! Skipped if `xcrun as` is unavailable.
15
16 use std::fs;
17 use std::path::PathBuf;
18 use std::process::Command;
19
20 use afs_ld::atom::{atomize_object, backpatch_symbol_atoms, AtomSection, AtomTable};
21 use afs_ld::resolve::{seed_all, AtomId, Inputs, Symbol, SymbolTable};
22
23 fn have_xcrun() -> bool {
24 Command::new("xcrun")
25 .arg("-f")
26 .arg("as")
27 .output()
28 .map(|o| o.status.success())
29 .unwrap_or(false)
30 }
31
32 fn assemble(src: &str, out: &PathBuf) -> Result<(), String> {
33 let tmp = std::env::temp_dir().join(format!(
34 "afs-ld-atom-{}-{}.s",
35 std::process::id(),
36 out.file_stem().and_then(|s| s.to_str()).unwrap_or("t")
37 ));
38 fs::write(&tmp, src).map_err(|e| format!("write: {e}"))?;
39 let status = Command::new("xcrun")
40 .args(["--sdk", "macosx", "as", "-arch", "arm64"])
41 .arg(&tmp)
42 .arg("-o")
43 .arg(out)
44 .output()
45 .map_err(|e| format!("spawn xcrun as: {e}"))?;
46 if !status.status.success() {
47 return Err(format!(
48 "xcrun as failed: {}",
49 String::from_utf8_lossy(&status.stderr)
50 ));
51 }
52 let _ = fs::remove_file(&tmp);
53 Ok(())
54 }
55
56 #[test]
57 fn atomize_splits_text_at_symbol_boundaries_and_backpatches_symbols() {
58 if !have_xcrun() {
59 eprintln!("skipping: xcrun as unavailable");
60 return;
61 }
62
63 // Three functions in __text + one data global. afs-as sets
64 // MH_SUBSECTIONS_VIA_SYMBOLS, so we expect one atom per external
65 // function plus one for the data symbol.
66 // `.subsections_via_symbols` sets MH_SUBSECTIONS_VIA_SYMBOLS on the
67 // resulting object so our atomizer splits at symbol boundaries. Every
68 // fixture afs-as emits carries this flag, so the corpus round-trips
69 // already exercise it — we need the directive only for hand-written
70 // test inputs.
71 let src = r#"
72 .section __TEXT,__text,regular,pure_instructions
73 .globl _fn_a
74 _fn_a:
75 mov x0, #0
76 ret
77 .globl _fn_b
78 _fn_b:
79 mov x0, #1
80 ret
81 .globl _fn_c
82 _fn_c:
83 mov x0, #2
84 ret
85 .section __DATA,__data
86 .globl _data_global
87 _data_global:
88 .quad 0x1122334455667788
89 .subsections_via_symbols
90 "#;
91
92 let obj_path = std::env::temp_dir().join(format!("afs-ld-atom-{}-test.o", std::process::id()));
93 if let Err(e) = assemble(src, &obj_path) {
94 eprintln!("skipping: assemble failed: {e}");
95 return;
96 }
97
98 let bytes = fs::read(&obj_path).unwrap();
99 let mut inputs = Inputs::new();
100 let input_id = inputs.add_object(obj_path.clone(), bytes, 0).unwrap();
101
102 // Seed the symbol table (produces Defined entries with AtomId(0)
103 // placeholders).
104 let mut sym_table = SymbolTable::new();
105 let _ = seed_all(&inputs, &mut sym_table).expect("seed_all");
106
107 // Atomize + back-patch.
108 let obj = inputs.object_file(input_id).unwrap();
109 let mut atom_table = AtomTable::new();
110 let atomization = atomize_object(input_id, obj, &mut atom_table);
111 backpatch_symbol_atoms(&atomization, input_id, obj, &mut sym_table, &mut atom_table);
112
113 // At least one atom per defined function plus one for data_global.
114 assert!(
115 atom_table.len() >= 4,
116 "expected ≥4 atoms (3 text + 1 data); got {}",
117 atom_table.len()
118 );
119
120 // Every external symbol defined in this object should now resolve to a
121 // non-placeholder atom.
122 for sym_name in ["_fn_a", "_fn_b", "_fn_c", "_data_global"] {
123 let istr = sym_table.intern(sym_name);
124 let sid = sym_table
125 .lookup(istr)
126 .unwrap_or_else(|| panic!("{sym_name} not in symbol table"));
127 let sym = sym_table.get(sid);
128 match sym {
129 Symbol::Defined { atom, value, .. } => {
130 assert_ne!(
131 *atom,
132 AtomId(0),
133 "{sym_name} still points at AtomId(0) placeholder"
134 );
135 // Primary owner symbols are at atom-relative offset 0.
136 assert_eq!(*value, 0, "{sym_name} should be at atom start");
137 // The atom itself should match the symbol's origin and be a
138 // reasonable section kind.
139 let atom = atom_table.get(*atom);
140 assert_eq!(atom.origin, input_id);
141 assert!(matches!(
142 atom.section,
143 AtomSection::Text | AtomSection::Data | AtomSection::ConstData
144 ));
145 assert_eq!(atom.owner, Some(sid));
146 }
147 other => panic!("{sym_name} should be Defined; got {other:?}"),
148 }
149 }
150
151 // The __text atoms should collectively cover the same bytes as the
152 // original __text section data.
153 let text_atoms: Vec<_> = atom_table
154 .iter()
155 .filter(|(_, a)| a.section == AtomSection::Text)
156 .collect();
157 assert!(!text_atoms.is_empty());
158 for (_, atom) in &text_atoms {
159 // Each atom has real content bytes (not zerofill).
160 assert!(!atom.data.is_empty(), "text atom has empty data");
161 assert_eq!(
162 atom.data.len(),
163 atom.size as usize,
164 "text atom data length mismatches size"
165 );
166 }
167
168 let _ = fs::remove_file(&obj_path);
169 }
170
171 #[test]
172 fn atomize_cstring_splits_at_null_terminators() {
173 if !have_xcrun() {
174 eprintln!("skipping: xcrun as unavailable");
175 return;
176 }
177
178 let src = r#"
179 .section __TEXT,__cstring,cstring_literals
180 .globl _s1
181 _s1:
182 .asciz "alpha"
183 .globl _s2
184 _s2:
185 .asciz "beta"
186 .globl _s3
187 _s3:
188 .asciz "gamma"
189 .subsections_via_symbols
190 "#;
191
192 let obj_path =
193 std::env::temp_dir().join(format!("afs-ld-atom-{}-cstrings.o", std::process::id()));
194 if let Err(e) = assemble(src, &obj_path) {
195 eprintln!("skipping: assemble failed: {e}");
196 return;
197 }
198
199 let bytes = fs::read(&obj_path).unwrap();
200 let mut inputs = Inputs::new();
201 let input_id = inputs.add_object(obj_path.clone(), bytes, 0).unwrap();
202 let mut sym_table = SymbolTable::new();
203 let _ = seed_all(&inputs, &mut sym_table).expect("seed_all");
204 let obj = inputs.object_file(input_id).unwrap();
205 let mut atom_table = AtomTable::new();
206 let _atomization = atomize_object(input_id, obj, &mut atom_table);
207
208 let cstring_atoms: Vec<_> = atom_table
209 .iter()
210 .filter(|(_, a)| a.section == AtomSection::CStringLiterals)
211 .collect();
212 assert_eq!(
213 cstring_atoms.len(),
214 3,
215 "expected 3 cstring atoms (one per asciz), got {}",
216 cstring_atoms.len()
217 );
218 // Each atom's data should end in 0x00.
219 for (_, atom) in &cstring_atoms {
220 assert!(
221 atom.data.last() == Some(&0),
222 "cstring atom data should end at null terminator; got {:?}",
223 atom.data
224 );
225 }
226
227 let _ = fs::remove_file(&obj_path);
228 }
229