Rust · 15117 bytes Raw Blame History
1 //! afs-ld — standalone ARM64 Mach-O linker.
2 //!
3 //! Sprint 0 scaffolding: public surface is declared but every link attempt
4 //! returns `LinkError::NotYetImplemented`. Subsequent sprints fill in the
5 //! reader, resolver, layout, reloc, synth, writer, and signing paths.
6
7 pub mod archive;
8 pub mod args;
9 pub mod atom;
10 pub mod diag;
11 pub mod dump;
12 pub mod input;
13 pub mod layout;
14 pub mod leb;
15 pub mod macho;
16 pub mod reloc;
17 pub mod resolve;
18 pub mod section;
19 pub mod string_table;
20 pub mod symbol;
21 pub mod synth;
22
23 use std::os::unix::fs::PermissionsExt;
24 use std::path::PathBuf;
25 use std::{fs, io};
26
27 use atom::{atomize_object, backpatch_symbol_atoms, AtomTable};
28 use layout::{Layout, LayoutInput};
29 use macho::dylib::{DylibDependency, DylibFile, DylibLoadKind};
30 use macho::reader::ReadError;
31 use macho::tbd::{parse_tbd, parse_version, Arch, Platform, Target};
32 use reloc::arm64::RelocError;
33 use resolve::{
34 classify_unresolved, drain_fetches, format_duplicate_diagnostic, format_undefined_diagnostic,
35 seed_all, DylibLoadMeta, InputAddError, Inputs, Symbol, SymbolTable, UndefinedTreatment,
36 };
37
38 const DEFAULT_TBD_VERSION: u32 = 1 << 16;
39
40 /// What kind of Mach-O file the linker is producing.
41 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
42 pub enum OutputKind {
43 Executable,
44 Dylib,
45 }
46
47 /// User-facing linker configuration, populated by the CLI parser.
48 #[derive(Debug, Clone)]
49 pub struct LinkOptions {
50 pub inputs: Vec<PathBuf>,
51 pub output: Option<PathBuf>,
52 pub entry: Option<String>,
53 pub arch: Option<String>,
54 pub strip_locals: bool,
55 pub kind: OutputKind,
56 /// When set, afs-ld operates in dump mode and prints the given file's
57 /// header + load commands instead of linking.
58 pub dump: Option<PathBuf>,
59 /// When set, afs-ld dumps the named static archive's structure.
60 pub dump_archive: Option<PathBuf>,
61 /// When set, afs-ld dumps the named MH_DYLIB's load commands + exports.
62 pub dump_dylib: Option<PathBuf>,
63 /// When set, afs-ld dumps the named TAPI TBD stub (all documents).
64 pub dump_tbd: Option<PathBuf>,
65 }
66
67 impl Default for LinkOptions {
68 fn default() -> Self {
69 Self {
70 inputs: Vec::new(),
71 output: None,
72 entry: None,
73 arch: None,
74 strip_locals: false,
75 kind: OutputKind::Executable,
76 dump: None,
77 dump_archive: None,
78 dump_dylib: None,
79 dump_tbd: None,
80 }
81 }
82 }
83
84 #[derive(Debug)]
85 pub enum LinkError {
86 /// No input files were provided on the command line.
87 NoInputs,
88 Io(io::Error),
89 Input(InputAddError),
90 Seed(resolve::SeedError),
91 Fetch(resolve::FetchError),
92 Write(macho::writer::WriteError),
93 Tbd(macho::tbd::TbdError),
94 Reloc(RelocError),
95 Synth(synth::SynthError),
96 Unwind(synth::unwind::UnwindError),
97 DuplicateSymbols(String),
98 UndefinedSymbols(String),
99 UnsupportedArch(String),
100 NoTbdDocument(PathBuf),
101 EntrySymbolNotFound(String),
102 }
103
104 impl std::fmt::Display for LinkError {
105 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106 match self {
107 LinkError::NoInputs => write!(f, "no input files"),
108 LinkError::Io(e) => write!(f, "{e}"),
109 LinkError::Input(e) => write!(f, "{e}"),
110 LinkError::Seed(e) => write!(f, "{e}"),
111 LinkError::Fetch(e) => write!(f, "{e}"),
112 LinkError::Write(e) => write!(f, "{e}"),
113 LinkError::Tbd(e) => write!(f, "{e}"),
114 LinkError::Reloc(e) => write!(f, "{e}"),
115 LinkError::Synth(e) => write!(f, "{e}"),
116 LinkError::Unwind(e) => write!(f, "{e}"),
117 LinkError::DuplicateSymbols(msg) | LinkError::UndefinedSymbols(msg) => {
118 write!(f, "{msg}")
119 }
120 LinkError::UnsupportedArch(arch) => {
121 write!(f, "unsupported arch `{arch}` (afs-ld requires arm64)")
122 }
123 LinkError::NoTbdDocument(path) => {
124 write!(f, "{}: no arm64-macos TBD document found", path.display())
125 }
126 LinkError::EntrySymbolNotFound(name) => {
127 write!(f, "entry symbol `{name}` was not found in linked objects")
128 }
129 }
130 }
131 }
132
133 impl std::error::Error for LinkError {}
134
135 impl From<io::Error> for LinkError {
136 fn from(value: io::Error) -> Self {
137 LinkError::Io(value)
138 }
139 }
140
141 impl From<InputAddError> for LinkError {
142 fn from(value: InputAddError) -> Self {
143 LinkError::Input(value)
144 }
145 }
146
147 impl From<ReadError> for LinkError {
148 fn from(value: ReadError) -> Self {
149 LinkError::Input(InputAddError::from(value))
150 }
151 }
152
153 impl From<resolve::SeedError> for LinkError {
154 fn from(value: resolve::SeedError) -> Self {
155 LinkError::Seed(value)
156 }
157 }
158
159 impl From<resolve::FetchError> for LinkError {
160 fn from(value: resolve::FetchError) -> Self {
161 LinkError::Fetch(value)
162 }
163 }
164
165 impl From<macho::writer::WriteError> for LinkError {
166 fn from(value: macho::writer::WriteError) -> Self {
167 LinkError::Write(value)
168 }
169 }
170
171 impl From<macho::tbd::TbdError> for LinkError {
172 fn from(value: macho::tbd::TbdError) -> Self {
173 LinkError::Tbd(value)
174 }
175 }
176
177 impl From<RelocError> for LinkError {
178 fn from(value: RelocError) -> Self {
179 LinkError::Reloc(value)
180 }
181 }
182
183 impl From<synth::SynthError> for LinkError {
184 fn from(value: synth::SynthError) -> Self {
185 LinkError::Synth(value)
186 }
187 }
188
189 impl From<synth::unwind::UnwindError> for LinkError {
190 fn from(value: synth::unwind::UnwindError) -> Self {
191 LinkError::Unwind(value)
192 }
193 }
194
195 /// The linker itself. Sprint 0 only validates that inputs exist; later sprints
196 /// grow this into the full pipeline described in `.docs/overview.md`.
197 pub struct Linker;
198
199 impl Linker {
200 pub fn run(opts: &LinkOptions) -> Result<(), LinkError> {
201 if opts.inputs.is_empty() {
202 return Err(LinkError::NoInputs);
203 }
204
205 if let Some(arch) = &opts.arch {
206 if arch != "arm64" {
207 return Err(LinkError::UnsupportedArch(arch.clone()));
208 }
209 }
210
211 let mut inputs = Inputs::new();
212 for (load_order, path) in opts.inputs.iter().enumerate() {
213 register_input(&mut inputs, path, load_order)?;
214 }
215
216 let mut sym_table = SymbolTable::new();
217 let seed_report = seed_all(&inputs, &mut sym_table)?;
218 if seed_report.has_errors() {
219 let mut msg = String::new();
220 for err in &seed_report.duplicates {
221 msg.push_str(&format_duplicate_diagnostic(&sym_table, &inputs, err));
222 }
223 return Err(LinkError::DuplicateSymbols(msg));
224 }
225
226 let drain_report = drain_fetches(&mut inputs, &mut sym_table, seed_report.pending_fetches)?;
227 if !drain_report.duplicates.is_empty() {
228 let mut msg = String::new();
229 for err in &drain_report.duplicates {
230 msg.push_str(&format_duplicate_diagnostic(&sym_table, &inputs, err));
231 }
232 return Err(LinkError::DuplicateSymbols(msg));
233 }
234 let mut referrers = seed_report.referrers.clone();
235 referrers.extend_from(&drain_report.referrers);
236 let unresolved = classify_unresolved(&mut sym_table, UndefinedTreatment::Error);
237 if !unresolved.errors.is_empty() {
238 return Err(LinkError::UndefinedSymbols(format_undefined_diagnostic(
239 &sym_table,
240 &inputs,
241 &referrers,
242 &unresolved.errors,
243 )));
244 }
245
246 let mut atom_table = AtomTable::new();
247 let mut objects = Vec::new();
248 for idx in 0..inputs.objects.len() {
249 let input_id = resolve::InputId(idx as u32);
250 let obj = inputs.object_file(input_id)?;
251 let atomization = atomize_object(input_id, &obj, &mut atom_table);
252 backpatch_symbol_atoms(
253 &atomization,
254 input_id,
255 &obj,
256 &mut sym_table,
257 &mut atom_table,
258 );
259 objects.push((input_id, obj));
260 }
261
262 let layout_inputs: Vec<LayoutInput<'_>> = objects
263 .iter()
264 .map(|(id, object)| {
265 let input = inputs.object(*id);
266 LayoutInput {
267 id: *id,
268 object,
269 load_order: input.load_order,
270 archive_member_offset: input.archive_member_offset,
271 }
272 })
273 .collect();
274 let mut dylib_loads = Vec::new();
275 let mut seen_ordinals = std::collections::BTreeSet::new();
276 for dylib in &inputs.dylibs {
277 if !seen_ordinals.insert(dylib.ordinal) {
278 continue;
279 }
280 dylib_loads.push(DylibDependency {
281 kind: DylibLoadKind::Normal,
282 install_name: dylib.load_install_name.clone(),
283 current_version: dylib.load_current_version,
284 compatibility_version: dylib.load_compatibility_version,
285 ordinal: dylib.ordinal,
286 });
287 }
288 let synthetic_plan = synth::SyntheticPlan::build(
289 &layout_inputs,
290 &atom_table,
291 &mut sym_table,
292 &inputs.dylibs,
293 )?;
294 let mut layout = Layout::build_with_synthetics(
295 opts.kind,
296 &layout_inputs,
297 &atom_table,
298 0,
299 Some(&synthetic_plan),
300 );
301 let linkedit_context = macho::writer::LinkEditContext {
302 layout_inputs: &layout_inputs,
303 atom_table: &atom_table,
304 sym_table: &sym_table,
305 synthetic_plan: &synthetic_plan,
306 };
307 let mut linkedit = None;
308 for _ in 0..4 {
309 let (next_layout, next_linkedit) = macho::writer::finalize_layout_with_linkedit(
310 &layout,
311 opts.kind,
312 opts,
313 &dylib_loads,
314 linkedit_context,
315 )?;
316 layout = next_layout;
317 linkedit = Some(next_linkedit);
318 let changed = synth::unwind::synthesize(
319 &mut layout,
320 &layout_inputs,
321 &atom_table,
322 &sym_table,
323 &synthetic_plan,
324 )?;
325 if !changed {
326 break;
327 }
328 }
329 let linkedit = linkedit.expect("finalize loop always runs at least once");
330 reloc::arm64::apply_layout(
331 &mut layout,
332 &layout_inputs,
333 &atom_table,
334 &sym_table,
335 Some(&synthetic_plan),
336 &linkedit,
337 )?;
338
339 let mut image = Vec::new();
340 let entry_point = resolve_entry_point(opts, &sym_table)?;
341 macho::writer::write_finalized_with_linkedit(
342 &layout,
343 opts.kind,
344 opts,
345 entry_point,
346 &dylib_loads,
347 &linkedit,
348 &mut image,
349 )?;
350 let output = default_output_path(opts);
351 fs::write(&output, image)?;
352 if opts.kind == OutputKind::Executable {
353 let mut perms = fs::metadata(&output)?.permissions();
354 let mode = perms.mode();
355 perms.set_mode(mode | ((mode & 0o444) >> 2));
356 fs::set_permissions(&output, perms)?;
357 }
358 Ok(())
359 }
360 }
361
362 fn default_output_path(opts: &LinkOptions) -> PathBuf {
363 opts.output
364 .clone()
365 .unwrap_or_else(|| PathBuf::from("a.out"))
366 }
367
368 fn register_input(
369 inputs: &mut Inputs,
370 path: &std::path::Path,
371 load_order: usize,
372 ) -> Result<(), LinkError> {
373 let bytes = fs::read(path)?;
374 match path.extension().and_then(|ext| ext.to_str()) {
375 Some("a") => {
376 let _ = inputs.add_archive(path.to_path_buf(), bytes, load_order)?;
377 }
378 Some("dylib") => {
379 let _ = inputs.add_dylib(path.to_path_buf(), bytes)?;
380 }
381 Some("tbd") => {
382 let text = std::str::from_utf8(&bytes).map_err(|e| {
383 LinkError::Tbd(macho::tbd::TbdError::Schema {
384 msg: format!("TBD input is not UTF-8: {e}"),
385 })
386 })?;
387 let docs = parse_tbd(text)?;
388 let target = Target {
389 arch: Arch::Arm64,
390 platform: Platform::MacOs,
391 };
392 let canonical = docs
393 .iter()
394 .find(|doc| doc.parent_umbrella.is_empty())
395 .unwrap_or_else(|| &docs[0]);
396 let load = DylibLoadMeta {
397 install_name: canonical.install_name.clone(),
398 current_version: canonical
399 .current_version
400 .as_deref()
401 .map(parse_version)
402 .unwrap_or(DEFAULT_TBD_VERSION),
403 compatibility_version: canonical
404 .compatibility_version
405 .as_deref()
406 .map(parse_version)
407 .unwrap_or(DEFAULT_TBD_VERSION),
408 ordinal: inputs.next_dylib_ordinal(),
409 };
410 let mut loaded = false;
411 for doc in docs
412 .iter()
413 .filter(|doc| doc.targets.iter().any(|t| t.matches_requested(&target)))
414 {
415 let file = DylibFile::from_tbd(path, doc, &target);
416 let _ =
417 inputs.add_dylib_from_file_with_meta(path.to_path_buf(), file, load.clone());
418 loaded = true;
419 }
420 if !loaded {
421 return Err(LinkError::NoTbdDocument(path.to_path_buf()));
422 }
423 }
424 _ => {
425 let _ = inputs.add_object(path.to_path_buf(), bytes, load_order)?;
426 }
427 }
428 Ok(())
429 }
430
431 fn resolve_entry_point(
432 opts: &LinkOptions,
433 sym_table: &SymbolTable,
434 ) -> Result<Option<macho::writer::EntryPoint>, LinkError> {
435 let name = if let Some(name) = &opts.entry {
436 name.as_str()
437 } else if opts.kind == OutputKind::Executable {
438 if symbol_defined(sym_table, "_main") {
439 "_main"
440 } else if symbol_defined(sym_table, "_start") {
441 "_start"
442 } else {
443 return Ok(None);
444 }
445 } else {
446 return Ok(None);
447 };
448 let Some((symbol_id, _)) = sym_table
449 .iter()
450 .find(|(_, symbol)| sym_table.interner.resolve(symbol.name()) == name)
451 else {
452 return Err(LinkError::EntrySymbolNotFound(name.to_string()));
453 };
454 let Symbol::Defined { atom, value, .. } = sym_table.get(symbol_id) else {
455 return Err(LinkError::EntrySymbolNotFound(name.to_string()));
456 };
457 Ok(Some(macho::writer::EntryPoint {
458 atom: *atom,
459 atom_value: *value,
460 }))
461 }
462
463 fn symbol_defined(sym_table: &SymbolTable, name: &str) -> bool {
464 sym_table.iter().any(|(_, symbol)| {
465 sym_table.interner.resolve(symbol.name()) == name
466 && matches!(symbol, Symbol::Defined { .. })
467 })
468 }
469