| 1 | //! afs-ld — standalone ARM64 Mach-O linker. |
| 2 | //! |
| 3 | //! Sprint 0 scaffolding: public surface is declared but every link attempt |
| 4 | //! returns `LinkError::NotYetImplemented`. Subsequent sprints fill in the |
| 5 | //! reader, resolver, layout, reloc, synth, writer, and signing paths. |
| 6 | |
| 7 | pub mod archive; |
| 8 | pub mod args; |
| 9 | pub mod atom; |
| 10 | pub mod diag; |
| 11 | pub mod dump; |
| 12 | pub mod input; |
| 13 | pub mod layout; |
| 14 | pub mod leb; |
| 15 | pub mod macho; |
| 16 | pub mod reloc; |
| 17 | pub mod resolve; |
| 18 | pub mod section; |
| 19 | pub mod string_table; |
| 20 | pub mod symbol; |
| 21 | pub mod synth; |
| 22 | |
| 23 | use std::os::unix::fs::PermissionsExt; |
| 24 | use std::path::PathBuf; |
| 25 | use std::{fs, io}; |
| 26 | |
| 27 | use atom::{atomize_object, backpatch_symbol_atoms, AtomTable}; |
| 28 | use layout::{Layout, LayoutInput}; |
| 29 | use macho::dylib::{DylibDependency, DylibFile, DylibLoadKind}; |
| 30 | use macho::reader::ReadError; |
| 31 | use macho::tbd::{parse_tbd, parse_version, Arch, Platform, Target}; |
| 32 | use reloc::arm64::RelocError; |
| 33 | use resolve::{ |
| 34 | classify_unresolved, drain_fetches, format_duplicate_diagnostic, format_undefined_diagnostic, |
| 35 | seed_all, DylibLoadMeta, InputAddError, Inputs, Symbol, SymbolTable, UndefinedTreatment, |
| 36 | }; |
| 37 | |
| 38 | const DEFAULT_TBD_VERSION: u32 = 1 << 16; |
| 39 | |
| 40 | /// What kind of Mach-O file the linker is producing. |
| 41 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 42 | pub enum OutputKind { |
| 43 | Executable, |
| 44 | Dylib, |
| 45 | } |
| 46 | |
| 47 | /// User-facing linker configuration, populated by the CLI parser. |
| 48 | #[derive(Debug, Clone)] |
| 49 | pub struct LinkOptions { |
| 50 | pub inputs: Vec<PathBuf>, |
| 51 | pub output: Option<PathBuf>, |
| 52 | pub entry: Option<String>, |
| 53 | pub arch: Option<String>, |
| 54 | pub strip_locals: bool, |
| 55 | pub kind: OutputKind, |
| 56 | /// When set, afs-ld operates in dump mode and prints the given file's |
| 57 | /// header + load commands instead of linking. |
| 58 | pub dump: Option<PathBuf>, |
| 59 | /// When set, afs-ld dumps the named static archive's structure. |
| 60 | pub dump_archive: Option<PathBuf>, |
| 61 | /// When set, afs-ld dumps the named MH_DYLIB's load commands + exports. |
| 62 | pub dump_dylib: Option<PathBuf>, |
| 63 | /// When set, afs-ld dumps the named TAPI TBD stub (all documents). |
| 64 | pub dump_tbd: Option<PathBuf>, |
| 65 | } |
| 66 | |
| 67 | impl Default for LinkOptions { |
| 68 | fn default() -> Self { |
| 69 | Self { |
| 70 | inputs: Vec::new(), |
| 71 | output: None, |
| 72 | entry: None, |
| 73 | arch: None, |
| 74 | strip_locals: false, |
| 75 | kind: OutputKind::Executable, |
| 76 | dump: None, |
| 77 | dump_archive: None, |
| 78 | dump_dylib: None, |
| 79 | dump_tbd: None, |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | #[derive(Debug)] |
| 85 | pub enum LinkError { |
| 86 | /// No input files were provided on the command line. |
| 87 | NoInputs, |
| 88 | Io(io::Error), |
| 89 | Input(InputAddError), |
| 90 | Seed(resolve::SeedError), |
| 91 | Fetch(resolve::FetchError), |
| 92 | Write(macho::writer::WriteError), |
| 93 | Tbd(macho::tbd::TbdError), |
| 94 | Reloc(RelocError), |
| 95 | Synth(synth::SynthError), |
| 96 | Unwind(synth::unwind::UnwindError), |
| 97 | DuplicateSymbols(String), |
| 98 | UndefinedSymbols(String), |
| 99 | UnsupportedArch(String), |
| 100 | NoTbdDocument(PathBuf), |
| 101 | EntrySymbolNotFound(String), |
| 102 | } |
| 103 | |
| 104 | impl std::fmt::Display for LinkError { |
| 105 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 106 | match self { |
| 107 | LinkError::NoInputs => write!(f, "no input files"), |
| 108 | LinkError::Io(e) => write!(f, "{e}"), |
| 109 | LinkError::Input(e) => write!(f, "{e}"), |
| 110 | LinkError::Seed(e) => write!(f, "{e}"), |
| 111 | LinkError::Fetch(e) => write!(f, "{e}"), |
| 112 | LinkError::Write(e) => write!(f, "{e}"), |
| 113 | LinkError::Tbd(e) => write!(f, "{e}"), |
| 114 | LinkError::Reloc(e) => write!(f, "{e}"), |
| 115 | LinkError::Synth(e) => write!(f, "{e}"), |
| 116 | LinkError::Unwind(e) => write!(f, "{e}"), |
| 117 | LinkError::DuplicateSymbols(msg) | LinkError::UndefinedSymbols(msg) => { |
| 118 | write!(f, "{msg}") |
| 119 | } |
| 120 | LinkError::UnsupportedArch(arch) => { |
| 121 | write!(f, "unsupported arch `{arch}` (afs-ld requires arm64)") |
| 122 | } |
| 123 | LinkError::NoTbdDocument(path) => { |
| 124 | write!(f, "{}: no arm64-macos TBD document found", path.display()) |
| 125 | } |
| 126 | LinkError::EntrySymbolNotFound(name) => { |
| 127 | write!(f, "entry symbol `{name}` was not found in linked objects") |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | impl std::error::Error for LinkError {} |
| 134 | |
| 135 | impl From<io::Error> for LinkError { |
| 136 | fn from(value: io::Error) -> Self { |
| 137 | LinkError::Io(value) |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | impl From<InputAddError> for LinkError { |
| 142 | fn from(value: InputAddError) -> Self { |
| 143 | LinkError::Input(value) |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | impl From<ReadError> for LinkError { |
| 148 | fn from(value: ReadError) -> Self { |
| 149 | LinkError::Input(InputAddError::from(value)) |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | impl From<resolve::SeedError> for LinkError { |
| 154 | fn from(value: resolve::SeedError) -> Self { |
| 155 | LinkError::Seed(value) |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | impl From<resolve::FetchError> for LinkError { |
| 160 | fn from(value: resolve::FetchError) -> Self { |
| 161 | LinkError::Fetch(value) |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | impl From<macho::writer::WriteError> for LinkError { |
| 166 | fn from(value: macho::writer::WriteError) -> Self { |
| 167 | LinkError::Write(value) |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | impl From<macho::tbd::TbdError> for LinkError { |
| 172 | fn from(value: macho::tbd::TbdError) -> Self { |
| 173 | LinkError::Tbd(value) |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | impl From<RelocError> for LinkError { |
| 178 | fn from(value: RelocError) -> Self { |
| 179 | LinkError::Reloc(value) |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | impl From<synth::SynthError> for LinkError { |
| 184 | fn from(value: synth::SynthError) -> Self { |
| 185 | LinkError::Synth(value) |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | impl From<synth::unwind::UnwindError> for LinkError { |
| 190 | fn from(value: synth::unwind::UnwindError) -> Self { |
| 191 | LinkError::Unwind(value) |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | /// The linker itself. Sprint 0 only validates that inputs exist; later sprints |
| 196 | /// grow this into the full pipeline described in `.docs/overview.md`. |
| 197 | pub struct Linker; |
| 198 | |
| 199 | impl Linker { |
| 200 | pub fn run(opts: &LinkOptions) -> Result<(), LinkError> { |
| 201 | if opts.inputs.is_empty() { |
| 202 | return Err(LinkError::NoInputs); |
| 203 | } |
| 204 | |
| 205 | if let Some(arch) = &opts.arch { |
| 206 | if arch != "arm64" { |
| 207 | return Err(LinkError::UnsupportedArch(arch.clone())); |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | let mut inputs = Inputs::new(); |
| 212 | for (load_order, path) in opts.inputs.iter().enumerate() { |
| 213 | register_input(&mut inputs, path, load_order)?; |
| 214 | } |
| 215 | |
| 216 | let mut sym_table = SymbolTable::new(); |
| 217 | let seed_report = seed_all(&inputs, &mut sym_table)?; |
| 218 | if seed_report.has_errors() { |
| 219 | let mut msg = String::new(); |
| 220 | for err in &seed_report.duplicates { |
| 221 | msg.push_str(&format_duplicate_diagnostic(&sym_table, &inputs, err)); |
| 222 | } |
| 223 | return Err(LinkError::DuplicateSymbols(msg)); |
| 224 | } |
| 225 | |
| 226 | let drain_report = drain_fetches(&mut inputs, &mut sym_table, seed_report.pending_fetches)?; |
| 227 | if !drain_report.duplicates.is_empty() { |
| 228 | let mut msg = String::new(); |
| 229 | for err in &drain_report.duplicates { |
| 230 | msg.push_str(&format_duplicate_diagnostic(&sym_table, &inputs, err)); |
| 231 | } |
| 232 | return Err(LinkError::DuplicateSymbols(msg)); |
| 233 | } |
| 234 | let mut referrers = seed_report.referrers.clone(); |
| 235 | referrers.extend_from(&drain_report.referrers); |
| 236 | let unresolved = classify_unresolved(&mut sym_table, UndefinedTreatment::Error); |
| 237 | if !unresolved.errors.is_empty() { |
| 238 | return Err(LinkError::UndefinedSymbols(format_undefined_diagnostic( |
| 239 | &sym_table, |
| 240 | &inputs, |
| 241 | &referrers, |
| 242 | &unresolved.errors, |
| 243 | ))); |
| 244 | } |
| 245 | |
| 246 | let mut atom_table = AtomTable::new(); |
| 247 | let mut objects = Vec::new(); |
| 248 | for idx in 0..inputs.objects.len() { |
| 249 | let input_id = resolve::InputId(idx as u32); |
| 250 | let obj = inputs.object_file(input_id)?; |
| 251 | let atomization = atomize_object(input_id, &obj, &mut atom_table); |
| 252 | backpatch_symbol_atoms( |
| 253 | &atomization, |
| 254 | input_id, |
| 255 | &obj, |
| 256 | &mut sym_table, |
| 257 | &mut atom_table, |
| 258 | ); |
| 259 | objects.push((input_id, obj)); |
| 260 | } |
| 261 | |
| 262 | let layout_inputs: Vec<LayoutInput<'_>> = objects |
| 263 | .iter() |
| 264 | .map(|(id, object)| { |
| 265 | let input = inputs.object(*id); |
| 266 | LayoutInput { |
| 267 | id: *id, |
| 268 | object, |
| 269 | load_order: input.load_order, |
| 270 | archive_member_offset: input.archive_member_offset, |
| 271 | } |
| 272 | }) |
| 273 | .collect(); |
| 274 | let mut dylib_loads = Vec::new(); |
| 275 | let mut seen_ordinals = std::collections::BTreeSet::new(); |
| 276 | for dylib in &inputs.dylibs { |
| 277 | if !seen_ordinals.insert(dylib.ordinal) { |
| 278 | continue; |
| 279 | } |
| 280 | dylib_loads.push(DylibDependency { |
| 281 | kind: DylibLoadKind::Normal, |
| 282 | install_name: dylib.load_install_name.clone(), |
| 283 | current_version: dylib.load_current_version, |
| 284 | compatibility_version: dylib.load_compatibility_version, |
| 285 | ordinal: dylib.ordinal, |
| 286 | }); |
| 287 | } |
| 288 | let synthetic_plan = synth::SyntheticPlan::build( |
| 289 | &layout_inputs, |
| 290 | &atom_table, |
| 291 | &mut sym_table, |
| 292 | &inputs.dylibs, |
| 293 | )?; |
| 294 | let mut layout = Layout::build_with_synthetics( |
| 295 | opts.kind, |
| 296 | &layout_inputs, |
| 297 | &atom_table, |
| 298 | 0, |
| 299 | Some(&synthetic_plan), |
| 300 | ); |
| 301 | let linkedit_context = macho::writer::LinkEditContext { |
| 302 | layout_inputs: &layout_inputs, |
| 303 | atom_table: &atom_table, |
| 304 | sym_table: &sym_table, |
| 305 | synthetic_plan: &synthetic_plan, |
| 306 | }; |
| 307 | let mut linkedit = None; |
| 308 | for _ in 0..4 { |
| 309 | let (next_layout, next_linkedit) = macho::writer::finalize_layout_with_linkedit( |
| 310 | &layout, |
| 311 | opts.kind, |
| 312 | opts, |
| 313 | &dylib_loads, |
| 314 | linkedit_context, |
| 315 | )?; |
| 316 | layout = next_layout; |
| 317 | linkedit = Some(next_linkedit); |
| 318 | let changed = synth::unwind::synthesize( |
| 319 | &mut layout, |
| 320 | &layout_inputs, |
| 321 | &atom_table, |
| 322 | &sym_table, |
| 323 | &synthetic_plan, |
| 324 | )?; |
| 325 | if !changed { |
| 326 | break; |
| 327 | } |
| 328 | } |
| 329 | let linkedit = linkedit.expect("finalize loop always runs at least once"); |
| 330 | reloc::arm64::apply_layout( |
| 331 | &mut layout, |
| 332 | &layout_inputs, |
| 333 | &atom_table, |
| 334 | &sym_table, |
| 335 | Some(&synthetic_plan), |
| 336 | &linkedit, |
| 337 | )?; |
| 338 | |
| 339 | let mut image = Vec::new(); |
| 340 | let entry_point = resolve_entry_point(opts, &sym_table)?; |
| 341 | macho::writer::write_finalized_with_linkedit( |
| 342 | &layout, |
| 343 | opts.kind, |
| 344 | opts, |
| 345 | entry_point, |
| 346 | &dylib_loads, |
| 347 | &linkedit, |
| 348 | &mut image, |
| 349 | )?; |
| 350 | let output = default_output_path(opts); |
| 351 | fs::write(&output, image)?; |
| 352 | if opts.kind == OutputKind::Executable { |
| 353 | let mut perms = fs::metadata(&output)?.permissions(); |
| 354 | let mode = perms.mode(); |
| 355 | perms.set_mode(mode | ((mode & 0o444) >> 2)); |
| 356 | fs::set_permissions(&output, perms)?; |
| 357 | } |
| 358 | Ok(()) |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | fn default_output_path(opts: &LinkOptions) -> PathBuf { |
| 363 | opts.output |
| 364 | .clone() |
| 365 | .unwrap_or_else(|| PathBuf::from("a.out")) |
| 366 | } |
| 367 | |
| 368 | fn register_input( |
| 369 | inputs: &mut Inputs, |
| 370 | path: &std::path::Path, |
| 371 | load_order: usize, |
| 372 | ) -> Result<(), LinkError> { |
| 373 | let bytes = fs::read(path)?; |
| 374 | match path.extension().and_then(|ext| ext.to_str()) { |
| 375 | Some("a") => { |
| 376 | let _ = inputs.add_archive(path.to_path_buf(), bytes, load_order)?; |
| 377 | } |
| 378 | Some("dylib") => { |
| 379 | let _ = inputs.add_dylib(path.to_path_buf(), bytes)?; |
| 380 | } |
| 381 | Some("tbd") => { |
| 382 | let text = std::str::from_utf8(&bytes).map_err(|e| { |
| 383 | LinkError::Tbd(macho::tbd::TbdError::Schema { |
| 384 | msg: format!("TBD input is not UTF-8: {e}"), |
| 385 | }) |
| 386 | })?; |
| 387 | let docs = parse_tbd(text)?; |
| 388 | let target = Target { |
| 389 | arch: Arch::Arm64, |
| 390 | platform: Platform::MacOs, |
| 391 | }; |
| 392 | let canonical = docs |
| 393 | .iter() |
| 394 | .find(|doc| doc.parent_umbrella.is_empty()) |
| 395 | .unwrap_or_else(|| &docs[0]); |
| 396 | let load = DylibLoadMeta { |
| 397 | install_name: canonical.install_name.clone(), |
| 398 | current_version: canonical |
| 399 | .current_version |
| 400 | .as_deref() |
| 401 | .map(parse_version) |
| 402 | .unwrap_or(DEFAULT_TBD_VERSION), |
| 403 | compatibility_version: canonical |
| 404 | .compatibility_version |
| 405 | .as_deref() |
| 406 | .map(parse_version) |
| 407 | .unwrap_or(DEFAULT_TBD_VERSION), |
| 408 | ordinal: inputs.next_dylib_ordinal(), |
| 409 | }; |
| 410 | let mut loaded = false; |
| 411 | for doc in docs |
| 412 | .iter() |
| 413 | .filter(|doc| doc.targets.iter().any(|t| t.matches_requested(&target))) |
| 414 | { |
| 415 | let file = DylibFile::from_tbd(path, doc, &target); |
| 416 | let _ = |
| 417 | inputs.add_dylib_from_file_with_meta(path.to_path_buf(), file, load.clone()); |
| 418 | loaded = true; |
| 419 | } |
| 420 | if !loaded { |
| 421 | return Err(LinkError::NoTbdDocument(path.to_path_buf())); |
| 422 | } |
| 423 | } |
| 424 | _ => { |
| 425 | let _ = inputs.add_object(path.to_path_buf(), bytes, load_order)?; |
| 426 | } |
| 427 | } |
| 428 | Ok(()) |
| 429 | } |
| 430 | |
| 431 | fn resolve_entry_point( |
| 432 | opts: &LinkOptions, |
| 433 | sym_table: &SymbolTable, |
| 434 | ) -> Result<Option<macho::writer::EntryPoint>, LinkError> { |
| 435 | let name = if let Some(name) = &opts.entry { |
| 436 | name.as_str() |
| 437 | } else if opts.kind == OutputKind::Executable { |
| 438 | if symbol_defined(sym_table, "_main") { |
| 439 | "_main" |
| 440 | } else if symbol_defined(sym_table, "_start") { |
| 441 | "_start" |
| 442 | } else { |
| 443 | return Ok(None); |
| 444 | } |
| 445 | } else { |
| 446 | return Ok(None); |
| 447 | }; |
| 448 | let Some((symbol_id, _)) = sym_table |
| 449 | .iter() |
| 450 | .find(|(_, symbol)| sym_table.interner.resolve(symbol.name()) == name) |
| 451 | else { |
| 452 | return Err(LinkError::EntrySymbolNotFound(name.to_string())); |
| 453 | }; |
| 454 | let Symbol::Defined { atom, value, .. } = sym_table.get(symbol_id) else { |
| 455 | return Err(LinkError::EntrySymbolNotFound(name.to_string())); |
| 456 | }; |
| 457 | Ok(Some(macho::writer::EntryPoint { |
| 458 | atom: *atom, |
| 459 | atom_value: *value, |
| 460 | })) |
| 461 | } |
| 462 | |
| 463 | fn symbol_defined(sym_table: &SymbolTable, name: &str) -> bool { |
| 464 | sym_table.iter().any(|(_, symbol)| { |
| 465 | sym_table.interner.resolve(symbol.name()) == name |
| 466 | && matches!(symbol, Symbol::Defined { .. }) |
| 467 | }) |
| 468 | } |
| 469 |