Rust · 103156 bytes Raw Blame History
1 //! Sprint 10 Mach-O writer.
2 //!
3 //! Emits a parseable `MH_EXECUTE` or `MH_DYLIB` image from the output layout.
4
5 use std::collections::HashMap;
6 use std::fmt;
7 use std::fs;
8 use std::path::PathBuf;
9 use std::time::Duration;
10
11 use crate::atom::AtomTable;
12 use crate::input::{DataInCodeEntry, ObjectFile};
13 use crate::layout::{Layout, LayoutInput, PAGE_SIZE};
14 use crate::leb::write_uleb;
15 use crate::macho::constants::*;
16 use crate::macho::dylib::DylibDependency;
17 use crate::macho::exports::{ExportEntry, ExportKind};
18 use crate::macho::reader::{
19 write_commands, write_header, BuildTool, BuildVersionCmd, DyldInfoCmd, DylibCmd, DysymtabCmd,
20 LinkEditDataCmd, LoadCommand, MachHeader64, RpathCmd, Section64Header, Segment64, SymtabCmd,
21 HEADER_SIZE,
22 };
23 use crate::reloc::{
24 parse_raw_relocs, parse_relocs, ParsedRelocCache, Referent, Reloc, RelocKind, RelocLength,
25 };
26 use crate::resolve::{AtomId, InputId};
27 use crate::resolve::{Symbol, SymbolId, SymbolTable};
28 use crate::section::is_executable;
29 use crate::string_table::StringTableBuilder;
30 use crate::symbol::{write_nlist_table, InputSymbol, RawNlist, SymKind, NLIST_SIZE};
31 use crate::synth::tlv::THREAD_VARIABLE_DESCRIPTOR_SIZE;
32 use crate::synth::{
33 code_sig::CodeSignaturePlan,
34 dyld_info::{
35 build_export_trie, emit_bind_records, emit_lazy_bind_record, emit_rebase_run,
36 BindRecordSpec, OpcodeStream,
37 },
38 SyntheticPlan,
39 };
40 use crate::{LinkOptions, OutputKind};
41
42 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
43 pub struct EntryPoint {
44 pub atom: crate::resolve::AtomId,
45 pub atom_value: u64,
46 }
47
48 #[derive(Clone, Copy)]
49 pub struct LinkEditContext<'a> {
50 pub layout_inputs: &'a [LayoutInput<'a>],
51 pub atom_table: &'a AtomTable,
52 pub sym_table: &'a SymbolTable,
53 pub synthetic_plan: &'a SyntheticPlan,
54 pub icf_redirects: Option<&'a HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
55 pub parsed_relocs: &'a ParsedRelocCache,
56 }
57
58 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
59 pub struct LinkEditBuildTimings {
60 pub symbol_plan: Duration,
61 pub symbol_plan_locals: Duration,
62 pub symbol_plan_globals: Duration,
63 pub symbol_plan_strtab: Duration,
64 pub dyld_info: Duration,
65 pub dyld_bind: Duration,
66 pub dyld_rebase: Duration,
67 pub dyld_export: Duration,
68 pub metadata_tables: Duration,
69 pub code_signature: Duration,
70 }
71
72 impl std::ops::AddAssign for LinkEditBuildTimings {
73 fn add_assign(&mut self, rhs: Self) {
74 self.symbol_plan += rhs.symbol_plan;
75 self.symbol_plan_locals += rhs.symbol_plan_locals;
76 self.symbol_plan_globals += rhs.symbol_plan_globals;
77 self.symbol_plan_strtab += rhs.symbol_plan_strtab;
78 self.dyld_info += rhs.dyld_info;
79 self.dyld_bind += rhs.dyld_bind;
80 self.dyld_rebase += rhs.dyld_rebase;
81 self.dyld_export += rhs.dyld_export;
82 self.metadata_tables += rhs.metadata_tables;
83 self.code_signature += rhs.code_signature;
84 }
85 }
86
87 #[derive(Debug, Clone, PartialEq, Eq)]
88 pub struct LinkMapSymbol {
89 pub name: String,
90 pub addr: u64,
91 pub size: u64,
92 pub file_index: usize,
93 }
94
95 #[derive(Debug)]
96 pub enum WriteError {
97 MissingSegment(&'static str),
98 OffsetTooLarge(&'static str),
99 EntryAtomMissing(crate::resolve::AtomId),
100 DefinedSymbolAtomMissing(SymbolId, crate::resolve::AtomId),
101 DefinedSymbolSectionMissing(SymbolId, crate::resolve::AtomId),
102 DirectBindAtomMissing(crate::resolve::AtomId),
103 DirectBindSectionMissing(crate::resolve::AtomId),
104 ImportSymbolMissing(SymbolId),
105 ImportSymbolWrongKind(SymbolId),
106 MalformedRelocations(PathBuf, u8, String),
107 MalformedLoh(PathBuf, String),
108 MalformedDataInCode(PathBuf, String),
109 SymbolListRead(PathBuf, String),
110 }
111
112 impl fmt::Display for WriteError {
113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114 match self {
115 WriteError::MissingSegment(name) => write!(f, "missing output segment `{name}`"),
116 WriteError::OffsetTooLarge(what) => {
117 write!(f, "{what} exceeds 32-bit Mach-O field width")
118 }
119 WriteError::EntryAtomMissing(atom) => {
120 write!(f, "entry atom {:?} missing from layout", atom)
121 }
122 WriteError::DefinedSymbolAtomMissing(symbol, atom) => write!(
123 f,
124 "defined symbol {:?} points at missing atom {:?} in final layout",
125 symbol, atom
126 ),
127 WriteError::DefinedSymbolSectionMissing(symbol, atom) => write!(
128 f,
129 "defined symbol {:?} points at atom {:?} outside any output section",
130 symbol, atom
131 ),
132 WriteError::DirectBindAtomMissing(atom) => {
133 write!(f, "direct bind atom {:?} missing from layout", atom)
134 }
135 WriteError::DirectBindSectionMissing(atom) => {
136 write!(
137 f,
138 "direct bind atom {:?} is not inside an output section",
139 atom
140 )
141 }
142 WriteError::ImportSymbolMissing(symbol) => {
143 write!(
144 f,
145 "synthetic import symbol {:?} missing from symbol table",
146 symbol
147 )
148 }
149 WriteError::ImportSymbolWrongKind(symbol) => {
150 write!(
151 f,
152 "synthetic import symbol {:?} is not a dylib import",
153 symbol
154 )
155 }
156 WriteError::MalformedRelocations(path, section, detail) => write!(
157 f,
158 "failed to parse relocations in {} section {}: {detail}",
159 path.display(),
160 section
161 ),
162 WriteError::MalformedLoh(path, detail) => {
163 write!(
164 f,
165 "failed to remap LC_LINKER_OPTIMIZATION_HINT in {}: {detail}",
166 path.display()
167 )
168 }
169 WriteError::MalformedDataInCode(path, detail) => {
170 write!(
171 f,
172 "failed to remap LC_DATA_IN_CODE in {}: {detail}",
173 path.display()
174 )
175 }
176 WriteError::SymbolListRead(path, detail) => {
177 write!(
178 f,
179 "{}: unable to read symbol list: {detail}",
180 path.display()
181 )
182 }
183 }
184 }
185 }
186
187 impl std::error::Error for WriteError {}
188
189 pub fn write(
190 layout: &Layout,
191 kind: OutputKind,
192 opts: &LinkOptions,
193 out: &mut Vec<u8>,
194 ) -> Result<(), WriteError> {
195 write_with_dylibs(layout, kind, opts, None, &[], out)
196 }
197
198 pub fn write_with_dylibs(
199 layout: &Layout,
200 kind: OutputKind,
201 opts: &LinkOptions,
202 entry_point: Option<EntryPoint>,
203 dylibs: &[DylibDependency],
204 out: &mut Vec<u8>,
205 ) -> Result<(), WriteError> {
206 let layout = finalize_layout(layout, kind, opts, dylibs)?;
207 write_finalized_with_dylibs(&layout, kind, opts, entry_point, dylibs, out)
208 }
209
210 pub fn finalize_layout(
211 layout: &Layout,
212 kind: OutputKind,
213 opts: &LinkOptions,
214 dylibs: &[DylibDependency],
215 ) -> Result<Layout, WriteError> {
216 Ok(finalize_with_linkedit(layout, kind, opts, dylibs, None)?.0)
217 }
218
219 pub fn finalize_layout_with_linkedit(
220 layout: &Layout,
221 kind: OutputKind,
222 opts: &LinkOptions,
223 dylibs: &[DylibDependency],
224 context: LinkEditContext<'_>,
225 ) -> Result<(Layout, LinkEditPlan, LinkEditBuildTimings), WriteError> {
226 finalize_with_linkedit(layout, kind, opts, dylibs, Some(LinkEditInputs(context)))
227 }
228
229 pub fn build_parsed_reloc_cache(
230 inputs: &[LayoutInput<'_>],
231 ) -> Result<ParsedRelocCache, WriteError> {
232 let mut cache = HashMap::new();
233 for input in inputs {
234 for (sect_idx, section) in input.object.sections.iter().enumerate() {
235 if section.raw_relocs.is_empty() {
236 continue;
237 }
238 let section_idx = (sect_idx + 1) as u8;
239 let raws = parse_raw_relocs(&section.raw_relocs, 0, section.nreloc).map_err(|err| {
240 WriteError::MalformedRelocations(
241 input.object.path.clone(),
242 section_idx,
243 err.to_string(),
244 )
245 })?;
246 let relocs = parse_relocs(&raws).map_err(|err| {
247 WriteError::MalformedRelocations(
248 input.object.path.clone(),
249 section_idx,
250 err.to_string(),
251 )
252 })?;
253 cache.insert((input.id, section_idx), relocs);
254 }
255 }
256 Ok(cache)
257 }
258
259 fn finalize_with_linkedit(
260 layout: &Layout,
261 kind: OutputKind,
262 opts: &LinkOptions,
263 dylibs: &[DylibDependency],
264 inputs: Option<LinkEditInputs<'_>>,
265 ) -> Result<(Layout, LinkEditPlan, LinkEditBuildTimings), WriteError> {
266 let mut layout = layout.clone();
267 let (mut linkedit, mut timings) = build_linkedit_plan_profiled(&layout, kind, opts, inputs)?;
268 apply_indirect_starts(&mut layout, &linkedit);
269 let header_size = estimate_header_size(&layout, kind, opts, dylibs, &linkedit);
270 layout.relayout(header_size);
271
272 let (next_linkedit, next_timings) = build_linkedit_plan_profiled(&layout, kind, opts, inputs)?;
273 linkedit = next_linkedit;
274 timings += next_timings;
275 apply_indirect_starts(&mut layout, &linkedit);
276 let exact_header_size =
277 HEADER_SIZE as u64 + exact_sizeofcmds(&layout, kind, opts, dylibs, &linkedit)? as u64;
278 if exact_header_size != header_size {
279 layout.relayout(exact_header_size);
280 let (next_linkedit, next_timings) =
281 build_linkedit_plan_profiled(&layout, kind, opts, inputs)?;
282 linkedit = next_linkedit;
283 timings += next_timings;
284 apply_indirect_starts(&mut layout, &linkedit);
285 }
286
287 let linkedit_seg = layout
288 .segment_mut("__LINKEDIT")
289 .ok_or(WriteError::MissingSegment("__LINKEDIT"))?;
290 linkedit_seg.file_size = linkedit.total_size().max(1);
291 linkedit_seg.vm_size = align_up(linkedit.total_size().max(1), PAGE_SIZE);
292 Ok((layout, linkedit, timings))
293 }
294
295 fn exact_sizeofcmds(
296 layout: &Layout,
297 kind: OutputKind,
298 opts: &LinkOptions,
299 dylibs: &[DylibDependency],
300 linkedit: &LinkEditPlan,
301 ) -> Result<u32, WriteError> {
302 Ok(build_commands(layout, kind, opts, None, dylibs, linkedit)?
303 .iter()
304 .map(LoadCommand::cmdsize)
305 .sum())
306 }
307
308 pub fn write_finalized_with_dylibs(
309 layout: &Layout,
310 kind: OutputKind,
311 opts: &LinkOptions,
312 entry_point: Option<EntryPoint>,
313 dylibs: &[DylibDependency],
314 out: &mut Vec<u8>,
315 ) -> Result<(), WriteError> {
316 let linkedit = LinkEditPlan::minimal(layout, kind, opts)?;
317 write_finalized_with_linkedit(layout, kind, opts, entry_point, dylibs, &linkedit, out)
318 }
319
320 pub fn write_finalized_with_linkedit(
321 layout: &Layout,
322 kind: OutputKind,
323 opts: &LinkOptions,
324 entry_point: Option<EntryPoint>,
325 dylibs: &[DylibDependency],
326 linkedit_plan: &LinkEditPlan,
327 out: &mut Vec<u8>,
328 ) -> Result<(), WriteError> {
329 let _linkedit_segment = layout
330 .segment("__LINKEDIT")
331 .cloned()
332 .ok_or(WriteError::MissingSegment("__LINKEDIT"))?;
333 let commands = build_commands(layout, kind, opts, entry_point, dylibs, linkedit_plan)?;
334
335 let sizeofcmds: u32 = commands.iter().map(LoadCommand::cmdsize).sum();
336 let header = MachHeader64 {
337 magic: MH_MAGIC_64,
338 cputype: CPU_TYPE_ARM64,
339 cpusubtype: CPU_SUBTYPE_ARM64_ALL,
340 filetype: match kind {
341 OutputKind::Executable => MH_EXECUTE,
342 OutputKind::Dylib => MH_DYLIB,
343 },
344 ncmds: commands.len() as u32,
345 sizeofcmds,
346 flags: header_flags(layout, kind),
347 reserved: 0,
348 };
349
350 let final_size = final_file_size(layout);
351 out.clear();
352 out.reserve(final_size as usize);
353 write_header(&header, out);
354 write_commands(&commands, out);
355 out.resize(final_size as usize, 0);
356
357 for section in &layout.sections {
358 if section.is_zerofill() {
359 continue;
360 }
361 if !section.synthetic_data.is_empty() {
362 let start = (section.file_off + section.synthetic_offset) as usize;
363 let end = start + section.synthetic_data.len();
364 out[start..end].copy_from_slice(&section.synthetic_data);
365 }
366 for placed in &section.atoms {
367 let start = (section.file_off + placed.offset) as usize;
368 let end = start + placed.data.len();
369 out[start..end].copy_from_slice(&placed.data);
370 }
371 }
372
373 let symoff = linkedit_plan.symtab.symoff as usize;
374 let indirectoff = linkedit_plan.dysymtab.indirectsymoff as usize;
375 let rebaseoff = linkedit_plan.dyld_info.rebase_off as usize;
376 let bindoff = linkedit_plan.dyld_info.bind_off as usize;
377 let weak_bind_off = linkedit_plan.dyld_info.weak_bind_off as usize;
378 let lazy_bind_off = linkedit_plan.dyld_info.lazy_bind_off as usize;
379 let export_off = linkedit_plan.dyld_info.export_off as usize;
380 let loh_off = linkedit_plan.loh.map(|loh| loh.dataoff as usize);
381 let function_starts_off = linkedit_plan.function_starts.dataoff as usize;
382 let data_in_code_off = linkedit_plan.data_in_code.dataoff as usize;
383 let stroff = linkedit_plan.symtab.stroff as usize;
384 if !linkedit_plan.symtab_bytes.is_empty() {
385 let end = symoff + linkedit_plan.symtab_bytes.len();
386 out[symoff..end].copy_from_slice(&linkedit_plan.symtab_bytes);
387 }
388 if !linkedit_plan.indirect_bytes.is_empty() {
389 let end = indirectoff + linkedit_plan.indirect_bytes.len();
390 out[indirectoff..end].copy_from_slice(&linkedit_plan.indirect_bytes);
391 }
392 if !linkedit_plan.rebase_bytes.is_empty() {
393 let end = rebaseoff + linkedit_plan.rebase_bytes.len();
394 out[rebaseoff..end].copy_from_slice(&linkedit_plan.rebase_bytes);
395 }
396 if !linkedit_plan.bind_bytes.is_empty() {
397 let end = bindoff + linkedit_plan.bind_bytes.len();
398 out[bindoff..end].copy_from_slice(&linkedit_plan.bind_bytes);
399 }
400 if !linkedit_plan.weak_bind_bytes.is_empty() {
401 let end = weak_bind_off + linkedit_plan.weak_bind_bytes.len();
402 out[weak_bind_off..end].copy_from_slice(&linkedit_plan.weak_bind_bytes);
403 }
404 if !linkedit_plan.lazy_bind_bytes.is_empty() {
405 let end = lazy_bind_off + linkedit_plan.lazy_bind_bytes.len();
406 out[lazy_bind_off..end].copy_from_slice(&linkedit_plan.lazy_bind_bytes);
407 }
408 if !linkedit_plan.export_bytes.is_empty() {
409 let end = export_off + linkedit_plan.export_bytes.len();
410 out[export_off..end].copy_from_slice(&linkedit_plan.export_bytes);
411 }
412 if let Some(loh_off) = loh_off {
413 if !linkedit_plan.loh_bytes.is_empty() {
414 let end = loh_off + linkedit_plan.loh_bytes.len();
415 out[loh_off..end].copy_from_slice(&linkedit_plan.loh_bytes);
416 }
417 }
418 if !linkedit_plan.function_starts_bytes.is_empty() {
419 let end = function_starts_off + linkedit_plan.function_starts_bytes.len();
420 out[function_starts_off..end].copy_from_slice(&linkedit_plan.function_starts_bytes);
421 }
422 if !linkedit_plan.data_in_code_bytes.is_empty() {
423 let end = data_in_code_off + linkedit_plan.data_in_code_bytes.len();
424 out[data_in_code_off..end].copy_from_slice(&linkedit_plan.data_in_code_bytes);
425 }
426 let end = stroff + linkedit_plan.strtab_bytes.len();
427 out[stroff..end].copy_from_slice(&linkedit_plan.strtab_bytes);
428 if let Some(code_signature) = &linkedit_plan.code_signature {
429 let start = code_signature.dataoff as usize;
430 let bytes = code_signature.build_with_jobs(&out[..start], opts.parallel_jobs());
431 let end = start + bytes.len();
432 out[start..end].copy_from_slice(&bytes);
433 }
434
435 Ok(())
436 }
437
438 fn build_commands(
439 layout: &Layout,
440 kind: OutputKind,
441 opts: &LinkOptions,
442 entry_point: Option<EntryPoint>,
443 dylibs: &[DylibDependency],
444 linkedit: &LinkEditPlan,
445 ) -> Result<Vec<LoadCommand>, WriteError> {
446 let mut commands = Vec::new();
447 for segment in &layout.segments {
448 commands.push(LoadCommand::Segment64(segment_command(
449 layout,
450 segment.name.as_str(),
451 )?));
452 }
453
454 match kind {
455 OutputKind::Executable => {
456 commands.push(LoadCommand::DyldInfoOnly(linkedit.dyld_info));
457 commands.push(LoadCommand::Symtab(linkedit.symtab));
458 commands.push(LoadCommand::Dysymtab(linkedit.dysymtab));
459 commands.push(raw_dylinker_command("/usr/lib/dyld"));
460 if opts.emit_uuid {
461 commands.push(raw_uuid_command(stable_uuid(layout, kind)));
462 }
463 commands.push(LoadCommand::BuildVersion(build_version_command(opts)));
464 commands.push(raw_source_version_command(0));
465 commands.push(raw_entry_point(resolve_entryoff(layout, entry_point)?, 0));
466 }
467 OutputKind::Dylib => {
468 commands.push(LoadCommand::Dylib(DylibCmd {
469 cmd: LC_ID_DYLIB,
470 name: dylib_install_name(opts),
471 timestamp: 2,
472 current_version: dylib_current_version(opts),
473 compatibility_version: dylib_compatibility_version(opts),
474 }));
475 commands.push(LoadCommand::DyldInfoOnly(linkedit.dyld_info));
476 commands.push(LoadCommand::Symtab(linkedit.symtab));
477 commands.push(LoadCommand::Dysymtab(linkedit.dysymtab));
478 if opts.emit_uuid {
479 commands.push(raw_uuid_command(stable_uuid(layout, kind)));
480 }
481 commands.push(LoadCommand::BuildVersion(build_version_command(opts)));
482 commands.push(raw_source_version_command(0));
483 }
484 }
485
486 for dylib in dylibs {
487 commands.push(LoadCommand::Dylib(DylibCmd {
488 cmd: dylib.kind.load_cmd(),
489 name: dylib.install_name.clone(),
490 timestamp: 2,
491 current_version: dylib.current_version,
492 compatibility_version: dylib.compatibility_version,
493 }));
494 }
495
496 for rpath in &opts.rpaths {
497 commands.push(LoadCommand::Rpath(RpathCmd {
498 path: rpath.clone(),
499 }));
500 }
501
502 if let Some(loh) = linkedit.loh {
503 commands.push(raw_linkedit_command(
504 LC_LINKER_OPTIMIZATION_HINT,
505 loh.dataoff,
506 loh.datasize,
507 ));
508 }
509 commands.push(raw_linkedit_command(
510 LC_FUNCTION_STARTS,
511 linkedit.function_starts.dataoff,
512 linkedit.function_starts.datasize,
513 ));
514 commands.push(raw_linkedit_command(
515 LC_DATA_IN_CODE,
516 linkedit.data_in_code.dataoff,
517 linkedit.data_in_code.datasize,
518 ));
519 if let Some(code_signature) = &linkedit.code_signature {
520 commands.push(raw_linkedit_command(
521 LC_CODE_SIGNATURE,
522 code_signature.dataoff,
523 code_signature.datasize,
524 ));
525 } else {
526 commands.push(raw_linkedit_command(LC_CODE_SIGNATURE, 0, 0));
527 }
528 Ok(commands)
529 }
530
531 fn estimate_header_size(
532 layout: &Layout,
533 kind: OutputKind,
534 opts: &LinkOptions,
535 dylibs: &[DylibDependency],
536 linkedit: &LinkEditPlan,
537 ) -> u64 {
538 let mut size = HEADER_SIZE as u64;
539 for segment in &layout.segments {
540 size += (8 + 64 + 80 * segment.sections.len()) as u64;
541 }
542 size += build_version_command(opts).wire_size() as u64;
543 if opts.emit_uuid {
544 size += 24;
545 }
546 size += match kind {
547 OutputKind::Executable => {
548 raw_dylinker_command("/usr/lib/dyld").cmdsize() as u64
549 + 24
550 + raw_source_version_command(0).cmdsize() as u64
551 }
552 OutputKind::Dylib => {
553 DylibCmd {
554 cmd: LC_ID_DYLIB,
555 name: dylib_install_name(opts),
556 timestamp: 2,
557 current_version: dylib_current_version(opts),
558 compatibility_version: dylib_compatibility_version(opts),
559 }
560 .wire_size() as u64
561 + raw_source_version_command(0).cmdsize() as u64
562 }
563 };
564 for rpath in &opts.rpaths {
565 size += RpathCmd {
566 path: rpath.clone(),
567 }
568 .wire_size() as u64;
569 }
570 for dylib in dylibs {
571 size += DylibCmd {
572 cmd: dylib.kind.load_cmd(),
573 name: dylib.install_name.clone(),
574 timestamp: 2,
575 current_version: dylib.current_version,
576 compatibility_version: dylib.compatibility_version,
577 }
578 .wire_size() as u64;
579 }
580 size += SymtabCmd::WIRE_SIZE as u64;
581 size += DysymtabCmd::WIRE_SIZE as u64;
582 size += 16 * 3;
583 if linkedit.loh.is_some() {
584 size += 16;
585 }
586 size += DyldInfoCmd::WIRE_SIZE as u64;
587 size
588 }
589
590 fn segment_command(layout: &Layout, segment_name: &str) -> Result<Segment64, WriteError> {
591 let segment = layout
592 .segment(segment_name)
593 .ok_or(WriteError::MissingSegment(match segment_name {
594 "__PAGEZERO" => "__PAGEZERO",
595 "__TEXT" => "__TEXT",
596 "__DATA_CONST" => "__DATA_CONST",
597 "__DATA" => "__DATA",
598 "__LINKEDIT" => "__LINKEDIT",
599 _ => "__UNKNOWN",
600 }))?;
601 let mut sections = Vec::with_capacity(segment.sections.len());
602 for id in &segment.sections {
603 let section = &layout.sections[id.0 as usize];
604 sections.push(Section64Header {
605 sectname: name16(&section.name),
606 segname: name16(&section.segment),
607 addr: section.addr,
608 size: section.size,
609 offset: if section.is_zerofill() {
610 0
611 } else {
612 u32_fit(section.file_off, "section file offset")?
613 },
614 align: section.align_pow2 as u32,
615 reloff: 0,
616 nreloc: 0,
617 flags: section.flags,
618 reserved1: section.reserved1,
619 reserved2: section.reserved2,
620 reserved3: section.reserved3,
621 });
622 }
623
624 Ok(Segment64 {
625 segname: name16(segment_name),
626 vmaddr: segment.vm_addr,
627 vmsize: segment.vm_size,
628 fileoff: segment.file_off,
629 filesize: segment.file_size,
630 maxprot: segment.max_prot.bits(),
631 initprot: segment.init_prot.bits(),
632 flags: segment.flags,
633 sections,
634 })
635 }
636
637 fn raw_entry_point(entryoff: u64, stacksize: u64) -> LoadCommand {
638 let mut data = Vec::with_capacity(16);
639 data.extend_from_slice(&entryoff.to_le_bytes());
640 data.extend_from_slice(&stacksize.to_le_bytes());
641 LoadCommand::Raw {
642 cmd: LC_MAIN,
643 cmdsize: 24,
644 data,
645 }
646 }
647
648 fn raw_dylinker_command(path: &str) -> LoadCommand {
649 let mut data = Vec::with_capacity(4 + path.len() + 1);
650 let path_offset: u32 = 12;
651 data.extend_from_slice(&path_offset.to_le_bytes());
652 data.extend_from_slice(path.as_bytes());
653 data.push(0);
654 while !(8 + data.len()).is_multiple_of(8) {
655 data.push(0);
656 }
657 LoadCommand::Raw {
658 cmd: LC_LOAD_DYLINKER,
659 cmdsize: (8 + data.len()) as u32,
660 data,
661 }
662 }
663
664 fn raw_uuid_command(uuid: [u8; 16]) -> LoadCommand {
665 LoadCommand::Raw {
666 cmd: LC_UUID,
667 cmdsize: 24,
668 data: uuid.to_vec(),
669 }
670 }
671
672 fn raw_source_version_command(version: u64) -> LoadCommand {
673 LoadCommand::Raw {
674 cmd: LC_SOURCE_VERSION,
675 cmdsize: 16,
676 data: version.to_le_bytes().to_vec(),
677 }
678 }
679
680 fn raw_linkedit_command(cmd: u32, dataoff: u32, datasize: u32) -> LoadCommand {
681 let mut data = Vec::with_capacity(8);
682 data.extend_from_slice(&dataoff.to_le_bytes());
683 data.extend_from_slice(&datasize.to_le_bytes());
684 LoadCommand::Raw {
685 cmd,
686 cmdsize: 16,
687 data,
688 }
689 }
690
691 fn build_version_command(opts: &LinkOptions) -> BuildVersionCmd {
692 let platform = opts.platform_version.unwrap_or(crate::PlatformVersion {
693 minos: pack_version(11, 0, 0),
694 sdk: pack_version(11, 0, 0),
695 });
696 BuildVersionCmd {
697 platform: PLATFORM_MACOS,
698 minos: platform.minos,
699 sdk: platform.sdk,
700 tools: vec![BuildTool {
701 tool: 3,
702 version: pack_version(0, 1, 0),
703 }],
704 }
705 }
706
707 fn stable_uuid(layout: &Layout, kind: OutputKind) -> [u8; 16] {
708 fn mix(state: &mut u64, bytes: &[u8]) {
709 for byte in bytes {
710 *state ^= u64::from(*byte);
711 *state = state.wrapping_mul(0x100000001b3);
712 }
713 }
714
715 let mut lo = 0xcbf29ce484222325u64;
716 let mut hi = 0x84222325cbf29ce4u64;
717 mix(
718 &mut lo,
719 &[match kind {
720 OutputKind::Executable => 1,
721 OutputKind::Dylib => 2,
722 }],
723 );
724 for segment in &layout.segments {
725 mix(&mut lo, segment.name.as_bytes());
726 mix(&mut lo, &segment.vm_addr.to_le_bytes());
727 mix(&mut lo, &segment.vm_size.to_le_bytes());
728 mix(&mut hi, &segment.file_off.to_le_bytes());
729 mix(&mut hi, &segment.file_size.to_le_bytes());
730 mix(&mut hi, &segment.flags.to_le_bytes());
731 }
732 for section in &layout.sections {
733 mix(&mut lo, section.segment.as_bytes());
734 mix(&mut lo, section.name.as_bytes());
735 mix(&mut lo, &section.addr.to_le_bytes());
736 mix(&mut hi, &section.size.to_le_bytes());
737 mix(&mut hi, &section.file_off.to_le_bytes());
738 mix(&mut hi, &section.flags.to_le_bytes());
739 }
740 let mut uuid = [0u8; 16];
741 uuid[..8].copy_from_slice(&lo.to_be_bytes());
742 uuid[8..].copy_from_slice(&hi.to_be_bytes());
743 uuid[6] = (uuid[6] & 0x0f) | 0x40;
744 uuid[8] = (uuid[8] & 0x3f) | 0x80;
745 uuid
746 }
747
748 fn header_flags(layout: &Layout, kind: OutputKind) -> u32 {
749 let mut flags = match kind {
750 OutputKind::Executable => MH_DYLDLINK | MH_NOUNDEFS | MH_TWOLEVEL | MH_PIE,
751 OutputKind::Dylib => MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS,
752 };
753 if layout
754 .sections
755 .iter()
756 .any(|section| section.segment == "__DATA" && section.name == "__thread_vars")
757 {
758 flags |= MH_HAS_TLV_DESCRIPTORS;
759 }
760 flags
761 }
762
763 fn dylib_install_name(opts: &LinkOptions) -> String {
764 if let Some(name) = &opts.install_name {
765 return name.clone();
766 }
767 if let Some(path) = &opts.output {
768 if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
769 return format!("@rpath/{name}");
770 }
771 return path.display().to_string();
772 }
773 "@rpath/a.out.dylib".to_string()
774 }
775
776 fn dylib_current_version(opts: &LinkOptions) -> u32 {
777 opts.current_version
778 .unwrap_or_else(|| pack_version(1, 0, 0))
779 }
780
781 fn dylib_compatibility_version(opts: &LinkOptions) -> u32 {
782 opts.compatibility_version
783 .unwrap_or_else(|| pack_version(1, 0, 0))
784 }
785
786 #[derive(Debug, Clone, PartialEq, Eq)]
787 pub struct LinkEditPlan {
788 base_off: u32,
789 pub symtab: SymtabCmd,
790 pub dysymtab: DysymtabCmd,
791 pub dyld_info: DyldInfoCmd,
792 pub loh: Option<LinkEditDataCmd>,
793 pub function_starts: LinkEditDataCmd,
794 pub data_in_code: LinkEditDataCmd,
795 pub symtab_bytes: Vec<u8>,
796 pub indirect_bytes: Vec<u8>,
797 rebase_bytes: Vec<u8>,
798 bind_bytes: Vec<u8>,
799 weak_bind_bytes: Vec<u8>,
800 lazy_bind_bytes: Vec<u8>,
801 export_bytes: Vec<u8>,
802 loh_bytes: Vec<u8>,
803 function_starts_bytes: Vec<u8>,
804 data_in_code_bytes: Vec<u8>,
805 pub strtab_bytes: Vec<u8>,
806 code_signature: Option<CodeSignaturePlan>,
807 indirect_starts: HashMap<(String, String), u32>,
808 lazy_bind_offsets: HashMap<SymbolId, u32>,
809 pub map_symbols: Vec<LinkMapSymbol>,
810 }
811
812 impl LinkEditPlan {
813 fn minimal(layout: &Layout, kind: OutputKind, opts: &LinkOptions) -> Result<Self, WriteError> {
814 build_linkedit_plan(layout, kind, opts, None)
815 }
816
817 fn total_size(&self) -> u64 {
818 let base_off = self.base_off as u64;
819 let regular_end = self.symtab.stroff as u64 + self.strtab_bytes.len() as u64;
820 let regular_size = regular_end.saturating_sub(base_off);
821 if let Some(code_signature) = &self.code_signature {
822 (code_signature.dataoff as u64 - base_off) + code_signature.datasize as u64
823 } else {
824 regular_size
825 }
826 }
827
828 pub fn lazy_bind_offset(&self, symbol: SymbolId) -> Option<u32> {
829 self.lazy_bind_offsets.get(&symbol).copied()
830 }
831
832 pub fn loh_bytes(&self) -> &[u8] {
833 &self.loh_bytes
834 }
835 }
836
837 fn build_linkedit_plan(
838 layout: &Layout,
839 kind: OutputKind,
840 opts: &LinkOptions,
841 inputs: Option<LinkEditInputs<'_>>,
842 ) -> Result<LinkEditPlan, WriteError> {
843 build_linkedit_plan_profiled(layout, kind, opts, inputs).map(|(plan, _)| plan)
844 }
845
846 fn build_linkedit_plan_profiled(
847 layout: &Layout,
848 kind: OutputKind,
849 opts: &LinkOptions,
850 inputs: Option<LinkEditInputs<'_>>,
851 ) -> Result<(LinkEditPlan, LinkEditBuildTimings), WriteError> {
852 let mut timings = LinkEditBuildTimings::default();
853 let linkedit = layout
854 .segment("__LINKEDIT")
855 .cloned()
856 .ok_or(WriteError::MissingSegment("__LINKEDIT"))?;
857 let base_off = u32_fit(linkedit.file_off, "linkedit file offset")?;
858
859 let Some(inputs) = inputs else {
860 let phase_started = std::time::Instant::now();
861 let code_signature = Some(build_code_signature(
862 layout,
863 kind,
864 opts,
865 base_off as u64 + 8,
866 )?);
867 timings.code_signature += phase_started.elapsed();
868 return Ok((
869 LinkEditPlan {
870 base_off,
871 symtab: SymtabCmd {
872 symoff: base_off,
873 nsyms: 0,
874 stroff: base_off,
875 strsize: 8,
876 },
877 dysymtab: DysymtabCmd::default(),
878 dyld_info: DyldInfoCmd::default(),
879 loh: None,
880 function_starts: LinkEditDataCmd {
881 dataoff: base_off,
882 datasize: 0,
883 },
884 data_in_code: LinkEditDataCmd {
885 dataoff: base_off,
886 datasize: 0,
887 },
888 symtab_bytes: Vec::new(),
889 indirect_bytes: Vec::new(),
890 rebase_bytes: Vec::new(),
891 bind_bytes: Vec::new(),
892 weak_bind_bytes: Vec::new(),
893 lazy_bind_bytes: Vec::new(),
894 export_bytes: Vec::new(),
895 loh_bytes: Vec::new(),
896 function_starts_bytes: Vec::new(),
897 data_in_code_bytes: Vec::new(),
898 strtab_bytes: vec![0; 8],
899 code_signature,
900 indirect_starts: HashMap::new(),
901 lazy_bind_offsets: HashMap::new(),
902 map_symbols: Vec::new(),
903 },
904 timings,
905 ));
906 };
907 let sym_table = inputs.0.sym_table;
908 let synthetic_plan = inputs.0.synthetic_plan;
909
910 let phase_started = std::time::Instant::now();
911 let imports = collect_imports(sym_table, synthetic_plan)?;
912 let import_lookup: HashMap<SymbolId, &ImportSymbolRecord> = imports
913 .iter()
914 .map(|record| (record.symbol, record))
915 .collect();
916 let visibility = SymbolVisibilityPolicy::from_opts(opts)?;
917 let (symbol_plan, symbol_plan_timings) = build_output_symbols_profiled(
918 layout,
919 kind,
920 opts.dead_strip,
921 opts.strip_locals,
922 &visibility,
923 inputs,
924 &imports,
925 )?;
926 timings.symbol_plan += phase_started.elapsed();
927 timings.symbol_plan_locals += symbol_plan_timings.locals;
928 timings.symbol_plan_globals += symbol_plan_timings.globals;
929 timings.symbol_plan_strtab += symbol_plan_timings.strtab;
930 let mut symtab_bytes = Vec::with_capacity(symbol_plan.symbols.len() * NLIST_SIZE);
931 write_nlist_table(&symbol_plan.symbols, &mut symtab_bytes);
932
933 let mut indirect_symbols = Vec::new();
934 let mut indirect_starts = HashMap::new();
935 push_indirect_section(
936 &mut indirect_symbols,
937 &mut indirect_starts,
938 ("__TEXT", "__stubs"),
939 synthetic_plan.stubs.entries.iter().map(|entry| {
940 indirect_symbol_index(entry.symbol, &import_lookup, &symbol_plan.symbol_indices)
941 }),
942 );
943 push_indirect_section(
944 &mut indirect_symbols,
945 &mut indirect_starts,
946 ("__DATA_CONST", "__got"),
947 synthetic_plan.got.entries.iter().map(|entry| {
948 indirect_symbol_index(entry.symbol, &import_lookup, &symbol_plan.symbol_indices)
949 }),
950 );
951 push_indirect_section(
952 &mut indirect_symbols,
953 &mut indirect_starts,
954 ("__DATA", "__la_symbol_ptr"),
955 synthetic_plan.lazy_pointers.entries.iter().map(|entry| {
956 indirect_symbol_index(entry.symbol, &import_lookup, &symbol_plan.symbol_indices)
957 }),
958 );
959
960 let mut indirect_bytes = Vec::with_capacity(indirect_symbols.len() * 4);
961 for index in &indirect_symbols {
962 indirect_bytes.extend_from_slice(&index.to_le_bytes());
963 }
964
965 let dyld_started = std::time::Instant::now();
966 let phase_started = std::time::Instant::now();
967 let bind_streams = build_bind_streams(layout, synthetic_plan, &import_lookup)?;
968 let bind_bytes = pad_dyld_info_stream(bind_streams.bind);
969 let weak_bind_bytes = pad_dyld_info_stream(bind_streams.weak_bind);
970 let lazy_bind_bytes = pad_dyld_info_stream(bind_streams.lazy_bind);
971 timings.dyld_bind += phase_started.elapsed();
972 let phase_started = std::time::Instant::now();
973 let rebase_bytes = pad_dyld_info_stream(build_rebase_stream(layout, synthetic_plan, inputs)?);
974 timings.dyld_rebase += phase_started.elapsed();
975 let phase_started = std::time::Instant::now();
976 let export_bytes = pad_dyld_info_stream(build_export_trie(&symbol_plan.exports));
977 timings.dyld_export += phase_started.elapsed();
978 timings.dyld_info += dyld_started.elapsed();
979
980 let phase_started = std::time::Instant::now();
981 let loh_bytes = build_loh(
982 layout,
983 inputs.0.layout_inputs,
984 inputs.0.atom_table,
985 inputs.0.icf_redirects,
986 )?;
987 let function_starts_bytes =
988 build_function_starts(layout, inputs.0.layout_inputs, inputs.0.atom_table)?;
989 let data_in_code_bytes = build_data_in_code(
990 layout,
991 inputs.0.layout_inputs,
992 inputs.0.atom_table,
993 inputs.0.icf_redirects,
994 )?;
995 timings.metadata_tables += phase_started.elapsed();
996
997 let mut cursor = base_off as u64;
998 let rebase_off = place_optional_block(&mut cursor, rebase_bytes.len(), "rebase stream offset")?;
999 let bindoff = place_optional_block(&mut cursor, bind_bytes.len(), "bind stream offset")?;
1000 let weak_bind_off = place_optional_block(
1001 &mut cursor,
1002 weak_bind_bytes.len(),
1003 "weak bind stream offset",
1004 )?;
1005 let lazy_bind_off = place_optional_block(
1006 &mut cursor,
1007 lazy_bind_bytes.len(),
1008 "lazy bind stream offset",
1009 )?;
1010 let export_off = place_optional_block(&mut cursor, export_bytes.len(), "export trie offset")?;
1011 let loh = place_optional_linkedit_data_block(&mut cursor, loh_bytes.len(), "LOH offset")?;
1012 let function_starts = place_linkedit_data_block(
1013 &mut cursor,
1014 function_starts_bytes.len(),
1015 "function starts offset",
1016 )?;
1017 let data_in_code =
1018 place_linkedit_data_block(&mut cursor, data_in_code_bytes.len(), "data-in-code offset")?;
1019 let symoff = place_required_block(&mut cursor, symtab_bytes.len(), "symbol table offset")?;
1020 let indirectsymoff = place_optional_block(
1021 &mut cursor,
1022 indirect_bytes.len(),
1023 "indirect symbol table offset",
1024 )?;
1025 let stroff = place_required_block(
1026 &mut cursor,
1027 symbol_plan.strtab_bytes.len(),
1028 "string table offset",
1029 )?;
1030 let regular_end = stroff as u64 + symbol_plan.strtab_bytes.len() as u64;
1031 let phase_started = std::time::Instant::now();
1032 let code_signature = Some(build_code_signature(layout, kind, opts, regular_end)?);
1033 timings.code_signature += phase_started.elapsed();
1034 Ok((
1035 LinkEditPlan {
1036 base_off,
1037 symtab: SymtabCmd {
1038 symoff,
1039 nsyms: symbol_plan.symbols.len() as u32,
1040 stroff,
1041 strsize: symbol_plan.strtab_bytes.len() as u32,
1042 },
1043 dysymtab: DysymtabCmd {
1044 indirectsymoff,
1045 nindirectsyms: indirect_symbols.len() as u32,
1046 ..symbol_plan.dysymtab
1047 },
1048 dyld_info: DyldInfoCmd {
1049 rebase_off,
1050 rebase_size: rebase_bytes.len() as u32,
1051 bind_off: bindoff,
1052 bind_size: bind_bytes.len() as u32,
1053 weak_bind_off,
1054 weak_bind_size: weak_bind_bytes.len() as u32,
1055 lazy_bind_off,
1056 lazy_bind_size: lazy_bind_bytes.len() as u32,
1057 export_off,
1058 export_size: export_bytes.len() as u32,
1059 },
1060 loh,
1061 function_starts,
1062 data_in_code,
1063 symtab_bytes,
1064 indirect_bytes,
1065 rebase_bytes,
1066 bind_bytes,
1067 weak_bind_bytes,
1068 lazy_bind_bytes,
1069 export_bytes,
1070 loh_bytes,
1071 function_starts_bytes,
1072 data_in_code_bytes,
1073 strtab_bytes: symbol_plan.strtab_bytes,
1074 code_signature,
1075 indirect_starts,
1076 lazy_bind_offsets: bind_streams.lazy_offsets,
1077 map_symbols: symbol_plan.map_symbols,
1078 },
1079 timings,
1080 ))
1081 }
1082
1083 fn build_code_signature(
1084 layout: &Layout,
1085 kind: OutputKind,
1086 opts: &LinkOptions,
1087 regular_end: u64,
1088 ) -> Result<CodeSignaturePlan, WriteError> {
1089 let code_limit = align_up(regular_end, 16);
1090 CodeSignaturePlan::new(layout, opts, code_limit, kind == OutputKind::Executable)
1091 .map_err(WriteError::OffsetTooLarge)
1092 }
1093
1094 fn pad_dyld_info_stream(mut bytes: Vec<u8>) -> Vec<u8> {
1095 if bytes.is_empty() {
1096 return bytes;
1097 }
1098 let padded_len = align_up(bytes.len() as u64, 8) as usize;
1099 bytes.resize(padded_len, 0);
1100 bytes
1101 }
1102
1103 #[derive(Debug, Clone)]
1104 struct ImportSymbolRecord {
1105 symbol: SymbolId,
1106 name: String,
1107 ordinal: u16,
1108 weak_import: bool,
1109 }
1110
1111 #[derive(Clone, Copy)]
1112 struct LinkEditInputs<'a>(LinkEditContext<'a>);
1113
1114 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
1115 enum OutputSymbolPartition {
1116 Local,
1117 ExternalDefined,
1118 Undefined,
1119 }
1120
1121 #[derive(Debug, Clone)]
1122 struct OutputSymbolSpec {
1123 symbol: Option<SymbolId>,
1124 name: String,
1125 partition: OutputSymbolPartition,
1126 n_type: u8,
1127 n_sect: u8,
1128 n_desc: u16,
1129 n_value: u64,
1130 size: u64,
1131 file_index: usize,
1132 }
1133
1134 #[derive(Debug, Clone)]
1135 struct SymbolVisibilityPolicy {
1136 exported: Vec<String>,
1137 unexported: Vec<String>,
1138 }
1139
1140 impl SymbolVisibilityPolicy {
1141 fn from_opts(opts: &LinkOptions) -> Result<Self, WriteError> {
1142 let mut exported = opts.exported_symbols.clone();
1143 let mut unexported = opts.unexported_symbols.clone();
1144 for path in &opts.exported_symbols_lists {
1145 exported.extend(read_symbol_patterns(path)?);
1146 }
1147 for path in &opts.unexported_symbols_lists {
1148 unexported.extend(read_symbol_patterns(path)?);
1149 }
1150 Ok(Self {
1151 exported,
1152 unexported,
1153 })
1154 }
1155
1156 fn hides(&self, name: &str) -> bool {
1157 if !self.exported.is_empty()
1158 && !self
1159 .exported
1160 .iter()
1161 .any(|pattern| wildcard_matches(pattern, name))
1162 {
1163 return true;
1164 }
1165 self.unexported
1166 .iter()
1167 .any(|pattern| wildcard_matches(pattern, name))
1168 }
1169 }
1170
1171 #[derive(Debug, Clone)]
1172 struct SymbolTablePlan {
1173 symbols: Vec<InputSymbol>,
1174 map_symbols: Vec<LinkMapSymbol>,
1175 strtab_bytes: Vec<u8>,
1176 symbol_indices: HashMap<SymbolId, u32>,
1177 exports: Vec<ExportEntry>,
1178 dysymtab: DysymtabCmd,
1179 }
1180
1181 struct BindStreams {
1182 bind: Vec<u8>,
1183 weak_bind: Vec<u8>,
1184 lazy_bind: Vec<u8>,
1185 lazy_offsets: HashMap<SymbolId, u32>,
1186 }
1187
1188 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
1189 struct RebaseSite {
1190 segment_index: u8,
1191 segment_offset: u64,
1192 }
1193
1194 fn build_rebase_stream(
1195 layout: &Layout,
1196 synthetic_plan: &SyntheticPlan,
1197 inputs: LinkEditInputs<'_>,
1198 ) -> Result<Vec<u8>, WriteError> {
1199 let mut sites = collect_rebase_sites(layout, synthetic_plan, inputs)?;
1200 if sites.is_empty() {
1201 return Ok(Vec::new());
1202 }
1203 sites.sort_unstable();
1204 sites.dedup();
1205
1206 let mut out = OpcodeStream::new();
1207 out.byte(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
1208 let mut idx = 0usize;
1209 while idx < sites.len() {
1210 let segment_index = sites[idx].segment_index;
1211 out.byte(
1212 REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | (segment_index & REBASE_IMMEDIATE_MASK),
1213 );
1214 out.uleb(sites[idx].segment_offset);
1215 let mut cursor = sites[idx].segment_offset;
1216 while idx < sites.len() && sites[idx].segment_index == segment_index {
1217 if sites[idx].segment_offset > cursor {
1218 out.byte(REBASE_OPCODE_ADD_ADDR_ULEB);
1219 out.uleb(sites[idx].segment_offset - cursor);
1220 }
1221 let run_start = sites[idx].segment_offset;
1222 let mut run_len = 1usize;
1223 while idx + run_len < sites.len()
1224 && sites[idx + run_len].segment_index == segment_index
1225 && sites[idx + run_len].segment_offset == run_start + (run_len as u64) * 8
1226 {
1227 run_len += 1;
1228 }
1229 emit_rebase_run(&mut out, run_len);
1230 cursor = run_start + (run_len as u64) * 8;
1231 idx += run_len;
1232 }
1233 }
1234 out.done();
1235 Ok(out.into_vec())
1236 }
1237
1238 fn collect_rebase_sites(
1239 layout: &Layout,
1240 synthetic_plan: &SyntheticPlan,
1241 inputs: LinkEditInputs<'_>,
1242 ) -> Result<Vec<RebaseSite>, WriteError> {
1243 let mut sites = collect_lazy_pointer_rebase_sites(layout, synthetic_plan)?;
1244 sites.extend(collect_local_got_rebase_sites(
1245 layout,
1246 synthetic_plan,
1247 inputs.0.sym_table,
1248 )?);
1249 let input_map: HashMap<InputId, &ObjectFile> = inputs
1250 .0
1251 .layout_inputs
1252 .iter()
1253 .map(|input| (input.id, input.object))
1254 .collect();
1255 let symbol_name_index = build_symbol_name_index(inputs.0.sym_table);
1256
1257 for section in &layout.sections {
1258 if !matches!(section.segment.as_str(), "__DATA" | "__DATA_CONST") {
1259 continue;
1260 }
1261 if section.name == "__thread_vars" {
1262 continue;
1263 }
1264 let segment = layout
1265 .segment(&section.segment)
1266 .ok_or(WriteError::MissingSegment("__UNKNOWN"))?;
1267 let segment_index = segment_index(layout, &section.segment)?;
1268 for placed in &section.atoms {
1269 let atom = inputs.0.atom_table.get(placed.atom);
1270 let Some(obj) = input_map.get(&atom.origin).copied() else {
1271 continue;
1272 };
1273 let relocs = inputs
1274 .0
1275 .parsed_relocs
1276 .get(&(atom.origin, atom.input_section))
1277 .map(Vec::as_slice)
1278 .unwrap_or(&[]);
1279 for reloc in relocs_for_rebase(relocs, atom) {
1280 if !reloc_needs_rebase(obj, reloc, inputs.0.sym_table, &symbol_name_index) {
1281 continue;
1282 }
1283 let local_offset = reloc.offset.saturating_sub(atom.input_offset) as u64;
1284 sites.push(RebaseSite {
1285 segment_index,
1286 segment_offset: section.addr + placed.offset + local_offset - segment.vm_addr,
1287 });
1288 }
1289 }
1290 }
1291
1292 Ok(sites)
1293 }
1294
1295 fn collect_lazy_pointer_rebase_sites(
1296 layout: &Layout,
1297 synthetic_plan: &SyntheticPlan,
1298 ) -> Result<Vec<RebaseSite>, WriteError> {
1299 if synthetic_plan.lazy_pointers.entries.is_empty() {
1300 return Ok(Vec::new());
1301 }
1302
1303 let segment_index = segment_index(layout, "__DATA")?;
1304 let segment = layout
1305 .segment("__DATA")
1306 .ok_or(WriteError::MissingSegment("__DATA"))?;
1307 let section = layout
1308 .sections
1309 .iter()
1310 .find(|section| section.segment == "__DATA" && section.name == "__la_symbol_ptr")
1311 .ok_or(WriteError::MissingSegment("__DATA"))?;
1312
1313 Ok((0..synthetic_plan.lazy_pointers.entries.len())
1314 .map(|idx| RebaseSite {
1315 segment_index,
1316 segment_offset: section.addr + (idx as u64) * 8 - segment.vm_addr,
1317 })
1318 .collect())
1319 }
1320
1321 fn collect_local_got_rebase_sites(
1322 layout: &Layout,
1323 synthetic_plan: &SyntheticPlan,
1324 sym_table: &SymbolTable,
1325 ) -> Result<Vec<RebaseSite>, WriteError> {
1326 if synthetic_plan.got.entries.is_empty() {
1327 return Ok(Vec::new());
1328 }
1329
1330 let segment_index = segment_index(layout, "__DATA_CONST")?;
1331 let segment = layout
1332 .segment("__DATA_CONST")
1333 .ok_or(WriteError::MissingSegment("__DATA_CONST"))?;
1334 let section = layout
1335 .sections
1336 .iter()
1337 .find(|section| section.segment == "__DATA_CONST" && section.name == "__got")
1338 .ok_or(WriteError::MissingSegment("__DATA_CONST"))?;
1339
1340 Ok(synthetic_plan
1341 .got
1342 .entries
1343 .iter()
1344 .enumerate()
1345 .filter(|(_, entry)| !matches!(sym_table.get(entry.symbol), Symbol::DylibImport { .. }))
1346 .map(|(idx, _)| RebaseSite {
1347 segment_index,
1348 segment_offset: section.addr + (idx as u64) * 8 - segment.vm_addr,
1349 })
1350 .collect())
1351 }
1352
1353 fn relocs_for_rebase<'a>(
1354 relocs: &'a [Reloc],
1355 atom: &crate::atom::Atom,
1356 ) -> impl Iterator<Item = Reloc> + 'a {
1357 let start = atom.input_offset;
1358 let end = atom.input_offset + atom.size;
1359 relocs.iter().copied().filter(move |reloc| {
1360 let reloc_end = reloc.offset + reloc.length.byte_width() as u32;
1361 reloc.offset >= start && reloc_end <= end
1362 })
1363 }
1364
1365 fn reloc_needs_rebase(
1366 obj: &ObjectFile,
1367 reloc: Reloc,
1368 sym_table: &SymbolTable,
1369 symbol_name_index: &HashMap<String, SymbolId>,
1370 ) -> bool {
1371 if reloc.kind != RelocKind::Unsigned
1372 || reloc.length != RelocLength::Quad
1373 || reloc.pcrel
1374 || reloc.subtrahend.is_some()
1375 {
1376 return false;
1377 }
1378
1379 match reloc.referent {
1380 Referent::Section(_) => true,
1381 Referent::Symbol(sym_idx) => {
1382 let Some(input_sym) = obj.symbols.get(sym_idx as usize) else {
1383 return false;
1384 };
1385 match symbol_referent_id(obj, reloc.referent, symbol_name_index) {
1386 Some(symbol_id) => match sym_table.get(symbol_id) {
1387 Symbol::DylibImport { .. } => false,
1388 Symbol::Defined { atom, .. } => atom.0 != 0,
1389 Symbol::Common { .. } => true,
1390 _ => false,
1391 },
1392 None => matches!(input_sym.kind(), SymKind::Sect),
1393 }
1394 }
1395 }
1396 }
1397
1398 fn build_symbol_name_index(sym_table: &SymbolTable) -> HashMap<String, SymbolId> {
1399 sym_table
1400 .iter()
1401 .map(|(symbol_id, symbol)| {
1402 (
1403 sym_table.interner.resolve(symbol.name()).to_string(),
1404 symbol_id,
1405 )
1406 })
1407 .collect()
1408 }
1409
1410 fn symbol_referent_id(
1411 obj: &ObjectFile,
1412 referent: Referent,
1413 symbol_name_index: &HashMap<String, SymbolId>,
1414 ) -> Option<SymbolId> {
1415 let Referent::Symbol(sym_idx) = referent else {
1416 return None;
1417 };
1418 let input_sym = obj.symbols.get(sym_idx as usize)?;
1419 let name = obj.symbol_name(input_sym).ok()?;
1420 symbol_name_index.get(name).copied()
1421 }
1422
1423 fn build_function_starts(
1424 layout: &Layout,
1425 inputs: &[LayoutInput<'_>],
1426 atom_table: &AtomTable,
1427 ) -> Result<Vec<u8>, WriteError> {
1428 let image_base = layout
1429 .segment("__TEXT")
1430 .ok_or(WriteError::MissingSegment("__TEXT"))?
1431 .vm_addr;
1432 let symbol_offsets = build_function_start_symbol_index(inputs);
1433 let mut starts = Vec::new();
1434
1435 for section in &layout.sections {
1436 if section.segment != "__TEXT" || !is_executable(section.kind) {
1437 continue;
1438 }
1439 for placed in &section.atoms {
1440 starts.push(section.addr + placed.offset - image_base);
1441 let atom = atom_table.get(placed.atom);
1442 for alt in &atom.alt_entries {
1443 starts.push(
1444 section.addr + placed.offset + alt.offset_within_atom as u64 - image_base,
1445 );
1446 }
1447 let Some(section_symbols) = symbol_offsets.get(&(atom.origin, atom.input_section))
1448 else {
1449 continue;
1450 };
1451 let atom_start = atom.input_offset as u64;
1452 let atom_end = atom_start + atom.size as u64;
1453 let start_idx = section_symbols.partition_point(|&offset| offset <= atom_start);
1454 let end_idx = section_symbols.partition_point(|&offset| offset < atom_end);
1455 if start_idx >= end_idx {
1456 continue;
1457 }
1458 for &offset in &section_symbols[start_idx..end_idx] {
1459 starts.push(section.addr + placed.offset + (offset - atom_start) - image_base);
1460 }
1461 }
1462 }
1463
1464 starts.sort_unstable();
1465 starts.dedup();
1466 if starts.is_empty() {
1467 return Ok(Vec::new());
1468 }
1469
1470 let mut out = Vec::new();
1471 let mut previous = 0u64;
1472 for start in starts {
1473 write_uleb(start - previous, &mut out);
1474 previous = start;
1475 }
1476 out.push(0);
1477 while !out.len().is_multiple_of(8) {
1478 out.push(0);
1479 }
1480 Ok(out)
1481 }
1482
1483 type FunctionStartSymbolIndex = HashMap<(InputId, u8), Vec<u64>>;
1484
1485 fn build_function_start_symbol_index(inputs: &[LayoutInput<'_>]) -> FunctionStartSymbolIndex {
1486 let mut out: FunctionStartSymbolIndex = HashMap::new();
1487 for input in inputs {
1488 for input_sym in &input.object.symbols {
1489 if input_sym.stab_kind().is_some()
1490 || input_sym.kind() != SymKind::Sect
1491 || input_sym.alt_entry()
1492 {
1493 continue;
1494 }
1495 let Ok(name) = input.object.symbol_name(input_sym) else {
1496 continue;
1497 };
1498 if is_assembler_temporary_symbol(name) {
1499 continue;
1500 }
1501 let Some(section) = input.object.section_for_symbol(input_sym) else {
1502 continue;
1503 };
1504 out.entry((input.id, input_sym.sect_idx()))
1505 .or_default()
1506 .push(input_sym.value().saturating_sub(section.addr));
1507 }
1508 }
1509 for offsets in out.values_mut() {
1510 offsets.sort_unstable();
1511 offsets.dedup();
1512 }
1513 out
1514 }
1515
1516 fn build_data_in_code(
1517 layout: &Layout,
1518 inputs: &[LayoutInput<'_>],
1519 atom_table: &AtomTable,
1520 icf_redirects: Option<&HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
1521 ) -> Result<Vec<u8>, WriteError> {
1522 #[derive(Clone, Copy)]
1523 struct RemappedEntry {
1524 input_order: usize,
1525 input_entry_index: usize,
1526 offset: u32,
1527 length: u16,
1528 kind: u16,
1529 }
1530
1531 let atoms_by_input_section = atom_table.by_input_section();
1532 let atom_ranges = build_atom_range_index(atom_table, &atoms_by_input_section, icf_redirects);
1533 let mut remapped = Vec::new();
1534 for (input_order, input) in inputs.iter().enumerate() {
1535 for (input_entry_index, entry) in input.object.data_in_code.iter().copied().enumerate() {
1536 let (section_index, section_relative) =
1537 remap_data_in_code_to_section(input.object, entry)?;
1538 let (atom_id, atom_delta) = find_containing_atom_range(
1539 &atom_ranges,
1540 input.id,
1541 section_index,
1542 section_relative,
1543 entry.length as u32,
1544 )
1545 .ok_or_else(|| {
1546 WriteError::MalformedDataInCode(
1547 input.object.path.clone(),
1548 format!(
1549 "entry at file offset {} (len {}) did not land inside any atom",
1550 entry.offset, entry.length
1551 ),
1552 )
1553 })?;
1554 let output_offset = layout.atom_file_offset(atom_id).ok_or_else(|| {
1555 WriteError::MalformedDataInCode(
1556 input.object.path.clone(),
1557 format!(
1558 "atom {:?} for entry at file offset {} is missing from final layout",
1559 atom_id, entry.offset
1560 ),
1561 )
1562 })? + atom_delta as u64;
1563 remapped.push(RemappedEntry {
1564 input_order,
1565 input_entry_index,
1566 offset: u32_fit(output_offset, "data-in-code output offset")?,
1567 length: entry.length,
1568 kind: entry.kind,
1569 });
1570 }
1571 }
1572
1573 remapped.sort_by(|a, b| {
1574 a.offset
1575 .cmp(&b.offset)
1576 .then_with(|| a.input_order.cmp(&b.input_order))
1577 .then_with(|| a.input_entry_index.cmp(&b.input_entry_index))
1578 });
1579
1580 let mut out = Vec::with_capacity(remapped.len() * 8);
1581 for entry in remapped {
1582 out.extend_from_slice(&entry.offset.to_le_bytes());
1583 out.extend_from_slice(&entry.length.to_le_bytes());
1584 out.extend_from_slice(&entry.kind.to_le_bytes());
1585 }
1586 Ok(out)
1587 }
1588
1589 fn build_loh(
1590 _layout: &Layout,
1591 _inputs: &[LayoutInput<'_>],
1592 _atom_table: &AtomTable,
1593 _icf_redirects: Option<&HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
1594 ) -> Result<Vec<u8>, WriteError> {
1595 // Current Apple ld omits LC_LINKER_OPTIMIZATION_HINT from final linked
1596 // executables and dylibs on our parity corpus, so we do the same.
1597 Ok(Vec::new())
1598 }
1599
1600 fn remap_data_in_code_to_section(
1601 object: &ObjectFile,
1602 entry: DataInCodeEntry,
1603 ) -> Result<(u8, u32), WriteError> {
1604 let entry_start = entry.offset as u64;
1605 let entry_end = entry_start
1606 .checked_add(entry.length as u64)
1607 .ok_or_else(|| {
1608 WriteError::MalformedDataInCode(
1609 object.path.clone(),
1610 format!(
1611 "entry at input offset {} with len {} overflows u64",
1612 entry.offset, entry.length
1613 ),
1614 )
1615 })?;
1616 let mut matches = object
1617 .sections
1618 .iter()
1619 .enumerate()
1620 .filter(|(_, section)| !section.data.is_empty() && is_executable(section.kind))
1621 .filter_map(|(idx, section)| {
1622 let section_start = section.addr;
1623 let section_end = section.addr.checked_add(section.size)?;
1624 (section_start <= entry_start && entry_end <= section_end)
1625 .then_some(((idx + 1) as u8, (entry_start - section_start) as u32))
1626 });
1627 if let Some(mapped) = matches.next() {
1628 if matches.next().is_none() {
1629 return Ok(mapped);
1630 }
1631 return Err(WriteError::MalformedDataInCode(
1632 object.path.clone(),
1633 format!(
1634 "entry at input offset {} (len {}) ambiguously matches multiple executable input sections",
1635 entry.offset, entry.length
1636 ),
1637 ));
1638 }
1639 Err(WriteError::MalformedDataInCode(
1640 object.path.clone(),
1641 format!(
1642 "entry at input offset {} (len {}) does not map to any executable input section range",
1643 entry.offset, entry.length
1644 ),
1645 ))
1646 }
1647
1648 fn collect_imports(
1649 sym_table: &SymbolTable,
1650 synthetic_plan: &SyntheticPlan,
1651 ) -> Result<Vec<ImportSymbolRecord>, WriteError> {
1652 let mut ids: Vec<SymbolId> = synthetic_plan
1653 .stubs
1654 .entries
1655 .iter()
1656 .map(|entry| entry.symbol)
1657 .chain(synthetic_plan.got.entries.iter().map(|entry| entry.symbol))
1658 .chain(
1659 synthetic_plan
1660 .thread_pointers
1661 .entries
1662 .iter()
1663 .map(|entry| entry.symbol),
1664 )
1665 .chain(
1666 synthetic_plan
1667 .lazy_pointers
1668 .entries
1669 .iter()
1670 .map(|entry| entry.symbol),
1671 )
1672 .chain(synthetic_plan.direct_binds.iter().map(|entry| entry.symbol))
1673 .collect();
1674 if let Some(symbol) = synthetic_plan.tlv_bootstrap_symbol {
1675 ids.push(symbol);
1676 }
1677 ids.sort();
1678 ids.dedup();
1679
1680 let mut out = Vec::with_capacity(ids.len());
1681 for id in ids {
1682 let symbol = sym_table.get(id);
1683 let Symbol::DylibImport {
1684 name,
1685 ordinal,
1686 weak_import,
1687 ..
1688 } = symbol
1689 else {
1690 continue;
1691 };
1692 out.push(ImportSymbolRecord {
1693 symbol: id,
1694 name: sym_table.interner.resolve(*name).to_string(),
1695 ordinal: *ordinal,
1696 weak_import: *weak_import,
1697 });
1698 }
1699 out.sort_by(|a, b| a.name.cmp(&b.name));
1700 Ok(out)
1701 }
1702
1703 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1704 struct SymbolPlanBuildTimings {
1705 locals: Duration,
1706 globals: Duration,
1707 strtab: Duration,
1708 }
1709
1710 fn build_output_symbols_profiled(
1711 layout: &Layout,
1712 kind: OutputKind,
1713 dead_strip: bool,
1714 strip_locals: bool,
1715 visibility: &SymbolVisibilityPolicy,
1716 inputs: LinkEditInputs<'_>,
1717 imports: &[ImportSymbolRecord],
1718 ) -> Result<(SymbolTablePlan, SymbolPlanBuildTimings), WriteError> {
1719 let sym_table = inputs.0.sym_table;
1720 let atom_sections = atom_section_ordinals(layout);
1721 let atom_addrs = atom_addresses(layout);
1722 let atoms_by_input_section = inputs.0.atom_table.by_input_section();
1723 let atom_ranges = build_atom_range_index(
1724 inputs.0.atom_table,
1725 &atoms_by_input_section,
1726 inputs.0.icf_redirects,
1727 );
1728 let file_index_by_input: HashMap<InputId, usize> = inputs
1729 .0
1730 .layout_inputs
1731 .iter()
1732 .enumerate()
1733 .map(|(idx, input)| (input.id, idx + 1))
1734 .collect();
1735 let image_base = layout.segment("__TEXT").map(|seg| seg.vm_addr).unwrap_or(0);
1736 let mut timings = SymbolPlanBuildTimings::default();
1737 let mut locals = Vec::new();
1738 let mut external_defineds = Vec::new();
1739 let mut undefineds = Vec::with_capacity(imports.len());
1740
1741 if kind == OutputKind::Executable && !layout.sections.is_empty() {
1742 let text_vmaddr = layout
1743 .segment("__TEXT")
1744 .ok_or(WriteError::MissingSegment("__TEXT"))?
1745 .vm_addr;
1746 let hide_header = visibility.hides("__mh_execute_header");
1747 let header_partition = if hide_header {
1748 OutputSymbolPartition::Local
1749 } else {
1750 OutputSymbolPartition::ExternalDefined
1751 };
1752 let header_type = defined_symbol_type(hide_header);
1753 let target = if hide_header {
1754 &mut locals
1755 } else {
1756 &mut external_defineds
1757 };
1758 target.push(OutputSymbolSpec {
1759 symbol: None,
1760 name: "__mh_execute_header".to_string(),
1761 partition: header_partition,
1762 n_type: header_type,
1763 n_sect: 1,
1764 n_desc: REFERENCED_DYNAMICALLY,
1765 n_value: text_vmaddr,
1766 size: 0,
1767 file_index: 0,
1768 });
1769 }
1770
1771 let phase_started = std::time::Instant::now();
1772 for input in inputs.0.layout_inputs {
1773 let ctx = LocalSymbolContext {
1774 atom_table: inputs.0.atom_table,
1775 atom_ranges: &atom_ranges,
1776 atom_sections: &atom_sections,
1777 atom_addrs: &atom_addrs,
1778 input_id: input.id,
1779 file_index: file_index_by_input[&input.id],
1780 };
1781 collect_local_symbols(&ctx, input.object, &mut locals)?;
1782 }
1783 collect_synthetic_local_symbols(layout, inputs.0.synthetic_plan, &mut locals)?;
1784 timings.locals += phase_started.elapsed();
1785
1786 let phase_started = std::time::Instant::now();
1787 for (symbol_id, symbol) in sym_table.iter() {
1788 let Symbol::Defined {
1789 name,
1790 origin,
1791 atom,
1792 value,
1793 weak,
1794 private_extern,
1795 no_dead_strip,
1796 ..
1797 } = symbol
1798 else {
1799 continue;
1800 };
1801 if *private_extern {
1802 continue;
1803 }
1804 let name = sym_table.interner.resolve(*name).to_string();
1805 let hidden = visibility.hides(&name);
1806 let (n_type, n_sect, n_value) = if atom.0 == 0 {
1807 (absolute_symbol_type(hidden), NO_SECT, *value)
1808 } else {
1809 let Some(addr) = atom_addrs.get(atom).copied() else {
1810 if dead_strip {
1811 continue;
1812 }
1813 return Err(WriteError::DefinedSymbolAtomMissing(symbol_id, *atom));
1814 };
1815 let sect = *atom_sections
1816 .get(atom)
1817 .ok_or(WriteError::DefinedSymbolSectionMissing(symbol_id, *atom))?;
1818 (defined_symbol_type(hidden), sect, addr + *value)
1819 };
1820 let size = if atom.0 == 0 {
1821 0
1822 } else {
1823 inputs
1824 .0
1825 .atom_table
1826 .get(*atom)
1827 .size
1828 .saturating_sub(*value as u32) as u64
1829 };
1830 let mut n_desc = 0;
1831 if *weak {
1832 n_desc |= N_WEAK_DEF;
1833 }
1834 if *no_dead_strip {
1835 n_desc |= N_NO_DEAD_STRIP;
1836 }
1837 let partition = if hidden {
1838 OutputSymbolPartition::Local
1839 } else {
1840 OutputSymbolPartition::ExternalDefined
1841 };
1842 let target = if hidden {
1843 &mut locals
1844 } else {
1845 &mut external_defineds
1846 };
1847 target.push(OutputSymbolSpec {
1848 symbol: Some(symbol_id),
1849 name,
1850 partition,
1851 n_type,
1852 n_sect,
1853 n_desc,
1854 n_value,
1855 size,
1856 file_index: file_index_by_input.get(origin).copied().unwrap_or(0),
1857 });
1858 }
1859
1860 sort_local_symbols(&mut locals);
1861 external_defineds.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name));
1862 for import in imports {
1863 let mut n_desc = import.ordinal << 8;
1864 if import.weak_import {
1865 n_desc |= N_WEAK_REF;
1866 }
1867 undefineds.push(OutputSymbolSpec {
1868 symbol: Some(import.symbol),
1869 name: import.name.clone(),
1870 partition: OutputSymbolPartition::Undefined,
1871 n_type: N_UNDF | N_EXT,
1872 n_sect: NO_SECT,
1873 n_desc,
1874 n_value: 0,
1875 size: 0,
1876 file_index: 0,
1877 });
1878 }
1879 undefineds.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name));
1880 timings.globals += phase_started.elapsed();
1881
1882 let exports = if matches!(kind, OutputKind::Dylib | OutputKind::Executable) {
1883 external_defineds
1884 .iter()
1885 .map(|spec| ExportEntry {
1886 name: spec.name.clone(),
1887 flags: export_symbol_flags(layout, spec.n_desc, spec.n_type, spec.n_sect),
1888 kind: export_symbol_kind(
1889 layout,
1890 image_base,
1891 spec.n_type,
1892 spec.n_sect,
1893 spec.n_value,
1894 ),
1895 })
1896 .collect()
1897 } else {
1898 Vec::new()
1899 };
1900
1901 let local_count = if strip_locals { 0 } else { locals.len() };
1902 let external_defined_count = external_defineds.len();
1903 let undefined_count = undefineds.len();
1904 let phase_started = std::time::Instant::now();
1905 let mut specs = Vec::with_capacity(local_count + external_defineds.len() + undefineds.len());
1906 if !strip_locals {
1907 specs.extend(locals);
1908 }
1909 specs.extend(external_defineds);
1910 specs.extend(undefineds);
1911
1912 let (strtab_bytes, strx_by_spec) =
1913 StringTableBuilder::build_with_name_offsets(specs.iter().map(|spec| spec.name.as_str()));
1914
1915 let mut symbols = Vec::with_capacity(specs.len());
1916 let mut symbol_indices = HashMap::with_capacity(specs.len());
1917 let map_symbols = specs
1918 .iter()
1919 .filter(|spec| spec.partition != OutputSymbolPartition::Undefined)
1920 .map(|spec| LinkMapSymbol {
1921 name: spec.name.clone(),
1922 addr: spec.n_value,
1923 size: spec.size,
1924 file_index: spec.file_index,
1925 })
1926 .collect();
1927 for (idx, spec) in specs.into_iter().enumerate() {
1928 let strx = strx_by_spec[idx];
1929 symbols.push(InputSymbol::from_raw(RawNlist {
1930 strx,
1931 n_type: spec.n_type,
1932 n_sect: spec.n_sect,
1933 n_desc: spec.n_desc,
1934 n_value: spec.n_value,
1935 }));
1936 if let Some(symbol) = spec.symbol {
1937 symbol_indices.insert(symbol, idx as u32);
1938 }
1939 }
1940 timings.strtab += phase_started.elapsed();
1941
1942 Ok((
1943 SymbolTablePlan {
1944 symbols,
1945 map_symbols,
1946 strtab_bytes,
1947 symbol_indices,
1948 exports,
1949 dysymtab: DysymtabCmd {
1950 ilocalsym: 0,
1951 nlocalsym: local_count as u32,
1952 iextdefsym: local_count as u32,
1953 nextdefsym: external_defined_count as u32,
1954 iundefsym: (local_count + external_defined_count) as u32,
1955 nundefsym: undefined_count as u32,
1956 ..DysymtabCmd::default()
1957 },
1958 },
1959 timings,
1960 ))
1961 }
1962
1963 fn sort_local_symbols(locals: &mut [OutputSymbolSpec]) {
1964 locals.sort_by(|lhs, rhs| {
1965 lhs.n_sect
1966 .cmp(&rhs.n_sect)
1967 .then_with(|| lhs.n_value.cmp(&rhs.n_value))
1968 .then_with(|| lhs.n_type.cmp(&rhs.n_type))
1969 .then_with(|| lhs.name.cmp(&rhs.name))
1970 });
1971 }
1972
1973 fn collect_synthetic_local_symbols(
1974 layout: &Layout,
1975 synthetic_plan: &SyntheticPlan,
1976 out: &mut Vec<OutputSymbolSpec>,
1977 ) -> Result<(), WriteError> {
1978 if !synthetic_plan.needs_dyld_private {
1979 return Ok(());
1980 }
1981
1982 let Some((section_index, section)) = layout
1983 .sections
1984 .iter()
1985 .enumerate()
1986 .find(|(_, section)| section.segment == "__DATA" && section.name == "__data")
1987 else {
1988 return Err(WriteError::MissingSegment("__DATA"));
1989 };
1990
1991 out.push(OutputSymbolSpec {
1992 symbol: None,
1993 name: "__dyld_private".to_string(),
1994 partition: OutputSymbolPartition::Local,
1995 n_type: N_SECT,
1996 n_sect: u8::try_from(section_index + 1).expect("section index should fit in n_sect"),
1997 n_desc: 0,
1998 n_value: section.addr + section.synthetic_offset,
1999 size: 8,
2000 file_index: 0,
2001 });
2002 Ok(())
2003 }
2004
2005 fn collect_local_symbols(
2006 ctx: &LocalSymbolContext<'_>,
2007 object: &ObjectFile,
2008 out: &mut Vec<OutputSymbolSpec>,
2009 ) -> Result<(), WriteError> {
2010 for input_sym in &object.symbols {
2011 if input_sym.stab_kind().is_some() {
2012 continue;
2013 }
2014 if input_sym.is_ext() && !input_sym.is_private_ext() {
2015 continue;
2016 }
2017 let name = object.symbol_name(input_sym).unwrap_or("").to_string();
2018 if is_assembler_temporary_symbol(&name) {
2019 continue;
2020 }
2021 match input_sym.kind() {
2022 SymKind::Sect => {
2023 let section = object
2024 .section_for_symbol(input_sym)
2025 .expect("section symbol without section");
2026 let offset = input_sym.value().saturating_sub(section.addr) as u32;
2027 let (atom_id, delta) = find_containing_atom(
2028 ctx.atom_ranges,
2029 ctx.input_id,
2030 input_sym.sect_idx(),
2031 offset,
2032 )
2033 .ok_or(WriteError::MissingSegment("__UNKNOWN"))?;
2034 let addr = ctx.atom_addrs.get(&atom_id).copied().ok_or(
2035 WriteError::DefinedSymbolAtomMissing(SymbolId(u32::MAX), atom_id),
2036 )? + delta as u64;
2037 let n_sect = *ctx.atom_sections.get(&atom_id).ok_or(
2038 WriteError::DefinedSymbolSectionMissing(SymbolId(u32::MAX), atom_id),
2039 )?;
2040 out.push(OutputSymbolSpec {
2041 symbol: None,
2042 name,
2043 partition: OutputSymbolPartition::Local,
2044 n_type: input_symbol_type(input_sym),
2045 n_sect,
2046 n_desc: input_sym.raw.n_desc,
2047 n_value: addr,
2048 size: ctx.atom_table.get(atom_id).size.saturating_sub(delta) as u64,
2049 file_index: ctx.file_index,
2050 });
2051 }
2052 SymKind::Abs => {
2053 out.push(OutputSymbolSpec {
2054 symbol: None,
2055 name,
2056 partition: OutputSymbolPartition::Local,
2057 n_type: input_symbol_type(input_sym),
2058 n_sect: NO_SECT,
2059 n_desc: input_sym.raw.n_desc,
2060 n_value: input_sym.value(),
2061 size: 0,
2062 file_index: ctx.file_index,
2063 });
2064 }
2065 SymKind::Undef | SymKind::Indirect => {}
2066 }
2067 }
2068 Ok(())
2069 }
2070
2071 struct LocalSymbolContext<'a> {
2072 atom_table: &'a AtomTable,
2073 atom_ranges: &'a AtomRangeIndex,
2074 atom_sections: &'a HashMap<crate::resolve::AtomId, u8>,
2075 atom_addrs: &'a HashMap<crate::resolve::AtomId, u64>,
2076 input_id: InputId,
2077 file_index: usize,
2078 }
2079
2080 #[derive(Debug, Clone, Copy)]
2081 struct AtomRange {
2082 atom: crate::resolve::AtomId,
2083 start: u32,
2084 end: u32,
2085 }
2086
2087 type AtomRangeIndex = HashMap<(InputId, u8), Vec<AtomRange>>;
2088
2089 fn is_assembler_temporary_symbol(name: &str) -> bool {
2090 name.starts_with('L') || name.starts_with("ltmp")
2091 }
2092
2093 fn build_atom_range_index(
2094 atom_table: &AtomTable,
2095 atoms_by_input_section: &HashMap<(InputId, u8), Vec<crate::resolve::AtomId>>,
2096 icf_redirects: Option<&HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
2097 ) -> AtomRangeIndex {
2098 let mut out = HashMap::with_capacity(atoms_by_input_section.len());
2099 for (&key, ids) in atoms_by_input_section {
2100 let mut ranges = Vec::with_capacity(ids.len());
2101 for atom_id in ids {
2102 let atom = atom_table.get(*atom_id);
2103 ranges.push(AtomRange {
2104 atom: canonical_atom(*atom_id, icf_redirects),
2105 start: atom.input_offset,
2106 end: atom.input_offset.saturating_add(atom.size),
2107 });
2108 }
2109 ranges.sort_by(|lhs, rhs| {
2110 lhs.start
2111 .cmp(&rhs.start)
2112 .then_with(|| lhs.end.cmp(&rhs.end))
2113 });
2114 out.insert(key, ranges);
2115 }
2116 out
2117 }
2118
2119 fn find_containing_atom(
2120 atom_ranges: &AtomRangeIndex,
2121 input_id: InputId,
2122 input_section: u8,
2123 offset: u32,
2124 ) -> Option<(crate::resolve::AtomId, u32)> {
2125 find_containing_atom_range(atom_ranges, input_id, input_section, offset, 1)
2126 }
2127
2128 fn find_containing_atom_range(
2129 atom_ranges: &AtomRangeIndex,
2130 input_id: InputId,
2131 input_section: u8,
2132 offset: u32,
2133 len: u32,
2134 ) -> Option<(crate::resolve::AtomId, u32)> {
2135 let ranges = atom_ranges.get(&(input_id, input_section))?;
2136 let range_end = offset.checked_add(len)?;
2137 let idx = ranges.partition_point(|range| range.start <= offset);
2138 let range = idx.checked_sub(1).and_then(|idx| ranges.get(idx))?;
2139 (range.start <= offset && range_end <= range.end).then_some((range.atom, offset - range.start))
2140 }
2141
2142 fn canonical_atom(
2143 atom_id: crate::resolve::AtomId,
2144 redirects: Option<&HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
2145 ) -> crate::resolve::AtomId {
2146 let Some(redirects) = redirects else {
2147 return atom_id;
2148 };
2149 let mut current = atom_id;
2150 while let Some(&next) = redirects.get(&current) {
2151 if next == current {
2152 break;
2153 }
2154 current = next;
2155 }
2156 current
2157 }
2158
2159 fn input_symbol_type(input_sym: &InputSymbol) -> u8 {
2160 let mut n_type = match input_sym.kind() {
2161 SymKind::Sect => N_SECT,
2162 SymKind::Abs => N_ABS,
2163 SymKind::Undef => N_UNDF,
2164 SymKind::Indirect => N_INDR,
2165 };
2166 if input_sym.is_private_ext() {
2167 n_type |= N_PEXT;
2168 } else if input_sym.is_ext() {
2169 n_type |= N_EXT;
2170 }
2171 n_type
2172 }
2173
2174 fn atom_section_ordinals(layout: &Layout) -> HashMap<crate::resolve::AtomId, u8> {
2175 let mut out = HashMap::new();
2176 for (idx, section) in layout.sections.iter().enumerate() {
2177 let ordinal = (idx + 1) as u8;
2178 for placed in &section.atoms {
2179 out.insert(placed.atom, ordinal);
2180 }
2181 }
2182 out
2183 }
2184
2185 fn atom_addresses(layout: &Layout) -> HashMap<AtomId, u64> {
2186 let mut out = HashMap::new();
2187 for section in &layout.sections {
2188 for placed in &section.atoms {
2189 out.insert(placed.atom, section.addr + placed.offset);
2190 }
2191 }
2192 out
2193 }
2194
2195 fn export_symbol_flags(layout: &Layout, n_desc: u16, n_type: u8, n_sect: u8) -> u64 {
2196 let mut flags = 0u64;
2197 if n_desc & N_WEAK_DEF != 0 {
2198 flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
2199 }
2200 match n_type & N_TYPE {
2201 N_ABS => flags | EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE,
2202 _ if section_is_thread_local(layout, n_sect) => {
2203 flags | EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
2204 }
2205 _ => flags,
2206 }
2207 }
2208
2209 fn export_symbol_kind(
2210 layout: &Layout,
2211 image_base: u64,
2212 n_type: u8,
2213 n_sect: u8,
2214 n_value: u64,
2215 ) -> ExportKind {
2216 match n_type & N_TYPE {
2217 N_ABS => ExportKind::Absolute { address: n_value },
2218 _ if section_is_thread_local(layout, n_sect) => ExportKind::ThreadLocal {
2219 address: n_value.saturating_sub(image_base),
2220 },
2221 _ => ExportKind::Regular {
2222 address: n_value.saturating_sub(image_base),
2223 },
2224 }
2225 }
2226
2227 fn section_is_thread_local(layout: &Layout, n_sect: u8) -> bool {
2228 if n_sect == 0 {
2229 return false;
2230 }
2231 layout
2232 .sections
2233 .get(n_sect as usize - 1)
2234 .map(|section| {
2235 matches!(
2236 section.kind,
2237 crate::section::SectionKind::ThreadLocalRegular
2238 | crate::section::SectionKind::ThreadLocalZeroFill
2239 )
2240 })
2241 .unwrap_or(false)
2242 }
2243
2244 fn defined_symbol_type(private_extern: bool) -> u8 {
2245 if private_extern {
2246 N_SECT | N_PEXT
2247 } else {
2248 N_SECT | N_EXT
2249 }
2250 }
2251
2252 fn absolute_symbol_type(private_extern: bool) -> u8 {
2253 if private_extern {
2254 N_ABS | N_PEXT
2255 } else {
2256 N_ABS | N_EXT
2257 }
2258 }
2259
2260 fn read_symbol_patterns(path: &PathBuf) -> Result<Vec<String>, WriteError> {
2261 let contents = fs::read_to_string(path)
2262 .map_err(|err| WriteError::SymbolListRead(path.clone(), err.to_string()))?;
2263 Ok(contents
2264 .lines()
2265 .map(str::trim)
2266 .filter(|line| !line.is_empty())
2267 .map(ToString::to_string)
2268 .collect())
2269 }
2270
2271 fn wildcard_matches(pattern: &str, value: &str) -> bool {
2272 let pattern = pattern.as_bytes();
2273 let value = value.as_bytes();
2274 let mut p = 0usize;
2275 let mut v = 0usize;
2276 let mut star = None;
2277 let mut backtrack = 0usize;
2278
2279 while v < value.len() {
2280 if p < pattern.len() && (pattern[p] == b'?' || pattern[p] == value[v]) {
2281 p += 1;
2282 v += 1;
2283 } else if p < pattern.len() && pattern[p] == b'*' {
2284 star = Some(p);
2285 p += 1;
2286 backtrack = v;
2287 } else if let Some(star_idx) = star {
2288 p = star_idx + 1;
2289 backtrack += 1;
2290 v = backtrack;
2291 } else {
2292 return false;
2293 }
2294 }
2295
2296 while p < pattern.len() && pattern[p] == b'*' {
2297 p += 1;
2298 }
2299 p == pattern.len()
2300 }
2301
2302 fn place_optional_block(
2303 cursor: &mut u64,
2304 size: usize,
2305 context: &'static str,
2306 ) -> Result<u32, WriteError> {
2307 if size == 0 {
2308 return Ok(0);
2309 }
2310 place_required_block(cursor, size, context)
2311 }
2312
2313 fn place_required_block(
2314 cursor: &mut u64,
2315 size: usize,
2316 context: &'static str,
2317 ) -> Result<u32, WriteError> {
2318 *cursor = align_up(*cursor, 8);
2319 let offset = u32_fit(*cursor, context)?;
2320 *cursor += size as u64;
2321 Ok(offset)
2322 }
2323
2324 fn place_linkedit_data_block(
2325 cursor: &mut u64,
2326 size: usize,
2327 context: &'static str,
2328 ) -> Result<LinkEditDataCmd, WriteError> {
2329 *cursor = align_up(*cursor, 8);
2330 let dataoff = u32_fit(*cursor, context)?;
2331 *cursor += size as u64;
2332 Ok(LinkEditDataCmd {
2333 dataoff,
2334 datasize: size as u32,
2335 })
2336 }
2337
2338 fn place_optional_linkedit_data_block(
2339 cursor: &mut u64,
2340 size: usize,
2341 context: &'static str,
2342 ) -> Result<Option<LinkEditDataCmd>, WriteError> {
2343 if size == 0 {
2344 return Ok(None);
2345 }
2346 Ok(Some(place_linkedit_data_block(cursor, size, context)?))
2347 }
2348
2349 fn push_indirect_section(
2350 indirect_symbols: &mut Vec<u32>,
2351 indirect_starts: &mut HashMap<(String, String), u32>,
2352 key: (&str, &str),
2353 symbols: impl Iterator<Item = u32>,
2354 ) {
2355 let start = indirect_symbols.len() as u32;
2356 let mut saw_any = false;
2357 for symbol in symbols {
2358 saw_any = true;
2359 indirect_symbols.push(symbol);
2360 }
2361 if saw_any {
2362 indirect_starts.insert((key.0.to_string(), key.1.to_string()), start);
2363 }
2364 }
2365
2366 fn indirect_symbol_index(
2367 symbol: SymbolId,
2368 import_lookup: &HashMap<SymbolId, &ImportSymbolRecord>,
2369 symbol_indices: &HashMap<SymbolId, u32>,
2370 ) -> u32 {
2371 if import_lookup.contains_key(&symbol) {
2372 symbol_indices
2373 .get(&symbol)
2374 .copied()
2375 .unwrap_or(INDIRECT_SYMBOL_LOCAL)
2376 } else {
2377 INDIRECT_SYMBOL_LOCAL
2378 }
2379 }
2380
2381 fn build_bind_streams(
2382 layout: &Layout,
2383 synthetic_plan: &SyntheticPlan,
2384 imports: &HashMap<SymbolId, &ImportSymbolRecord>,
2385 ) -> Result<BindStreams, WriteError> {
2386 let mut bind_specs = Vec::new();
2387 let weak_bind = Vec::new();
2388 let mut lazy_bind = OpcodeStream::new();
2389 let mut lazy_offsets = HashMap::new();
2390 let layout_index = BindLayoutIndex::build(layout)?;
2391
2392 if let Some(tlv_bootstrap) = synthetic_plan.tlv_bootstrap_symbol {
2393 let segment_index = segment_index(layout, "__DATA")?;
2394 let segment = layout
2395 .segment("__DATA")
2396 .ok_or(WriteError::MissingSegment("__DATA"))?;
2397 if let Some(section) = layout
2398 .sections
2399 .iter()
2400 .find(|section| section.segment == "__DATA" && section.name == "__thread_vars")
2401 {
2402 let import = imports
2403 .get(&tlv_bootstrap)
2404 .copied()
2405 .ok_or(WriteError::ImportSymbolMissing(tlv_bootstrap))?;
2406 for placed in &section.atoms {
2407 for descriptor_offset in
2408 (0..placed.size).step_by(THREAD_VARIABLE_DESCRIPTOR_SIZE as usize)
2409 {
2410 let slot_addr = section.addr + placed.offset + descriptor_offset;
2411 bind_specs.push(BindRecordSpec {
2412 segment_index,
2413 segment_offset: slot_addr - segment.vm_addr,
2414 ordinal: import.ordinal,
2415 name: &import.name,
2416 weak_import: import.weak_import,
2417 addend: 0,
2418 terminate: false,
2419 });
2420 }
2421 }
2422 }
2423 }
2424
2425 if !synthetic_plan.got.entries.is_empty() {
2426 let segment_index = segment_index(layout, "__DATA_CONST")?;
2427 let segment = layout
2428 .segment("__DATA_CONST")
2429 .ok_or(WriteError::MissingSegment("__DATA_CONST"))?;
2430 let section = layout
2431 .sections
2432 .iter()
2433 .find(|section| section.segment == "__DATA_CONST" && section.name == "__got")
2434 .ok_or(WriteError::MissingSegment("__DATA_CONST"))?;
2435 for (idx, entry) in synthetic_plan.got.entries.iter().enumerate() {
2436 let Some(import) = imports.get(&entry.symbol).copied() else {
2437 continue;
2438 };
2439 let slot_addr = section.addr + (idx as u64) * 8;
2440 bind_specs.push(BindRecordSpec {
2441 segment_index,
2442 segment_offset: slot_addr - segment.vm_addr,
2443 ordinal: import.ordinal,
2444 name: &import.name,
2445 weak_import: import.weak_import,
2446 addend: 0,
2447 terminate: false,
2448 });
2449 }
2450 }
2451
2452 for entry in &synthetic_plan.direct_binds {
2453 let import = imports
2454 .get(&entry.symbol)
2455 .copied()
2456 .ok_or(WriteError::ImportSymbolMissing(entry.symbol))?;
2457 let placement = layout_index
2458 .atoms
2459 .get(&entry.atom)
2460 .ok_or(WriteError::DirectBindAtomMissing(entry.atom))?;
2461 if placement.is_thread_vars {
2462 // `__thread_vars` starts are emitted through the dedicated
2463 // `__tlv_bootstrap` pass above. Descriptor tails are rewritten to
2464 // template offsets before write, so any generic direct bind landing
2465 // back in this section is stale and would override the TLV bind.
2466 continue;
2467 }
2468 let slot_addr = placement.addr + entry.atom_offset as u64;
2469 bind_specs.push(BindRecordSpec {
2470 segment_index: placement.segment_index,
2471 segment_offset: slot_addr - placement.segment_vm_addr,
2472 ordinal: import.ordinal,
2473 name: &import.name,
2474 weak_import: import.weak_import,
2475 addend: entry.addend,
2476 terminate: false,
2477 });
2478 }
2479
2480 if let Some(last) = bind_specs.last_mut() {
2481 last.terminate = true;
2482 }
2483
2484 if !synthetic_plan.lazy_pointers.entries.is_empty() {
2485 let segment_index = segment_index(layout, "__DATA")?;
2486 let segment = layout
2487 .segment("__DATA")
2488 .ok_or(WriteError::MissingSegment("__DATA"))?;
2489 let section = layout
2490 .sections
2491 .iter()
2492 .find(|section| section.segment == "__DATA" && section.name == "__la_symbol_ptr")
2493 .ok_or(WriteError::MissingSegment("__DATA"))?;
2494 for (idx, entry) in synthetic_plan.lazy_pointers.entries.iter().enumerate() {
2495 let import = imports
2496 .get(&entry.symbol)
2497 .copied()
2498 .ok_or(WriteError::ImportSymbolMissing(entry.symbol))?;
2499 let slot_addr = section.addr + (idx as u64) * 8;
2500 lazy_offsets.insert(entry.symbol, lazy_bind.len() as u32);
2501 emit_lazy_bind_record(
2502 &mut lazy_bind,
2503 segment_index,
2504 slot_addr - segment.vm_addr,
2505 import.ordinal,
2506 &import.name,
2507 import.weak_import,
2508 );
2509 }
2510 }
2511
2512 Ok(BindStreams {
2513 bind: emit_bind_records(&bind_specs),
2514 weak_bind,
2515 lazy_bind: lazy_bind.into_vec(),
2516 lazy_offsets,
2517 })
2518 }
2519
2520 struct BindLayoutIndex {
2521 atoms: HashMap<AtomId, BindAtomPlacement>,
2522 }
2523
2524 #[derive(Clone, Copy)]
2525 struct BindAtomPlacement {
2526 addr: u64,
2527 segment_index: u8,
2528 segment_vm_addr: u64,
2529 is_thread_vars: bool,
2530 }
2531
2532 impl BindLayoutIndex {
2533 fn build(layout: &Layout) -> Result<Self, WriteError> {
2534 let mut segment_meta = HashMap::with_capacity(layout.segments.len());
2535 for (idx, segment) in layout.segments.iter().enumerate() {
2536 segment_meta.insert(
2537 segment.name.as_str(),
2538 (
2539 u8::try_from(idx).map_err(|_| WriteError::OffsetTooLarge("segment index"))?,
2540 segment.vm_addr,
2541 ),
2542 );
2543 }
2544 let atom_count: usize = layout
2545 .sections
2546 .iter()
2547 .map(|section| section.atoms.len())
2548 .sum();
2549 let mut atoms = HashMap::with_capacity(atom_count);
2550 for section in &layout.sections {
2551 let Some((segment_index, segment_vm_addr)) =
2552 segment_meta.get(section.segment.as_str()).copied()
2553 else {
2554 continue;
2555 };
2556 let is_thread_vars = section.segment == "__DATA" && section.name == "__thread_vars";
2557 for placed in &section.atoms {
2558 atoms.insert(
2559 placed.atom,
2560 BindAtomPlacement {
2561 addr: section.addr + placed.offset,
2562 segment_index,
2563 segment_vm_addr,
2564 is_thread_vars,
2565 },
2566 );
2567 }
2568 }
2569 Ok(Self { atoms })
2570 }
2571 }
2572
2573 fn segment_index(layout: &Layout, name: &str) -> Result<u8, WriteError> {
2574 let idx = layout
2575 .segments
2576 .iter()
2577 .position(|segment| segment.name == name)
2578 .ok_or(WriteError::MissingSegment(match name {
2579 "__DATA_CONST" => "__DATA_CONST",
2580 "__DATA" => "__DATA",
2581 "__TEXT" => "__TEXT",
2582 "__LINKEDIT" => "__LINKEDIT",
2583 _ => "__UNKNOWN",
2584 }))?;
2585 u8::try_from(idx).map_err(|_| WriteError::OffsetTooLarge("segment index"))
2586 }
2587
2588 fn apply_indirect_starts(layout: &mut Layout, linkedit: &LinkEditPlan) {
2589 for section in &mut layout.sections {
2590 if let Some(&start) = linkedit
2591 .indirect_starts
2592 .get(&(section.segment.clone(), section.name.clone()))
2593 {
2594 section.reserved1 = start;
2595 }
2596 }
2597 }
2598
2599 fn entryoff(layout: &Layout) -> Result<u64, WriteError> {
2600 if let Some(text) = layout
2601 .sections
2602 .iter()
2603 .find(|section| section.segment == "__TEXT" && section.name == "__text")
2604 {
2605 return Ok(text.file_off);
2606 }
2607 let text = layout
2608 .segment("__TEXT")
2609 .ok_or(WriteError::MissingSegment("__TEXT"))?;
2610 Ok(text.file_off)
2611 }
2612
2613 fn resolve_entryoff(layout: &Layout, entry_point: Option<EntryPoint>) -> Result<u64, WriteError> {
2614 if let Some(entry_point) = entry_point {
2615 let atom_file_off = layout
2616 .atom_file_offset(entry_point.atom)
2617 .ok_or(WriteError::EntryAtomMissing(entry_point.atom))?;
2618 return Ok(atom_file_off + entry_point.atom_value);
2619 }
2620 entryoff(layout)
2621 }
2622
2623 fn final_file_size(layout: &Layout) -> u64 {
2624 let mut max_size = 0u64;
2625 for segment in &layout.segments {
2626 max_size = max_size.max(segment.file_off + segment.file_size);
2627 }
2628 align_up(max_size.max(1), 1)
2629 }
2630
2631 fn pack_version(major: u32, minor: u32, patch: u32) -> u32 {
2632 (major << 16) | (minor << 8) | patch
2633 }
2634
2635 fn name16(s: &str) -> [u8; 16] {
2636 let mut out = [0u8; 16];
2637 let bytes = s.as_bytes();
2638 let n = bytes.len().min(16);
2639 out[..n].copy_from_slice(&bytes[..n]);
2640 out
2641 }
2642
2643 fn align_up(value: u64, align: u64) -> u64 {
2644 if align <= 1 {
2645 return value;
2646 }
2647 let mask = align - 1;
2648 (value + mask) & !mask
2649 }
2650
2651 fn u32_fit(value: u64, what: &'static str) -> Result<u32, WriteError> {
2652 u32::try_from(value).map_err(|_| WriteError::OffsetTooLarge(what))
2653 }
2654
2655 #[cfg(test)]
2656 mod tests {
2657 use crate::atom::{AltEntry, Atom, AtomFlags, AtomSection, AtomTable};
2658 use crate::input::ObjectFile;
2659 use crate::layout::{Layout, PAGE_SIZE};
2660 use crate::leb::read_uleb;
2661 use crate::macho::reader::MachHeader64;
2662 use crate::resolve::{AtomId, InputId, SymbolId};
2663 use crate::section::{
2664 InputSection, OutputAtom, OutputSection, OutputSectionId, OutputSegment, Prot, SectionKind,
2665 };
2666 use crate::string_table::StringTable;
2667
2668 use super::*;
2669
2670 fn decode_function_starts_blob(blob: &[u8]) -> Vec<u64> {
2671 let mut out = Vec::new();
2672 let mut cursor = 0usize;
2673 let mut current = 0u64;
2674 while cursor < blob.len() {
2675 let (delta, used) = read_uleb(&blob[cursor..]).unwrap();
2676 cursor += used;
2677 if delta == 0 {
2678 break;
2679 }
2680 current += delta;
2681 out.push(current);
2682 }
2683 out
2684 }
2685
2686 #[test]
2687 fn minimal_executable_writes_parseable_header() {
2688 let layout = Layout::empty(OutputKind::Executable, 0);
2689 let mut bytes = Vec::new();
2690 write(
2691 &layout,
2692 OutputKind::Executable,
2693 &LinkOptions::default(),
2694 &mut bytes,
2695 )
2696 .unwrap();
2697
2698 let header = crate::macho::reader::parse_header(&bytes).unwrap();
2699 let commands = crate::macho::reader::parse_commands(&header, &bytes).unwrap();
2700 assert_eq!(header.filetype, MH_EXECUTE);
2701 assert!(
2702 commands
2703 .iter()
2704 .any(|cmd| matches!(cmd, LoadCommand::Raw { cmd, .. } if *cmd == LC_MAIN)),
2705 "expected LC_MAIN in {commands:?}"
2706 );
2707 assert!(bytes.len() >= HEADER_SIZE);
2708 }
2709
2710 #[test]
2711 fn minimal_dylib_writes_parseable_header() {
2712 let layout = Layout::empty(OutputKind::Dylib, 0);
2713 let mut bytes = Vec::new();
2714 let opts = LinkOptions {
2715 output: Some("libtiny.dylib".into()),
2716 ..LinkOptions::default()
2717 };
2718 write(&layout, OutputKind::Dylib, &opts, &mut bytes).unwrap();
2719
2720 let header = crate::macho::reader::parse_header(&bytes).unwrap();
2721 let commands = crate::macho::reader::parse_commands(&header, &bytes).unwrap();
2722 assert_eq!(header.filetype, MH_DYLIB);
2723 assert!(
2724 commands
2725 .iter()
2726 .any(|cmd| matches!(cmd, LoadCommand::Dylib(d) if d.cmd == LC_ID_DYLIB)),
2727 "expected LC_ID_DYLIB in {commands:?}"
2728 );
2729 }
2730
2731 #[test]
2732 fn linkedit_starts_on_page_boundary_after_text() {
2733 let layout = Layout::empty(OutputKind::Executable, 0);
2734 let mut bytes = Vec::new();
2735 write(
2736 &layout,
2737 OutputKind::Executable,
2738 &LinkOptions::default(),
2739 &mut bytes,
2740 )
2741 .unwrap();
2742
2743 let header = crate::macho::reader::parse_header(&bytes).unwrap();
2744 let commands = crate::macho::reader::parse_commands(&header, &bytes).unwrap();
2745 let mut text = None;
2746 let mut linkedit = None;
2747 for cmd in commands {
2748 if let LoadCommand::Segment64(seg) = cmd {
2749 let name = seg.segname_str();
2750 if name == "__TEXT" {
2751 text = Some(seg);
2752 } else if name == "__LINKEDIT" {
2753 linkedit = Some(seg);
2754 }
2755 }
2756 }
2757
2758 let text = text.unwrap();
2759 let linkedit = linkedit.unwrap();
2760 assert_eq!(linkedit.fileoff % PAGE_SIZE, 0);
2761 assert!(linkedit.fileoff >= text.filesize);
2762 }
2763
2764 #[test]
2765 fn text_only_executable_omits_empty_data_segments() {
2766 let layout = Layout::empty(OutputKind::Executable, 0);
2767 let mut bytes = Vec::new();
2768 write(
2769 &layout,
2770 OutputKind::Executable,
2771 &LinkOptions::default(),
2772 &mut bytes,
2773 )
2774 .unwrap();
2775
2776 let header = crate::macho::reader::parse_header(&bytes).unwrap();
2777 let commands = crate::macho::reader::parse_commands(&header, &bytes).unwrap();
2778 let segment_names: Vec<String> = commands
2779 .into_iter()
2780 .filter_map(|cmd| match cmd {
2781 LoadCommand::Segment64(seg) => Some(seg.segname_str()),
2782 _ => None,
2783 })
2784 .collect();
2785
2786 assert!(segment_names.iter().any(|name| name == "__TEXT"));
2787 assert!(segment_names.iter().any(|name| name == "__LINKEDIT"));
2788 assert!(!segment_names.iter().any(|name| name == "__DATA_CONST"));
2789 assert!(!segment_names.iter().any(|name| name == "__DATA"));
2790 }
2791
2792 #[test]
2793 fn function_starts_use_all_executable_text_atoms_and_alt_entries_only() {
2794 let mut atoms = AtomTable::new();
2795 let atom_id = atoms.push(Atom {
2796 id: AtomId(0),
2797 origin: InputId(1),
2798 input_section: 1,
2799 section: AtomSection::Text,
2800 input_offset: 0,
2801 size: 16,
2802 align_pow2: 2,
2803 owner: None,
2804 alt_entries: vec![AltEntry {
2805 symbol: SymbolId(1),
2806 offset_within_atom: 8,
2807 }],
2808 data: vec![0; 16],
2809 flags: AtomFlags::NONE,
2810 parent_of: None,
2811 });
2812 let coalesced_atom_id = atoms.push(Atom {
2813 id: AtomId(1),
2814 origin: InputId(1),
2815 input_section: 2,
2816 section: AtomSection::Text,
2817 input_offset: 0,
2818 size: 4,
2819 align_pow2: 2,
2820 owner: None,
2821 alt_entries: Vec::new(),
2822 data: vec![0; 4],
2823 flags: AtomFlags::NONE,
2824 parent_of: None,
2825 });
2826 let layout = Layout {
2827 kind: OutputKind::Executable,
2828 segments: vec![OutputSegment {
2829 name: "__TEXT".into(),
2830 sections: vec![
2831 OutputSectionId(0),
2832 OutputSectionId(1),
2833 OutputSectionId(2),
2834 OutputSectionId(3),
2835 ],
2836 vm_addr: 0x1_0000_0000,
2837 vm_size: 0x4000,
2838 file_off: 0,
2839 file_size: 0x4000,
2840 init_prot: Prot::READ_EXECUTE,
2841 max_prot: Prot::READ_EXECUTE,
2842 flags: 0,
2843 }],
2844 sections: vec![
2845 OutputSection {
2846 segment: "__TEXT".into(),
2847 name: "__text".into(),
2848 kind: SectionKind::Text,
2849 align_pow2: 2,
2850 flags: 0,
2851 reserved1: 0,
2852 reserved2: 0,
2853 reserved3: 0,
2854 atoms: vec![OutputAtom {
2855 atom: atom_id,
2856 offset: 0,
2857 size: 16,
2858 data: vec![0; 16],
2859 }],
2860 synthetic_offset: 0,
2861 synthetic_data: Vec::new(),
2862 addr: 0x1_0000_1000,
2863 size: 16,
2864 file_off: 0x1000,
2865 },
2866 OutputSection {
2867 segment: "__TEXT".into(),
2868 name: "__stubs".into(),
2869 kind: SectionKind::SymbolStubs,
2870 align_pow2: 2,
2871 flags: 0,
2872 reserved1: 0,
2873 reserved2: 0,
2874 reserved3: 0,
2875 atoms: Vec::new(),
2876 synthetic_offset: 0,
2877 synthetic_data: vec![0; 12],
2878 addr: 0x1_0000_1010,
2879 size: 12,
2880 file_off: 0x1010,
2881 },
2882 OutputSection {
2883 segment: "__TEXT".into(),
2884 name: "__stub_helper".into(),
2885 kind: SectionKind::Text,
2886 align_pow2: 2,
2887 flags: 0,
2888 reserved1: 0,
2889 reserved2: 0,
2890 reserved3: 0,
2891 atoms: Vec::new(),
2892 synthetic_offset: 0,
2893 synthetic_data: vec![0; 36],
2894 addr: 0x1_0000_101c,
2895 size: 36,
2896 file_off: 0x101c,
2897 },
2898 OutputSection {
2899 segment: "__TEXT".into(),
2900 name: "__textcoal_nt".into(),
2901 kind: SectionKind::Coalesced,
2902 align_pow2: 2,
2903 flags: 0,
2904 reserved1: 0,
2905 reserved2: 0,
2906 reserved3: 0,
2907 atoms: vec![OutputAtom {
2908 atom: coalesced_atom_id,
2909 offset: 0,
2910 size: 4,
2911 data: vec![0; 4],
2912 }],
2913 synthetic_offset: 0,
2914 synthetic_data: Vec::new(),
2915 addr: 0x1_0000_1040,
2916 size: 4,
2917 file_off: 0x1040,
2918 },
2919 ],
2920 };
2921
2922 let blob = build_function_starts(&layout, &[], &atoms).unwrap();
2923 assert_eq!(
2924 decode_function_starts_blob(&blob),
2925 vec![0x1000, 0x1008, 0x1040]
2926 );
2927 }
2928
2929 #[test]
2930 fn function_starts_index_uses_only_interior_named_entries() {
2931 let mut atoms = AtomTable::new();
2932 let atom_id = atoms.push(Atom {
2933 id: AtomId(0),
2934 origin: InputId(1),
2935 input_section: 1,
2936 section: AtomSection::Text,
2937 input_offset: 0,
2938 size: 16,
2939 align_pow2: 2,
2940 owner: None,
2941 alt_entries: Vec::new(),
2942 data: vec![0; 16],
2943 flags: AtomFlags::NONE,
2944 parent_of: None,
2945 });
2946 let zero_atom_id = atoms.push(Atom {
2947 id: AtomId(0),
2948 origin: InputId(1),
2949 input_section: 1,
2950 section: AtomSection::Text,
2951 input_offset: 16,
2952 size: 0,
2953 align_pow2: 2,
2954 owner: None,
2955 alt_entries: Vec::new(),
2956 data: Vec::new(),
2957 flags: AtomFlags::NONE,
2958 parent_of: None,
2959 });
2960 let object = object_with_text_symbols(&[
2961 ("_start", 0x1000, 0),
2962 ("Ltmp0", 0x1004, 0),
2963 ("_middle", 0x1008, 0),
2964 ("_alt", 0x100c, N_ALT_ENTRY),
2965 ("_end", 0x1010, 0),
2966 ]);
2967 let inputs = [LayoutInput {
2968 id: InputId(1),
2969 object: &object,
2970 load_order: 0,
2971 archive_member_offset: None,
2972 }];
2973 let layout = Layout {
2974 kind: OutputKind::Executable,
2975 segments: vec![OutputSegment {
2976 name: "__TEXT".into(),
2977 sections: vec![OutputSectionId(0)],
2978 vm_addr: 0x1_0000_0000,
2979 vm_size: 0x4000,
2980 file_off: 0,
2981 file_size: 0x4000,
2982 init_prot: Prot::READ_EXECUTE,
2983 max_prot: Prot::READ_EXECUTE,
2984 flags: 0,
2985 }],
2986 sections: vec![OutputSection {
2987 segment: "__TEXT".into(),
2988 name: "__text".into(),
2989 kind: SectionKind::Text,
2990 align_pow2: 2,
2991 flags: 0,
2992 reserved1: 0,
2993 reserved2: 0,
2994 reserved3: 0,
2995 atoms: vec![
2996 OutputAtom {
2997 atom: atom_id,
2998 offset: 0,
2999 size: 16,
3000 data: vec![0; 16],
3001 },
3002 OutputAtom {
3003 atom: zero_atom_id,
3004 offset: 16,
3005 size: 0,
3006 data: Vec::new(),
3007 },
3008 ],
3009 synthetic_offset: 0,
3010 synthetic_data: Vec::new(),
3011 addr: 0x1_0000_1000,
3012 size: 16,
3013 file_off: 0x1000,
3014 }],
3015 };
3016
3017 let blob = build_function_starts(&layout, &inputs, &atoms).unwrap();
3018 assert_eq!(
3019 decode_function_starts_blob(&blob),
3020 vec![0x1000, 0x1008, 0x1010]
3021 );
3022 }
3023
3024 fn object_with_text_symbols(symbols: &[(&str, u64, u16)]) -> ObjectFile {
3025 let mut strings = vec![0];
3026 let mut strx = Vec::new();
3027 for (name, _, _) in symbols {
3028 strx.push(strings.len() as u32);
3029 strings.extend_from_slice(name.as_bytes());
3030 strings.push(0);
3031 }
3032 ObjectFile {
3033 path: "function-starts-index.o".into(),
3034 header: MachHeader64 {
3035 magic: MH_MAGIC_64,
3036 cputype: CPU_TYPE_ARM64,
3037 cpusubtype: CPU_SUBTYPE_ARM64_ALL,
3038 filetype: MH_OBJECT,
3039 ncmds: 0,
3040 sizeofcmds: 0,
3041 flags: 0,
3042 reserved: 0,
3043 },
3044 commands: Vec::new(),
3045 sections: vec![InputSection {
3046 segname: "__TEXT".into(),
3047 sectname: "__text".into(),
3048 kind: SectionKind::Text,
3049 addr: 0x1000,
3050 size: 16,
3051 align_pow2: 2,
3052 flags: 0,
3053 offset: 0,
3054 reloff: 0,
3055 nreloc: 0,
3056 reserved1: 0,
3057 reserved2: 0,
3058 reserved3: 0,
3059 data: vec![0; 16],
3060 raw_relocs: Vec::new(),
3061 }],
3062 symbols: symbols
3063 .iter()
3064 .zip(strx)
3065 .map(|((_, value, desc), strx)| {
3066 InputSymbol::from_raw(RawNlist {
3067 strx,
3068 n_type: N_SECT,
3069 n_sect: 1,
3070 n_desc: *desc,
3071 n_value: *value,
3072 })
3073 })
3074 .collect(),
3075 strings: StringTable::from_bytes(strings),
3076 symtab: None,
3077 dysymtab: None,
3078 loh: Vec::new(),
3079 data_in_code: Vec::new(),
3080 }
3081 }
3082
3083 #[test]
3084 fn containing_atom_lookup_reuses_precomputed_section_index() {
3085 let mut atoms = AtomTable::new();
3086 let first = atoms.push(Atom {
3087 id: AtomId(0),
3088 origin: InputId(7),
3089 input_section: 3,
3090 section: AtomSection::Text,
3091 input_offset: 0,
3092 size: 8,
3093 align_pow2: 2,
3094 owner: None,
3095 alt_entries: Vec::new(),
3096 data: vec![0; 8],
3097 flags: AtomFlags::NONE,
3098 parent_of: None,
3099 });
3100 let second = atoms.push(Atom {
3101 id: AtomId(0),
3102 origin: InputId(7),
3103 input_section: 3,
3104 section: AtomSection::Text,
3105 input_offset: 8,
3106 size: 12,
3107 align_pow2: 2,
3108 owner: None,
3109 alt_entries: Vec::new(),
3110 data: vec![0; 12],
3111 flags: AtomFlags::NONE,
3112 parent_of: None,
3113 });
3114
3115 let by_input_section = atoms.by_input_section();
3116 let atom_ranges = build_atom_range_index(&atoms, &by_input_section, None);
3117 assert_eq!(
3118 find_containing_atom(&atom_ranges, InputId(7), 3, 4),
3119 Some((first, 4))
3120 );
3121 assert_eq!(
3122 find_containing_atom_range(&atom_ranges, InputId(7), 3, 10, 2),
3123 Some((second, 2))
3124 );
3125 }
3126 }
3127