Rust · 38725 bytes Raw Blame History
1 //! Atomization model.
2 //!
3 //! An **atom** is the linker's fundamental unit of output layout,
4 //! dead-stripping, and ICF. Each input section is split into one or more
5 //! atoms; output sections are concatenations of atoms. Every
6 //! `Symbol::Defined` owns exactly one atom (except `.alt_entry` chain
7 //! symbols which fold into a predecessor's atom).
8 //!
9 //! afs-as always sets `MH_SUBSECTIONS_VIA_SYMBOLS`, so in practice text and
10 //! data sections split at symbol boundaries; literal sections
11 //! (`__cstring`, `__literal*`) split at content boundaries; zerofill and
12 //! TLS sections split per-symbol. The full ruleset lives in
13 //! [`atomize_input_section`].
14 //!
15 //! Later passes reference atoms via `AtomId` (Sprint 7's opaque handle).
16 //! This module hands out ids via `AtomTable::push`; `AtomId(0)` is a
17 //! pre-existing sentinel meaning "no atom bound yet" (used by
18 //! `Symbol::Defined { atom }` before atomization back-patches it).
19
20 use std::collections::HashMap;
21
22 use crate::input::ObjectFile;
23 use crate::macho::constants::MH_SUBSECTIONS_VIA_SYMBOLS;
24 use crate::reloc::{parse_raw_relocs, parse_relocs, Referent};
25 use crate::resolve::{AtomId, InputId, SymbolId, SymbolTable};
26 use crate::section::{InputSection, SectionKind};
27 use crate::symbol::{InputSymbol, SymKind};
28
29 /// Which conceptual output section family this atom belongs to. Sprint 10
30 /// turns these into real `__TEXT,__text` / `__DATA,__data` etc. placements.
31 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
32 pub enum AtomSection {
33 Text,
34 Data,
35 ConstData,
36 CStringLiterals,
37 Literal4,
38 Literal8,
39 Literal16,
40 ZeroFill,
41 ThreadLocalData,
42 ThreadLocalBss,
43 ThreadLocalVariables,
44 ThreadLocalInitPointers,
45 Coalesced,
46 CompactUnwind,
47 EhFrame,
48 SymbolStubs,
49 NonLazySymbolPointers,
50 LazySymbolPointers,
51 /// Section kind we don't have specialized layout for yet. Layout still
52 /// works (output section keyed by segname/sectname) but downstream
53 /// passes treat it opaquely.
54 Other,
55 }
56
57 impl AtomSection {
58 pub fn from_section_kind(kind: SectionKind) -> Self {
59 match kind {
60 SectionKind::Text => AtomSection::Text,
61 SectionKind::Data => AtomSection::Data,
62 SectionKind::ConstData => AtomSection::ConstData,
63 SectionKind::CStringLiterals => AtomSection::CStringLiterals,
64 SectionKind::Literal4 => AtomSection::Literal4,
65 SectionKind::Literal8 => AtomSection::Literal8,
66 SectionKind::Literal16 => AtomSection::Literal16,
67 SectionKind::ZeroFill | SectionKind::GbZeroFill => AtomSection::ZeroFill,
68 SectionKind::ThreadLocalRegular => AtomSection::ThreadLocalData,
69 SectionKind::ThreadLocalZeroFill => AtomSection::ThreadLocalBss,
70 SectionKind::ThreadLocalVariables => AtomSection::ThreadLocalVariables,
71 SectionKind::ThreadLocalVariablePointers => AtomSection::ThreadLocalVariables,
72 SectionKind::ThreadLocalInitPointers => AtomSection::ThreadLocalInitPointers,
73 SectionKind::Coalesced => AtomSection::Coalesced,
74 SectionKind::CompactUnwind => AtomSection::CompactUnwind,
75 SectionKind::EhFrame => AtomSection::EhFrame,
76 SectionKind::SymbolStubs => AtomSection::SymbolStubs,
77 SectionKind::NonLazySymbolPointers => AtomSection::NonLazySymbolPointers,
78 SectionKind::LazySymbolPointers => AtomSection::LazySymbolPointers,
79 SectionKind::Regular | SectionKind::Unknown(_) => AtomSection::Other,
80 }
81 }
82
83 pub fn is_zerofill(self) -> bool {
84 matches!(self, AtomSection::ZeroFill | AtomSection::ThreadLocalBss)
85 }
86
87 pub fn is_literal(self) -> bool {
88 matches!(
89 self,
90 AtomSection::CStringLiterals
91 | AtomSection::Literal4
92 | AtomSection::Literal8
93 | AtomSection::Literal16
94 )
95 }
96 }
97
98 /// Bit-packed boolean attributes. Fields intentionally narrow — each bit
99 /// carries clear linker-visible meaning.
100 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
101 pub struct AtomFlags {
102 bits: u32,
103 }
104
105 impl AtomFlags {
106 pub const NONE: AtomFlags = AtomFlags { bits: 0 };
107 pub const NO_DEAD_STRIP: u32 = 1 << 0;
108 pub const WEAK_DEF: u32 = 1 << 1;
109 pub const THREAD_LOCAL: u32 = 1 << 2;
110 pub const LITERAL: u32 = 1 << 3;
111 pub const PURE_INSTRUCTIONS: u32 = 1 << 4;
112 pub const ADDRESS_TAKEN: u32 = 1 << 5; // set during reloc scan (Sprint 24's ICF gate)
113
114 pub fn has(self, bit: u32) -> bool {
115 self.bits & bit != 0
116 }
117
118 pub fn with(mut self, bit: u32) -> Self {
119 self.bits |= bit;
120 self
121 }
122
123 pub fn set(&mut self, bit: u32) {
124 self.bits |= bit;
125 }
126
127 pub fn bits(self) -> u32 {
128 self.bits
129 }
130 }
131
132 /// A symbol that resolves to a point inside another atom via `.alt_entry`.
133 /// Used for the `_start` / `_main` pattern where a secondary entry point
134 /// aliases into the middle of a function.
135 #[derive(Debug, Clone, PartialEq, Eq)]
136 pub struct AltEntry {
137 pub symbol: SymbolId,
138 /// Byte offset into the containing atom where this alt entry points.
139 pub offset_within_atom: u32,
140 }
141
142 /// One atom. Dead-stripping, ICF, and layout all work in terms of atoms.
143 #[derive(Debug, Clone, PartialEq, Eq)]
144 pub struct Atom {
145 pub id: AtomId,
146 pub origin: InputId,
147 /// 1-based section index within `origin`'s Mach-O section list.
148 pub input_section: u8,
149 pub section: AtomSection,
150 /// Offset within the input section where this atom's content starts.
151 pub input_offset: u32,
152 /// Byte size. For zerofill atoms, this is virtual; `data` is empty.
153 pub size: u32,
154 /// log2 of required alignment. Inherited from the containing section.
155 pub align_pow2: u8,
156 /// Primary defining symbol, if any. Locals that split a section at
157 /// `MH_SUBSECTIONS_VIA_SYMBOLS` boundaries but have no matching
158 /// `Symbol::Defined` (rare; happens for unnamed atoms inside literal
159 /// sections) leave this `None`.
160 pub owner: Option<SymbolId>,
161 /// `.alt_entry` chain — symbols aliased into this atom.
162 pub alt_entries: Vec<AltEntry>,
163 /// File-backed content, empty for zerofill.
164 pub data: Vec<u8>,
165 pub flags: AtomFlags,
166 /// For compact-unwind and eh_frame atoms: the function atom whose
167 /// lifetime this metadata atom shares. Sprint 23 (dead-strip) uses
168 /// this to keep unwind metadata live iff the function is live.
169 pub parent_of: Option<AtomId>,
170 }
171
172 /// Registry of all atoms in the link. `push` hands out stable `AtomId`s;
173 /// `get` / `get_mut` index into the table.
174 #[derive(Debug, Default)]
175 pub struct AtomTable {
176 atoms: Vec<Atom>,
177 }
178
179 impl AtomTable {
180 pub fn new() -> Self {
181 Self::default()
182 }
183
184 /// Assign an id to `atom` (overwriting any prior `id` field) and
185 /// store it. Returns the new handle.
186 pub fn push(&mut self, mut atom: Atom) -> AtomId {
187 // Skip id 0 — `AtomId(0)` is the pre-atomization placeholder for
188 // `Symbol::Defined { atom }` slots seeded before atomization runs.
189 let id = AtomId((self.atoms.len() as u32) + 1);
190 atom.id = id;
191 self.atoms.push(atom);
192 id
193 }
194
195 pub fn get(&self, id: AtomId) -> &Atom {
196 &self.atoms[(id.0 - 1) as usize]
197 }
198
199 pub fn get_mut(&mut self, id: AtomId) -> &mut Atom {
200 &mut self.atoms[(id.0 - 1) as usize]
201 }
202
203 pub fn len(&self) -> usize {
204 self.atoms.len()
205 }
206
207 pub fn is_empty(&self) -> bool {
208 self.atoms.is_empty()
209 }
210
211 pub fn iter(&self) -> impl Iterator<Item = (AtomId, &Atom)> {
212 self.atoms
213 .iter()
214 .enumerate()
215 .map(|(i, a)| (AtomId((i + 1) as u32), a))
216 }
217
218 /// Group atoms by `(origin, input_section)`, preserving insertion
219 /// order within each group. Sprint 10's layout pass walks this
220 /// grouping to preserve input ordering within output sections.
221 pub fn by_input_section(&self) -> HashMap<(InputId, u8), Vec<AtomId>> {
222 let mut out: HashMap<(InputId, u8), Vec<AtomId>> = HashMap::new();
223 for (id, atom) in self.iter() {
224 out.entry((atom.origin, atom.input_section))
225 .or_default()
226 .push(id);
227 }
228 out
229 }
230 }
231
232 // ---------------------------------------------------------------------------
233 // Atomization pass.
234 // ---------------------------------------------------------------------------
235
236 /// Per-object atomization output. Back-patching `Symbol::Defined.atom`
237 /// walks `owner_by_sym`; Sprint 23's dead-strip reads `alt_entries_by_sym`
238 /// when computing the live graph.
239 #[derive(Debug, Default)]
240 pub struct ObjectAtomization {
241 pub atoms: Vec<AtomId>,
242 /// `(symbol_index_in_object → atom that owns it)`. Populated for every
243 /// external/private-extern SECT symbol that started a new atom.
244 pub owner_by_sym: Vec<(usize, AtomId)>,
245 /// `(symbol_index_in_object → (containing_atom, offset_within_atom))`.
246 /// Populated for `.alt_entry` symbols that folded into an existing atom.
247 pub alt_entries_by_sym: Vec<(usize, AtomId, u32)>,
248 }
249
250 /// Atomize every section in `obj`, pushing into `table`. The caller
251 /// typically walks every input in sequence and merges results.
252 pub fn atomize_object(
253 input_id: InputId,
254 obj: &ObjectFile,
255 table: &mut AtomTable,
256 ) -> ObjectAtomization {
257 let subsections_via_symbols = obj.header.flags & MH_SUBSECTIONS_VIA_SYMBOLS != 0;
258 let mut out = ObjectAtomization::default();
259
260 for (sect_idx_zero, sect) in obj.sections.iter().enumerate() {
261 let sect_idx_one = (sect_idx_zero + 1) as u8;
262 // Gather symbols targeting this section and translate their
263 // `n_value` (absolute address in the object's layout) into
264 // in-section offsets by subtracting the section's `addr`.
265 //
266 // Only external / private-extern / alt-entry symbols count as
267 // subsection boundaries. Locals like `ltmp0` often sit at the
268 // same offset as an adjacent external (they're compiler-generated
269 // anchors for PC-relative addressing); splitting at them would
270 // produce zero-size atoms. This matches ld64's pragmatic reading
271 // of MH_SUBSECTIONS_VIA_SYMBOLS.
272 let mut syms: Vec<(usize, &InputSymbol, u32)> = obj
273 .symbols
274 .iter()
275 .enumerate()
276 .filter(|(_, s)| {
277 s.stab_kind().is_none()
278 && s.kind() == SymKind::Sect
279 && s.sect_idx() == sect_idx_one
280 && (s.is_ext() || s.is_private_ext() || s.alt_entry())
281 })
282 .map(|(i, s)| {
283 let offset = s.value().saturating_sub(sect.addr) as u32;
284 (i, s, offset)
285 })
286 .collect();
287 syms.sort_by_key(|(_, _, off)| *off);
288
289 atomize_regular_section(
290 input_id,
291 sect_idx_one,
292 sect,
293 &syms,
294 subsections_via_symbols,
295 table,
296 &mut out,
297 );
298 }
299
300 // Post-pass: wire metadata atoms to the function atoms whose lifetime
301 // they track, so dead-strip can prune unwind surfaces precisely.
302 link_unwind_parents(input_id, obj, table, &out);
303 link_eh_frame_parents(input_id, obj, table, &out);
304
305 out
306 }
307
308 /// Walk `__compact_unwind` atoms; for each, find its `function_start`
309 /// reloc (at record offset 0), resolve the referent to a function atom
310 /// within this same input, and set `parent_of`. External-symbol relocs
311 /// (e.g. `__compact_unwind` referencing a function in another object)
312 /// are left with `parent_of = None` and wired by Sprint 17's unwind
313 /// synthesis pass, which has the full atom table.
314 fn link_unwind_parents(
315 input_id: InputId,
316 obj: &ObjectFile,
317 table: &mut AtomTable,
318 out: &ObjectAtomization,
319 ) {
320 let Some((cu_idx_zero, cu_sect)) = obj
321 .sections
322 .iter()
323 .enumerate()
324 .find(|(_, s)| s.kind == SectionKind::CompactUnwind)
325 else {
326 return;
327 };
328 let cu_idx_one = (cu_idx_zero + 1) as u8;
329
330 let raws = match parse_raw_relocs(&cu_sect.raw_relocs, 0, cu_sect.nreloc) {
331 Ok(r) => r,
332 Err(_) => return,
333 };
334 let fused = match parse_relocs(&raws) {
335 Ok(f) => f,
336 Err(_) => return,
337 };
338
339 // Index atoms produced by this object for (section, offset) lookup.
340 let mut atom_index: HashMap<(u8, u32), AtomId> = HashMap::new();
341 for id in &out.atoms {
342 let a = table.get(*id);
343 atom_index.insert((a.input_section, a.input_offset), *id);
344 }
345
346 // For each compact_unwind atom, find its first reloc.
347 for id in &out.atoms {
348 let atom = table.get(*id);
349 if atom.input_section != cu_idx_one {
350 continue;
351 }
352 let record_start = atom.input_offset;
353 let Some(r) = fused.iter().find(|r| r.offset == record_start) else {
354 continue;
355 };
356 let parent = match r.referent {
357 Referent::Section(sect_idx) => {
358 // The 8-byte `function_start` field holds the target's
359 // in-section offset. For ARM64_RELOC_UNSIGNED, that byte
360 // window carries the addend directly.
361 if atom.data.len() >= 8 {
362 let mut buf = [0u8; 8];
363 buf.copy_from_slice(&atom.data[0..8]);
364 let target_offset = u64::from_le_bytes(buf) as u32;
365 atom_index.get(&(sect_idx, target_offset)).copied()
366 } else {
367 None
368 }
369 }
370 Referent::Symbol(_) => None,
371 };
372 if let Some(parent_id) = parent {
373 table.get_mut(*id).parent_of = Some(parent_id);
374 }
375 }
376 let _ = input_id; // reserved for cross-object lookup in Sprint 17
377 }
378
379 /// Replace every `Symbol::Defined { atom: AtomId(0), ... }` seeded before
380 /// atomization with the real atom handle and atom-relative offset.
381 /// Silently skips symbols that have no matching entry (e.g. those that
382 /// were replaced by a strong definition elsewhere before atomization ran).
383 pub fn backpatch_symbol_atoms(
384 atomization: &ObjectAtomization,
385 input_id: InputId,
386 obj: &ObjectFile,
387 sym_table: &mut SymbolTable,
388 atom_table: &mut AtomTable,
389 ) {
390 use crate::resolve::Symbol;
391
392 for (sym_idx, atom_id) in &atomization.owner_by_sym {
393 let input_sym = &obj.symbols[*sym_idx];
394 let Ok(name_str) = obj.symbol_name(input_sym) else {
395 continue;
396 };
397 let istr = sym_table.intern(name_str);
398 let Some(sid) = sym_table.lookup(istr) else {
399 continue;
400 };
401 // Primary owner symbols sit at atom boundary → atom-relative 0.
402 if let Symbol::Defined { origin, .. } = sym_table.get(sid) {
403 if *origin == input_id {
404 sym_table.bind_atom(sid, *atom_id, 0);
405 atom_table.get_mut(*atom_id).owner = Some(sid);
406 }
407 }
408 }
409
410 for (sym_idx, atom_id, local_off) in &atomization.alt_entries_by_sym {
411 let input_sym = &obj.symbols[*sym_idx];
412 let Ok(name_str) = obj.symbol_name(input_sym) else {
413 continue;
414 };
415 let istr = sym_table.intern(name_str);
416 let Some(sid) = sym_table.lookup(istr) else {
417 continue;
418 };
419 if let Symbol::Defined { origin, .. } = sym_table.get(sid) {
420 if *origin == input_id {
421 sym_table.bind_atom(sid, *atom_id, *local_off as u64);
422 // Update the atom's alt_entries with the resolver-side
423 // SymbolId (we stored the InputSymbol index during
424 // atomization; now we know the real handle).
425 let atom = atom_table.get_mut(*atom_id);
426 for alt in &mut atom.alt_entries {
427 if alt.symbol == SymbolId(*sym_idx as u32)
428 && alt.offset_within_atom == *local_off
429 {
430 alt.symbol = sid;
431 }
432 }
433 }
434 }
435 }
436 }
437
438 /// Split one section into atoms according to the `MH_SUBSECTIONS_VIA_SYMBOLS`
439 /// invariant plus `.alt_entry` folding. Literal and unwind specialization
440 /// lands in follow-up commits; this function's fallback is "one atom per
441 /// section" for sections the subsections flag doesn't split.
442 #[allow(clippy::too_many_arguments)]
443 fn atomize_regular_section(
444 input_id: InputId,
445 section_idx: u8,
446 sect: &InputSection,
447 syms: &[(usize, &InputSymbol, u32)],
448 subsections_via_symbols: bool,
449 table: &mut AtomTable,
450 out: &mut ObjectAtomization,
451 ) {
452 let kind = sect.kind;
453 let atom_section = AtomSection::from_section_kind(kind);
454
455 // Without the subsections flag, every section becomes one atom — the
456 // linker-side equivalent of Apple-style monolithic sections.
457 if !subsections_via_symbols {
458 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
459 let id = table.push(atom);
460 out.atoms.push(id);
461 for (sym_idx, _sym, off) in syms {
462 out.alt_entries_by_sym.push((*sym_idx, id, *off));
463 }
464 return;
465 }
466
467 // Zerofill: splitting happens per symbol (each tentative common-style
468 // slot gets its own atom). If no symbols defined, emit a single atom.
469 if atom_section.is_zerofill() {
470 atomize_zerofill(input_id, section_idx, sect, syms, atom_section, table, out);
471 return;
472 }
473
474 // Literal sections split on content boundaries (null for `__cstring`,
475 // fixed-size chunks for `__literal4/8/16`) independent of symbol
476 // labels. Sprint 24's ICF uses the per-atom content for dedup.
477 if atom_section.is_literal() {
478 atomize_literal_section(input_id, section_idx, sect, syms, atom_section, table, out);
479 return;
480 }
481
482 // `__compact_unwind` is a fixed-layout array of 32-byte records; each
483 // record becomes its own atom with `parent_of` wired to the function
484 // atom it describes (linked post-hoc in `link_unwind_parents`).
485 if atom_section == AtomSection::CompactUnwind {
486 atomize_compact_unwind(input_id, section_idx, sect, syms, atom_section, table, out);
487 return;
488 }
489
490 if atom_section == AtomSection::EhFrame {
491 atomize_eh_frame(input_id, section_idx, sect, atom_section, table, out);
492 return;
493 }
494
495 // With subsections_via_symbols and at least one split point, walk the
496 // sorted symbols and emit one atom per non-alt_entry boundary.
497 if syms.is_empty() {
498 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
499 let id = table.push(atom);
500 out.atoms.push(id);
501 return;
502 }
503
504 // If there's content before the first symbol, carve a head atom
505 // (unowned). afs-as emits a leading symbol in practice so this is
506 // typically zero bytes, but the fallback keeps the byte-flow intact.
507 let first_offset = syms[0].2;
508 if first_offset > 0 {
509 let head = build_slice_atom(
510 input_id,
511 section_idx,
512 sect,
513 atom_section,
514 0,
515 first_offset,
516 None,
517 &[],
518 );
519 let head_id = table.push(head);
520 out.atoms.push(head_id);
521 }
522
523 // Walk symbol boundaries.
524 let section_size = sect.size as u32;
525 let mut i = 0;
526 while i < syms.len() {
527 let (primary_idx, primary, atom_offset) = syms[i];
528 let next_real_boundary = find_next_non_alt_entry(syms, i + 1)
529 .map(|j| syms[j].2)
530 .unwrap_or(section_size);
531 let size = next_real_boundary.saturating_sub(atom_offset);
532
533 // Collect alt_entries that fall into [atom_offset, atom_offset+size).
534 let mut alts: Vec<AltEntry> = Vec::new();
535 let mut alt_folded: Vec<(usize, u32)> = Vec::new();
536 for (alt_idx, alt_sym, alt_off) in syms.iter().skip(i + 1) {
537 if *alt_off >= atom_offset + size {
538 break;
539 }
540 if !alt_sym.alt_entry() {
541 break;
542 }
543 let local = *alt_off - atom_offset;
544 alts.push(AltEntry {
545 symbol: SymbolId(*alt_idx as u32),
546 offset_within_atom: local,
547 });
548 alt_folded.push((*alt_idx, local));
549 }
550
551 let atom = build_slice_atom(
552 input_id,
553 section_idx,
554 sect,
555 atom_section,
556 atom_offset,
557 size,
558 Some(primary),
559 &alts,
560 );
561 let id = table.push(atom);
562 out.atoms.push(id);
563 out.owner_by_sym.push((primary_idx, id));
564 for (alt_idx, local_off) in alt_folded {
565 out.alt_entries_by_sym.push((alt_idx, id, local_off));
566 }
567
568 // Advance past the primary and its folded alt_entries.
569 i = find_next_non_alt_entry(syms, i + 1).unwrap_or(syms.len());
570 }
571 }
572
573 /// Split a literal section into atoms. `__cstring` splits at null-byte
574 /// terminators (variable-length); `__literal4/8/16` split at fixed-width
575 /// boundaries. Owner symbols attach at exact offsets where a symbol
576 /// points.
577 fn atomize_literal_section(
578 input_id: InputId,
579 section_idx: u8,
580 sect: &InputSection,
581 syms: &[(usize, &InputSymbol, u32)],
582 atom_section: AtomSection,
583 table: &mut AtomTable,
584 out: &mut ObjectAtomization,
585 ) {
586 match atom_section {
587 AtomSection::CStringLiterals => {
588 atomize_cstring(input_id, section_idx, sect, syms, atom_section, table, out)
589 }
590 AtomSection::Literal4 => atomize_fixed_literal(
591 input_id,
592 section_idx,
593 sect,
594 syms,
595 4,
596 atom_section,
597 table,
598 out,
599 ),
600 AtomSection::Literal8 => atomize_fixed_literal(
601 input_id,
602 section_idx,
603 sect,
604 syms,
605 8,
606 atom_section,
607 table,
608 out,
609 ),
610 AtomSection::Literal16 => atomize_fixed_literal(
611 input_id,
612 section_idx,
613 sect,
614 syms,
615 16,
616 atom_section,
617 table,
618 out,
619 ),
620 _ => unreachable!("atomize_literal_section called with non-literal kind"),
621 }
622 }
623
624 fn atomize_cstring(
625 input_id: InputId,
626 section_idx: u8,
627 sect: &InputSection,
628 syms: &[(usize, &InputSymbol, u32)],
629 atom_section: AtomSection,
630 table: &mut AtomTable,
631 out: &mut ObjectAtomization,
632 ) {
633 let mut offset = 0usize;
634 while offset < sect.data.len() {
635 let relative_nul = sect.data[offset..]
636 .iter()
637 .position(|&b| b == 0)
638 .unwrap_or(sect.data.len() - offset);
639 let end = offset + relative_nul + 1;
640 let end = end.min(sect.data.len());
641 let data = sect.data[offset..end].to_vec();
642 let size = (end - offset) as u32;
643
644 let owner_entry = syms.iter().find(|(_, _, off)| *off as usize == offset);
645 let owner_idx = owner_entry.map(|(i, _, _)| *i);
646
647 let mut flags = AtomFlags::default().with(AtomFlags::LITERAL);
648 if let Some((_, sym, _)) = owner_entry {
649 flags.set(symbol_flags(sym).bits());
650 }
651
652 let atom = Atom {
653 id: AtomId(0),
654 origin: input_id,
655 input_section: section_idx,
656 section: atom_section,
657 input_offset: offset as u32,
658 size,
659 align_pow2: sect.align_pow2 as u8,
660 owner: None,
661 alt_entries: Vec::new(),
662 data,
663 flags,
664 parent_of: None,
665 };
666 let id = table.push(atom);
667 out.atoms.push(id);
668 if let Some(idx) = owner_idx {
669 out.owner_by_sym.push((idx, id));
670 }
671 offset = end;
672 }
673 }
674
675 #[allow(clippy::too_many_arguments)]
676 fn atomize_fixed_literal(
677 input_id: InputId,
678 section_idx: u8,
679 sect: &InputSection,
680 syms: &[(usize, &InputSymbol, u32)],
681 chunk_size: usize,
682 atom_section: AtomSection,
683 table: &mut AtomTable,
684 out: &mut ObjectAtomization,
685 ) {
686 let section_size = sect.size as usize;
687 let mut offset = 0usize;
688 while offset < section_size {
689 let end = (offset + chunk_size).min(section_size);
690 let data_end = end.min(sect.data.len());
691 let data = if offset < data_end {
692 sect.data[offset..data_end].to_vec()
693 } else {
694 Vec::new()
695 };
696 let size = (end - offset) as u32;
697
698 let owner_entry = syms.iter().find(|(_, _, off)| *off as usize == offset);
699 let owner_idx = owner_entry.map(|(i, _, _)| *i);
700
701 let mut flags = AtomFlags::default().with(AtomFlags::LITERAL);
702 if let Some((_, sym, _)) = owner_entry {
703 flags.set(symbol_flags(sym).bits());
704 }
705
706 let atom = Atom {
707 id: AtomId(0),
708 origin: input_id,
709 input_section: section_idx,
710 section: atom_section,
711 input_offset: offset as u32,
712 size,
713 align_pow2: sect.align_pow2 as u8,
714 owner: None,
715 alt_entries: Vec::new(),
716 data,
717 flags,
718 parent_of: None,
719 };
720 let id = table.push(atom);
721 out.atoms.push(id);
722 if let Some(idx) = owner_idx {
723 out.owner_by_sym.push((idx, id));
724 }
725 offset = end;
726 }
727 }
728
729 /// Split `__compact_unwind` into 32-byte atoms (one per record).
730 /// `parent_of` is filled in post-hoc by `link_unwind_parents` once all
731 /// sections of this object have been atomized.
732 fn atomize_compact_unwind(
733 input_id: InputId,
734 section_idx: u8,
735 sect: &InputSection,
736 syms: &[(usize, &InputSymbol, u32)],
737 atom_section: AtomSection,
738 table: &mut AtomTable,
739 out: &mut ObjectAtomization,
740 ) {
741 const RECORD: usize = 32;
742 let section_size = sect.size as usize;
743 let mut offset = 0usize;
744 while offset < section_size {
745 let end = (offset + RECORD).min(section_size);
746 let data = sect.data[offset..end.min(sect.data.len())].to_vec();
747 let size = (end - offset) as u32;
748
749 let owner_idx = syms
750 .iter()
751 .find(|(_, _, off)| *off as usize == offset)
752 .map(|(i, _, _)| *i);
753
754 let atom = Atom {
755 id: AtomId(0),
756 origin: input_id,
757 input_section: section_idx,
758 section: atom_section,
759 input_offset: offset as u32,
760 size,
761 align_pow2: sect.align_pow2 as u8,
762 owner: None,
763 alt_entries: Vec::new(),
764 data,
765 flags: AtomFlags::default(),
766 parent_of: None, // filled by link_unwind_parents
767 };
768 let id = table.push(atom);
769 out.atoms.push(id);
770 if let Some(idx) = owner_idx {
771 out.owner_by_sym.push((idx, id));
772 }
773 offset = end;
774 }
775 }
776
777 /// Split `__eh_frame` into DWARF CFI records so dead-strip can retain only
778 /// the live FDEs and their shared CIEs.
779 fn atomize_eh_frame(
780 input_id: InputId,
781 section_idx: u8,
782 sect: &InputSection,
783 atom_section: AtomSection,
784 table: &mut AtomTable,
785 out: &mut ObjectAtomization,
786 ) {
787 let mut offset = 0usize;
788 while offset < sect.data.len() {
789 let Some(size) = eh_frame_record_size(&sect.data, offset) else {
790 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
791 let id = table.push(atom);
792 out.atoms.push(id);
793 return;
794 };
795
796 let end = (offset + size).min(sect.data.len());
797 let atom = Atom {
798 id: AtomId(0),
799 origin: input_id,
800 input_section: section_idx,
801 section: atom_section,
802 input_offset: offset as u32,
803 size: (end - offset) as u32,
804 align_pow2: (sect.align_pow2 as u8).min(2),
805 owner: None,
806 alt_entries: Vec::new(),
807 data: sect.data[offset..end].to_vec(),
808 flags: AtomFlags::default(),
809 parent_of: None,
810 };
811 let id = table.push(atom);
812 out.atoms.push(id);
813 offset = end;
814 }
815 }
816
817 fn eh_frame_record_size(data: &[u8], offset: usize) -> Option<usize> {
818 let length_end = offset.checked_add(4)?;
819 let length_bytes: [u8; 4] = data.get(offset..length_end)?.try_into().ok()?;
820 let length = u32::from_le_bytes(length_bytes);
821 if length == 0 {
822 return Some(4);
823 }
824 if length == u32::MAX {
825 return None;
826 }
827 let size = 4usize.checked_add(length as usize)?;
828 (offset + size <= data.len()).then_some(size)
829 }
830
831 fn eh_frame_cie_pointer(atom: &Atom) -> Option<u32> {
832 (atom.section == AtomSection::EhFrame && atom.data.len() >= 8).then(|| {
833 let mut buf = [0u8; 4];
834 buf.copy_from_slice(&atom.data[4..8]);
835 u32::from_le_bytes(buf)
836 })
837 }
838
839 fn resolve_function_parent(
840 obj: &ObjectFile,
841 atom: &Atom,
842 reloc: crate::reloc::Reloc,
843 atom_index: &HashMap<(u8, u32), AtomId>,
844 field_offset: usize,
845 ) -> Option<AtomId> {
846 match reloc.referent {
847 Referent::Section(sect_idx) => {
848 let end = field_offset.checked_add(8)?;
849 let mut buf = [0u8; 8];
850 buf.copy_from_slice(atom.data.get(field_offset..end)?);
851 let target_offset = u64::from_le_bytes(buf) as u32;
852 atom_index.get(&(sect_idx, target_offset)).copied()
853 }
854 Referent::Symbol(sym_idx) => {
855 let input_sym = obj.symbols.get(sym_idx as usize)?;
856 (input_sym.kind() == SymKind::Sect)
857 .then(|| {
858 let target_offset = input_sym.value().saturating_sub(
859 obj.sections
860 .get(input_sym.sect_idx().saturating_sub(1) as usize)
861 .map(|section| section.addr)
862 .unwrap_or(0),
863 ) as u32;
864 atom_index
865 .get(&(input_sym.sect_idx(), target_offset))
866 .copied()
867 })
868 .flatten()
869 }
870 }
871 }
872
873 fn link_eh_frame_parents(
874 input_id: InputId,
875 obj: &ObjectFile,
876 table: &mut AtomTable,
877 out: &ObjectAtomization,
878 ) {
879 let Some((eh_idx_zero, eh_sect)) = obj
880 .sections
881 .iter()
882 .enumerate()
883 .find(|(_, s)| s.kind == SectionKind::EhFrame)
884 else {
885 return;
886 };
887 let eh_idx_one = (eh_idx_zero + 1) as u8;
888
889 let raws = match parse_raw_relocs(&eh_sect.raw_relocs, 0, eh_sect.nreloc) {
890 Ok(r) => r,
891 Err(_) => return,
892 };
893 let fused = match parse_relocs(&raws) {
894 Ok(f) => f,
895 Err(_) => return,
896 };
897
898 let mut atom_index: HashMap<(u8, u32), AtomId> = HashMap::new();
899 for id in &out.atoms {
900 let a = table.get(*id);
901 atom_index.insert((a.input_section, a.input_offset), *id);
902 }
903
904 for id in &out.atoms {
905 let atom = table.get(*id);
906 if atom.input_section != eh_idx_one {
907 continue;
908 }
909 let Some(cie_pointer) = eh_frame_cie_pointer(atom) else {
910 continue;
911 };
912 if cie_pointer == 0 {
913 continue;
914 }
915 let Some(reloc) = fused.iter().find(|r| r.offset == atom.input_offset + 8) else {
916 continue;
917 };
918 if let Some(parent_id) = resolve_function_parent(obj, atom, *reloc, &atom_index, 8) {
919 table.get_mut(*id).parent_of = Some(parent_id);
920 }
921 }
922 let _ = input_id;
923 }
924
925 fn atomize_zerofill(
926 input_id: InputId,
927 section_idx: u8,
928 sect: &InputSection,
929 syms: &[(usize, &InputSymbol, u32)],
930 atom_section: AtomSection,
931 table: &mut AtomTable,
932 out: &mut ObjectAtomization,
933 ) {
934 if syms.is_empty() {
935 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
936 let id = table.push(atom);
937 out.atoms.push(id);
938 return;
939 }
940 let section_size = sect.size as u32;
941 for (i, (sym_idx, sym, start)) in syms.iter().enumerate() {
942 let start = *start;
943 let end = syms
944 .get(i + 1)
945 .map(|(_, _, off)| *off)
946 .unwrap_or(section_size);
947 let size = end.saturating_sub(start);
948 let atom = Atom {
949 id: AtomId(0),
950 origin: input_id,
951 input_section: section_idx,
952 section: atom_section,
953 input_offset: start,
954 size,
955 align_pow2: sect.align_pow2 as u8,
956 owner: Some(SymbolId(*sym_idx as u32)),
957 alt_entries: Vec::new(),
958 data: Vec::new(), // zerofill
959 flags: symbol_flags(sym),
960 parent_of: None,
961 };
962 let id = table.push(atom);
963 out.atoms.push(id);
964 out.owner_by_sym.push((*sym_idx, id));
965 }
966 }
967
968 fn build_section_atom(
969 input_id: InputId,
970 section_idx: u8,
971 sect: &InputSection,
972 atom_section: AtomSection,
973 ) -> Atom {
974 let data = if atom_section.is_zerofill() {
975 Vec::new()
976 } else {
977 sect.data.clone()
978 };
979 let mut flags = AtomFlags::default();
980 if sect.kind == SectionKind::Text {
981 flags.set(AtomFlags::PURE_INSTRUCTIONS);
982 }
983 Atom {
984 id: AtomId(0),
985 origin: input_id,
986 input_section: section_idx,
987 section: atom_section,
988 input_offset: 0,
989 size: sect.size as u32,
990 align_pow2: sect.align_pow2 as u8,
991 owner: None,
992 alt_entries: Vec::new(),
993 data,
994 flags,
995 parent_of: None,
996 }
997 }
998
999 #[allow(clippy::too_many_arguments)]
1000 fn build_slice_atom(
1001 input_id: InputId,
1002 section_idx: u8,
1003 sect: &InputSection,
1004 atom_section: AtomSection,
1005 offset: u32,
1006 size: u32,
1007 owner: Option<&InputSymbol>,
1008 alt_entries: &[AltEntry],
1009 ) -> Atom {
1010 let data = if atom_section.is_zerofill() {
1011 Vec::new()
1012 } else {
1013 let start = offset as usize;
1014 let end = (offset + size) as usize;
1015 sect.data[start..end.min(sect.data.len())].to_vec()
1016 };
1017 let mut flags = AtomFlags::default();
1018 if sect.kind == SectionKind::Text {
1019 flags.set(AtomFlags::PURE_INSTRUCTIONS);
1020 }
1021 if let Some(sym) = owner {
1022 flags.set(symbol_flags(sym).bits());
1023 }
1024 Atom {
1025 id: AtomId(0),
1026 origin: input_id,
1027 input_section: section_idx,
1028 section: atom_section,
1029 input_offset: offset,
1030 size,
1031 align_pow2: sect.align_pow2 as u8,
1032 // owner is wired at back-patch time via `backpatch_symbol_atoms`;
1033 // atomization doesn't know the resolver-side SymbolId yet.
1034 owner: None,
1035 alt_entries: alt_entries.to_vec(),
1036 data,
1037 flags,
1038 parent_of: None,
1039 }
1040 }
1041
1042 fn symbol_flags(sym: &InputSymbol) -> AtomFlags {
1043 let mut f = AtomFlags::default();
1044 if sym.no_dead_strip() {
1045 f.set(AtomFlags::NO_DEAD_STRIP);
1046 }
1047 if sym.weak_def() {
1048 f.set(AtomFlags::WEAK_DEF);
1049 }
1050 f
1051 }
1052
1053 /// Find the next non-alt_entry symbol starting from index `i`. Returns the
1054 /// index (into `syms`), or `None` if every remaining symbol is an alt
1055 /// entry.
1056 fn find_next_non_alt_entry(syms: &[(usize, &InputSymbol, u32)], from: usize) -> Option<usize> {
1057 syms.iter()
1058 .enumerate()
1059 .skip(from)
1060 .find(|(_, (_, s, _))| !s.alt_entry())
1061 .map(|(i, _)| i)
1062 }
1063
1064 #[cfg(test)]
1065 mod tests {
1066 use super::*;
1067
1068 fn make_text_atom(origin: InputId, sect: u8, off: u32, size: u32) -> Atom {
1069 Atom {
1070 id: AtomId(0), // will be overwritten by push
1071 origin,
1072 input_section: sect,
1073 section: AtomSection::Text,
1074 input_offset: off,
1075 size,
1076 align_pow2: 2,
1077 owner: None,
1078 alt_entries: Vec::new(),
1079 data: vec![0u8; size as usize],
1080 flags: AtomFlags::default().with(AtomFlags::PURE_INSTRUCTIONS),
1081 parent_of: None,
1082 }
1083 }
1084
1085 #[test]
1086 fn push_assigns_stable_one_based_ids_and_roundtrips_via_get() {
1087 let mut t = AtomTable::new();
1088 let a = t.push(make_text_atom(InputId(0), 1, 0, 16));
1089 let b = t.push(make_text_atom(InputId(0), 1, 16, 8));
1090 assert_eq!(a.0, 1);
1091 assert_eq!(b.0, 2);
1092 assert_eq!(t.len(), 2);
1093 assert_eq!(t.get(a).input_offset, 0);
1094 assert_eq!(t.get(b).input_offset, 16);
1095 }
1096
1097 #[test]
1098 fn id_zero_is_reserved_as_placeholder() {
1099 // `Symbol::Defined { atom: AtomId(0) }` is the pre-atomization
1100 // sentinel; any real atom must have id >= 1.
1101 let mut t = AtomTable::new();
1102 let id = t.push(make_text_atom(InputId(0), 1, 0, 1));
1103 assert_ne!(id, AtomId(0));
1104 assert_eq!(id, AtomId(1));
1105 }
1106
1107 #[test]
1108 fn atom_section_from_section_kind_covers_all_variants() {
1109 assert_eq!(
1110 AtomSection::from_section_kind(SectionKind::Text),
1111 AtomSection::Text
1112 );
1113 assert_eq!(
1114 AtomSection::from_section_kind(SectionKind::CStringLiterals),
1115 AtomSection::CStringLiterals
1116 );
1117 assert_eq!(
1118 AtomSection::from_section_kind(SectionKind::CompactUnwind),
1119 AtomSection::CompactUnwind
1120 );
1121 assert_eq!(
1122 AtomSection::from_section_kind(SectionKind::ZeroFill),
1123 AtomSection::ZeroFill
1124 );
1125 assert!(AtomSection::from_section_kind(SectionKind::ZeroFill).is_zerofill());
1126 assert!(AtomSection::from_section_kind(SectionKind::CStringLiterals).is_literal());
1127 assert!(!AtomSection::from_section_kind(SectionKind::Text).is_literal());
1128 }
1129
1130 #[test]
1131 fn atom_flags_bitwise() {
1132 let f = AtomFlags::default()
1133 .with(AtomFlags::NO_DEAD_STRIP)
1134 .with(AtomFlags::WEAK_DEF);
1135 assert!(f.has(AtomFlags::NO_DEAD_STRIP));
1136 assert!(f.has(AtomFlags::WEAK_DEF));
1137 assert!(!f.has(AtomFlags::THREAD_LOCAL));
1138 }
1139
1140 #[test]
1141 fn by_input_section_groups_by_origin_and_section_index() {
1142 let mut t = AtomTable::new();
1143 let a = t.push(make_text_atom(InputId(0), 1, 0, 4));
1144 let b = t.push(make_text_atom(InputId(0), 1, 4, 4));
1145 let c = t.push(make_text_atom(InputId(1), 1, 0, 4));
1146 let grouped = t.by_input_section();
1147 assert_eq!(grouped.get(&(InputId(0), 1)).unwrap(), &vec![a, b]);
1148 assert_eq!(grouped.get(&(InputId(1), 1)).unwrap(), &vec![c]);
1149 }
1150 }
1151