Rust · 33865 bytes Raw Blame History
1 //! Atomization model.
2 //!
3 //! An **atom** is the linker's fundamental unit of output layout,
4 //! dead-stripping, and ICF. Each input section is split into one or more
5 //! atoms; output sections are concatenations of atoms. Every
6 //! `Symbol::Defined` owns exactly one atom (except `.alt_entry` chain
7 //! symbols which fold into a predecessor's atom).
8 //!
9 //! afs-as always sets `MH_SUBSECTIONS_VIA_SYMBOLS`, so in practice text and
10 //! data sections split at symbol boundaries; literal sections
11 //! (`__cstring`, `__literal*`) split at content boundaries; zerofill and
12 //! TLS sections split per-symbol. The full ruleset lives in
13 //! [`atomize_input_section`].
14 //!
15 //! Later passes reference atoms via `AtomId` (Sprint 7's opaque handle).
16 //! This module hands out ids via `AtomTable::push`; `AtomId(0)` is a
17 //! pre-existing sentinel meaning "no atom bound yet" (used by
18 //! `Symbol::Defined { atom }` before atomization back-patches it).
19
20 use std::collections::HashMap;
21
22 use crate::input::ObjectFile;
23 use crate::macho::constants::MH_SUBSECTIONS_VIA_SYMBOLS;
24 use crate::reloc::{parse_raw_relocs, parse_relocs, Referent};
25 use crate::resolve::{AtomId, InputId, SymbolId, SymbolTable};
26 use crate::section::{InputSection, SectionKind};
27 use crate::symbol::{InputSymbol, SymKind};
28
29 /// Which conceptual output section family this atom belongs to. Sprint 10
30 /// turns these into real `__TEXT,__text` / `__DATA,__data` etc. placements.
31 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
32 pub enum AtomSection {
33 Text,
34 Data,
35 ConstData,
36 CStringLiterals,
37 Literal4,
38 Literal8,
39 Literal16,
40 ZeroFill,
41 ThreadLocalData,
42 ThreadLocalBss,
43 ThreadLocalVariables,
44 ThreadLocalInitPointers,
45 Coalesced,
46 CompactUnwind,
47 EhFrame,
48 SymbolStubs,
49 NonLazySymbolPointers,
50 LazySymbolPointers,
51 /// Section kind we don't have specialized layout for yet. Layout still
52 /// works (output section keyed by segname/sectname) but downstream
53 /// passes treat it opaquely.
54 Other,
55 }
56
57 impl AtomSection {
58 pub fn from_section_kind(kind: SectionKind) -> Self {
59 match kind {
60 SectionKind::Text => AtomSection::Text,
61 SectionKind::Data => AtomSection::Data,
62 SectionKind::ConstData => AtomSection::ConstData,
63 SectionKind::CStringLiterals => AtomSection::CStringLiterals,
64 SectionKind::Literal4 => AtomSection::Literal4,
65 SectionKind::Literal8 => AtomSection::Literal8,
66 SectionKind::Literal16 => AtomSection::Literal16,
67 SectionKind::ZeroFill | SectionKind::GbZeroFill => AtomSection::ZeroFill,
68 SectionKind::ThreadLocalRegular => AtomSection::ThreadLocalData,
69 SectionKind::ThreadLocalZeroFill => AtomSection::ThreadLocalBss,
70 SectionKind::ThreadLocalVariables => AtomSection::ThreadLocalVariables,
71 SectionKind::ThreadLocalVariablePointers => AtomSection::ThreadLocalVariables,
72 SectionKind::ThreadLocalInitPointers => AtomSection::ThreadLocalInitPointers,
73 SectionKind::Coalesced => AtomSection::Coalesced,
74 SectionKind::CompactUnwind => AtomSection::CompactUnwind,
75 SectionKind::EhFrame => AtomSection::EhFrame,
76 SectionKind::SymbolStubs => AtomSection::SymbolStubs,
77 SectionKind::NonLazySymbolPointers => AtomSection::NonLazySymbolPointers,
78 SectionKind::LazySymbolPointers => AtomSection::LazySymbolPointers,
79 SectionKind::Regular | SectionKind::Unknown(_) => AtomSection::Other,
80 }
81 }
82
83 pub fn is_zerofill(self) -> bool {
84 matches!(self, AtomSection::ZeroFill | AtomSection::ThreadLocalBss)
85 }
86
87 pub fn is_literal(self) -> bool {
88 matches!(
89 self,
90 AtomSection::CStringLiterals
91 | AtomSection::Literal4
92 | AtomSection::Literal8
93 | AtomSection::Literal16
94 )
95 }
96 }
97
98 /// Bit-packed boolean attributes. Fields intentionally narrow — each bit
99 /// carries clear linker-visible meaning.
100 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
101 pub struct AtomFlags {
102 bits: u32,
103 }
104
105 impl AtomFlags {
106 pub const NONE: AtomFlags = AtomFlags { bits: 0 };
107 pub const NO_DEAD_STRIP: u32 = 1 << 0;
108 pub const WEAK_DEF: u32 = 1 << 1;
109 pub const THREAD_LOCAL: u32 = 1 << 2;
110 pub const LITERAL: u32 = 1 << 3;
111 pub const PURE_INSTRUCTIONS: u32 = 1 << 4;
112 pub const ADDRESS_TAKEN: u32 = 1 << 5; // set during reloc scan (Sprint 24's ICF gate)
113
114 pub fn has(self, bit: u32) -> bool {
115 self.bits & bit != 0
116 }
117
118 pub fn with(mut self, bit: u32) -> Self {
119 self.bits |= bit;
120 self
121 }
122
123 pub fn set(&mut self, bit: u32) {
124 self.bits |= bit;
125 }
126
127 pub fn bits(self) -> u32 {
128 self.bits
129 }
130 }
131
132 /// A symbol that resolves to a point inside another atom via `.alt_entry`.
133 /// Used for the `_start` / `_main` pattern where a secondary entry point
134 /// aliases into the middle of a function.
135 #[derive(Debug, Clone, PartialEq, Eq)]
136 pub struct AltEntry {
137 pub symbol: SymbolId,
138 /// Byte offset into the containing atom where this alt entry points.
139 pub offset_within_atom: u32,
140 }
141
142 /// One atom. Dead-stripping, ICF, and layout all work in terms of atoms.
143 #[derive(Debug, Clone, PartialEq, Eq)]
144 pub struct Atom {
145 pub id: AtomId,
146 pub origin: InputId,
147 /// 1-based section index within `origin`'s Mach-O section list.
148 pub input_section: u8,
149 pub section: AtomSection,
150 /// Offset within the input section where this atom's content starts.
151 pub input_offset: u32,
152 /// Byte size. For zerofill atoms, this is virtual; `data` is empty.
153 pub size: u32,
154 /// log2 of required alignment. Inherited from the containing section.
155 pub align_pow2: u8,
156 /// Primary defining symbol, if any. Locals that split a section at
157 /// `MH_SUBSECTIONS_VIA_SYMBOLS` boundaries but have no matching
158 /// `Symbol::Defined` (rare; happens for unnamed atoms inside literal
159 /// sections) leave this `None`.
160 pub owner: Option<SymbolId>,
161 /// `.alt_entry` chain — symbols aliased into this atom.
162 pub alt_entries: Vec<AltEntry>,
163 /// File-backed content, empty for zerofill.
164 pub data: Vec<u8>,
165 pub flags: AtomFlags,
166 /// For compact-unwind and eh_frame atoms: the function atom whose
167 /// lifetime this metadata atom shares. Sprint 23 (dead-strip) uses
168 /// this to keep unwind metadata live iff the function is live.
169 pub parent_of: Option<AtomId>,
170 }
171
172 /// Registry of all atoms in the link. `push` hands out stable `AtomId`s;
173 /// `get` / `get_mut` index into the table.
174 #[derive(Debug, Default)]
175 pub struct AtomTable {
176 atoms: Vec<Atom>,
177 }
178
179 impl AtomTable {
180 pub fn new() -> Self {
181 Self::default()
182 }
183
184 /// Assign an id to `atom` (overwriting any prior `id` field) and
185 /// store it. Returns the new handle.
186 pub fn push(&mut self, mut atom: Atom) -> AtomId {
187 // Skip id 0 — `AtomId(0)` is the pre-atomization placeholder for
188 // `Symbol::Defined { atom }` slots seeded before atomization runs.
189 let id = AtomId((self.atoms.len() as u32) + 1);
190 atom.id = id;
191 self.atoms.push(atom);
192 id
193 }
194
195 pub fn get(&self, id: AtomId) -> &Atom {
196 &self.atoms[(id.0 - 1) as usize]
197 }
198
199 pub fn get_mut(&mut self, id: AtomId) -> &mut Atom {
200 &mut self.atoms[(id.0 - 1) as usize]
201 }
202
203 pub fn len(&self) -> usize {
204 self.atoms.len()
205 }
206
207 pub fn is_empty(&self) -> bool {
208 self.atoms.is_empty()
209 }
210
211 pub fn iter(&self) -> impl Iterator<Item = (AtomId, &Atom)> {
212 self.atoms
213 .iter()
214 .enumerate()
215 .map(|(i, a)| (AtomId((i + 1) as u32), a))
216 }
217
218 /// Group atoms by `(origin, input_section)`, preserving insertion
219 /// order within each group. Sprint 10's layout pass walks this
220 /// grouping to preserve input ordering within output sections.
221 pub fn by_input_section(&self) -> HashMap<(InputId, u8), Vec<AtomId>> {
222 let mut out: HashMap<(InputId, u8), Vec<AtomId>> = HashMap::new();
223 for (id, atom) in self.iter() {
224 out.entry((atom.origin, atom.input_section))
225 .or_default()
226 .push(id);
227 }
228 out
229 }
230 }
231
232 // ---------------------------------------------------------------------------
233 // Atomization pass.
234 // ---------------------------------------------------------------------------
235
236 /// Per-object atomization output. Back-patching `Symbol::Defined.atom`
237 /// walks `owner_by_sym`; Sprint 23's dead-strip reads `alt_entries_by_sym`
238 /// when computing the live graph.
239 #[derive(Debug, Default)]
240 pub struct ObjectAtomization {
241 pub atoms: Vec<AtomId>,
242 /// `(symbol_index_in_object → atom that owns it)`. Populated for every
243 /// external/private-extern SECT symbol that started a new atom.
244 pub owner_by_sym: Vec<(usize, AtomId)>,
245 /// `(symbol_index_in_object → (containing_atom, offset_within_atom))`.
246 /// Populated for `.alt_entry` symbols that folded into an existing atom.
247 pub alt_entries_by_sym: Vec<(usize, AtomId, u32)>,
248 }
249
250 /// Atomize every section in `obj`, pushing into `table`. The caller
251 /// typically walks every input in sequence and merges results.
252 pub fn atomize_object(
253 input_id: InputId,
254 obj: &ObjectFile,
255 table: &mut AtomTable,
256 ) -> ObjectAtomization {
257 let subsections_via_symbols = obj.header.flags & MH_SUBSECTIONS_VIA_SYMBOLS != 0;
258 let mut out = ObjectAtomization::default();
259
260 for (sect_idx_zero, sect) in obj.sections.iter().enumerate() {
261 let sect_idx_one = (sect_idx_zero + 1) as u8;
262 // Gather symbols targeting this section and translate their
263 // `n_value` (absolute address in the object's layout) into
264 // in-section offsets by subtracting the section's `addr`.
265 //
266 // Only external / private-extern / alt-entry symbols count as
267 // subsection boundaries. Locals like `ltmp0` often sit at the
268 // same offset as an adjacent external (they're compiler-generated
269 // anchors for PC-relative addressing); splitting at them would
270 // produce zero-size atoms. This matches ld64's pragmatic reading
271 // of MH_SUBSECTIONS_VIA_SYMBOLS.
272 let mut syms: Vec<(usize, &InputSymbol, u32)> = obj
273 .symbols
274 .iter()
275 .enumerate()
276 .filter(|(_, s)| {
277 s.stab_kind().is_none()
278 && s.kind() == SymKind::Sect
279 && s.sect_idx() == sect_idx_one
280 && (s.is_ext() || s.is_private_ext() || s.alt_entry())
281 })
282 .map(|(i, s)| {
283 let offset = s.value().saturating_sub(sect.addr) as u32;
284 (i, s, offset)
285 })
286 .collect();
287 syms.sort_by_key(|(_, _, off)| *off);
288
289 atomize_regular_section(
290 input_id,
291 sect_idx_one,
292 sect,
293 &syms,
294 subsections_via_symbols,
295 table,
296 &mut out,
297 );
298 }
299
300 // Post-pass: wire `parent_of` for every `__compact_unwind` atom to the
301 // function atom that its `function_start` reloc references.
302 link_unwind_parents(input_id, obj, table, &out);
303
304 out
305 }
306
307 /// Walk `__compact_unwind` atoms; for each, find its `function_start`
308 /// reloc (at record offset 0), resolve the referent to a function atom
309 /// within this same input, and set `parent_of`. External-symbol relocs
310 /// (e.g. `__compact_unwind` referencing a function in another object)
311 /// are left with `parent_of = None` and wired by Sprint 17's unwind
312 /// synthesis pass, which has the full atom table.
313 fn link_unwind_parents(
314 input_id: InputId,
315 obj: &ObjectFile,
316 table: &mut AtomTable,
317 out: &ObjectAtomization,
318 ) {
319 let Some((cu_idx_zero, cu_sect)) = obj
320 .sections
321 .iter()
322 .enumerate()
323 .find(|(_, s)| s.kind == SectionKind::CompactUnwind)
324 else {
325 return;
326 };
327 let cu_idx_one = (cu_idx_zero + 1) as u8;
328
329 let raws = match parse_raw_relocs(&cu_sect.raw_relocs, 0, cu_sect.nreloc) {
330 Ok(r) => r,
331 Err(_) => return,
332 };
333 let fused = match parse_relocs(&raws) {
334 Ok(f) => f,
335 Err(_) => return,
336 };
337
338 // Index atoms produced by this object for (section, offset) lookup.
339 let mut atom_index: HashMap<(u8, u32), AtomId> = HashMap::new();
340 for id in &out.atoms {
341 let a = table.get(*id);
342 atom_index.insert((a.input_section, a.input_offset), *id);
343 }
344
345 // For each compact_unwind atom, find its first reloc.
346 for id in &out.atoms {
347 let atom = table.get(*id);
348 if atom.input_section != cu_idx_one {
349 continue;
350 }
351 let record_start = atom.input_offset;
352 let Some(r) = fused.iter().find(|r| r.offset == record_start) else {
353 continue;
354 };
355 let parent = match r.referent {
356 Referent::Section(sect_idx) => {
357 // The 8-byte `function_start` field holds the target's
358 // in-section offset. For ARM64_RELOC_UNSIGNED, that byte
359 // window carries the addend directly.
360 if atom.data.len() >= 8 {
361 let mut buf = [0u8; 8];
362 buf.copy_from_slice(&atom.data[0..8]);
363 let target_offset = u64::from_le_bytes(buf) as u32;
364 atom_index.get(&(sect_idx, target_offset)).copied()
365 } else {
366 None
367 }
368 }
369 Referent::Symbol(_) => None,
370 };
371 if let Some(parent_id) = parent {
372 table.get_mut(*id).parent_of = Some(parent_id);
373 }
374 }
375 let _ = input_id; // reserved for cross-object lookup in Sprint 17
376 }
377
378 /// Replace every `Symbol::Defined { atom: AtomId(0), ... }` seeded before
379 /// atomization with the real atom handle and atom-relative offset.
380 /// Silently skips symbols that have no matching entry (e.g. those that
381 /// were replaced by a strong definition elsewhere before atomization ran).
382 pub fn backpatch_symbol_atoms(
383 atomization: &ObjectAtomization,
384 input_id: InputId,
385 obj: &ObjectFile,
386 sym_table: &mut SymbolTable,
387 atom_table: &mut AtomTable,
388 ) {
389 use crate::resolve::Symbol;
390
391 for (sym_idx, atom_id) in &atomization.owner_by_sym {
392 let input_sym = &obj.symbols[*sym_idx];
393 let Ok(name_str) = obj.symbol_name(input_sym) else {
394 continue;
395 };
396 let istr = sym_table.intern(name_str);
397 let Some(sid) = sym_table.lookup(istr) else {
398 continue;
399 };
400 // Primary owner symbols sit at atom boundary → atom-relative 0.
401 if let Symbol::Defined { origin, .. } = sym_table.get(sid) {
402 if *origin == input_id {
403 sym_table.bind_atom(sid, *atom_id, 0);
404 atom_table.get_mut(*atom_id).owner = Some(sid);
405 }
406 }
407 }
408
409 for (sym_idx, atom_id, local_off) in &atomization.alt_entries_by_sym {
410 let input_sym = &obj.symbols[*sym_idx];
411 let Ok(name_str) = obj.symbol_name(input_sym) else {
412 continue;
413 };
414 let istr = sym_table.intern(name_str);
415 let Some(sid) = sym_table.lookup(istr) else {
416 continue;
417 };
418 if let Symbol::Defined { origin, .. } = sym_table.get(sid) {
419 if *origin == input_id {
420 sym_table.bind_atom(sid, *atom_id, *local_off as u64);
421 // Update the atom's alt_entries with the resolver-side
422 // SymbolId (we stored the InputSymbol index during
423 // atomization; now we know the real handle).
424 let atom = atom_table.get_mut(*atom_id);
425 for alt in &mut atom.alt_entries {
426 if alt.symbol == SymbolId(*sym_idx as u32)
427 && alt.offset_within_atom == *local_off
428 {
429 alt.symbol = sid;
430 }
431 }
432 }
433 }
434 }
435 }
436
437 /// Split one section into atoms according to the `MH_SUBSECTIONS_VIA_SYMBOLS`
438 /// invariant plus `.alt_entry` folding. Literal and unwind specialization
439 /// lands in follow-up commits; this function's fallback is "one atom per
440 /// section" for sections the subsections flag doesn't split.
441 #[allow(clippy::too_many_arguments)]
442 fn atomize_regular_section(
443 input_id: InputId,
444 section_idx: u8,
445 sect: &InputSection,
446 syms: &[(usize, &InputSymbol, u32)],
447 subsections_via_symbols: bool,
448 table: &mut AtomTable,
449 out: &mut ObjectAtomization,
450 ) {
451 let kind = sect.kind;
452 let atom_section = AtomSection::from_section_kind(kind);
453
454 // Without the subsections flag, every section becomes one atom — the
455 // linker-side equivalent of Apple-style monolithic sections.
456 if !subsections_via_symbols {
457 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
458 let id = table.push(atom);
459 out.atoms.push(id);
460 for (sym_idx, _sym, off) in syms {
461 out.alt_entries_by_sym.push((*sym_idx, id, *off));
462 }
463 return;
464 }
465
466 // Zerofill: splitting happens per symbol (each tentative common-style
467 // slot gets its own atom). If no symbols defined, emit a single atom.
468 if atom_section.is_zerofill() {
469 atomize_zerofill(input_id, section_idx, sect, syms, atom_section, table, out);
470 return;
471 }
472
473 // Literal sections split on content boundaries (null for `__cstring`,
474 // fixed-size chunks for `__literal4/8/16`) independent of symbol
475 // labels. Sprint 24's ICF uses the per-atom content for dedup.
476 if atom_section.is_literal() {
477 atomize_literal_section(input_id, section_idx, sect, syms, atom_section, table, out);
478 return;
479 }
480
481 // `__compact_unwind` is a fixed-layout array of 32-byte records; each
482 // record becomes its own atom with `parent_of` wired to the function
483 // atom it describes (linked post-hoc in `link_unwind_parents`).
484 if atom_section == AtomSection::CompactUnwind {
485 atomize_compact_unwind(input_id, section_idx, sect, syms, atom_section, table, out);
486 return;
487 }
488
489 // With subsections_via_symbols and at least one split point, walk the
490 // sorted symbols and emit one atom per non-alt_entry boundary.
491 if syms.is_empty() {
492 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
493 let id = table.push(atom);
494 out.atoms.push(id);
495 return;
496 }
497
498 // If there's content before the first symbol, carve a head atom
499 // (unowned). afs-as emits a leading symbol in practice so this is
500 // typically zero bytes, but the fallback keeps the byte-flow intact.
501 let first_offset = syms[0].2;
502 if first_offset > 0 {
503 let head = build_slice_atom(
504 input_id,
505 section_idx,
506 sect,
507 atom_section,
508 0,
509 first_offset,
510 None,
511 &[],
512 );
513 let head_id = table.push(head);
514 out.atoms.push(head_id);
515 }
516
517 // Walk symbol boundaries.
518 let section_size = sect.size as u32;
519 let mut i = 0;
520 while i < syms.len() {
521 let (primary_idx, primary, atom_offset) = syms[i];
522 let next_real_boundary = find_next_non_alt_entry(syms, i + 1)
523 .map(|j| syms[j].2)
524 .unwrap_or(section_size);
525 let size = next_real_boundary.saturating_sub(atom_offset);
526
527 // Collect alt_entries that fall into [atom_offset, atom_offset+size).
528 let mut alts: Vec<AltEntry> = Vec::new();
529 let mut alt_folded: Vec<(usize, u32)> = Vec::new();
530 for (alt_idx, alt_sym, alt_off) in syms.iter().skip(i + 1) {
531 if *alt_off >= atom_offset + size {
532 break;
533 }
534 if !alt_sym.alt_entry() {
535 break;
536 }
537 let local = *alt_off - atom_offset;
538 alts.push(AltEntry {
539 symbol: SymbolId(*alt_idx as u32),
540 offset_within_atom: local,
541 });
542 alt_folded.push((*alt_idx, local));
543 }
544
545 let atom = build_slice_atom(
546 input_id,
547 section_idx,
548 sect,
549 atom_section,
550 atom_offset,
551 size,
552 Some(primary),
553 &alts,
554 );
555 let id = table.push(atom);
556 out.atoms.push(id);
557 out.owner_by_sym.push((primary_idx, id));
558 for (alt_idx, local_off) in alt_folded {
559 out.alt_entries_by_sym.push((alt_idx, id, local_off));
560 }
561
562 // Advance past the primary and its folded alt_entries.
563 i = find_next_non_alt_entry(syms, i + 1).unwrap_or(syms.len());
564 }
565 }
566
567 /// Split a literal section into atoms. `__cstring` splits at null-byte
568 /// terminators (variable-length); `__literal4/8/16` split at fixed-width
569 /// boundaries. Owner symbols attach at exact offsets where a symbol
570 /// points.
571 fn atomize_literal_section(
572 input_id: InputId,
573 section_idx: u8,
574 sect: &InputSection,
575 syms: &[(usize, &InputSymbol, u32)],
576 atom_section: AtomSection,
577 table: &mut AtomTable,
578 out: &mut ObjectAtomization,
579 ) {
580 match atom_section {
581 AtomSection::CStringLiterals => {
582 atomize_cstring(input_id, section_idx, sect, syms, atom_section, table, out)
583 }
584 AtomSection::Literal4 => atomize_fixed_literal(
585 input_id,
586 section_idx,
587 sect,
588 syms,
589 4,
590 atom_section,
591 table,
592 out,
593 ),
594 AtomSection::Literal8 => atomize_fixed_literal(
595 input_id,
596 section_idx,
597 sect,
598 syms,
599 8,
600 atom_section,
601 table,
602 out,
603 ),
604 AtomSection::Literal16 => atomize_fixed_literal(
605 input_id,
606 section_idx,
607 sect,
608 syms,
609 16,
610 atom_section,
611 table,
612 out,
613 ),
614 _ => unreachable!("atomize_literal_section called with non-literal kind"),
615 }
616 }
617
618 fn atomize_cstring(
619 input_id: InputId,
620 section_idx: u8,
621 sect: &InputSection,
622 syms: &[(usize, &InputSymbol, u32)],
623 atom_section: AtomSection,
624 table: &mut AtomTable,
625 out: &mut ObjectAtomization,
626 ) {
627 let mut offset = 0usize;
628 while offset < sect.data.len() {
629 let relative_nul = sect.data[offset..]
630 .iter()
631 .position(|&b| b == 0)
632 .unwrap_or(sect.data.len() - offset);
633 let end = offset + relative_nul + 1;
634 let end = end.min(sect.data.len());
635 let data = sect.data[offset..end].to_vec();
636 let size = (end - offset) as u32;
637
638 let owner_entry = syms.iter().find(|(_, _, off)| *off as usize == offset);
639 let owner_idx = owner_entry.map(|(i, _, _)| *i);
640
641 let mut flags = AtomFlags::default().with(AtomFlags::LITERAL);
642 if let Some((_, sym, _)) = owner_entry {
643 flags.set(symbol_flags(sym).bits());
644 }
645
646 let atom = Atom {
647 id: AtomId(0),
648 origin: input_id,
649 input_section: section_idx,
650 section: atom_section,
651 input_offset: offset as u32,
652 size,
653 align_pow2: sect.align_pow2 as u8,
654 owner: None,
655 alt_entries: Vec::new(),
656 data,
657 flags,
658 parent_of: None,
659 };
660 let id = table.push(atom);
661 out.atoms.push(id);
662 if let Some(idx) = owner_idx {
663 out.owner_by_sym.push((idx, id));
664 }
665 offset = end;
666 }
667 }
668
669 #[allow(clippy::too_many_arguments)]
670 fn atomize_fixed_literal(
671 input_id: InputId,
672 section_idx: u8,
673 sect: &InputSection,
674 syms: &[(usize, &InputSymbol, u32)],
675 chunk_size: usize,
676 atom_section: AtomSection,
677 table: &mut AtomTable,
678 out: &mut ObjectAtomization,
679 ) {
680 let section_size = sect.size as usize;
681 let mut offset = 0usize;
682 while offset < section_size {
683 let end = (offset + chunk_size).min(section_size);
684 let data_end = end.min(sect.data.len());
685 let data = if offset < data_end {
686 sect.data[offset..data_end].to_vec()
687 } else {
688 Vec::new()
689 };
690 let size = (end - offset) as u32;
691
692 let owner_entry = syms.iter().find(|(_, _, off)| *off as usize == offset);
693 let owner_idx = owner_entry.map(|(i, _, _)| *i);
694
695 let mut flags = AtomFlags::default().with(AtomFlags::LITERAL);
696 if let Some((_, sym, _)) = owner_entry {
697 flags.set(symbol_flags(sym).bits());
698 }
699
700 let atom = Atom {
701 id: AtomId(0),
702 origin: input_id,
703 input_section: section_idx,
704 section: atom_section,
705 input_offset: offset as u32,
706 size,
707 align_pow2: sect.align_pow2 as u8,
708 owner: None,
709 alt_entries: Vec::new(),
710 data,
711 flags,
712 parent_of: None,
713 };
714 let id = table.push(atom);
715 out.atoms.push(id);
716 if let Some(idx) = owner_idx {
717 out.owner_by_sym.push((idx, id));
718 }
719 offset = end;
720 }
721 }
722
723 /// Split `__compact_unwind` into 32-byte atoms (one per record).
724 /// `parent_of` is filled in post-hoc by `link_unwind_parents` once all
725 /// sections of this object have been atomized.
726 fn atomize_compact_unwind(
727 input_id: InputId,
728 section_idx: u8,
729 sect: &InputSection,
730 syms: &[(usize, &InputSymbol, u32)],
731 atom_section: AtomSection,
732 table: &mut AtomTable,
733 out: &mut ObjectAtomization,
734 ) {
735 const RECORD: usize = 32;
736 let section_size = sect.size as usize;
737 let mut offset = 0usize;
738 while offset < section_size {
739 let end = (offset + RECORD).min(section_size);
740 let data = sect.data[offset..end.min(sect.data.len())].to_vec();
741 let size = (end - offset) as u32;
742
743 let owner_idx = syms
744 .iter()
745 .find(|(_, _, off)| *off as usize == offset)
746 .map(|(i, _, _)| *i);
747
748 let atom = Atom {
749 id: AtomId(0),
750 origin: input_id,
751 input_section: section_idx,
752 section: atom_section,
753 input_offset: offset as u32,
754 size,
755 align_pow2: sect.align_pow2 as u8,
756 owner: None,
757 alt_entries: Vec::new(),
758 data,
759 flags: AtomFlags::default(),
760 parent_of: None, // filled by link_unwind_parents
761 };
762 let id = table.push(atom);
763 out.atoms.push(id);
764 if let Some(idx) = owner_idx {
765 out.owner_by_sym.push((idx, id));
766 }
767 offset = end;
768 }
769 }
770
771 fn atomize_zerofill(
772 input_id: InputId,
773 section_idx: u8,
774 sect: &InputSection,
775 syms: &[(usize, &InputSymbol, u32)],
776 atom_section: AtomSection,
777 table: &mut AtomTable,
778 out: &mut ObjectAtomization,
779 ) {
780 if syms.is_empty() {
781 let atom = build_section_atom(input_id, section_idx, sect, atom_section);
782 let id = table.push(atom);
783 out.atoms.push(id);
784 return;
785 }
786 let section_size = sect.size as u32;
787 for (i, (sym_idx, sym, start)) in syms.iter().enumerate() {
788 let start = *start;
789 let end = syms
790 .get(i + 1)
791 .map(|(_, _, off)| *off)
792 .unwrap_or(section_size);
793 let size = end.saturating_sub(start);
794 let atom = Atom {
795 id: AtomId(0),
796 origin: input_id,
797 input_section: section_idx,
798 section: atom_section,
799 input_offset: start,
800 size,
801 align_pow2: sect.align_pow2 as u8,
802 owner: Some(SymbolId(*sym_idx as u32)),
803 alt_entries: Vec::new(),
804 data: Vec::new(), // zerofill
805 flags: symbol_flags(sym),
806 parent_of: None,
807 };
808 let id = table.push(atom);
809 out.atoms.push(id);
810 out.owner_by_sym.push((*sym_idx, id));
811 }
812 }
813
814 fn build_section_atom(
815 input_id: InputId,
816 section_idx: u8,
817 sect: &InputSection,
818 atom_section: AtomSection,
819 ) -> Atom {
820 let data = if atom_section.is_zerofill() {
821 Vec::new()
822 } else {
823 sect.data.clone()
824 };
825 let mut flags = AtomFlags::default();
826 if sect.kind == SectionKind::Text {
827 flags.set(AtomFlags::PURE_INSTRUCTIONS);
828 }
829 Atom {
830 id: AtomId(0),
831 origin: input_id,
832 input_section: section_idx,
833 section: atom_section,
834 input_offset: 0,
835 size: sect.size as u32,
836 align_pow2: sect.align_pow2 as u8,
837 owner: None,
838 alt_entries: Vec::new(),
839 data,
840 flags,
841 parent_of: None,
842 }
843 }
844
845 #[allow(clippy::too_many_arguments)]
846 fn build_slice_atom(
847 input_id: InputId,
848 section_idx: u8,
849 sect: &InputSection,
850 atom_section: AtomSection,
851 offset: u32,
852 size: u32,
853 owner: Option<&InputSymbol>,
854 alt_entries: &[AltEntry],
855 ) -> Atom {
856 let data = if atom_section.is_zerofill() {
857 Vec::new()
858 } else {
859 let start = offset as usize;
860 let end = (offset + size) as usize;
861 sect.data[start..end.min(sect.data.len())].to_vec()
862 };
863 let mut flags = AtomFlags::default();
864 if sect.kind == SectionKind::Text {
865 flags.set(AtomFlags::PURE_INSTRUCTIONS);
866 }
867 if let Some(sym) = owner {
868 flags.set(symbol_flags(sym).bits());
869 }
870 Atom {
871 id: AtomId(0),
872 origin: input_id,
873 input_section: section_idx,
874 section: atom_section,
875 input_offset: offset,
876 size,
877 align_pow2: sect.align_pow2 as u8,
878 // owner is wired at back-patch time via `backpatch_symbol_atoms`;
879 // atomization doesn't know the resolver-side SymbolId yet.
880 owner: None,
881 alt_entries: alt_entries.to_vec(),
882 data,
883 flags,
884 parent_of: None,
885 }
886 }
887
888 fn symbol_flags(sym: &InputSymbol) -> AtomFlags {
889 let mut f = AtomFlags::default();
890 if sym.no_dead_strip() {
891 f.set(AtomFlags::NO_DEAD_STRIP);
892 }
893 if sym.weak_def() {
894 f.set(AtomFlags::WEAK_DEF);
895 }
896 f
897 }
898
899 /// Find the next non-alt_entry symbol starting from index `i`. Returns the
900 /// index (into `syms`), or `None` if every remaining symbol is an alt
901 /// entry.
902 fn find_next_non_alt_entry(syms: &[(usize, &InputSymbol, u32)], from: usize) -> Option<usize> {
903 syms.iter()
904 .enumerate()
905 .skip(from)
906 .find(|(_, (_, s, _))| !s.alt_entry())
907 .map(|(i, _)| i)
908 }
909
910 #[cfg(test)]
911 mod tests {
912 use super::*;
913
914 fn make_text_atom(origin: InputId, sect: u8, off: u32, size: u32) -> Atom {
915 Atom {
916 id: AtomId(0), // will be overwritten by push
917 origin,
918 input_section: sect,
919 section: AtomSection::Text,
920 input_offset: off,
921 size,
922 align_pow2: 2,
923 owner: None,
924 alt_entries: Vec::new(),
925 data: vec![0u8; size as usize],
926 flags: AtomFlags::default().with(AtomFlags::PURE_INSTRUCTIONS),
927 parent_of: None,
928 }
929 }
930
931 #[test]
932 fn push_assigns_stable_one_based_ids_and_roundtrips_via_get() {
933 let mut t = AtomTable::new();
934 let a = t.push(make_text_atom(InputId(0), 1, 0, 16));
935 let b = t.push(make_text_atom(InputId(0), 1, 16, 8));
936 assert_eq!(a.0, 1);
937 assert_eq!(b.0, 2);
938 assert_eq!(t.len(), 2);
939 assert_eq!(t.get(a).input_offset, 0);
940 assert_eq!(t.get(b).input_offset, 16);
941 }
942
943 #[test]
944 fn id_zero_is_reserved_as_placeholder() {
945 // `Symbol::Defined { atom: AtomId(0) }` is the pre-atomization
946 // sentinel; any real atom must have id >= 1.
947 let mut t = AtomTable::new();
948 let id = t.push(make_text_atom(InputId(0), 1, 0, 1));
949 assert_ne!(id, AtomId(0));
950 assert_eq!(id, AtomId(1));
951 }
952
953 #[test]
954 fn atom_section_from_section_kind_covers_all_variants() {
955 assert_eq!(
956 AtomSection::from_section_kind(SectionKind::Text),
957 AtomSection::Text
958 );
959 assert_eq!(
960 AtomSection::from_section_kind(SectionKind::CStringLiterals),
961 AtomSection::CStringLiterals
962 );
963 assert_eq!(
964 AtomSection::from_section_kind(SectionKind::CompactUnwind),
965 AtomSection::CompactUnwind
966 );
967 assert_eq!(
968 AtomSection::from_section_kind(SectionKind::ZeroFill),
969 AtomSection::ZeroFill
970 );
971 assert!(AtomSection::from_section_kind(SectionKind::ZeroFill).is_zerofill());
972 assert!(AtomSection::from_section_kind(SectionKind::CStringLiterals).is_literal());
973 assert!(!AtomSection::from_section_kind(SectionKind::Text).is_literal());
974 }
975
976 #[test]
977 fn atom_flags_bitwise() {
978 let f = AtomFlags::default()
979 .with(AtomFlags::NO_DEAD_STRIP)
980 .with(AtomFlags::WEAK_DEF);
981 assert!(f.has(AtomFlags::NO_DEAD_STRIP));
982 assert!(f.has(AtomFlags::WEAK_DEF));
983 assert!(!f.has(AtomFlags::THREAD_LOCAL));
984 }
985
986 #[test]
987 fn by_input_section_groups_by_origin_and_section_index() {
988 let mut t = AtomTable::new();
989 let a = t.push(make_text_atom(InputId(0), 1, 0, 4));
990 let b = t.push(make_text_atom(InputId(0), 1, 4, 4));
991 let c = t.push(make_text_atom(InputId(1), 1, 0, 4));
992 let grouped = t.by_input_section();
993 assert_eq!(grouped.get(&(InputId(0), 1)).unwrap(), &vec![a, b]);
994 assert_eq!(grouped.get(&(InputId(1), 1)).unwrap(), &vec![c]);
995 }
996 }
997