| 1 | //! Atomization model. |
| 2 | //! |
| 3 | //! An **atom** is the linker's fundamental unit of output layout, |
| 4 | //! dead-stripping, and ICF. Each input section is split into one or more |
| 5 | //! atoms; output sections are concatenations of atoms. Every |
| 6 | //! `Symbol::Defined` owns exactly one atom (except `.alt_entry` chain |
| 7 | //! symbols which fold into a predecessor's atom). |
| 8 | //! |
| 9 | //! afs-as always sets `MH_SUBSECTIONS_VIA_SYMBOLS`, so in practice text and |
| 10 | //! data sections split at symbol boundaries; literal sections |
| 11 | //! (`__cstring`, `__literal*`) split at content boundaries; zerofill and |
| 12 | //! TLS sections split per-symbol. The full ruleset lives in |
| 13 | //! [`atomize_input_section`]. |
| 14 | //! |
| 15 | //! Later passes reference atoms via `AtomId` (Sprint 7's opaque handle). |
| 16 | //! This module hands out ids via `AtomTable::push`; `AtomId(0)` is a |
| 17 | //! pre-existing sentinel meaning "no atom bound yet" (used by |
| 18 | //! `Symbol::Defined { atom }` before atomization back-patches it). |
| 19 | |
| 20 | use std::collections::HashMap; |
| 21 | |
| 22 | use crate::input::ObjectFile; |
| 23 | use crate::macho::constants::MH_SUBSECTIONS_VIA_SYMBOLS; |
| 24 | use crate::reloc::{parse_raw_relocs, parse_relocs, Referent}; |
| 25 | use crate::resolve::{AtomId, InputId, SymbolId, SymbolTable}; |
| 26 | use crate::section::{InputSection, SectionKind}; |
| 27 | use crate::symbol::{InputSymbol, SymKind}; |
| 28 | |
| 29 | /// Which conceptual output section family this atom belongs to. Sprint 10 |
| 30 | /// turns these into real `__TEXT,__text` / `__DATA,__data` etc. placements. |
| 31 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| 32 | pub enum AtomSection { |
| 33 | Text, |
| 34 | Data, |
| 35 | ConstData, |
| 36 | CStringLiterals, |
| 37 | Literal4, |
| 38 | Literal8, |
| 39 | Literal16, |
| 40 | ZeroFill, |
| 41 | ThreadLocalData, |
| 42 | ThreadLocalBss, |
| 43 | ThreadLocalVariables, |
| 44 | ThreadLocalInitPointers, |
| 45 | Coalesced, |
| 46 | CompactUnwind, |
| 47 | EhFrame, |
| 48 | SymbolStubs, |
| 49 | NonLazySymbolPointers, |
| 50 | LazySymbolPointers, |
| 51 | /// Section kind we don't have specialized layout for yet. Layout still |
| 52 | /// works (output section keyed by segname/sectname) but downstream |
| 53 | /// passes treat it opaquely. |
| 54 | Other, |
| 55 | } |
| 56 | |
| 57 | impl AtomSection { |
| 58 | pub fn from_section_kind(kind: SectionKind) -> Self { |
| 59 | match kind { |
| 60 | SectionKind::Text => AtomSection::Text, |
| 61 | SectionKind::Data => AtomSection::Data, |
| 62 | SectionKind::ConstData => AtomSection::ConstData, |
| 63 | SectionKind::CStringLiterals => AtomSection::CStringLiterals, |
| 64 | SectionKind::Literal4 => AtomSection::Literal4, |
| 65 | SectionKind::Literal8 => AtomSection::Literal8, |
| 66 | SectionKind::Literal16 => AtomSection::Literal16, |
| 67 | SectionKind::ZeroFill | SectionKind::GbZeroFill => AtomSection::ZeroFill, |
| 68 | SectionKind::ThreadLocalRegular => AtomSection::ThreadLocalData, |
| 69 | SectionKind::ThreadLocalZeroFill => AtomSection::ThreadLocalBss, |
| 70 | SectionKind::ThreadLocalVariables => AtomSection::ThreadLocalVariables, |
| 71 | SectionKind::ThreadLocalVariablePointers => AtomSection::ThreadLocalVariables, |
| 72 | SectionKind::ThreadLocalInitPointers => AtomSection::ThreadLocalInitPointers, |
| 73 | SectionKind::Coalesced => AtomSection::Coalesced, |
| 74 | SectionKind::CompactUnwind => AtomSection::CompactUnwind, |
| 75 | SectionKind::EhFrame => AtomSection::EhFrame, |
| 76 | SectionKind::SymbolStubs => AtomSection::SymbolStubs, |
| 77 | SectionKind::NonLazySymbolPointers => AtomSection::NonLazySymbolPointers, |
| 78 | SectionKind::LazySymbolPointers => AtomSection::LazySymbolPointers, |
| 79 | SectionKind::Regular | SectionKind::Unknown(_) => AtomSection::Other, |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | pub fn is_zerofill(self) -> bool { |
| 84 | matches!(self, AtomSection::ZeroFill | AtomSection::ThreadLocalBss) |
| 85 | } |
| 86 | |
| 87 | pub fn is_literal(self) -> bool { |
| 88 | matches!( |
| 89 | self, |
| 90 | AtomSection::CStringLiterals |
| 91 | | AtomSection::Literal4 |
| 92 | | AtomSection::Literal8 |
| 93 | | AtomSection::Literal16 |
| 94 | ) |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /// Bit-packed boolean attributes. Fields intentionally narrow — each bit |
| 99 | /// carries clear linker-visible meaning. |
| 100 | #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] |
| 101 | pub struct AtomFlags { |
| 102 | bits: u32, |
| 103 | } |
| 104 | |
| 105 | impl AtomFlags { |
| 106 | pub const NONE: AtomFlags = AtomFlags { bits: 0 }; |
| 107 | pub const NO_DEAD_STRIP: u32 = 1 << 0; |
| 108 | pub const WEAK_DEF: u32 = 1 << 1; |
| 109 | pub const THREAD_LOCAL: u32 = 1 << 2; |
| 110 | pub const LITERAL: u32 = 1 << 3; |
| 111 | pub const PURE_INSTRUCTIONS: u32 = 1 << 4; |
| 112 | pub const ADDRESS_TAKEN: u32 = 1 << 5; // set during reloc scan (Sprint 24's ICF gate) |
| 113 | |
| 114 | pub fn has(self, bit: u32) -> bool { |
| 115 | self.bits & bit != 0 |
| 116 | } |
| 117 | |
| 118 | pub fn with(mut self, bit: u32) -> Self { |
| 119 | self.bits |= bit; |
| 120 | self |
| 121 | } |
| 122 | |
| 123 | pub fn set(&mut self, bit: u32) { |
| 124 | self.bits |= bit; |
| 125 | } |
| 126 | |
| 127 | pub fn bits(self) -> u32 { |
| 128 | self.bits |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | /// A symbol that resolves to a point inside another atom via `.alt_entry`. |
| 133 | /// Used for the `_start` / `_main` pattern where a secondary entry point |
| 134 | /// aliases into the middle of a function. |
| 135 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 136 | pub struct AltEntry { |
| 137 | pub symbol: SymbolId, |
| 138 | /// Byte offset into the containing atom where this alt entry points. |
| 139 | pub offset_within_atom: u32, |
| 140 | } |
| 141 | |
| 142 | /// One atom. Dead-stripping, ICF, and layout all work in terms of atoms. |
| 143 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 144 | pub struct Atom { |
| 145 | pub id: AtomId, |
| 146 | pub origin: InputId, |
| 147 | /// 1-based section index within `origin`'s Mach-O section list. |
| 148 | pub input_section: u8, |
| 149 | pub section: AtomSection, |
| 150 | /// Offset within the input section where this atom's content starts. |
| 151 | pub input_offset: u32, |
| 152 | /// Byte size. For zerofill atoms, this is virtual; `data` is empty. |
| 153 | pub size: u32, |
| 154 | /// log2 of required alignment. Inherited from the containing section. |
| 155 | pub align_pow2: u8, |
| 156 | /// Primary defining symbol, if any. Locals that split a section at |
| 157 | /// `MH_SUBSECTIONS_VIA_SYMBOLS` boundaries but have no matching |
| 158 | /// `Symbol::Defined` (rare; happens for unnamed atoms inside literal |
| 159 | /// sections) leave this `None`. |
| 160 | pub owner: Option<SymbolId>, |
| 161 | /// `.alt_entry` chain — symbols aliased into this atom. |
| 162 | pub alt_entries: Vec<AltEntry>, |
| 163 | /// File-backed content, empty for zerofill. |
| 164 | pub data: Vec<u8>, |
| 165 | pub flags: AtomFlags, |
| 166 | /// For compact-unwind and eh_frame atoms: the function atom whose |
| 167 | /// lifetime this metadata atom shares. Sprint 23 (dead-strip) uses |
| 168 | /// this to keep unwind metadata live iff the function is live. |
| 169 | pub parent_of: Option<AtomId>, |
| 170 | } |
| 171 | |
| 172 | /// Registry of all atoms in the link. `push` hands out stable `AtomId`s; |
| 173 | /// `get` / `get_mut` index into the table. |
| 174 | #[derive(Debug, Default)] |
| 175 | pub struct AtomTable { |
| 176 | atoms: Vec<Atom>, |
| 177 | } |
| 178 | |
| 179 | impl AtomTable { |
| 180 | pub fn new() -> Self { |
| 181 | Self::default() |
| 182 | } |
| 183 | |
| 184 | /// Assign an id to `atom` (overwriting any prior `id` field) and |
| 185 | /// store it. Returns the new handle. |
| 186 | pub fn push(&mut self, mut atom: Atom) -> AtomId { |
| 187 | // Skip id 0 — `AtomId(0)` is the pre-atomization placeholder for |
| 188 | // `Symbol::Defined { atom }` slots seeded before atomization runs. |
| 189 | let id = AtomId((self.atoms.len() as u32) + 1); |
| 190 | atom.id = id; |
| 191 | self.atoms.push(atom); |
| 192 | id |
| 193 | } |
| 194 | |
| 195 | pub fn get(&self, id: AtomId) -> &Atom { |
| 196 | &self.atoms[(id.0 - 1) as usize] |
| 197 | } |
| 198 | |
| 199 | pub fn get_mut(&mut self, id: AtomId) -> &mut Atom { |
| 200 | &mut self.atoms[(id.0 - 1) as usize] |
| 201 | } |
| 202 | |
| 203 | pub fn len(&self) -> usize { |
| 204 | self.atoms.len() |
| 205 | } |
| 206 | |
| 207 | pub fn is_empty(&self) -> bool { |
| 208 | self.atoms.is_empty() |
| 209 | } |
| 210 | |
| 211 | pub fn iter(&self) -> impl Iterator<Item = (AtomId, &Atom)> { |
| 212 | self.atoms |
| 213 | .iter() |
| 214 | .enumerate() |
| 215 | .map(|(i, a)| (AtomId((i + 1) as u32), a)) |
| 216 | } |
| 217 | |
| 218 | /// Group atoms by `(origin, input_section)`, preserving insertion |
| 219 | /// order within each group. Sprint 10's layout pass walks this |
| 220 | /// grouping to preserve input ordering within output sections. |
| 221 | pub fn by_input_section(&self) -> HashMap<(InputId, u8), Vec<AtomId>> { |
| 222 | let mut out: HashMap<(InputId, u8), Vec<AtomId>> = HashMap::new(); |
| 223 | for (id, atom) in self.iter() { |
| 224 | out.entry((atom.origin, atom.input_section)) |
| 225 | .or_default() |
| 226 | .push(id); |
| 227 | } |
| 228 | out |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | // --------------------------------------------------------------------------- |
| 233 | // Atomization pass. |
| 234 | // --------------------------------------------------------------------------- |
| 235 | |
| 236 | /// Per-object atomization output. Back-patching `Symbol::Defined.atom` |
| 237 | /// walks `owner_by_sym`; Sprint 23's dead-strip reads `alt_entries_by_sym` |
| 238 | /// when computing the live graph. |
| 239 | #[derive(Debug, Default)] |
| 240 | pub struct ObjectAtomization { |
| 241 | pub atoms: Vec<AtomId>, |
| 242 | /// `(symbol_index_in_object → atom that owns it)`. Populated for every |
| 243 | /// external/private-extern SECT symbol that started a new atom. |
| 244 | pub owner_by_sym: Vec<(usize, AtomId)>, |
| 245 | /// `(symbol_index_in_object → (containing_atom, offset_within_atom))`. |
| 246 | /// Populated for `.alt_entry` symbols that folded into an existing atom. |
| 247 | pub alt_entries_by_sym: Vec<(usize, AtomId, u32)>, |
| 248 | } |
| 249 | |
| 250 | /// Atomize every section in `obj`, pushing into `table`. The caller |
| 251 | /// typically walks every input in sequence and merges results. |
| 252 | pub fn atomize_object( |
| 253 | input_id: InputId, |
| 254 | obj: &ObjectFile, |
| 255 | table: &mut AtomTable, |
| 256 | ) -> ObjectAtomization { |
| 257 | let subsections_via_symbols = obj.header.flags & MH_SUBSECTIONS_VIA_SYMBOLS != 0; |
| 258 | let mut out = ObjectAtomization::default(); |
| 259 | |
| 260 | for (sect_idx_zero, sect) in obj.sections.iter().enumerate() { |
| 261 | let sect_idx_one = (sect_idx_zero + 1) as u8; |
| 262 | // Gather symbols targeting this section and translate their |
| 263 | // `n_value` (absolute address in the object's layout) into |
| 264 | // in-section offsets by subtracting the section's `addr`. |
| 265 | // |
| 266 | // Only external / private-extern / alt-entry symbols count as |
| 267 | // subsection boundaries. Locals like `ltmp0` often sit at the |
| 268 | // same offset as an adjacent external (they're compiler-generated |
| 269 | // anchors for PC-relative addressing); splitting at them would |
| 270 | // produce zero-size atoms. This matches ld64's pragmatic reading |
| 271 | // of MH_SUBSECTIONS_VIA_SYMBOLS. |
| 272 | let mut syms: Vec<(usize, &InputSymbol, u32)> = obj |
| 273 | .symbols |
| 274 | .iter() |
| 275 | .enumerate() |
| 276 | .filter(|(_, s)| { |
| 277 | s.stab_kind().is_none() |
| 278 | && s.kind() == SymKind::Sect |
| 279 | && s.sect_idx() == sect_idx_one |
| 280 | && (s.is_ext() || s.is_private_ext() || s.alt_entry()) |
| 281 | }) |
| 282 | .map(|(i, s)| { |
| 283 | let offset = s.value().saturating_sub(sect.addr) as u32; |
| 284 | (i, s, offset) |
| 285 | }) |
| 286 | .collect(); |
| 287 | syms.sort_by_key(|(_, _, off)| *off); |
| 288 | |
| 289 | atomize_regular_section( |
| 290 | input_id, |
| 291 | sect_idx_one, |
| 292 | sect, |
| 293 | &syms, |
| 294 | subsections_via_symbols, |
| 295 | table, |
| 296 | &mut out, |
| 297 | ); |
| 298 | } |
| 299 | |
| 300 | // Post-pass: wire metadata atoms to the function atoms whose lifetime |
| 301 | // they track, so dead-strip can prune unwind surfaces precisely. |
| 302 | link_unwind_parents(input_id, obj, table, &out); |
| 303 | link_eh_frame_parents(input_id, obj, table, &out); |
| 304 | |
| 305 | out |
| 306 | } |
| 307 | |
| 308 | /// Walk `__compact_unwind` atoms; for each, find its `function_start` |
| 309 | /// reloc (at record offset 0), resolve the referent to a function atom |
| 310 | /// within this same input, and set `parent_of`. External-symbol relocs |
| 311 | /// (e.g. `__compact_unwind` referencing a function in another object) |
| 312 | /// are left with `parent_of = None` and wired by Sprint 17's unwind |
| 313 | /// synthesis pass, which has the full atom table. |
| 314 | fn link_unwind_parents( |
| 315 | input_id: InputId, |
| 316 | obj: &ObjectFile, |
| 317 | table: &mut AtomTable, |
| 318 | out: &ObjectAtomization, |
| 319 | ) { |
| 320 | let Some((cu_idx_zero, cu_sect)) = obj |
| 321 | .sections |
| 322 | .iter() |
| 323 | .enumerate() |
| 324 | .find(|(_, s)| s.kind == SectionKind::CompactUnwind) |
| 325 | else { |
| 326 | return; |
| 327 | }; |
| 328 | let cu_idx_one = (cu_idx_zero + 1) as u8; |
| 329 | |
| 330 | let raws = match parse_raw_relocs(&cu_sect.raw_relocs, 0, cu_sect.nreloc) { |
| 331 | Ok(r) => r, |
| 332 | Err(_) => return, |
| 333 | }; |
| 334 | let fused = match parse_relocs(&raws) { |
| 335 | Ok(f) => f, |
| 336 | Err(_) => return, |
| 337 | }; |
| 338 | |
| 339 | // Index atoms produced by this object for (section, offset) lookup. |
| 340 | let mut atom_index: HashMap<(u8, u32), AtomId> = HashMap::new(); |
| 341 | for id in &out.atoms { |
| 342 | let a = table.get(*id); |
| 343 | atom_index.insert((a.input_section, a.input_offset), *id); |
| 344 | } |
| 345 | |
| 346 | // For each compact_unwind atom, find its first reloc. |
| 347 | for id in &out.atoms { |
| 348 | let atom = table.get(*id); |
| 349 | if atom.input_section != cu_idx_one { |
| 350 | continue; |
| 351 | } |
| 352 | let record_start = atom.input_offset; |
| 353 | let Some(r) = fused.iter().find(|r| r.offset == record_start) else { |
| 354 | continue; |
| 355 | }; |
| 356 | let parent = match r.referent { |
| 357 | Referent::Section(sect_idx) => { |
| 358 | // The 8-byte `function_start` field holds the target's |
| 359 | // in-section offset. For ARM64_RELOC_UNSIGNED, that byte |
| 360 | // window carries the addend directly. |
| 361 | if atom.data.len() >= 8 { |
| 362 | let mut buf = [0u8; 8]; |
| 363 | buf.copy_from_slice(&atom.data[0..8]); |
| 364 | let target_offset = u64::from_le_bytes(buf) as u32; |
| 365 | atom_index.get(&(sect_idx, target_offset)).copied() |
| 366 | } else { |
| 367 | None |
| 368 | } |
| 369 | } |
| 370 | Referent::Symbol(_) => None, |
| 371 | }; |
| 372 | if let Some(parent_id) = parent { |
| 373 | table.get_mut(*id).parent_of = Some(parent_id); |
| 374 | } |
| 375 | } |
| 376 | let _ = input_id; // reserved for cross-object lookup in Sprint 17 |
| 377 | } |
| 378 | |
| 379 | /// Replace every `Symbol::Defined { atom: AtomId(0), ... }` seeded before |
| 380 | /// atomization with the real atom handle and atom-relative offset. |
| 381 | /// Silently skips symbols that have no matching entry (e.g. those that |
| 382 | /// were replaced by a strong definition elsewhere before atomization ran). |
| 383 | pub fn backpatch_symbol_atoms( |
| 384 | atomization: &ObjectAtomization, |
| 385 | input_id: InputId, |
| 386 | obj: &ObjectFile, |
| 387 | sym_table: &mut SymbolTable, |
| 388 | atom_table: &mut AtomTable, |
| 389 | ) { |
| 390 | use crate::resolve::Symbol; |
| 391 | |
| 392 | for (sym_idx, atom_id) in &atomization.owner_by_sym { |
| 393 | let input_sym = &obj.symbols[*sym_idx]; |
| 394 | let Ok(name_str) = obj.symbol_name(input_sym) else { |
| 395 | continue; |
| 396 | }; |
| 397 | let istr = sym_table.intern(name_str); |
| 398 | let Some(sid) = sym_table.lookup(istr) else { |
| 399 | continue; |
| 400 | }; |
| 401 | // Primary owner symbols sit at atom boundary → atom-relative 0. |
| 402 | if let Symbol::Defined { origin, .. } = sym_table.get(sid) { |
| 403 | if *origin == input_id { |
| 404 | sym_table.bind_atom(sid, *atom_id, 0); |
| 405 | atom_table.get_mut(*atom_id).owner = Some(sid); |
| 406 | } |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | for (sym_idx, atom_id, local_off) in &atomization.alt_entries_by_sym { |
| 411 | let input_sym = &obj.symbols[*sym_idx]; |
| 412 | let Ok(name_str) = obj.symbol_name(input_sym) else { |
| 413 | continue; |
| 414 | }; |
| 415 | let istr = sym_table.intern(name_str); |
| 416 | let Some(sid) = sym_table.lookup(istr) else { |
| 417 | continue; |
| 418 | }; |
| 419 | if let Symbol::Defined { origin, .. } = sym_table.get(sid) { |
| 420 | if *origin == input_id { |
| 421 | sym_table.bind_atom(sid, *atom_id, *local_off as u64); |
| 422 | // Update the atom's alt_entries with the resolver-side |
| 423 | // SymbolId (we stored the InputSymbol index during |
| 424 | // atomization; now we know the real handle). |
| 425 | let atom = atom_table.get_mut(*atom_id); |
| 426 | for alt in &mut atom.alt_entries { |
| 427 | if alt.symbol == SymbolId(*sym_idx as u32) |
| 428 | && alt.offset_within_atom == *local_off |
| 429 | { |
| 430 | alt.symbol = sid; |
| 431 | } |
| 432 | } |
| 433 | } |
| 434 | } |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | /// Split one section into atoms according to the `MH_SUBSECTIONS_VIA_SYMBOLS` |
| 439 | /// invariant plus `.alt_entry` folding. Literal and unwind specialization |
| 440 | /// lands in follow-up commits; this function's fallback is "one atom per |
| 441 | /// section" for sections the subsections flag doesn't split. |
| 442 | #[allow(clippy::too_many_arguments)] |
| 443 | fn atomize_regular_section( |
| 444 | input_id: InputId, |
| 445 | section_idx: u8, |
| 446 | sect: &InputSection, |
| 447 | syms: &[(usize, &InputSymbol, u32)], |
| 448 | subsections_via_symbols: bool, |
| 449 | table: &mut AtomTable, |
| 450 | out: &mut ObjectAtomization, |
| 451 | ) { |
| 452 | let kind = sect.kind; |
| 453 | let atom_section = AtomSection::from_section_kind(kind); |
| 454 | |
| 455 | // Without the subsections flag, every section becomes one atom — the |
| 456 | // linker-side equivalent of Apple-style monolithic sections. |
| 457 | if !subsections_via_symbols { |
| 458 | let atom = build_section_atom(input_id, section_idx, sect, atom_section); |
| 459 | let id = table.push(atom); |
| 460 | out.atoms.push(id); |
| 461 | for (sym_idx, _sym, off) in syms { |
| 462 | out.alt_entries_by_sym.push((*sym_idx, id, *off)); |
| 463 | } |
| 464 | return; |
| 465 | } |
| 466 | |
| 467 | // Zerofill: splitting happens per symbol (each tentative common-style |
| 468 | // slot gets its own atom). If no symbols defined, emit a single atom. |
| 469 | if atom_section.is_zerofill() { |
| 470 | atomize_zerofill(input_id, section_idx, sect, syms, atom_section, table, out); |
| 471 | return; |
| 472 | } |
| 473 | |
| 474 | // Literal sections split on content boundaries (null for `__cstring`, |
| 475 | // fixed-size chunks for `__literal4/8/16`) independent of symbol |
| 476 | // labels. Sprint 24's ICF uses the per-atom content for dedup. |
| 477 | if atom_section.is_literal() { |
| 478 | atomize_literal_section(input_id, section_idx, sect, syms, atom_section, table, out); |
| 479 | return; |
| 480 | } |
| 481 | |
| 482 | // `__compact_unwind` is a fixed-layout array of 32-byte records; each |
| 483 | // record becomes its own atom with `parent_of` wired to the function |
| 484 | // atom it describes (linked post-hoc in `link_unwind_parents`). |
| 485 | if atom_section == AtomSection::CompactUnwind { |
| 486 | atomize_compact_unwind(input_id, section_idx, sect, syms, atom_section, table, out); |
| 487 | return; |
| 488 | } |
| 489 | |
| 490 | if atom_section == AtomSection::EhFrame { |
| 491 | atomize_eh_frame(input_id, section_idx, sect, atom_section, table, out); |
| 492 | return; |
| 493 | } |
| 494 | |
| 495 | // With subsections_via_symbols and at least one split point, walk the |
| 496 | // sorted symbols and emit one atom per non-alt_entry boundary. |
| 497 | if syms.is_empty() { |
| 498 | let atom = build_section_atom(input_id, section_idx, sect, atom_section); |
| 499 | let id = table.push(atom); |
| 500 | out.atoms.push(id); |
| 501 | return; |
| 502 | } |
| 503 | |
| 504 | // If there's content before the first symbol, carve a head atom |
| 505 | // (unowned). afs-as emits a leading symbol in practice so this is |
| 506 | // typically zero bytes, but the fallback keeps the byte-flow intact. |
| 507 | let first_offset = syms[0].2; |
| 508 | if first_offset > 0 { |
| 509 | let head = build_slice_atom( |
| 510 | input_id, |
| 511 | section_idx, |
| 512 | sect, |
| 513 | atom_section, |
| 514 | 0, |
| 515 | first_offset, |
| 516 | None, |
| 517 | &[], |
| 518 | ); |
| 519 | let head_id = table.push(head); |
| 520 | out.atoms.push(head_id); |
| 521 | } |
| 522 | |
| 523 | // Walk symbol boundaries. |
| 524 | let section_size = sect.size as u32; |
| 525 | let mut i = 0; |
| 526 | while i < syms.len() { |
| 527 | let (primary_idx, primary, atom_offset) = syms[i]; |
| 528 | let next_real_boundary = find_next_non_alt_entry(syms, i + 1) |
| 529 | .map(|j| syms[j].2) |
| 530 | .unwrap_or(section_size); |
| 531 | let size = next_real_boundary.saturating_sub(atom_offset); |
| 532 | |
| 533 | // Collect alt_entries that fall into [atom_offset, atom_offset+size). |
| 534 | let mut alts: Vec<AltEntry> = Vec::new(); |
| 535 | let mut alt_folded: Vec<(usize, u32)> = Vec::new(); |
| 536 | for (alt_idx, alt_sym, alt_off) in syms.iter().skip(i + 1) { |
| 537 | if *alt_off >= atom_offset + size { |
| 538 | break; |
| 539 | } |
| 540 | if !alt_sym.alt_entry() { |
| 541 | break; |
| 542 | } |
| 543 | let local = *alt_off - atom_offset; |
| 544 | alts.push(AltEntry { |
| 545 | symbol: SymbolId(*alt_idx as u32), |
| 546 | offset_within_atom: local, |
| 547 | }); |
| 548 | alt_folded.push((*alt_idx, local)); |
| 549 | } |
| 550 | |
| 551 | let atom = build_slice_atom( |
| 552 | input_id, |
| 553 | section_idx, |
| 554 | sect, |
| 555 | atom_section, |
| 556 | atom_offset, |
| 557 | size, |
| 558 | Some(primary), |
| 559 | &alts, |
| 560 | ); |
| 561 | let id = table.push(atom); |
| 562 | out.atoms.push(id); |
| 563 | out.owner_by_sym.push((primary_idx, id)); |
| 564 | for (alt_idx, local_off) in alt_folded { |
| 565 | out.alt_entries_by_sym.push((alt_idx, id, local_off)); |
| 566 | } |
| 567 | |
| 568 | // Advance past the primary and its folded alt_entries. |
| 569 | i = find_next_non_alt_entry(syms, i + 1).unwrap_or(syms.len()); |
| 570 | } |
| 571 | } |
| 572 | |
| 573 | /// Split a literal section into atoms. `__cstring` splits at null-byte |
| 574 | /// terminators (variable-length); `__literal4/8/16` split at fixed-width |
| 575 | /// boundaries. Owner symbols attach at exact offsets where a symbol |
| 576 | /// points. |
| 577 | fn atomize_literal_section( |
| 578 | input_id: InputId, |
| 579 | section_idx: u8, |
| 580 | sect: &InputSection, |
| 581 | syms: &[(usize, &InputSymbol, u32)], |
| 582 | atom_section: AtomSection, |
| 583 | table: &mut AtomTable, |
| 584 | out: &mut ObjectAtomization, |
| 585 | ) { |
| 586 | match atom_section { |
| 587 | AtomSection::CStringLiterals => { |
| 588 | atomize_cstring(input_id, section_idx, sect, syms, atom_section, table, out) |
| 589 | } |
| 590 | AtomSection::Literal4 => atomize_fixed_literal( |
| 591 | input_id, |
| 592 | section_idx, |
| 593 | sect, |
| 594 | syms, |
| 595 | 4, |
| 596 | atom_section, |
| 597 | table, |
| 598 | out, |
| 599 | ), |
| 600 | AtomSection::Literal8 => atomize_fixed_literal( |
| 601 | input_id, |
| 602 | section_idx, |
| 603 | sect, |
| 604 | syms, |
| 605 | 8, |
| 606 | atom_section, |
| 607 | table, |
| 608 | out, |
| 609 | ), |
| 610 | AtomSection::Literal16 => atomize_fixed_literal( |
| 611 | input_id, |
| 612 | section_idx, |
| 613 | sect, |
| 614 | syms, |
| 615 | 16, |
| 616 | atom_section, |
| 617 | table, |
| 618 | out, |
| 619 | ), |
| 620 | _ => unreachable!("atomize_literal_section called with non-literal kind"), |
| 621 | } |
| 622 | } |
| 623 | |
| 624 | fn atomize_cstring( |
| 625 | input_id: InputId, |
| 626 | section_idx: u8, |
| 627 | sect: &InputSection, |
| 628 | syms: &[(usize, &InputSymbol, u32)], |
| 629 | atom_section: AtomSection, |
| 630 | table: &mut AtomTable, |
| 631 | out: &mut ObjectAtomization, |
| 632 | ) { |
| 633 | let mut offset = 0usize; |
| 634 | while offset < sect.data.len() { |
| 635 | let relative_nul = sect.data[offset..] |
| 636 | .iter() |
| 637 | .position(|&b| b == 0) |
| 638 | .unwrap_or(sect.data.len() - offset); |
| 639 | let end = offset + relative_nul + 1; |
| 640 | let end = end.min(sect.data.len()); |
| 641 | let data = sect.data[offset..end].to_vec(); |
| 642 | let size = (end - offset) as u32; |
| 643 | |
| 644 | let owner_entry = syms.iter().find(|(_, _, off)| *off as usize == offset); |
| 645 | let owner_idx = owner_entry.map(|(i, _, _)| *i); |
| 646 | |
| 647 | let mut flags = AtomFlags::default().with(AtomFlags::LITERAL); |
| 648 | if let Some((_, sym, _)) = owner_entry { |
| 649 | flags.set(symbol_flags(sym).bits()); |
| 650 | } |
| 651 | |
| 652 | let atom = Atom { |
| 653 | id: AtomId(0), |
| 654 | origin: input_id, |
| 655 | input_section: section_idx, |
| 656 | section: atom_section, |
| 657 | input_offset: offset as u32, |
| 658 | size, |
| 659 | align_pow2: sect.align_pow2 as u8, |
| 660 | owner: None, |
| 661 | alt_entries: Vec::new(), |
| 662 | data, |
| 663 | flags, |
| 664 | parent_of: None, |
| 665 | }; |
| 666 | let id = table.push(atom); |
| 667 | out.atoms.push(id); |
| 668 | if let Some(idx) = owner_idx { |
| 669 | out.owner_by_sym.push((idx, id)); |
| 670 | } |
| 671 | offset = end; |
| 672 | } |
| 673 | } |
| 674 | |
| 675 | #[allow(clippy::too_many_arguments)] |
| 676 | fn atomize_fixed_literal( |
| 677 | input_id: InputId, |
| 678 | section_idx: u8, |
| 679 | sect: &InputSection, |
| 680 | syms: &[(usize, &InputSymbol, u32)], |
| 681 | chunk_size: usize, |
| 682 | atom_section: AtomSection, |
| 683 | table: &mut AtomTable, |
| 684 | out: &mut ObjectAtomization, |
| 685 | ) { |
| 686 | let section_size = sect.size as usize; |
| 687 | let mut offset = 0usize; |
| 688 | while offset < section_size { |
| 689 | let end = (offset + chunk_size).min(section_size); |
| 690 | let data_end = end.min(sect.data.len()); |
| 691 | let data = if offset < data_end { |
| 692 | sect.data[offset..data_end].to_vec() |
| 693 | } else { |
| 694 | Vec::new() |
| 695 | }; |
| 696 | let size = (end - offset) as u32; |
| 697 | |
| 698 | let owner_entry = syms.iter().find(|(_, _, off)| *off as usize == offset); |
| 699 | let owner_idx = owner_entry.map(|(i, _, _)| *i); |
| 700 | |
| 701 | let mut flags = AtomFlags::default().with(AtomFlags::LITERAL); |
| 702 | if let Some((_, sym, _)) = owner_entry { |
| 703 | flags.set(symbol_flags(sym).bits()); |
| 704 | } |
| 705 | |
| 706 | let atom = Atom { |
| 707 | id: AtomId(0), |
| 708 | origin: input_id, |
| 709 | input_section: section_idx, |
| 710 | section: atom_section, |
| 711 | input_offset: offset as u32, |
| 712 | size, |
| 713 | align_pow2: sect.align_pow2 as u8, |
| 714 | owner: None, |
| 715 | alt_entries: Vec::new(), |
| 716 | data, |
| 717 | flags, |
| 718 | parent_of: None, |
| 719 | }; |
| 720 | let id = table.push(atom); |
| 721 | out.atoms.push(id); |
| 722 | if let Some(idx) = owner_idx { |
| 723 | out.owner_by_sym.push((idx, id)); |
| 724 | } |
| 725 | offset = end; |
| 726 | } |
| 727 | } |
| 728 | |
| 729 | /// Split `__compact_unwind` into 32-byte atoms (one per record). |
| 730 | /// `parent_of` is filled in post-hoc by `link_unwind_parents` once all |
| 731 | /// sections of this object have been atomized. |
| 732 | fn atomize_compact_unwind( |
| 733 | input_id: InputId, |
| 734 | section_idx: u8, |
| 735 | sect: &InputSection, |
| 736 | syms: &[(usize, &InputSymbol, u32)], |
| 737 | atom_section: AtomSection, |
| 738 | table: &mut AtomTable, |
| 739 | out: &mut ObjectAtomization, |
| 740 | ) { |
| 741 | const RECORD: usize = 32; |
| 742 | let section_size = sect.size as usize; |
| 743 | let mut offset = 0usize; |
| 744 | while offset < section_size { |
| 745 | let end = (offset + RECORD).min(section_size); |
| 746 | let data = sect.data[offset..end.min(sect.data.len())].to_vec(); |
| 747 | let size = (end - offset) as u32; |
| 748 | |
| 749 | let owner_idx = syms |
| 750 | .iter() |
| 751 | .find(|(_, _, off)| *off as usize == offset) |
| 752 | .map(|(i, _, _)| *i); |
| 753 | |
| 754 | let atom = Atom { |
| 755 | id: AtomId(0), |
| 756 | origin: input_id, |
| 757 | input_section: section_idx, |
| 758 | section: atom_section, |
| 759 | input_offset: offset as u32, |
| 760 | size, |
| 761 | align_pow2: sect.align_pow2 as u8, |
| 762 | owner: None, |
| 763 | alt_entries: Vec::new(), |
| 764 | data, |
| 765 | flags: AtomFlags::default(), |
| 766 | parent_of: None, // filled by link_unwind_parents |
| 767 | }; |
| 768 | let id = table.push(atom); |
| 769 | out.atoms.push(id); |
| 770 | if let Some(idx) = owner_idx { |
| 771 | out.owner_by_sym.push((idx, id)); |
| 772 | } |
| 773 | offset = end; |
| 774 | } |
| 775 | } |
| 776 | |
| 777 | /// Split `__eh_frame` into DWARF CFI records so dead-strip can retain only |
| 778 | /// the live FDEs and their shared CIEs. |
| 779 | fn atomize_eh_frame( |
| 780 | input_id: InputId, |
| 781 | section_idx: u8, |
| 782 | sect: &InputSection, |
| 783 | atom_section: AtomSection, |
| 784 | table: &mut AtomTable, |
| 785 | out: &mut ObjectAtomization, |
| 786 | ) { |
| 787 | let mut offset = 0usize; |
| 788 | while offset < sect.data.len() { |
| 789 | let Some(size) = eh_frame_record_size(§.data, offset) else { |
| 790 | let atom = build_section_atom(input_id, section_idx, sect, atom_section); |
| 791 | let id = table.push(atom); |
| 792 | out.atoms.push(id); |
| 793 | return; |
| 794 | }; |
| 795 | |
| 796 | let end = (offset + size).min(sect.data.len()); |
| 797 | let atom = Atom { |
| 798 | id: AtomId(0), |
| 799 | origin: input_id, |
| 800 | input_section: section_idx, |
| 801 | section: atom_section, |
| 802 | input_offset: offset as u32, |
| 803 | size: (end - offset) as u32, |
| 804 | align_pow2: (sect.align_pow2 as u8).min(2), |
| 805 | owner: None, |
| 806 | alt_entries: Vec::new(), |
| 807 | data: sect.data[offset..end].to_vec(), |
| 808 | flags: AtomFlags::default(), |
| 809 | parent_of: None, |
| 810 | }; |
| 811 | let id = table.push(atom); |
| 812 | out.atoms.push(id); |
| 813 | offset = end; |
| 814 | } |
| 815 | } |
| 816 | |
| 817 | fn eh_frame_record_size(data: &[u8], offset: usize) -> Option<usize> { |
| 818 | let length_end = offset.checked_add(4)?; |
| 819 | let length_bytes: [u8; 4] = data.get(offset..length_end)?.try_into().ok()?; |
| 820 | let length = u32::from_le_bytes(length_bytes); |
| 821 | if length == 0 { |
| 822 | return Some(4); |
| 823 | } |
| 824 | if length == u32::MAX { |
| 825 | return None; |
| 826 | } |
| 827 | let size = 4usize.checked_add(length as usize)?; |
| 828 | (offset + size <= data.len()).then_some(size) |
| 829 | } |
| 830 | |
| 831 | fn eh_frame_cie_pointer(atom: &Atom) -> Option<u32> { |
| 832 | (atom.section == AtomSection::EhFrame && atom.data.len() >= 8).then(|| { |
| 833 | let mut buf = [0u8; 4]; |
| 834 | buf.copy_from_slice(&atom.data[4..8]); |
| 835 | u32::from_le_bytes(buf) |
| 836 | }) |
| 837 | } |
| 838 | |
| 839 | fn resolve_function_parent( |
| 840 | obj: &ObjectFile, |
| 841 | atom: &Atom, |
| 842 | reloc: crate::reloc::Reloc, |
| 843 | atom_index: &HashMap<(u8, u32), AtomId>, |
| 844 | field_offset: usize, |
| 845 | ) -> Option<AtomId> { |
| 846 | match reloc.referent { |
| 847 | Referent::Section(sect_idx) => { |
| 848 | let end = field_offset.checked_add(8)?; |
| 849 | let mut buf = [0u8; 8]; |
| 850 | buf.copy_from_slice(atom.data.get(field_offset..end)?); |
| 851 | let target_offset = u64::from_le_bytes(buf) as u32; |
| 852 | atom_index.get(&(sect_idx, target_offset)).copied() |
| 853 | } |
| 854 | Referent::Symbol(sym_idx) => { |
| 855 | let input_sym = obj.symbols.get(sym_idx as usize)?; |
| 856 | (input_sym.kind() == SymKind::Sect) |
| 857 | .then(|| { |
| 858 | let target_offset = input_sym.value().saturating_sub( |
| 859 | obj.sections |
| 860 | .get(input_sym.sect_idx().saturating_sub(1) as usize) |
| 861 | .map(|section| section.addr) |
| 862 | .unwrap_or(0), |
| 863 | ) as u32; |
| 864 | atom_index |
| 865 | .get(&(input_sym.sect_idx(), target_offset)) |
| 866 | .copied() |
| 867 | }) |
| 868 | .flatten() |
| 869 | } |
| 870 | } |
| 871 | } |
| 872 | |
| 873 | fn link_eh_frame_parents( |
| 874 | input_id: InputId, |
| 875 | obj: &ObjectFile, |
| 876 | table: &mut AtomTable, |
| 877 | out: &ObjectAtomization, |
| 878 | ) { |
| 879 | let Some((eh_idx_zero, eh_sect)) = obj |
| 880 | .sections |
| 881 | .iter() |
| 882 | .enumerate() |
| 883 | .find(|(_, s)| s.kind == SectionKind::EhFrame) |
| 884 | else { |
| 885 | return; |
| 886 | }; |
| 887 | let eh_idx_one = (eh_idx_zero + 1) as u8; |
| 888 | |
| 889 | let raws = match parse_raw_relocs(&eh_sect.raw_relocs, 0, eh_sect.nreloc) { |
| 890 | Ok(r) => r, |
| 891 | Err(_) => return, |
| 892 | }; |
| 893 | let fused = match parse_relocs(&raws) { |
| 894 | Ok(f) => f, |
| 895 | Err(_) => return, |
| 896 | }; |
| 897 | |
| 898 | let mut atom_index: HashMap<(u8, u32), AtomId> = HashMap::new(); |
| 899 | for id in &out.atoms { |
| 900 | let a = table.get(*id); |
| 901 | atom_index.insert((a.input_section, a.input_offset), *id); |
| 902 | } |
| 903 | |
| 904 | for id in &out.atoms { |
| 905 | let atom = table.get(*id); |
| 906 | if atom.input_section != eh_idx_one { |
| 907 | continue; |
| 908 | } |
| 909 | let Some(cie_pointer) = eh_frame_cie_pointer(atom) else { |
| 910 | continue; |
| 911 | }; |
| 912 | if cie_pointer == 0 { |
| 913 | continue; |
| 914 | } |
| 915 | let Some(reloc) = fused.iter().find(|r| r.offset == atom.input_offset + 8) else { |
| 916 | continue; |
| 917 | }; |
| 918 | if let Some(parent_id) = resolve_function_parent(obj, atom, *reloc, &atom_index, 8) { |
| 919 | table.get_mut(*id).parent_of = Some(parent_id); |
| 920 | } |
| 921 | } |
| 922 | let _ = input_id; |
| 923 | } |
| 924 | |
| 925 | fn atomize_zerofill( |
| 926 | input_id: InputId, |
| 927 | section_idx: u8, |
| 928 | sect: &InputSection, |
| 929 | syms: &[(usize, &InputSymbol, u32)], |
| 930 | atom_section: AtomSection, |
| 931 | table: &mut AtomTable, |
| 932 | out: &mut ObjectAtomization, |
| 933 | ) { |
| 934 | if syms.is_empty() { |
| 935 | let atom = build_section_atom(input_id, section_idx, sect, atom_section); |
| 936 | let id = table.push(atom); |
| 937 | out.atoms.push(id); |
| 938 | return; |
| 939 | } |
| 940 | let section_size = sect.size as u32; |
| 941 | for (i, (sym_idx, sym, start)) in syms.iter().enumerate() { |
| 942 | let start = *start; |
| 943 | let end = syms |
| 944 | .get(i + 1) |
| 945 | .map(|(_, _, off)| *off) |
| 946 | .unwrap_or(section_size); |
| 947 | let size = end.saturating_sub(start); |
| 948 | let atom = Atom { |
| 949 | id: AtomId(0), |
| 950 | origin: input_id, |
| 951 | input_section: section_idx, |
| 952 | section: atom_section, |
| 953 | input_offset: start, |
| 954 | size, |
| 955 | align_pow2: sect.align_pow2 as u8, |
| 956 | owner: Some(SymbolId(*sym_idx as u32)), |
| 957 | alt_entries: Vec::new(), |
| 958 | data: Vec::new(), // zerofill |
| 959 | flags: symbol_flags(sym), |
| 960 | parent_of: None, |
| 961 | }; |
| 962 | let id = table.push(atom); |
| 963 | out.atoms.push(id); |
| 964 | out.owner_by_sym.push((*sym_idx, id)); |
| 965 | } |
| 966 | } |
| 967 | |
| 968 | fn build_section_atom( |
| 969 | input_id: InputId, |
| 970 | section_idx: u8, |
| 971 | sect: &InputSection, |
| 972 | atom_section: AtomSection, |
| 973 | ) -> Atom { |
| 974 | let data = if atom_section.is_zerofill() { |
| 975 | Vec::new() |
| 976 | } else { |
| 977 | sect.data.clone() |
| 978 | }; |
| 979 | let mut flags = AtomFlags::default(); |
| 980 | if sect.kind == SectionKind::Text { |
| 981 | flags.set(AtomFlags::PURE_INSTRUCTIONS); |
| 982 | } |
| 983 | Atom { |
| 984 | id: AtomId(0), |
| 985 | origin: input_id, |
| 986 | input_section: section_idx, |
| 987 | section: atom_section, |
| 988 | input_offset: 0, |
| 989 | size: sect.size as u32, |
| 990 | align_pow2: sect.align_pow2 as u8, |
| 991 | owner: None, |
| 992 | alt_entries: Vec::new(), |
| 993 | data, |
| 994 | flags, |
| 995 | parent_of: None, |
| 996 | } |
| 997 | } |
| 998 | |
| 999 | #[allow(clippy::too_many_arguments)] |
| 1000 | fn build_slice_atom( |
| 1001 | input_id: InputId, |
| 1002 | section_idx: u8, |
| 1003 | sect: &InputSection, |
| 1004 | atom_section: AtomSection, |
| 1005 | offset: u32, |
| 1006 | size: u32, |
| 1007 | owner: Option<&InputSymbol>, |
| 1008 | alt_entries: &[AltEntry], |
| 1009 | ) -> Atom { |
| 1010 | let data = if atom_section.is_zerofill() { |
| 1011 | Vec::new() |
| 1012 | } else { |
| 1013 | let start = offset as usize; |
| 1014 | let end = (offset + size) as usize; |
| 1015 | sect.data[start..end.min(sect.data.len())].to_vec() |
| 1016 | }; |
| 1017 | let mut flags = AtomFlags::default(); |
| 1018 | if sect.kind == SectionKind::Text { |
| 1019 | flags.set(AtomFlags::PURE_INSTRUCTIONS); |
| 1020 | } |
| 1021 | if let Some(sym) = owner { |
| 1022 | flags.set(symbol_flags(sym).bits()); |
| 1023 | } |
| 1024 | Atom { |
| 1025 | id: AtomId(0), |
| 1026 | origin: input_id, |
| 1027 | input_section: section_idx, |
| 1028 | section: atom_section, |
| 1029 | input_offset: offset, |
| 1030 | size, |
| 1031 | align_pow2: sect.align_pow2 as u8, |
| 1032 | // owner is wired at back-patch time via `backpatch_symbol_atoms`; |
| 1033 | // atomization doesn't know the resolver-side SymbolId yet. |
| 1034 | owner: None, |
| 1035 | alt_entries: alt_entries.to_vec(), |
| 1036 | data, |
| 1037 | flags, |
| 1038 | parent_of: None, |
| 1039 | } |
| 1040 | } |
| 1041 | |
| 1042 | fn symbol_flags(sym: &InputSymbol) -> AtomFlags { |
| 1043 | let mut f = AtomFlags::default(); |
| 1044 | if sym.no_dead_strip() { |
| 1045 | f.set(AtomFlags::NO_DEAD_STRIP); |
| 1046 | } |
| 1047 | if sym.weak_def() { |
| 1048 | f.set(AtomFlags::WEAK_DEF); |
| 1049 | } |
| 1050 | f |
| 1051 | } |
| 1052 | |
| 1053 | /// Find the next non-alt_entry symbol starting from index `i`. Returns the |
| 1054 | /// index (into `syms`), or `None` if every remaining symbol is an alt |
| 1055 | /// entry. |
| 1056 | fn find_next_non_alt_entry(syms: &[(usize, &InputSymbol, u32)], from: usize) -> Option<usize> { |
| 1057 | syms.iter() |
| 1058 | .enumerate() |
| 1059 | .skip(from) |
| 1060 | .find(|(_, (_, s, _))| !s.alt_entry()) |
| 1061 | .map(|(i, _)| i) |
| 1062 | } |
| 1063 | |
| 1064 | #[cfg(test)] |
| 1065 | mod tests { |
| 1066 | use super::*; |
| 1067 | |
| 1068 | fn make_text_atom(origin: InputId, sect: u8, off: u32, size: u32) -> Atom { |
| 1069 | Atom { |
| 1070 | id: AtomId(0), // will be overwritten by push |
| 1071 | origin, |
| 1072 | input_section: sect, |
| 1073 | section: AtomSection::Text, |
| 1074 | input_offset: off, |
| 1075 | size, |
| 1076 | align_pow2: 2, |
| 1077 | owner: None, |
| 1078 | alt_entries: Vec::new(), |
| 1079 | data: vec![0u8; size as usize], |
| 1080 | flags: AtomFlags::default().with(AtomFlags::PURE_INSTRUCTIONS), |
| 1081 | parent_of: None, |
| 1082 | } |
| 1083 | } |
| 1084 | |
| 1085 | #[test] |
| 1086 | fn push_assigns_stable_one_based_ids_and_roundtrips_via_get() { |
| 1087 | let mut t = AtomTable::new(); |
| 1088 | let a = t.push(make_text_atom(InputId(0), 1, 0, 16)); |
| 1089 | let b = t.push(make_text_atom(InputId(0), 1, 16, 8)); |
| 1090 | assert_eq!(a.0, 1); |
| 1091 | assert_eq!(b.0, 2); |
| 1092 | assert_eq!(t.len(), 2); |
| 1093 | assert_eq!(t.get(a).input_offset, 0); |
| 1094 | assert_eq!(t.get(b).input_offset, 16); |
| 1095 | } |
| 1096 | |
| 1097 | #[test] |
| 1098 | fn id_zero_is_reserved_as_placeholder() { |
| 1099 | // `Symbol::Defined { atom: AtomId(0) }` is the pre-atomization |
| 1100 | // sentinel; any real atom must have id >= 1. |
| 1101 | let mut t = AtomTable::new(); |
| 1102 | let id = t.push(make_text_atom(InputId(0), 1, 0, 1)); |
| 1103 | assert_ne!(id, AtomId(0)); |
| 1104 | assert_eq!(id, AtomId(1)); |
| 1105 | } |
| 1106 | |
| 1107 | #[test] |
| 1108 | fn atom_section_from_section_kind_covers_all_variants() { |
| 1109 | assert_eq!( |
| 1110 | AtomSection::from_section_kind(SectionKind::Text), |
| 1111 | AtomSection::Text |
| 1112 | ); |
| 1113 | assert_eq!( |
| 1114 | AtomSection::from_section_kind(SectionKind::CStringLiterals), |
| 1115 | AtomSection::CStringLiterals |
| 1116 | ); |
| 1117 | assert_eq!( |
| 1118 | AtomSection::from_section_kind(SectionKind::CompactUnwind), |
| 1119 | AtomSection::CompactUnwind |
| 1120 | ); |
| 1121 | assert_eq!( |
| 1122 | AtomSection::from_section_kind(SectionKind::ZeroFill), |
| 1123 | AtomSection::ZeroFill |
| 1124 | ); |
| 1125 | assert!(AtomSection::from_section_kind(SectionKind::ZeroFill).is_zerofill()); |
| 1126 | assert!(AtomSection::from_section_kind(SectionKind::CStringLiterals).is_literal()); |
| 1127 | assert!(!AtomSection::from_section_kind(SectionKind::Text).is_literal()); |
| 1128 | } |
| 1129 | |
| 1130 | #[test] |
| 1131 | fn atom_flags_bitwise() { |
| 1132 | let f = AtomFlags::default() |
| 1133 | .with(AtomFlags::NO_DEAD_STRIP) |
| 1134 | .with(AtomFlags::WEAK_DEF); |
| 1135 | assert!(f.has(AtomFlags::NO_DEAD_STRIP)); |
| 1136 | assert!(f.has(AtomFlags::WEAK_DEF)); |
| 1137 | assert!(!f.has(AtomFlags::THREAD_LOCAL)); |
| 1138 | } |
| 1139 | |
| 1140 | #[test] |
| 1141 | fn by_input_section_groups_by_origin_and_section_index() { |
| 1142 | let mut t = AtomTable::new(); |
| 1143 | let a = t.push(make_text_atom(InputId(0), 1, 0, 4)); |
| 1144 | let b = t.push(make_text_atom(InputId(0), 1, 4, 4)); |
| 1145 | let c = t.push(make_text_atom(InputId(1), 1, 0, 4)); |
| 1146 | let grouped = t.by_input_section(); |
| 1147 | assert_eq!(grouped.get(&(InputId(0), 1)).unwrap(), &vec![a, b]); |
| 1148 | assert_eq!(grouped.get(&(InputId(1), 1)).unwrap(), &vec![c]); |
| 1149 | } |
| 1150 | } |
| 1151 |