Rust · 77856 bytes Raw Blame History
1 //! Linker-side symbol table and name resolution.
2 //!
3 //! Sprint 7 lays the substrate:
4 //!
5 //! * `StringInterner` — the source of all symbol-name identity.
6 //! * Opaque ID newtypes (`InputId`, `AtomId`, `DylibId`, `ArchiveId`,
7 //! `MemberId`, `SymbolId`) that later sprints populate with real
8 //! registries.
9 //! * `Symbol` sum type with seven variants — every state a name can be in
10 //! during resolution.
11 //! * `SymbolTable` with the full insertion matrix and a transition log.
12 //!
13 //! Sprint 8 builds the fixed-point `resolve()` pass on top. This module's
14 //! API is deliberately thin: callers hand it symbols, it tells them what
15 //! happened (inserted / replaced / kept / pending archive fetch), and they
16 //! drive the outer loop.
17
18 use std::collections::{HashMap, HashSet, VecDeque};
19 use std::path::{Path, PathBuf};
20 use std::sync::{mpsc, Arc, Mutex};
21 use std::thread;
22
23 use crate::archive::{Archive, ArchiveError};
24 use crate::input::ObjectFile;
25 use crate::macho::dylib::DylibFile;
26 use crate::macho::reader::ReadError;
27
28 // ---------------------------------------------------------------------------
29 // Interned strings.
30 // ---------------------------------------------------------------------------
31
32 /// Opaque handle into a `StringInterner`. Cheap to copy; all comparisons
33 /// between names go through handle equality, not string compare.
34 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
35 pub struct Istr(u32);
36
37 impl Istr {
38 pub fn as_u32(self) -> u32 {
39 self.0
40 }
41 }
42
43 #[derive(Debug, Default)]
44 pub struct StringInterner {
45 strings: Vec<Arc<str>>,
46 index: HashMap<Arc<str>, u32>,
47 }
48
49 impl StringInterner {
50 pub fn new() -> Self {
51 Self::default()
52 }
53
54 /// Intern `s`, returning the existing handle when the string was already
55 /// seen. Allocates at most one `Arc<str>` per unique name.
56 pub fn intern(&mut self, s: &str) -> Istr {
57 if let Some(&i) = self.index.get(s) {
58 return Istr(i);
59 }
60 let rc: Arc<str> = Arc::from(s);
61 let id = self.strings.len() as u32;
62 self.strings.push(rc.clone());
63 self.index.insert(rc, id);
64 Istr(id)
65 }
66
67 pub fn get(&self, s: &str) -> Option<Istr> {
68 self.index.get(s).copied().map(Istr)
69 }
70
71 pub fn resolve(&self, i: Istr) -> &str {
72 &self.strings[i.0 as usize]
73 }
74
75 pub fn len(&self) -> usize {
76 self.strings.len()
77 }
78
79 pub fn is_empty(&self) -> bool {
80 self.strings.is_empty()
81 }
82 }
83
84 // ---------------------------------------------------------------------------
85 // Opaque IDs — populated by later sprints. Sprint 7 only defines the
86 // newtypes so `Symbol` can reference them without cyclic-module pain.
87 // ---------------------------------------------------------------------------
88
89 macro_rules! opaque_id {
90 ($(#[$meta:meta])* $name:ident) => {
91 $(#[$meta])*
92 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
93 pub struct $name(pub u32);
94 impl $name {
95 pub fn as_u32(self) -> u32 {
96 self.0
97 }
98 }
99 };
100 }
101
102 opaque_id!(
103 /// Handle into the driver's input registry (Sprint 8): one slot per
104 /// `.o`, archive member fetched on demand, `.a`, `.dylib`, or `.tbd`.
105 InputId
106 );
107 opaque_id!(
108 /// Handle into the atomization table populated in Sprint 9. Each
109 /// `Symbol::Defined` owns exactly one atom.
110 AtomId
111 );
112 opaque_id!(
113 /// Handle into the dylib registry: one slot per loaded `DylibFile`.
114 DylibId
115 );
116 opaque_id!(
117 /// Handle into the archive registry (Sprint 4's `Archive` wrapped
118 /// in a per-linker instance): one slot per `.a` on the link line.
119 ArchiveId
120 );
121 opaque_id!(
122 /// Per-archive handle identifying a member by its `ar_hdr` offset.
123 MemberId
124 );
125 opaque_id!(
126 /// Index into `SymbolTable::symbols`. Stable across the whole link.
127 SymbolId
128 );
129
130 // ---------------------------------------------------------------------------
131 // Inputs registry.
132 //
133 // Sprint 8 drives resolution against a triple of per-kind Vecs. Handles
134 // `InputId` / `ArchiveId` / `DylibId` are 1:1 with slot indices here, and
135 // the opaque newtypes (above) keep them from being confused across
136 // categories.
137 // ---------------------------------------------------------------------------
138
139 #[derive(Debug)]
140 pub struct ObjectInput {
141 pub path: PathBuf,
142 pub load_order: usize,
143 pub archive_member_offset: Option<u32>,
144 /// Raw bytes retained for diagnostics and future low-level readers.
145 pub bytes: Vec<u8>,
146 /// Parsed object view. It owns section/relocation/string-table buffers,
147 /// so it is safe to build off-thread and then borrow during the link.
148 pub parsed: ObjectFile,
149 }
150
151 #[derive(Debug)]
152 pub struct ArchiveInput {
153 pub path: PathBuf,
154 pub load_order: usize,
155 pub bytes: Vec<u8>,
156 /// Members we've already fetched (keyed by `ar_hdr` offset). Prevents
157 /// the fixed-point loop from re-ingesting the same object twice —
158 /// important both for correctness (no duplicate-strong errors from
159 /// our own symbols) and for keeping transitions deterministic.
160 pub fetched: HashSet<u32>,
161 }
162
163 #[derive(Debug)]
164 pub struct DylibInput {
165 pub path: PathBuf,
166 /// Parsed `DylibFile`. `DylibFile` owns its data (no borrow into
167 /// `bytes`), so we keep it pre-parsed for O(1) export-trie walks.
168 pub file: DylibFile,
169 /// Install name surfaced in the output's `LC_LOAD_DYLIB` list.
170 ///
171 /// Multi-document TBD inputs may seed exports from several sibling
172 /// documents while still canonicalizing them back to one umbrella load
173 /// command (e.g. `libSystem.tbd`).
174 pub load_install_name: String,
175 /// Current-version field surfaced in the output `LC_LOAD_DYLIB`.
176 pub load_current_version: u32,
177 /// Compatibility-version field surfaced in the output `LC_LOAD_DYLIB`.
178 pub load_compatibility_version: u32,
179 /// 1-based two-level-namespace ordinal encoded into undefined symbols and
180 /// bind opcodes. Matches the output's `LC_LOAD_DYLIB` ordering, so several
181 /// parsed TBD documents from one umbrella input may legitimately share it.
182 pub ordinal: u16,
183 }
184
185 #[derive(Debug, Clone)]
186 pub struct DylibLoadMeta {
187 pub install_name: String,
188 pub current_version: u32,
189 pub compatibility_version: u32,
190 pub ordinal: u16,
191 }
192
193 #[derive(Debug, Default)]
194 pub struct Inputs {
195 pub objects: Vec<ObjectInput>,
196 pub archives: Vec<ArchiveInput>,
197 pub dylibs: Vec<DylibInput>,
198 }
199
200 #[derive(Debug)]
201 pub enum InputAddError {
202 Read(ReadError),
203 Archive(ArchiveError),
204 }
205
206 impl std::fmt::Display for InputAddError {
207 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208 match self {
209 InputAddError::Read(e) => write!(f, "{e}"),
210 InputAddError::Archive(e) => write!(f, "{e}"),
211 }
212 }
213 }
214
215 impl std::error::Error for InputAddError {}
216
217 impl From<ReadError> for InputAddError {
218 fn from(e: ReadError) -> Self {
219 InputAddError::Read(e)
220 }
221 }
222
223 impl From<ArchiveError> for InputAddError {
224 fn from(e: ArchiveError) -> Self {
225 InputAddError::Archive(e)
226 }
227 }
228
229 impl Inputs {
230 pub fn new() -> Self {
231 Self::default()
232 }
233
234 /// Register an `.o` file. Validates the Mach-O header by parsing once,
235 /// then keeps only the raw bytes (re-parsing on demand is cheap and
236 /// sidesteps borrow-lifetime headaches).
237 pub fn add_object(
238 &mut self,
239 path: PathBuf,
240 bytes: Vec<u8>,
241 load_order: usize,
242 ) -> Result<InputId, InputAddError> {
243 // Validate now — we'd rather catch a bad object at the add site.
244 let parsed = ObjectFile::parse(&path, &bytes)?;
245 Ok(self.add_parsed_object(path, bytes, parsed, load_order))
246 }
247
248 pub fn add_parsed_object(
249 &mut self,
250 path: PathBuf,
251 bytes: Vec<u8>,
252 parsed: ObjectFile,
253 load_order: usize,
254 ) -> InputId {
255 let id = InputId(self.objects.len() as u32);
256 self.objects.push(ObjectInput {
257 path,
258 load_order,
259 archive_member_offset: None,
260 bytes,
261 parsed,
262 });
263 id
264 }
265
266 /// Register an `.a` file.
267 pub fn add_archive(
268 &mut self,
269 path: PathBuf,
270 bytes: Vec<u8>,
271 load_order: usize,
272 ) -> Result<ArchiveId, InputAddError> {
273 Archive::open(&path, &bytes)?; // validate
274 Ok(self.add_validated_archive(path, bytes, load_order))
275 }
276
277 pub fn add_validated_archive(
278 &mut self,
279 path: PathBuf,
280 bytes: Vec<u8>,
281 load_order: usize,
282 ) -> ArchiveId {
283 let id = ArchiveId(self.archives.len() as u32);
284 self.archives.push(ArchiveInput {
285 path,
286 load_order,
287 bytes,
288 fetched: std::collections::HashSet::new(),
289 });
290 id
291 }
292
293 /// Register a `.dylib`. TBD-backed dylibs go through
294 /// [`Inputs::add_dylib_from_tbd`].
295 pub fn add_dylib(&mut self, path: PathBuf, bytes: Vec<u8>) -> Result<DylibId, InputAddError> {
296 let file = DylibFile::parse(&path, &bytes)?;
297 let ordinal = self.next_dylib_ordinal();
298 let id = DylibId(self.dylibs.len() as u32);
299 self.dylibs.push(DylibInput {
300 path,
301 load_install_name: file.install_name.clone(),
302 load_current_version: file.current_version,
303 load_compatibility_version: file.compatibility_version,
304 file,
305 ordinal,
306 });
307 Ok(id)
308 }
309
310 /// Register a TBD-backed dylib. The caller materializes the `DylibFile`
311 /// via `DylibFile::from_tbd(path, tbd, target)` so the target filter
312 /// is explicit.
313 pub fn add_dylib_from_file(&mut self, path: PathBuf, file: DylibFile) -> DylibId {
314 let ordinal = self.next_dylib_ordinal();
315 let load = DylibLoadMeta {
316 install_name: file.install_name.clone(),
317 current_version: file.current_version,
318 compatibility_version: file.compatibility_version,
319 ordinal,
320 };
321 self.add_dylib_from_file_with_meta(path, file, load)
322 }
323
324 pub fn add_dylib_from_file_with_meta(
325 &mut self,
326 path: PathBuf,
327 file: DylibFile,
328 load: DylibLoadMeta,
329 ) -> DylibId {
330 let id = DylibId(self.dylibs.len() as u32);
331 self.dylibs.push(DylibInput {
332 path,
333 load_install_name: load.install_name,
334 load_current_version: load.current_version,
335 load_compatibility_version: load.compatibility_version,
336 file,
337 ordinal: load.ordinal,
338 });
339 id
340 }
341
342 pub fn next_dylib_ordinal(&self) -> u16 {
343 self.dylibs
344 .iter()
345 .map(|dylib| dylib.ordinal)
346 .max()
347 .unwrap_or(0)
348 + 1
349 }
350
351 // ---- accessors ----
352
353 pub fn object(&self, id: InputId) -> &ObjectInput {
354 &self.objects[id.0 as usize]
355 }
356
357 pub fn archive(&self, id: ArchiveId) -> &ArchiveInput {
358 &self.archives[id.0 as usize]
359 }
360
361 pub fn archive_mut(&mut self, id: ArchiveId) -> &mut ArchiveInput {
362 &mut self.archives[id.0 as usize]
363 }
364
365 pub fn dylib(&self, id: DylibId) -> &DylibInput {
366 &self.dylibs[id.0 as usize]
367 }
368
369 /// Borrow a parsed `ObjectFile` view of a registered object.
370 pub fn object_file(&self, id: InputId) -> Result<&ObjectFile, ReadError> {
371 let o = &self.objects[id.0 as usize];
372 Ok(&o.parsed)
373 }
374
375 /// Open an `Archive` view, borrowing from the registry's bytes for the
376 /// returned lifetime.
377 pub fn archive_view(&self, id: ArchiveId) -> Result<Archive<'_>, ArchiveError> {
378 let a = &self.archives[id.0 as usize];
379 Archive::open(&a.path, &a.bytes)
380 }
381 }
382
383 // ---------------------------------------------------------------------------
384 // Symbol sum type.
385 //
386 // Every state a name can be in during resolution:
387 // * Undefined — referenced but not yet satisfied
388 // * Defined — an object file provides a concrete body at `atom + value`
389 // * Common — tentative definition (`N_UNDF + N_EXT + n_value>0`); picks
390 // a winner by size/alignment, then morphs into Defined during atomization
391 // * DylibImport — resolved from a dylib's export trie / TBD
392 // * LazyArchive — name covered by a not-yet-fetched archive member
393 // * LazyObject — name covered by a not-yet-loaded `--start-lib` object
394 // * Alias — `N_INDR` pointing at another name; Sprint 8's resolver
395 // flattens the chain when the target becomes available
396 // ---------------------------------------------------------------------------
397
398 #[derive(Debug, Clone, PartialEq, Eq)]
399 pub enum Symbol {
400 Undefined {
401 name: Istr,
402 origin: InputId,
403 weak_ref: bool,
404 },
405 Defined {
406 name: Istr,
407 origin: InputId,
408 atom: AtomId,
409 value: u64,
410 weak: bool,
411 private_extern: bool,
412 no_dead_strip: bool,
413 },
414 Common {
415 name: Istr,
416 origin: InputId,
417 size: u64,
418 align_pow2: u8,
419 },
420 DylibImport {
421 name: Istr,
422 dylib: DylibId,
423 ordinal: u16,
424 weak_import: bool,
425 },
426 LazyArchive {
427 name: Istr,
428 archive: ArchiveId,
429 member: MemberId,
430 },
431 LazyObject {
432 name: Istr,
433 origin: InputId,
434 },
435 Alias {
436 name: Istr,
437 aliased: Istr,
438 },
439 }
440
441 impl Symbol {
442 pub fn name(&self) -> Istr {
443 match self {
444 Symbol::Undefined { name, .. }
445 | Symbol::Defined { name, .. }
446 | Symbol::Common { name, .. }
447 | Symbol::DylibImport { name, .. }
448 | Symbol::LazyArchive { name, .. }
449 | Symbol::LazyObject { name, .. }
450 | Symbol::Alias { name, .. } => *name,
451 }
452 }
453
454 pub fn kind(&self) -> SymbolKindTag {
455 match self {
456 Symbol::Undefined { .. } => SymbolKindTag::Undefined,
457 Symbol::Defined { .. } => SymbolKindTag::Defined,
458 Symbol::Common { .. } => SymbolKindTag::Common,
459 Symbol::DylibImport { .. } => SymbolKindTag::DylibImport,
460 Symbol::LazyArchive { .. } => SymbolKindTag::LazyArchive,
461 Symbol::LazyObject { .. } => SymbolKindTag::LazyObject,
462 Symbol::Alias { .. } => SymbolKindTag::Alias,
463 }
464 }
465
466 /// True for `Defined` without the weak flag — ld's "strong" category,
467 /// the only one where duplicates are an error.
468 pub fn is_strong_defined(&self) -> bool {
469 matches!(self, Symbol::Defined { weak: false, .. })
470 }
471
472 pub fn is_weak_defined(&self) -> bool {
473 matches!(self, Symbol::Defined { weak: true, .. })
474 }
475 }
476
477 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
478 pub enum SymbolKindTag {
479 Undefined,
480 Defined,
481 Common,
482 DylibImport,
483 LazyArchive,
484 LazyObject,
485 Alias,
486 }
487
488 // ---------------------------------------------------------------------------
489 // SymbolTable.
490 // ---------------------------------------------------------------------------
491
492 /// What `SymbolTable::insert` did with the incoming symbol.
493 #[derive(Debug, Clone, PartialEq, Eq)]
494 pub enum InsertOutcome {
495 /// First time this name appeared; the new symbol is now at `id`.
496 Inserted(SymbolId),
497 /// A stronger / more-concrete symbol replaced the existing entry.
498 Replaced {
499 id: SymbolId,
500 from: SymbolKindTag,
501 to: SymbolKindTag,
502 },
503 /// Existing entry wins; the new one was dropped on the floor.
504 Kept(SymbolId),
505 /// Two Common symbols with the same name were coalesced: size grew to
506 /// the max and alignment grew to the stricter of the two.
507 CommonCoalesced { id: SymbolId },
508 /// Inserting an Undefined whose slot currently holds a LazyArchive.
509 /// The caller (Sprint 8 resolver) must fetch the named archive member
510 /// and re-insert its symbols. The LazyArchive stays in place.
511 PendingArchiveFetch {
512 id: SymbolId,
513 archive: ArchiveId,
514 member: MemberId,
515 },
516 /// Inserting an Undefined whose slot currently holds a LazyObject
517 /// (from `--start-lib`). Caller loads the object and re-inserts.
518 PendingObjectLoad { id: SymbolId, origin: InputId },
519 }
520
521 #[derive(Debug, Clone, PartialEq, Eq)]
522 pub enum InsertError {
523 /// Two distinct strong `Defined` entries collided. Sprint 8 surfaces
524 /// this as a user-facing `duplicate symbol` diagnostic.
525 DuplicateStrong {
526 name: Istr,
527 first: SymbolId,
528 second: Box<Symbol>,
529 },
530 /// Alias chain would cycle back to itself or exceed the depth cap.
531 AliasCycle { name: Istr },
532 }
533
534 #[derive(Debug, Clone, PartialEq, Eq)]
535 pub enum ResolveError {
536 /// `lookup` returned `None` for this name.
537 Unknown(Istr),
538 /// Alias chain cycled or exceeded `MAX_ALIAS_DEPTH`.
539 AliasCycle { start: Istr },
540 }
541
542 /// Maximum hops through alias chains before we call it a cycle. Real
543 /// `N_INDR` chains are almost always a single hop; 32 is generous.
544 pub const MAX_ALIAS_DEPTH: usize = 32;
545
546 #[derive(Debug, Clone, PartialEq, Eq)]
547 pub struct Transition {
548 pub id: SymbolId,
549 pub from: SymbolKindTag,
550 pub to: SymbolKindTag,
551 pub cause: TransitionCause,
552 }
553
554 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
555 pub enum TransitionCause {
556 Inserted,
557 Replaced,
558 CommonCoalesced,
559 }
560
561 #[derive(Debug, Default)]
562 pub struct SymbolTable {
563 pub interner: StringInterner,
564 symbols: Vec<Symbol>,
565 by_name: HashMap<Istr, SymbolId>,
566 transitions: Vec<Transition>,
567 }
568
569 impl SymbolTable {
570 pub fn new() -> Self {
571 Self::default()
572 }
573
574 pub fn intern(&mut self, name: &str) -> Istr {
575 self.interner.intern(name)
576 }
577
578 pub fn lookup(&self, name: Istr) -> Option<SymbolId> {
579 self.by_name.get(&name).copied()
580 }
581
582 pub fn lookup_str(&self, name: &str) -> Option<SymbolId> {
583 self.interner.get(name).and_then(|name| self.lookup(name))
584 }
585
586 pub fn get(&self, id: SymbolId) -> &Symbol {
587 &self.symbols[id.0 as usize]
588 }
589
590 pub fn iter(&self) -> impl Iterator<Item = (SymbolId, &Symbol)> {
591 self.symbols
592 .iter()
593 .enumerate()
594 .map(|(i, s)| (SymbolId(i as u32), s))
595 }
596
597 pub fn len(&self) -> usize {
598 self.symbols.len()
599 }
600
601 pub fn is_empty(&self) -> bool {
602 self.symbols.is_empty()
603 }
604
605 pub fn transitions(&self) -> &[Transition] {
606 &self.transitions
607 }
608
609 /// Back-patch a `Symbol::Defined`'s `atom` and `value` fields. The
610 /// atomization pass (Sprint 9) uses this to replace the `AtomId(0)`
611 /// placeholder seeded at resolution time with the real atom handle
612 /// and the symbol's atom-relative offset. No-op for non-Defined
613 /// entries; silently ignored so an Alias or DylibImport that shadows
614 /// a previously-Defined slot doesn't panic.
615 pub fn bind_atom(&mut self, id: SymbolId, atom: AtomId, value: u64) {
616 if let Symbol::Defined {
617 atom: a, value: v, ..
618 } = &mut self.symbols[id.0 as usize]
619 {
620 *a = atom;
621 *v = value;
622 }
623 }
624
625 /// Insert a symbol, running the resolution matrix. See Sprint 7's
626 /// `.docs/sprints/sprint07.md` for the full matrix.
627 pub fn insert(&mut self, sym: Symbol) -> Result<InsertOutcome, InsertError> {
628 let name = sym.name();
629
630 // Vacant slot: just store it.
631 let Some(&existing_id) = self.by_name.get(&name) else {
632 let id = self.push_new(sym.clone());
633 self.transitions.push(Transition {
634 id,
635 from: sym.kind(),
636 to: sym.kind(),
637 cause: TransitionCause::Inserted,
638 });
639 return Ok(InsertOutcome::Inserted(id));
640 };
641
642 let existing_kind = self.symbols[existing_id.0 as usize].kind();
643 let new_kind = sym.kind();
644
645 // Alias insertions have their own rules (cycle detection).
646 if new_kind == SymbolKindTag::Alias {
647 return self.insert_alias_over(existing_id, sym);
648 }
649
650 // Resolve the existing × new pair via the matrix.
651 use SymbolKindTag::*;
652 let action = match (existing_kind, new_kind) {
653 // --- existing Undefined ---
654 (Undefined, Undefined) => Action::Keep,
655 (Undefined, Defined | Common | DylibImport | LazyArchive | LazyObject) => {
656 Action::Replace
657 }
658
659 // --- existing Defined (strong vs weak resolved inline) ---
660 (Defined, Undefined)
661 | (Defined, Common)
662 | (Defined, DylibImport)
663 | (Defined, LazyArchive)
664 | (Defined, LazyObject) => Action::Keep,
665 (Defined, Defined) => self.defined_vs_defined(existing_id, &sym)?,
666
667 // --- existing Common ---
668 (Common, Undefined) => Action::Keep,
669 (Common, Defined) => Action::Replace,
670 (Common, Common) => Action::CoalesceCommon,
671 (Common, DylibImport) | (Common, LazyArchive) | (Common, LazyObject) => Action::Keep,
672
673 // --- existing DylibImport ---
674 (DylibImport, Undefined) => Action::Keep,
675 (DylibImport, Defined) => Action::Replace,
676 (DylibImport, Common)
677 | (DylibImport, DylibImport)
678 | (DylibImport, LazyArchive)
679 | (DylibImport, LazyObject) => Action::Keep,
680
681 // --- existing LazyArchive ---
682 (LazyArchive, Undefined) => Action::PendingArchiveFetch,
683 (LazyArchive, Defined)
684 | (LazyArchive, Common)
685 | (LazyArchive, DylibImport)
686 | (LazyArchive, LazyObject) => Action::Replace,
687 (LazyArchive, LazyArchive) => Action::Keep,
688
689 // --- existing LazyObject ---
690 (LazyObject, Undefined) => Action::PendingObjectLoad,
691 (LazyObject, Defined)
692 | (LazyObject, Common)
693 | (LazyObject, DylibImport)
694 | (LazyObject, LazyArchive) => Action::Replace,
695 (LazyObject, LazyObject) => Action::Keep,
696
697 // --- existing Alias: a direct form replaces it ---
698 (Alias, _) => Action::Replace,
699
700 // Shouldn't hit — Alias insertions diverged above.
701 (_, Alias) => unreachable!(),
702 };
703
704 Ok(self.apply_action(existing_id, sym, existing_kind, new_kind, action))
705 }
706
707 fn defined_vs_defined(
708 &self,
709 existing_id: SymbolId,
710 new: &Symbol,
711 ) -> Result<Action, InsertError> {
712 let existing = &self.symbols[existing_id.0 as usize];
713 match (existing.is_strong_defined(), new.is_strong_defined()) {
714 (true, true) => Err(InsertError::DuplicateStrong {
715 name: new.name(),
716 first: existing_id,
717 second: Box::new(new.clone()),
718 }),
719 (false, true) => Ok(Action::Replace), // strong over weak
720 (true, false) => Ok(Action::Keep), // strong keeps its seat
721 (false, false) => Ok(Action::Keep), // first weak wins
722 }
723 }
724
725 fn apply_action(
726 &mut self,
727 id: SymbolId,
728 sym: Symbol,
729 from: SymbolKindTag,
730 to: SymbolKindTag,
731 action: Action,
732 ) -> InsertOutcome {
733 match action {
734 Action::Keep => InsertOutcome::Kept(id),
735 Action::Replace => {
736 self.symbols[id.0 as usize] = sym;
737 self.transitions.push(Transition {
738 id,
739 from,
740 to,
741 cause: TransitionCause::Replaced,
742 });
743 InsertOutcome::Replaced { id, from, to }
744 }
745 Action::CoalesceCommon => {
746 self.coalesce_common(id, sym);
747 self.transitions.push(Transition {
748 id,
749 from,
750 to,
751 cause: TransitionCause::CommonCoalesced,
752 });
753 InsertOutcome::CommonCoalesced { id }
754 }
755 Action::PendingArchiveFetch => {
756 let Symbol::LazyArchive {
757 archive, member, ..
758 } = self.symbols[id.0 as usize]
759 else {
760 unreachable!("PendingArchiveFetch requires LazyArchive in slot")
761 };
762 InsertOutcome::PendingArchiveFetch {
763 id,
764 archive,
765 member,
766 }
767 }
768 Action::PendingObjectLoad => {
769 let Symbol::LazyObject { origin, .. } = self.symbols[id.0 as usize] else {
770 unreachable!("PendingObjectLoad requires LazyObject in slot")
771 };
772 InsertOutcome::PendingObjectLoad { id, origin }
773 }
774 }
775 }
776
777 fn coalesce_common(&mut self, id: SymbolId, incoming: Symbol) {
778 let slot = &mut self.symbols[id.0 as usize];
779 let (
780 Symbol::Common {
781 size: a_size,
782 align_pow2: a_align,
783 ..
784 },
785 Symbol::Common {
786 size: b_size,
787 align_pow2: b_align,
788 ..
789 },
790 ) = (slot.clone(), incoming)
791 else {
792 unreachable!("coalesce_common requires two Common entries");
793 };
794 if let Symbol::Common {
795 size, align_pow2, ..
796 } = slot
797 {
798 *size = a_size.max(b_size);
799 *align_pow2 = a_align.max(b_align);
800 }
801 }
802
803 fn push_new(&mut self, sym: Symbol) -> SymbolId {
804 let id = SymbolId(self.symbols.len() as u32);
805 let name = sym.name();
806 self.symbols.push(sym);
807 self.by_name.insert(name, id);
808 id
809 }
810
811 fn insert_alias_over(
812 &mut self,
813 existing_id: SymbolId,
814 sym: Symbol,
815 ) -> Result<InsertOutcome, InsertError> {
816 let Symbol::Alias { name, aliased } = &sym else {
817 unreachable!("insert_alias_over called with non-Alias symbol");
818 };
819 if self.alias_would_cycle(*name, *aliased) {
820 return Err(InsertError::AliasCycle { name: *name });
821 }
822 let from = self.symbols[existing_id.0 as usize].kind();
823 self.symbols[existing_id.0 as usize] = sym;
824 self.transitions.push(Transition {
825 id: existing_id,
826 from,
827 to: SymbolKindTag::Alias,
828 cause: TransitionCause::Replaced,
829 });
830 Ok(InsertOutcome::Replaced {
831 id: existing_id,
832 from,
833 to: SymbolKindTag::Alias,
834 })
835 }
836
837 /// Walk alias chains to a concrete (non-Alias) symbol. Used by Sprint 8
838 /// during name resolution and by `-why_live` to trace where a symbol
839 /// ultimately points. Returns `AliasCycle` if the chain loops or
840 /// exceeds `MAX_ALIAS_DEPTH`.
841 pub fn resolve_chain(&self, name: Istr) -> Result<(SymbolId, &Symbol), ResolveError> {
842 let mut current = name;
843 for _ in 0..MAX_ALIAS_DEPTH {
844 let Some(id) = self.by_name.get(&current).copied() else {
845 return Err(ResolveError::Unknown(name));
846 };
847 let sym = &self.symbols[id.0 as usize];
848 match sym {
849 Symbol::Alias { aliased, .. } => current = *aliased,
850 _ => return Ok((id, sym)),
851 }
852 }
853 Err(ResolveError::AliasCycle { start: name })
854 }
855
856 /// Check whether adding an alias `new_name → target` would create a
857 /// cycle. Returns true if walking the existing chain from `target`
858 /// reaches `new_name` within `MAX_ALIAS_DEPTH` steps.
859 fn alias_would_cycle(&self, new_name: Istr, target: Istr) -> bool {
860 // A self-loop (`_foo → _foo`) is the shortest cycle.
861 if new_name == target {
862 return true;
863 }
864 let mut current = target;
865 for _ in 0..MAX_ALIAS_DEPTH {
866 if current == new_name {
867 return true;
868 }
869 let Some(id) = self.by_name.get(&current).copied() else {
870 return false;
871 };
872 match &self.symbols[id.0 as usize] {
873 Symbol::Alias { aliased, .. } => current = *aliased,
874 _ => return false,
875 }
876 }
877 // Exceeded the depth cap — treat as a cycle.
878 true
879 }
880 }
881
882 /// Internal matrix verdict — what `insert()` decided to do before actually
883 /// performing the operation.
884 enum Action {
885 Keep,
886 Replace,
887 CoalesceCommon,
888 PendingArchiveFetch,
889 PendingObjectLoad,
890 }
891
892 // ---------------------------------------------------------------------------
893 // Seeding: turn `Inputs` into `SymbolTable` entries.
894 // ---------------------------------------------------------------------------
895
896 #[derive(Debug, Default)]
897 pub struct SeedReport {
898 /// Fetches queued by (LazyArchive, Undefined) insertions. Sprint 8's
899 /// fixed-point loop drains this.
900 pub pending_fetches: Vec<PendingFetch>,
901 /// Duplicate-strong-defined errors encountered during seeding.
902 /// Collected rather than short-circuited so the user sees all of them
903 /// in one pass.
904 pub duplicates: Vec<InsertError>,
905 /// `(name, origin)` for every reference to an external name seen in
906 /// an object file. Drives the "referenced by" lines in undefined-
907 /// symbol diagnostics. Multi-input references accumulate.
908 pub referrers: ReferrerLog,
909 }
910
911 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
912 pub struct PendingFetch {
913 pub id: SymbolId,
914 pub archive: ArchiveId,
915 pub member: MemberId,
916 }
917
918 /// Tracks which inputs reference each external name. A single name can
919 /// appear as a Defined, Undefined, or Common in multiple inputs; we keep
920 /// one entry per (name, origin) pair in insertion order.
921 #[derive(Debug, Default, Clone)]
922 pub struct ReferrerLog {
923 entries: HashMap<Istr, Vec<InputId>>,
924 }
925
926 impl ReferrerLog {
927 pub fn new() -> Self {
928 Self::default()
929 }
930
931 pub fn add(&mut self, name: Istr, origin: InputId) {
932 let list = self.entries.entry(name).or_default();
933 if !list.contains(&origin) {
934 list.push(origin);
935 }
936 }
937
938 pub fn get(&self, name: Istr) -> &[InputId] {
939 self.entries.get(&name).map(|v| v.as_slice()).unwrap_or(&[])
940 }
941
942 pub fn extend_from(&mut self, other: &ReferrerLog) {
943 for (&name, origins) in &other.entries {
944 for &origin in origins {
945 self.add(name, origin);
946 }
947 }
948 }
949 }
950
951 impl SeedReport {
952 fn record_outcome(&mut self, outcome: InsertOutcome) {
953 if let InsertOutcome::PendingArchiveFetch {
954 id,
955 archive,
956 member,
957 } = outcome
958 {
959 self.pending_fetches.push(PendingFetch {
960 id,
961 archive,
962 member,
963 });
964 }
965 }
966
967 fn record_error(&mut self, err: InsertError) {
968 self.duplicates.push(err);
969 }
970
971 pub fn has_errors(&self) -> bool {
972 !self.duplicates.is_empty()
973 }
974 }
975
976 #[derive(Debug)]
977 pub enum SeedError {
978 Read(ReadError),
979 Archive(ArchiveError),
980 }
981
982 impl std::fmt::Display for SeedError {
983 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
984 match self {
985 SeedError::Read(e) => write!(f, "{e}"),
986 SeedError::Archive(e) => write!(f, "{e}"),
987 }
988 }
989 }
990
991 impl std::error::Error for SeedError {}
992
993 impl From<ReadError> for SeedError {
994 fn from(e: ReadError) -> Self {
995 SeedError::Read(e)
996 }
997 }
998
999 impl From<ArchiveError> for SeedError {
1000 fn from(e: ArchiveError) -> Self {
1001 SeedError::Archive(e)
1002 }
1003 }
1004
1005 /// Seed LazyArchive entries for every symbol defined in every archive's
1006 /// symbol index. Do this before object seeding so undefined references
1007 /// in objects correctly trigger `PendingArchiveFetch`.
1008 pub fn seed_archives(
1009 inputs: &Inputs,
1010 table: &mut SymbolTable,
1011 report: &mut SeedReport,
1012 ) -> Result<(), SeedError> {
1013 for (ai_idx, ai) in inputs.archives.iter().enumerate() {
1014 let archive = Archive::open(&ai.path, &ai.bytes)?;
1015 let archive_id = ArchiveId(ai_idx as u32);
1016 let Some(idx) = archive.symbol_index() else {
1017 // Archives without a symbol index are legal (clang produces
1018 // them occasionally). They require -all_load to pull anything.
1019 continue;
1020 };
1021 for entry in &idx.entries {
1022 let name = table.intern(&entry.name);
1023 let sym = Symbol::LazyArchive {
1024 name,
1025 archive: archive_id,
1026 member: MemberId(entry.member_header_offset),
1027 };
1028 match table.insert(sym) {
1029 Ok(outcome) => report.record_outcome(outcome),
1030 Err(e) => report.record_error(e),
1031 }
1032 }
1033 }
1034 Ok(())
1035 }
1036
1037 /// Seed one object's externals into the table. Private-extern symbols
1038 /// (`N_PEXT`) participate in resolution but stay hidden from dylib
1039 /// exports. Local symbols, stabs, and debug entries are skipped.
1040 pub fn seed_object(
1041 inputs: &Inputs,
1042 input_id: InputId,
1043 table: &mut SymbolTable,
1044 report: &mut SeedReport,
1045 ) -> Result<(), SeedError> {
1046 let obj = inputs.object_file(input_id)?;
1047 for input_sym in &obj.symbols {
1048 if input_sym.stab_kind().is_some() {
1049 continue;
1050 }
1051 // Only externals and private-externals participate.
1052 if !input_sym.is_ext() && !input_sym.is_private_ext() {
1053 continue;
1054 }
1055 let Ok(name_str) = obj.symbol_name(input_sym) else {
1056 continue;
1057 };
1058 let name = table.intern(name_str);
1059 report.referrers.add(name, input_id);
1060 let Some(sym) = symbolize_input(name, input_sym, input_id) else {
1061 continue;
1062 };
1063 match table.insert(sym) {
1064 Ok(outcome) => report.record_outcome(outcome),
1065 Err(e) => report.record_error(e),
1066 }
1067 }
1068 Ok(())
1069 }
1070
1071 /// Seed every regular export from a dylib as a `DylibImport`. Re-exports
1072 /// and stub+resolver terminals map to `DylibImport` too — they behave
1073 /// like imports from the consumer's perspective.
1074 pub fn seed_dylib(
1075 inputs: &Inputs,
1076 dylib_id: DylibId,
1077 table: &mut SymbolTable,
1078 report: &mut SeedReport,
1079 ) -> Result<(), SeedError> {
1080 let di = inputs.dylib(dylib_id);
1081 let entries = di.file.exports.entries().map_err(SeedError::Read)?;
1082 for entry in entries {
1083 let name = table.intern(&entry.name);
1084 let sym = Symbol::DylibImport {
1085 name,
1086 dylib: dylib_id,
1087 ordinal: di.ordinal,
1088 weak_import: entry.weak_def(),
1089 };
1090 match table.insert(sym) {
1091 Ok(outcome) => report.record_outcome(outcome),
1092 Err(e) => report.record_error(e),
1093 }
1094 }
1095 Ok(())
1096 }
1097
1098 /// Seed every input in a single pass. Order is archives → objects → dylibs
1099 /// so lazy-archive promotion works correctly (undefineds in objects hit
1100 /// pre-seeded LazyArchive slots and trigger `PendingArchiveFetch`).
1101 pub fn seed_all(inputs: &Inputs, table: &mut SymbolTable) -> Result<SeedReport, SeedError> {
1102 let mut report = SeedReport::default();
1103 seed_archives(inputs, table, &mut report)?;
1104 for i in 0..inputs.objects.len() {
1105 seed_object(inputs, InputId(i as u32), table, &mut report)?;
1106 }
1107 for i in 0..inputs.dylibs.len() {
1108 seed_dylib(inputs, DylibId(i as u32), table, &mut report)?;
1109 }
1110 Ok(report)
1111 }
1112
1113 // ---------------------------------------------------------------------------
1114 // Fixed-point fetch loop.
1115 // ---------------------------------------------------------------------------
1116
1117 #[derive(Debug)]
1118 pub enum FetchError {
1119 Read(ReadError),
1120 Archive(ArchiveError),
1121 MemberNotFound {
1122 archive: ArchiveId,
1123 member: MemberId,
1124 },
1125 }
1126
1127 impl std::fmt::Display for FetchError {
1128 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1129 match self {
1130 FetchError::Read(e) => write!(f, "{e}"),
1131 FetchError::Archive(e) => write!(f, "{e}"),
1132 FetchError::MemberNotFound { archive, member } => write!(
1133 f,
1134 "archive #{} has no member at ar_hdr offset 0x{:x}",
1135 archive.0, member.0
1136 ),
1137 }
1138 }
1139 }
1140
1141 impl std::error::Error for FetchError {}
1142
1143 impl From<ReadError> for FetchError {
1144 fn from(e: ReadError) -> Self {
1145 FetchError::Read(e)
1146 }
1147 }
1148
1149 impl From<ArchiveError> for FetchError {
1150 fn from(e: ArchiveError) -> Self {
1151 FetchError::Archive(e)
1152 }
1153 }
1154
1155 impl From<SeedError> for FetchError {
1156 fn from(e: SeedError) -> Self {
1157 match e {
1158 SeedError::Read(r) => FetchError::Read(r),
1159 SeedError::Archive(a) => FetchError::Archive(a),
1160 }
1161 }
1162 }
1163
1164 #[derive(Debug, Default)]
1165 pub struct DrainReport {
1166 pub fetched_members: usize,
1167 pub loaded_paths: Vec<PathBuf>,
1168 pub duplicates: Vec<InsertError>,
1169 pub referrers: ReferrerLog,
1170 }
1171
1172 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1173 struct ArchiveMemberKey {
1174 archive: ArchiveId,
1175 member: MemberId,
1176 }
1177
1178 #[derive(Clone, Copy)]
1179 struct ArchiveMemberLoadJob<'a> {
1180 index: usize,
1181 key: ArchiveMemberKey,
1182 archive_path: &'a Path,
1183 archive_bytes: &'a [u8],
1184 archive_load_order: usize,
1185 }
1186
1187 struct LoadedArchiveMember {
1188 key: ArchiveMemberKey,
1189 archive_load_order: usize,
1190 logical_path: PathBuf,
1191 bytes: Vec<u8>,
1192 parsed: ObjectFile,
1193 }
1194
1195 fn make_archive_member_jobs<'a>(
1196 inputs: &'a Inputs,
1197 keys: Vec<ArchiveMemberKey>,
1198 ) -> Vec<ArchiveMemberLoadJob<'a>> {
1199 keys.into_iter()
1200 .enumerate()
1201 .map(|(index, key)| {
1202 let archive = &inputs.archives[key.archive.0 as usize];
1203 ArchiveMemberLoadJob {
1204 index,
1205 key,
1206 archive_path: &archive.path,
1207 archive_bytes: &archive.bytes,
1208 archive_load_order: archive.load_order,
1209 }
1210 })
1211 .collect()
1212 }
1213
1214 fn load_archive_members_parallel(
1215 inputs: &Inputs,
1216 keys: Vec<ArchiveMemberKey>,
1217 parallel_jobs: usize,
1218 ) -> Vec<(ArchiveMemberKey, Result<LoadedArchiveMember, FetchError>)> {
1219 let jobs = make_archive_member_jobs(inputs, keys);
1220 if jobs.is_empty() {
1221 return Vec::new();
1222 }
1223 let job_count = parallel_jobs.max(1).min(jobs.len()).max(1);
1224 if job_count == 1 {
1225 return jobs
1226 .into_iter()
1227 .map(load_archive_member_job)
1228 .map(|(_, key, result)| (key, result))
1229 .collect();
1230 }
1231
1232 let queue = Arc::new(Mutex::new(VecDeque::from(jobs)));
1233 let (tx, rx) = mpsc::channel();
1234 let mut results = thread::scope(|scope| {
1235 for _ in 0..job_count {
1236 let queue = Arc::clone(&queue);
1237 let tx = tx.clone();
1238 scope.spawn(move || loop {
1239 let Some(job) = queue
1240 .lock()
1241 .expect("archive member load queue mutex poisoned")
1242 .pop_front()
1243 else {
1244 break;
1245 };
1246 tx.send(load_archive_member_job(job))
1247 .expect("archive member load receiver should stay live");
1248 });
1249 }
1250 drop(tx);
1251 rx.into_iter().collect::<Vec<_>>()
1252 });
1253 results.sort_by_key(|(index, _, _)| *index);
1254 results
1255 .into_iter()
1256 .map(|(_, key, result)| (key, result))
1257 .collect()
1258 }
1259
1260 fn load_archive_member_job(
1261 job: ArchiveMemberLoadJob<'_>,
1262 ) -> (
1263 usize,
1264 ArchiveMemberKey,
1265 Result<LoadedArchiveMember, FetchError>,
1266 ) {
1267 let result = (|| {
1268 let archive = Archive::open(job.archive_path, job.archive_bytes)?;
1269 let member =
1270 archive
1271 .member_at_offset(job.key.member.0)
1272 .ok_or(FetchError::MemberNotFound {
1273 archive: job.key.archive,
1274 member: job.key.member,
1275 })?;
1276 let logical_path =
1277 PathBuf::from(format!("{}({})", job.archive_path.display(), member.name));
1278 let bytes = member.body.to_vec();
1279 let parsed = ObjectFile::parse(&logical_path, &bytes)?;
1280 Ok(LoadedArchiveMember {
1281 key: job.key,
1282 archive_load_order: job.archive_load_order,
1283 logical_path,
1284 bytes,
1285 parsed,
1286 })
1287 })();
1288 (job.index, job.key, result)
1289 }
1290
1291 fn archive_member_key(pending: PendingFetch) -> ArchiveMemberKey {
1292 ArchiveMemberKey {
1293 archive: pending.archive,
1294 member: pending.member,
1295 }
1296 }
1297
1298 fn archive_member_is_fetched(inputs: &Inputs, key: ArchiveMemberKey) -> bool {
1299 inputs.archives[key.archive.0 as usize]
1300 .fetched
1301 .contains(&key.member.0)
1302 }
1303
1304 /// Shared ingest: copy one archive member's body into a fresh
1305 /// `ObjectInput`, mark it fetched, and seed its symbols. Callers either
1306 /// respond to a demand-driven `PendingFetch` or force-pull the member.
1307 fn ingest_loaded_member(
1308 inputs: &mut Inputs,
1309 table: &mut SymbolTable,
1310 loaded: LoadedArchiveMember,
1311 report: &mut DrainReport,
1312 ) -> Result<Vec<PendingFetch>, FetchError> {
1313 if archive_member_is_fetched(inputs, loaded.key) {
1314 return Ok(Vec::new());
1315 }
1316
1317 inputs.archives[loaded.key.archive.0 as usize]
1318 .fetched
1319 .insert(loaded.key.member.0);
1320 let input_id = InputId(inputs.objects.len() as u32);
1321 inputs.objects.push(ObjectInput {
1322 path: loaded.logical_path,
1323 load_order: loaded.archive_load_order,
1324 archive_member_offset: Some(loaded.key.member.0),
1325 bytes: loaded.bytes,
1326 parsed: loaded.parsed,
1327 });
1328 report.fetched_members += 1;
1329 report
1330 .loaded_paths
1331 .push(inputs.objects[input_id.0 as usize].path.clone());
1332
1333 let mut sub_report = SeedReport::default();
1334 seed_object(inputs, input_id, table, &mut sub_report)?;
1335 report.duplicates.extend(sub_report.duplicates);
1336 report.referrers.extend_from(&sub_report.referrers);
1337 Ok(sub_report.pending_fetches)
1338 }
1339
1340 fn load_and_ingest_member(
1341 inputs: &mut Inputs,
1342 table: &mut SymbolTable,
1343 key: ArchiveMemberKey,
1344 report: &mut DrainReport,
1345 parallel_jobs: usize,
1346 ) -> Result<Vec<PendingFetch>, FetchError> {
1347 if archive_member_is_fetched(inputs, key) {
1348 return Ok(Vec::new());
1349 }
1350 let loaded = load_archive_members_parallel(inputs, vec![key], parallel_jobs)
1351 .into_iter()
1352 .next()
1353 .expect("single archive member load should produce one result")
1354 .1?;
1355 ingest_loaded_member(inputs, table, loaded, report)
1356 }
1357
1358 /// Pull `pending`'s member only if the symbol slot is still a
1359 /// `LazyArchive` (i.e., a strong Defined hasn't superseded it). Returns
1360 /// any new `PendingFetch` entries triggered by the inserted member.
1361 fn fetch_and_ingest_one(
1362 inputs: &mut Inputs,
1363 table: &mut SymbolTable,
1364 pending: PendingFetch,
1365 report: &mut DrainReport,
1366 parallel_jobs: usize,
1367 ) -> Result<Vec<PendingFetch>, FetchError> {
1368 let slot_is_still_lazy = matches!(table.get(pending.id), Symbol::LazyArchive { .. });
1369 if !slot_is_still_lazy {
1370 return Ok(Vec::new());
1371 }
1372 load_and_ingest_member(
1373 inputs,
1374 table,
1375 archive_member_key(pending),
1376 report,
1377 parallel_jobs,
1378 )
1379 }
1380
1381 /// Pull every member of one archive (bypasses demand tracking). Respects
1382 /// `ArchiveInput::fetched` for deduplication so it's safe to combine with
1383 /// demand-driven fetching.
1384 pub fn force_load_archive(
1385 inputs: &mut Inputs,
1386 table: &mut SymbolTable,
1387 archive_id: ArchiveId,
1388 report: &mut DrainReport,
1389 parallel_jobs: usize,
1390 ) -> Result<(), FetchError> {
1391 let member_offsets: Vec<u32> = {
1392 let ai = &inputs.archives[archive_id.0 as usize];
1393 let archive = Archive::open(&ai.path, &ai.bytes)?;
1394 archive
1395 .object_members()
1396 .map(|m| m.header_offset as u32)
1397 .collect()
1398 };
1399 let keys = member_offsets
1400 .into_iter()
1401 .map(|offset| ArchiveMemberKey {
1402 archive: archive_id,
1403 member: MemberId(offset),
1404 })
1405 .collect();
1406 let mut queue: Vec<PendingFetch> = Vec::new();
1407 for (_, loaded) in load_archive_members_parallel(inputs, keys, parallel_jobs) {
1408 let new = ingest_loaded_member(inputs, table, loaded?, report)?;
1409 queue.extend(new);
1410 }
1411 while let Some(p) = queue.pop() {
1412 let new = fetch_and_ingest_one(inputs, table, p, report, parallel_jobs)?;
1413 queue.extend(new);
1414 }
1415 Ok(())
1416 }
1417
1418 /// Pull every member of every registered archive — the `-all_load`
1419 /// semantic.
1420 pub fn force_load_all(
1421 inputs: &mut Inputs,
1422 table: &mut SymbolTable,
1423 report: &mut DrainReport,
1424 parallel_jobs: usize,
1425 ) -> Result<(), FetchError> {
1426 for i in 0..inputs.archives.len() {
1427 force_load_archive(inputs, table, ArchiveId(i as u32), report, parallel_jobs)?;
1428 }
1429 Ok(())
1430 }
1431
1432 /// Look up an archive by path for `-force_load <path>`. Returns `None` if
1433 /// no registered archive matches; diagnostic surface lives in Sprint 19's
1434 /// CLI layer.
1435 pub fn find_archive_by_path(inputs: &Inputs, path: &std::path::Path) -> Option<ArchiveId> {
1436 inputs
1437 .archives
1438 .iter()
1439 .position(|a| a.path == path)
1440 .map(|i| ArchiveId(i as u32))
1441 }
1442
1443 // ---------------------------------------------------------------------------
1444 // Unresolved classification.
1445 // ---------------------------------------------------------------------------
1446
1447 /// Policy for handling Undefined symbols that remain after the fixed
1448 /// point. Maps to the CLI `-undefined <treatment>` flag (Sprint 19).
1449 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
1450 pub enum UndefinedTreatment {
1451 /// Undefineds are errors. The default, matching Apple `ld`.
1452 #[default]
1453 Error,
1454 /// Undefineds produce warnings; left in the table as Undefined.
1455 Warning,
1456 /// Undefineds are silently accepted; left in the table as Undefined.
1457 Suppress,
1458 /// Undefineds are promoted to flat-lookup DylibImport entries — dyld
1459 /// searches every loaded dylib at runtime.
1460 DynamicLookup,
1461 }
1462
1463 /// `n_desc`-style special ordinal: `-2` (two's-complement u16 = 0xFFFE)
1464 /// tells dyld to use flat-namespace lookup. Sprint 15 emits this into
1465 /// `LC_DYLD_INFO` bind opcodes.
1466 pub const FLAT_LOOKUP_ORDINAL: u16 = 0xFFFE;
1467
1468 /// Sentinel DylibId used for dynamic-lookup promotions before a real
1469 /// dylib registry is bound. Sprint 15 interprets `DylibId::INVALID`
1470 /// when emitting bind opcodes.
1471 impl DylibId {
1472 pub const INVALID: DylibId = DylibId(u32::MAX);
1473 }
1474
1475 #[derive(Debug, Default)]
1476 pub struct ClassificationReport {
1477 /// Strong undefineds that triggered errors under `Error` treatment.
1478 pub errors: Vec<Unresolved>,
1479 /// Strong undefineds that produced warnings under `Warning` treatment and
1480 /// were promoted to flat-lookup imports for final emission.
1481 pub warnings: Vec<Unresolved>,
1482 /// Strong undefineds that were silently accepted under `Suppress` and
1483 /// were promoted to flat-lookup imports for final emission.
1484 pub suppressed: Vec<Unresolved>,
1485 /// Undefineds promoted to flat-lookup DylibImport entries.
1486 pub promoted_to_dynamic: Vec<SymbolId>,
1487 /// Weak references that remain unresolved — always accepted.
1488 pub weak: Vec<Unresolved>,
1489 }
1490
1491 #[derive(Debug, Clone, PartialEq, Eq)]
1492 pub struct Unresolved {
1493 pub name: Istr,
1494 pub id: SymbolId,
1495 }
1496
1497 // ---------------------------------------------------------------------------
1498 // Levenshtein distance — used for did-you-mean hints.
1499 // ---------------------------------------------------------------------------
1500
1501 /// Classic dynamic-programming edit distance. O(m·n) time, O(n) space.
1502 pub fn levenshtein(a: &str, b: &str) -> usize {
1503 let a: Vec<char> = a.chars().collect();
1504 let b: Vec<char> = b.chars().collect();
1505 let (m, n) = (a.len(), b.len());
1506 if m == 0 {
1507 return n;
1508 }
1509 if n == 0 {
1510 return m;
1511 }
1512 let mut row: Vec<usize> = (0..=n).collect();
1513 for (i, ca) in a.iter().enumerate() {
1514 let mut prev = row[0];
1515 row[0] = i + 1;
1516 for (j, cb) in b.iter().enumerate() {
1517 let cost = if ca == cb { 0 } else { 1 };
1518 let new_val = (row[j + 1] + 1).min(row[j] + 1).min(prev + cost);
1519 prev = row[j + 1];
1520 row[j + 1] = new_val;
1521 }
1522 }
1523 row[n]
1524 }
1525
1526 /// Return up to `max` candidate names from `table` within `budget` edits
1527 /// of `query`. Sorted by ascending distance, ties broken by name.
1528 pub fn did_you_mean(table: &SymbolTable, query: &str, budget: usize, max: usize) -> Vec<String> {
1529 let mut hits: Vec<(usize, String)> = table
1530 .iter()
1531 .filter_map(|(_, s)| {
1532 let candidate = table.interner.resolve(s.name());
1533 if candidate == query {
1534 return None;
1535 }
1536 let d = levenshtein(query, candidate);
1537 if d <= budget {
1538 Some((d, candidate.to_string()))
1539 } else {
1540 None
1541 }
1542 })
1543 .collect();
1544 hits.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
1545 hits.dedup_by(|a, b| a.1 == b.1);
1546 hits.into_iter().take(max).map(|(_, n)| n).collect()
1547 }
1548
1549 // ---------------------------------------------------------------------------
1550 // Diagnostic formatters — produce afs-as-style text for the driver to
1551 // emit. Sprint 8 writes to String; Sprint 19's CLI layer owns stderr.
1552 // ---------------------------------------------------------------------------
1553
1554 /// Format the full undefined-symbol diagnostic block, one entry per
1555 /// unresolved name: the error line, every referrer, and an optional
1556 /// did-you-mean hint.
1557 fn format_undefined_diagnostic_with_level(
1558 table: &SymbolTable,
1559 inputs: &Inputs,
1560 referrers: &ReferrerLog,
1561 unresolved: &[Unresolved],
1562 level: &str,
1563 ) -> String {
1564 let mut out = String::new();
1565 for u in unresolved {
1566 let name = table.interner.resolve(u.name);
1567 out.push_str(&format!("afs-ld: {level}: undefined symbol: {name}\n"));
1568 for origin in referrers.get(u.name) {
1569 if let Some(oi) = inputs.objects.get(origin.0 as usize) {
1570 out.push_str(&format!(" referenced by {}\n", oi.path.display()));
1571 }
1572 }
1573 let suggestions = did_you_mean(table, name, 3, 3);
1574 if !suggestions.is_empty() {
1575 out.push_str(&format!(
1576 " Hint: did you mean {}?\n",
1577 suggestions
1578 .iter()
1579 .map(|s| format!("{s:?}"))
1580 .collect::<Vec<_>>()
1581 .join(" or ")
1582 ));
1583 }
1584 }
1585 out
1586 }
1587
1588 pub fn format_undefined_diagnostic(
1589 table: &SymbolTable,
1590 inputs: &Inputs,
1591 referrers: &ReferrerLog,
1592 unresolved: &[Unresolved],
1593 ) -> String {
1594 format_undefined_diagnostic_with_level(table, inputs, referrers, unresolved, "error")
1595 }
1596
1597 pub fn format_undefined_warning_diagnostic(
1598 table: &SymbolTable,
1599 inputs: &Inputs,
1600 referrers: &ReferrerLog,
1601 unresolved: &[Unresolved],
1602 ) -> String {
1603 format_undefined_diagnostic_with_level(table, inputs, referrers, unresolved, "warning")
1604 }
1605
1606 /// Format a `DuplicateStrong` insertion error for user consumption. Needs
1607 /// the incumbent symbol (from the table) plus the losing second symbol
1608 /// carried in the error itself.
1609 pub fn format_duplicate_diagnostic(
1610 table: &SymbolTable,
1611 inputs: &Inputs,
1612 err: &InsertError,
1613 ) -> String {
1614 let InsertError::DuplicateStrong {
1615 name,
1616 first,
1617 second,
1618 } = err
1619 else {
1620 return String::new();
1621 };
1622 let name_str = table.interner.resolve(*name);
1623 let mut out = String::new();
1624 out.push_str(&format!("afs-ld: error: duplicate symbol {name_str}\n"));
1625 if let Symbol::Defined { origin, .. } = table.get(*first) {
1626 if let Some(oi) = inputs.objects.get(origin.0 as usize) {
1627 out.push_str(&format!(" defined in {}\n", oi.path.display()));
1628 }
1629 }
1630 if let Symbol::Defined { origin, .. } = second.as_ref() {
1631 if let Some(oi) = inputs.objects.get(origin.0 as usize) {
1632 out.push_str(&format!(" also in {}\n", oi.path.display()));
1633 }
1634 }
1635 out
1636 }
1637
1638 /// After the fixed-point loop, walk the table and classify every remaining
1639 /// `Undefined`. Weak references always pass through cleanly.
1640 pub fn classify_unresolved(
1641 table: &mut SymbolTable,
1642 treatment: UndefinedTreatment,
1643 ) -> ClassificationReport {
1644 let mut report = ClassificationReport::default();
1645
1646 fn promote_to_flat_lookup(table: &mut SymbolTable, id: SymbolId, name: Istr) {
1647 table.symbols[id.0 as usize] = Symbol::DylibImport {
1648 name,
1649 dylib: DylibId::INVALID,
1650 ordinal: FLAT_LOOKUP_ORDINAL,
1651 weak_import: true,
1652 };
1653 table.transitions.push(Transition {
1654 id,
1655 from: SymbolKindTag::Undefined,
1656 to: SymbolKindTag::DylibImport,
1657 cause: TransitionCause::Replaced,
1658 });
1659 }
1660
1661 // Collect undefineds before mutating — avoids double-borrow grief.
1662 let undefs: Vec<(SymbolId, Istr, bool)> = table
1663 .iter()
1664 .filter_map(|(id, s)| match s {
1665 Symbol::Undefined { name, weak_ref, .. } => Some((id, *name, *weak_ref)),
1666 _ => None,
1667 })
1668 .collect();
1669
1670 for (id, name, weak_ref) in undefs {
1671 if weak_ref {
1672 report.weak.push(Unresolved { name, id });
1673 continue;
1674 }
1675 match treatment {
1676 UndefinedTreatment::Error => {
1677 report.errors.push(Unresolved { name, id });
1678 }
1679 UndefinedTreatment::Warning => {
1680 report.warnings.push(Unresolved { name, id });
1681 promote_to_flat_lookup(table, id, name);
1682 report.promoted_to_dynamic.push(id);
1683 }
1684 UndefinedTreatment::Suppress => {
1685 report.suppressed.push(Unresolved { name, id });
1686 promote_to_flat_lookup(table, id, name);
1687 report.promoted_to_dynamic.push(id);
1688 }
1689 UndefinedTreatment::DynamicLookup => {
1690 promote_to_flat_lookup(table, id, name);
1691 report.promoted_to_dynamic.push(id);
1692 }
1693 }
1694 }
1695
1696 report
1697 }
1698
1699 /// Drive the fetch queue to a fixed point. Each fetched member's own
1700 /// undefined references may trigger additional pending fetches — drain
1701 /// those too until the queue is empty.
1702 pub fn drain_fetches(
1703 inputs: &mut Inputs,
1704 table: &mut SymbolTable,
1705 initial: Vec<PendingFetch>,
1706 parallel_jobs: usize,
1707 ) -> Result<DrainReport, FetchError> {
1708 let mut queue = initial;
1709 let mut prepared = HashMap::new();
1710 let mut report = DrainReport::default();
1711 while let Some(p) = queue.pop() {
1712 let key = archive_member_key(p);
1713 let slot_is_still_lazy = matches!(table.get(p.id), Symbol::LazyArchive { .. });
1714 if !slot_is_still_lazy || archive_member_is_fetched(inputs, key) {
1715 prepared.remove(&key);
1716 continue;
1717 }
1718 // Parse siblings ahead of time, but only ingest the current stack
1719 // entry after re-checking its lazy slot. This keeps member order stable.
1720 if !prepared.contains_key(&key) {
1721 preparse_pending_fetches(inputs, table, p, &queue, &mut prepared, parallel_jobs);
1722 }
1723 let Some(loaded) = prepared.remove(&key) else {
1724 continue;
1725 };
1726 let loaded = loaded?;
1727 let slot_is_still_lazy = matches!(table.get(p.id), Symbol::LazyArchive { .. });
1728 if !slot_is_still_lazy || archive_member_is_fetched(inputs, key) {
1729 continue;
1730 }
1731 let new_pending = ingest_loaded_member(inputs, table, loaded, &mut report)?;
1732 queue.extend(new_pending);
1733 }
1734 Ok(report)
1735 }
1736
1737 fn preparse_pending_fetches(
1738 inputs: &Inputs,
1739 table: &SymbolTable,
1740 current: PendingFetch,
1741 queue: &[PendingFetch],
1742 prepared: &mut HashMap<ArchiveMemberKey, Result<LoadedArchiveMember, FetchError>>,
1743 parallel_jobs: usize,
1744 ) {
1745 let mut seen = HashSet::new();
1746 let mut keys = Vec::new();
1747 for pending in std::iter::once(&current).chain(queue.iter().rev()) {
1748 let key = archive_member_key(*pending);
1749 if prepared.contains_key(&key)
1750 || archive_member_is_fetched(inputs, key)
1751 || !matches!(table.get(pending.id), Symbol::LazyArchive { .. })
1752 || !seen.insert(key)
1753 {
1754 continue;
1755 }
1756 keys.push(key);
1757 }
1758 for (key, result) in load_archive_members_parallel(inputs, keys, parallel_jobs) {
1759 prepared.insert(key, result);
1760 }
1761 }
1762
1763 /// Turn a wire-form `InputSymbol` into a resolver-side `Symbol`. Returns
1764 /// `None` for kinds the resolver does not track (currently: aliases with
1765 /// unresolved target strx — Sprint 8's resolver defers those for now).
1766 fn symbolize_input(
1767 name: Istr,
1768 input_sym: &crate::symbol::InputSymbol,
1769 origin: InputId,
1770 ) -> Option<Symbol> {
1771 use crate::symbol::SymKind;
1772 match input_sym.kind() {
1773 SymKind::Undef => {
1774 if let Some(size) = input_sym.common_size() {
1775 let align_pow2 = input_sym.common_align_pow2().unwrap_or(0);
1776 Some(Symbol::Common {
1777 name,
1778 origin,
1779 size,
1780 align_pow2,
1781 })
1782 } else {
1783 Some(Symbol::Undefined {
1784 name,
1785 origin,
1786 weak_ref: input_sym.weak_ref(),
1787 })
1788 }
1789 }
1790 SymKind::Abs | SymKind::Sect => Some(Symbol::Defined {
1791 name,
1792 origin,
1793 // AtomId(0) is a placeholder; Sprint 9's atomization pass
1794 // replaces these with real atom handles in-place.
1795 atom: AtomId(0),
1796 value: input_sym.value(),
1797 weak: input_sym.weak_def(),
1798 private_extern: input_sym.is_private_ext(),
1799 no_dead_strip: input_sym.no_dead_strip(),
1800 }),
1801 SymKind::Indirect => {
1802 // Sprint 8 does not wire indirect-strx lookups yet — Sprint 9's
1803 // atomization pass (which has the string table at hand) will
1804 // rewrite these. For now, skip.
1805 None
1806 }
1807 }
1808 }
1809
1810 #[cfg(test)]
1811 mod tests {
1812 use super::*;
1813
1814 #[test]
1815 fn interner_dedups_same_string() {
1816 let mut s = StringInterner::new();
1817 let a = s.intern("_main");
1818 let b = s.intern("_main");
1819 assert_eq!(a, b);
1820 assert_eq!(s.len(), 1);
1821 }
1822
1823 #[test]
1824 fn interner_distinguishes_different_strings() {
1825 let mut s = StringInterner::new();
1826 let a = s.intern("_main");
1827 let b = s.intern("_helper");
1828 assert_ne!(a, b);
1829 assert_eq!(s.resolve(a), "_main");
1830 assert_eq!(s.resolve(b), "_helper");
1831 }
1832
1833 #[test]
1834 fn interner_handles_are_stable_u32() {
1835 let mut s = StringInterner::new();
1836 let a = s.intern("_a");
1837 let b = s.intern("_b");
1838 let c = s.intern("_c");
1839 assert_eq!(a.as_u32(), 0);
1840 assert_eq!(b.as_u32(), 1);
1841 assert_eq!(c.as_u32(), 2);
1842 }
1843
1844 #[test]
1845 fn opaque_ids_are_distinguishable() {
1846 let a = InputId(0);
1847 let b = InputId(1);
1848 assert_ne!(a, b);
1849 assert_eq!(a.as_u32(), 0);
1850 // Different newtypes with the same inner value are NOT compatible
1851 // (we test this via the module's type system at compile time).
1852 }
1853
1854 #[test]
1855 fn interner_large_workload() {
1856 let mut s = StringInterner::new();
1857 for i in 0..1000 {
1858 let name = format!("_sym_{i}");
1859 let h1 = s.intern(&name);
1860 let h2 = s.intern(&name);
1861 assert_eq!(h1, h2);
1862 }
1863 assert_eq!(s.len(), 1000);
1864 }
1865
1866 // ---- Symbol variant tests ----
1867
1868 fn n(i: u32) -> Istr {
1869 Istr(i)
1870 }
1871
1872 #[test]
1873 fn symbol_kind_tags_match_variants() {
1874 let undef = Symbol::Undefined {
1875 name: n(0),
1876 origin: InputId(0),
1877 weak_ref: false,
1878 };
1879 assert_eq!(undef.kind(), SymbolKindTag::Undefined);
1880
1881 let defined = Symbol::Defined {
1882 name: n(1),
1883 origin: InputId(0),
1884 atom: AtomId(0),
1885 value: 0,
1886 weak: false,
1887 private_extern: false,
1888 no_dead_strip: false,
1889 };
1890 assert_eq!(defined.kind(), SymbolKindTag::Defined);
1891 assert!(defined.is_strong_defined());
1892 assert!(!defined.is_weak_defined());
1893
1894 let weak = Symbol::Defined {
1895 name: n(2),
1896 origin: InputId(0),
1897 atom: AtomId(0),
1898 value: 0,
1899 weak: true,
1900 private_extern: false,
1901 no_dead_strip: false,
1902 };
1903 assert!(weak.is_weak_defined());
1904 assert!(!weak.is_strong_defined());
1905
1906 assert_eq!(
1907 Symbol::Common {
1908 name: n(3),
1909 origin: InputId(0),
1910 size: 8,
1911 align_pow2: 3
1912 }
1913 .kind(),
1914 SymbolKindTag::Common
1915 );
1916 assert_eq!(
1917 Symbol::DylibImport {
1918 name: n(4),
1919 dylib: DylibId(0),
1920 ordinal: 1,
1921 weak_import: false
1922 }
1923 .kind(),
1924 SymbolKindTag::DylibImport
1925 );
1926 assert_eq!(
1927 Symbol::LazyArchive {
1928 name: n(5),
1929 archive: ArchiveId(0),
1930 member: MemberId(0)
1931 }
1932 .kind(),
1933 SymbolKindTag::LazyArchive
1934 );
1935 assert_eq!(
1936 Symbol::LazyObject {
1937 name: n(6),
1938 origin: InputId(0)
1939 }
1940 .kind(),
1941 SymbolKindTag::LazyObject
1942 );
1943 assert_eq!(
1944 Symbol::Alias {
1945 name: n(7),
1946 aliased: n(0)
1947 }
1948 .kind(),
1949 SymbolKindTag::Alias
1950 );
1951 }
1952
1953 #[test]
1954 fn symbol_name_returns_istr_across_variants() {
1955 let sym = Symbol::Common {
1956 name: n(42),
1957 origin: InputId(0),
1958 size: 16,
1959 align_pow2: 4,
1960 };
1961 assert_eq!(sym.name(), n(42));
1962 }
1963
1964 // ----------------- SymbolTable & insertion matrix tests -----------------
1965
1966 fn undef(t: &mut SymbolTable, name: &str) -> Symbol {
1967 Symbol::Undefined {
1968 name: t.intern(name),
1969 origin: InputId(0),
1970 weak_ref: false,
1971 }
1972 }
1973
1974 fn defined_strong(t: &mut SymbolTable, name: &str) -> Symbol {
1975 Symbol::Defined {
1976 name: t.intern(name),
1977 origin: InputId(0),
1978 atom: AtomId(0),
1979 value: 0x100,
1980 weak: false,
1981 private_extern: false,
1982 no_dead_strip: false,
1983 }
1984 }
1985
1986 fn defined_weak(t: &mut SymbolTable, name: &str) -> Symbol {
1987 Symbol::Defined {
1988 name: t.intern(name),
1989 origin: InputId(0),
1990 atom: AtomId(0),
1991 value: 0x200,
1992 weak: true,
1993 private_extern: false,
1994 no_dead_strip: false,
1995 }
1996 }
1997
1998 fn common(t: &mut SymbolTable, name: &str, size: u64, align: u8) -> Symbol {
1999 Symbol::Common {
2000 name: t.intern(name),
2001 origin: InputId(0),
2002 size,
2003 align_pow2: align,
2004 }
2005 }
2006
2007 fn dylib_import(t: &mut SymbolTable, name: &str, ordinal: u16) -> Symbol {
2008 Symbol::DylibImport {
2009 name: t.intern(name),
2010 dylib: DylibId(0),
2011 ordinal,
2012 weak_import: false,
2013 }
2014 }
2015
2016 fn lazy_archive(t: &mut SymbolTable, name: &str) -> Symbol {
2017 Symbol::LazyArchive {
2018 name: t.intern(name),
2019 archive: ArchiveId(7),
2020 member: MemberId(42),
2021 }
2022 }
2023
2024 fn lazy_object(t: &mut SymbolTable, name: &str) -> Symbol {
2025 Symbol::LazyObject {
2026 name: t.intern(name),
2027 origin: InputId(5),
2028 }
2029 }
2030
2031 fn alias_sym(t: &mut SymbolTable, name: &str, target: &str) -> Symbol {
2032 let name_i = t.intern(name);
2033 let target_i = t.intern(target);
2034 Symbol::Alias {
2035 name: name_i,
2036 aliased: target_i,
2037 }
2038 }
2039
2040 // ---- vacant-slot insertions ----
2041
2042 #[test]
2043 fn vacant_insert_records_inserted_outcome() {
2044 let mut t = SymbolTable::new();
2045 let sym = defined_strong(&mut t, "_main");
2046 match t.insert(sym).unwrap() {
2047 InsertOutcome::Inserted(id) => {
2048 assert_eq!(id, SymbolId(0));
2049 assert!(matches!(t.get(id), Symbol::Defined { .. }));
2050 }
2051 other => panic!("expected Inserted, got {other:?}"),
2052 }
2053 assert_eq!(t.transitions().len(), 1);
2054 assert_eq!(t.transitions()[0].cause, TransitionCause::Inserted);
2055 }
2056
2057 // ---- existing Undefined ----
2058
2059 #[test]
2060 fn undefined_kept_under_undefined() {
2061 let mut t = SymbolTable::new();
2062 let a = undef(&mut t, "_x");
2063 let b = undef(&mut t, "_x");
2064 t.insert(a).unwrap();
2065 let out = t.insert(b).unwrap();
2066 assert!(matches!(out, InsertOutcome::Kept(_)));
2067 }
2068
2069 #[test]
2070 fn undefined_replaced_by_definition() {
2071 let mut t = SymbolTable::new();
2072 let first = undef(&mut t, "_x");
2073 t.insert(first).unwrap();
2074 let incoming = defined_strong(&mut t, "_x");
2075 let out = t.insert(incoming).unwrap();
2076 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2077 }
2078
2079 #[test]
2080 fn undefined_replaced_by_common() {
2081 let mut t = SymbolTable::new();
2082 let first = undef(&mut t, "_x");
2083 t.insert(first).unwrap();
2084 let c = common(&mut t, "_x", 8, 3);
2085 let out = t.insert(c).unwrap();
2086 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2087 }
2088
2089 #[test]
2090 fn undefined_replaced_by_dylib_import() {
2091 let mut t = SymbolTable::new();
2092 let first = undef(&mut t, "_x");
2093 t.insert(first).unwrap();
2094 let di = dylib_import(&mut t, "_x", 1);
2095 let out = t.insert(di).unwrap();
2096 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2097 }
2098
2099 #[test]
2100 fn undefined_replaced_by_lazy_archive() {
2101 let mut t = SymbolTable::new();
2102 let first = undef(&mut t, "_x");
2103 t.insert(first).unwrap();
2104 let la = lazy_archive(&mut t, "_x");
2105 let out = t.insert(la).unwrap();
2106 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2107 }
2108
2109 #[test]
2110 fn undefined_replaced_by_lazy_object() {
2111 let mut t = SymbolTable::new();
2112 let first = undef(&mut t, "_x");
2113 t.insert(first).unwrap();
2114 let lo = lazy_object(&mut t, "_x");
2115 let out = t.insert(lo).unwrap();
2116 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2117 }
2118
2119 // ---- existing Defined ----
2120
2121 #[test]
2122 fn strong_defined_keeps_under_undefined() {
2123 let mut t = SymbolTable::new();
2124 let existing = defined_strong(&mut t, "_x");
2125 t.insert(existing).unwrap();
2126 let newer = undef(&mut t, "_x");
2127 let out = t.insert(newer).unwrap();
2128 assert!(matches!(out, InsertOutcome::Kept(_)));
2129 }
2130
2131 #[test]
2132 fn two_strong_defined_is_duplicate_error() {
2133 let mut t = SymbolTable::new();
2134 let first = defined_strong(&mut t, "_x");
2135 t.insert(first).unwrap();
2136 let second = defined_strong(&mut t, "_x");
2137 let err = t.insert(second).unwrap_err();
2138 assert!(matches!(err, InsertError::DuplicateStrong { .. }));
2139 }
2140
2141 #[test]
2142 fn strong_keeps_under_weak() {
2143 let mut t = SymbolTable::new();
2144 let strong = defined_strong(&mut t, "_x");
2145 t.insert(strong).unwrap();
2146 let weaker = defined_weak(&mut t, "_x");
2147 let out = t.insert(weaker).unwrap();
2148 assert!(matches!(out, InsertOutcome::Kept(_)));
2149 }
2150
2151 #[test]
2152 fn weak_replaced_by_strong() {
2153 let mut t = SymbolTable::new();
2154 let weak = defined_weak(&mut t, "_x");
2155 t.insert(weak).unwrap();
2156 let strong = defined_strong(&mut t, "_x");
2157 let out = t.insert(strong).unwrap();
2158 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2159 assert!(t.get(SymbolId(0)).is_strong_defined());
2160 }
2161
2162 #[test]
2163 fn two_weak_defined_first_wins() {
2164 let mut t = SymbolTable::new();
2165 let first = defined_weak(&mut t, "_x");
2166 let Symbol::Defined {
2167 value: first_val, ..
2168 } = first
2169 else {
2170 unreachable!()
2171 };
2172 t.insert(first).unwrap();
2173 let second = defined_weak(&mut t, "_x");
2174 let out = t.insert(second).unwrap();
2175 assert!(matches!(out, InsertOutcome::Kept(_)));
2176 if let Symbol::Defined { value, .. } = t.get(SymbolId(0)) {
2177 assert_eq!(*value, first_val);
2178 }
2179 }
2180
2181 #[test]
2182 fn defined_keeps_under_common() {
2183 let mut t = SymbolTable::new();
2184 let strong = defined_strong(&mut t, "_x");
2185 t.insert(strong).unwrap();
2186 let c = common(&mut t, "_x", 8, 3);
2187 let out = t.insert(c).unwrap();
2188 assert!(matches!(out, InsertOutcome::Kept(_)));
2189 }
2190
2191 #[test]
2192 fn defined_keeps_under_dylib_import() {
2193 let mut t = SymbolTable::new();
2194 let strong = defined_strong(&mut t, "_x");
2195 t.insert(strong).unwrap();
2196 let di = dylib_import(&mut t, "_x", 1);
2197 let out = t.insert(di).unwrap();
2198 assert!(matches!(out, InsertOutcome::Kept(_)));
2199 }
2200
2201 #[test]
2202 fn defined_keeps_under_lazy_archive() {
2203 let mut t = SymbolTable::new();
2204 let strong = defined_strong(&mut t, "_x");
2205 t.insert(strong).unwrap();
2206 let la = lazy_archive(&mut t, "_x");
2207 let out = t.insert(la).unwrap();
2208 assert!(matches!(out, InsertOutcome::Kept(_)));
2209 }
2210
2211 // ---- existing Common ----
2212
2213 #[test]
2214 fn common_replaced_by_definition() {
2215 let mut t = SymbolTable::new();
2216 let c = common(&mut t, "_x", 8, 3);
2217 t.insert(c).unwrap();
2218 let def = defined_strong(&mut t, "_x");
2219 let out = t.insert(def).unwrap();
2220 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2221 }
2222
2223 #[test]
2224 fn common_coalesces_to_larger_size_and_stricter_alignment() {
2225 let mut t = SymbolTable::new();
2226 let a = common(&mut t, "_x", 8, 2);
2227 t.insert(a).unwrap();
2228 let b = common(&mut t, "_x", 16, 5);
2229 let out = t.insert(b).unwrap();
2230 assert!(matches!(out, InsertOutcome::CommonCoalesced { .. }));
2231 if let Symbol::Common {
2232 size, align_pow2, ..
2233 } = t.get(SymbolId(0))
2234 {
2235 assert_eq!(*size, 16);
2236 assert_eq!(*align_pow2, 5);
2237 }
2238 }
2239
2240 #[test]
2241 fn common_kept_under_dylib_import() {
2242 let mut t = SymbolTable::new();
2243 let c = common(&mut t, "_x", 8, 3);
2244 t.insert(c).unwrap();
2245 let di = dylib_import(&mut t, "_x", 2);
2246 let out = t.insert(di).unwrap();
2247 assert!(matches!(out, InsertOutcome::Kept(_)));
2248 }
2249
2250 // ---- existing DylibImport ----
2251
2252 #[test]
2253 fn dylib_import_replaced_by_local_definition() {
2254 let mut t = SymbolTable::new();
2255 let di = dylib_import(&mut t, "_printf", 1);
2256 t.insert(di).unwrap();
2257 let def = defined_strong(&mut t, "_printf");
2258 let out = t.insert(def).unwrap();
2259 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2260 }
2261
2262 #[test]
2263 fn dylib_import_keeps_under_another_import() {
2264 let mut t = SymbolTable::new();
2265 let first = dylib_import(&mut t, "_printf", 1);
2266 t.insert(first).unwrap();
2267 let second = dylib_import(&mut t, "_printf", 2);
2268 let out = t.insert(second).unwrap();
2269 assert!(matches!(out, InsertOutcome::Kept(_)));
2270 }
2271
2272 // ---- existing LazyArchive ----
2273
2274 #[test]
2275 fn lazy_archive_under_undefined_yields_pending_fetch() {
2276 let mut t = SymbolTable::new();
2277 let lazy = lazy_archive(&mut t, "_hidden");
2278 t.insert(lazy).unwrap();
2279 let want = undef(&mut t, "_hidden");
2280 match t.insert(want).unwrap() {
2281 InsertOutcome::PendingArchiveFetch {
2282 archive, member, ..
2283 } => {
2284 assert_eq!(archive, ArchiveId(7));
2285 assert_eq!(member, MemberId(42));
2286 }
2287 other => panic!("expected PendingArchiveFetch, got {other:?}"),
2288 }
2289 }
2290
2291 #[test]
2292 fn lazy_archive_replaced_by_defined() {
2293 let mut t = SymbolTable::new();
2294 let lazy = lazy_archive(&mut t, "_f");
2295 t.insert(lazy).unwrap();
2296 let def = defined_strong(&mut t, "_f");
2297 let out = t.insert(def).unwrap();
2298 assert!(matches!(out, InsertOutcome::Replaced { .. }));
2299 }
2300
2301 #[test]
2302 fn two_lazy_archives_first_wins() {
2303 let mut t = SymbolTable::new();
2304 let first = lazy_archive(&mut t, "_f");
2305 t.insert(first).unwrap();
2306 let name = t.intern("_f");
2307 let second = Symbol::LazyArchive {
2308 name,
2309 archive: ArchiveId(99),
2310 member: MemberId(99),
2311 };
2312 let out = t.insert(second).unwrap();
2313 assert!(matches!(out, InsertOutcome::Kept(_)));
2314 }
2315
2316 // ---- existing LazyObject ----
2317
2318 #[test]
2319 fn lazy_object_under_undefined_yields_pending_load() {
2320 let mut t = SymbolTable::new();
2321 let lazy = lazy_object(&mut t, "_f");
2322 t.insert(lazy).unwrap();
2323 let want = undef(&mut t, "_f");
2324 match t.insert(want).unwrap() {
2325 InsertOutcome::PendingObjectLoad { origin, .. } => {
2326 assert_eq!(origin, InputId(5));
2327 }
2328 other => panic!("expected PendingObjectLoad, got {other:?}"),
2329 }
2330 }
2331
2332 // ---- Alias path ----
2333
2334 #[test]
2335 fn alias_inserts_into_vacant_slot() {
2336 let mut t = SymbolTable::new();
2337 let al = alias_sym(&mut t, "_old_name", "_new_name");
2338 let out = t.insert(al).unwrap();
2339 assert!(matches!(out, InsertOutcome::Inserted(_)));
2340 }
2341
2342 #[test]
2343 fn direct_definition_replaces_alias() {
2344 let mut t = SymbolTable::new();
2345 let al = alias_sym(&mut t, "_alias", "_target");
2346 t.insert(al).unwrap();
2347 let def = defined_strong(&mut t, "_alias");
2348 let out = t.insert(def).unwrap();
2349 assert!(matches!(
2350 out,
2351 InsertOutcome::Replaced {
2352 from: SymbolKindTag::Alias,
2353 to: SymbolKindTag::Defined,
2354 ..
2355 }
2356 ));
2357 }
2358
2359 #[test]
2360 fn self_loop_alias_rejected() {
2361 let mut t = SymbolTable::new();
2362 let u = undef(&mut t, "_foo");
2363 t.insert(u).unwrap();
2364 let looped = alias_sym(&mut t, "_foo", "_foo");
2365 let err = t.insert(looped).unwrap_err();
2366 assert!(matches!(err, InsertError::AliasCycle { .. }));
2367 }
2368
2369 #[test]
2370 fn two_step_alias_cycle_rejected() {
2371 let mut t = SymbolTable::new();
2372 let a_to_b = alias_sym(&mut t, "_a", "_b");
2373 t.insert(a_to_b).unwrap();
2374 let u = undef(&mut t, "_b");
2375 t.insert(u).unwrap();
2376 let loop_back = alias_sym(&mut t, "_b", "_a");
2377 let err = t.insert(loop_back).unwrap_err();
2378 assert!(matches!(err, InsertError::AliasCycle { .. }));
2379 }
2380
2381 #[test]
2382 fn resolve_chain_walks_to_concrete_target() {
2383 let mut t = SymbolTable::new();
2384 let defined = defined_strong(&mut t, "_target");
2385 t.insert(defined).unwrap();
2386 let al = alias_sym(&mut t, "_alias", "_target");
2387 t.insert(al).unwrap();
2388 let name = t.intern("_alias");
2389 let (_, sym) = t.resolve_chain(name).unwrap();
2390 assert!(matches!(sym, Symbol::Defined { .. }));
2391 }
2392
2393 #[test]
2394 fn resolve_chain_unknown_name_errors() {
2395 let t = SymbolTable::new();
2396 let err = t.resolve_chain(Istr(99)).unwrap_err();
2397 assert!(matches!(err, ResolveError::Unknown(_)));
2398 }
2399
2400 // ---- Transition log ----
2401
2402 #[test]
2403 fn kept_entries_do_not_record_transitions() {
2404 let mut t = SymbolTable::new();
2405 let strong = defined_strong(&mut t, "_x");
2406 t.insert(strong).unwrap();
2407 let before = t.transitions().len();
2408 let weaker = defined_weak(&mut t, "_x");
2409 t.insert(weaker).unwrap();
2410 assert_eq!(t.transitions().len(), before);
2411 }
2412
2413 #[test]
2414 fn replace_records_transition_with_from_and_to() {
2415 let mut t = SymbolTable::new();
2416 let u = undef(&mut t, "_x");
2417 t.insert(u).unwrap();
2418 let d = defined_strong(&mut t, "_x");
2419 t.insert(d).unwrap();
2420 let last = t.transitions().last().unwrap();
2421 assert_eq!(last.from, SymbolKindTag::Undefined);
2422 assert_eq!(last.to, SymbolKindTag::Defined);
2423 assert_eq!(last.cause, TransitionCause::Replaced);
2424 }
2425
2426 #[test]
2427 fn common_coalesce_records_its_own_cause() {
2428 let mut t = SymbolTable::new();
2429 let a = common(&mut t, "_x", 8, 3);
2430 t.insert(a).unwrap();
2431 let b = common(&mut t, "_x", 16, 5);
2432 t.insert(b).unwrap();
2433 assert_eq!(
2434 t.transitions().last().unwrap().cause,
2435 TransitionCause::CommonCoalesced
2436 );
2437 }
2438 }
2439