Rust · 11100 bytes Raw Blame History
1 //! Symbol-table reader.
2 //!
3 //! Decodes 16-byte `nlist_64` records and exposes them via `InputSymbol`,
4 //! which keeps the raw wire bytes for byte-level round-trip and layers rich
5 //! accessors (kind, externness, weak flags, common size/alignment, library
6 //! ordinal, indirect aliased-name strx) on top.
7 //!
8 //! Sprint 2 scope: parse/encode + classification. The name-resolution step
9 //! that turns `strx` into `&str` lives on `ObjectFile` once `StringTable`
10 //! lands alongside this module.
11
12 use crate::macho::constants::*;
13 use crate::macho::reader::{u32_le, u64_le, ReadError};
14
15 /// Size of one `nlist_64` on the wire.
16 pub const NLIST_SIZE: usize = 16;
17
18 /// 16-byte `nlist_64` — exact wire representation.
19 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
20 pub struct RawNlist {
21 pub strx: u32,
22 pub n_type: u8,
23 pub n_sect: u8,
24 pub n_desc: u16,
25 pub n_value: u64,
26 }
27
28 impl RawNlist {
29 pub fn parse(bytes: &[u8]) -> Result<Self, ReadError> {
30 if bytes.len() < NLIST_SIZE {
31 return Err(ReadError::Truncated {
32 need: NLIST_SIZE,
33 have: bytes.len(),
34 context: "nlist_64",
35 });
36 }
37 Ok(RawNlist {
38 strx: u32_le(&bytes[0..4]),
39 n_type: bytes[4],
40 n_sect: bytes[5],
41 n_desc: u16::from_le_bytes([bytes[6], bytes[7]]),
42 n_value: u64_le(&bytes[8..16]),
43 })
44 }
45
46 pub fn write(&self, out: &mut Vec<u8>) {
47 out.extend_from_slice(&self.strx.to_le_bytes());
48 out.push(self.n_type);
49 out.push(self.n_sect);
50 out.extend_from_slice(&self.n_desc.to_le_bytes());
51 out.extend_from_slice(&self.n_value.to_le_bytes());
52 }
53 }
54
55 /// Coarse classification of an `nlist_64` based on its `n_type & N_TYPE` bits.
56 /// Stab (debug) entries sit alongside this in `InputSymbol::stab_kind`.
57 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
58 pub enum SymKind {
59 Undef,
60 Abs,
61 Sect,
62 Indirect,
63 }
64
65 /// Linker-side view of one symbol. Carries the raw nlist for round-trip and
66 /// a set of accessors that decode its semantic meaning.
67 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
68 pub struct InputSymbol {
69 pub raw: RawNlist,
70 }
71
72 impl InputSymbol {
73 pub fn from_raw(raw: RawNlist) -> Self {
74 InputSymbol { raw }
75 }
76
77 pub fn strx(&self) -> u32 {
78 self.raw.strx
79 }
80
81 pub fn sect_idx(&self) -> u8 {
82 self.raw.n_sect
83 }
84
85 pub fn value(&self) -> u64 {
86 self.raw.n_value
87 }
88
89 /// If this is a stabs debug entry, return the stab kind byte; else `None`.
90 pub fn stab_kind(&self) -> Option<u8> {
91 if self.raw.n_type & N_STAB != 0 {
92 Some(self.raw.n_type)
93 } else {
94 None
95 }
96 }
97
98 /// Classify the non-stab kind. For stab entries callers should first check
99 /// `stab_kind()` and treat the result as opaque.
100 pub fn kind(&self) -> SymKind {
101 match self.raw.n_type & N_TYPE {
102 N_UNDF => SymKind::Undef,
103 N_ABS => SymKind::Abs,
104 N_SECT => SymKind::Sect,
105 N_INDR => SymKind::Indirect,
106 // `N_PBUD` (0xc) is obsolete; treat as Undef to keep matching total.
107 _ => SymKind::Undef,
108 }
109 }
110
111 pub fn is_ext(&self) -> bool {
112 self.raw.n_type & N_EXT != 0
113 }
114
115 pub fn is_private_ext(&self) -> bool {
116 self.raw.n_type & N_PEXT != 0
117 }
118
119 pub fn weak_ref(&self) -> bool {
120 self.raw.n_desc & N_WEAK_REF != 0
121 }
122
123 pub fn weak_def(&self) -> bool {
124 self.raw.n_desc & N_WEAK_DEF != 0
125 }
126
127 pub fn no_dead_strip(&self) -> bool {
128 self.raw.n_desc & N_NO_DEAD_STRIP != 0
129 }
130
131 /// True for symbols marked as an alternate entry point for their
132 /// section's preceding atom. Atomization folds these into the
133 /// owning atom's `alt_entries` list instead of splitting at them.
134 pub fn alt_entry(&self) -> bool {
135 self.raw.n_desc & N_ALT_ENTRY != 0
136 }
137
138 /// True iff this is an external undefined symbol with a non-zero size —
139 /// the Mach-O convention for "common" tentative definitions.
140 pub fn is_common(&self) -> bool {
141 self.kind() == SymKind::Undef && self.is_ext() && self.raw.n_value > 0
142 }
143
144 /// Size (in bytes) of a common symbol; `None` if not common.
145 pub fn common_size(&self) -> Option<u64> {
146 self.is_common().then_some(self.raw.n_value)
147 }
148
149 /// Log-2 alignment of a common symbol, encoded in `n_desc` bits 8..11.
150 /// `None` if the symbol is not common.
151 pub fn common_align_pow2(&self) -> Option<u8> {
152 self.is_common()
153 .then_some(((self.raw.n_desc >> 8) & 0x0f) as u8)
154 }
155
156 /// Two-level-namespace library ordinal from an undefined symbol's
157 /// `n_desc` high byte. `None` for non-undefined symbols and common
158 /// symbols (common uses those bits for alignment).
159 pub fn library_ordinal(&self) -> Option<u8> {
160 if self.kind() == SymKind::Undef && !self.is_common() {
161 Some((self.raw.n_desc >> 8) as u8)
162 } else {
163 None
164 }
165 }
166
167 /// For `N_INDR` aliases: `n_value` holds a strx into the string table
168 /// naming the target symbol.
169 pub fn indirect_target_strx(&self) -> Option<u32> {
170 if self.kind() == SymKind::Indirect {
171 Some(self.raw.n_value as u32)
172 } else {
173 None
174 }
175 }
176 }
177
178 /// Parse `nsyms` consecutive nlist entries starting at `symoff` bytes into the
179 /// file image. Errors on truncation or an out-of-bounds offset.
180 pub fn parse_nlist_table(
181 file_bytes: &[u8],
182 symoff: u32,
183 nsyms: u32,
184 ) -> Result<Vec<InputSymbol>, ReadError> {
185 let start = symoff as usize;
186 let total = (nsyms as usize)
187 .checked_mul(NLIST_SIZE)
188 .ok_or(ReadError::Truncated {
189 need: usize::MAX,
190 have: file_bytes.len(),
191 context: "symbol table (nsyms × 16 overflows)",
192 })?;
193 let end = start.checked_add(total).ok_or(ReadError::Truncated {
194 need: usize::MAX,
195 have: file_bytes.len(),
196 context: "symbol table (symoff + size overflows)",
197 })?;
198 if end > file_bytes.len() {
199 return Err(ReadError::Truncated {
200 need: end,
201 have: file_bytes.len(),
202 context: "symbol table (nlist region)",
203 });
204 }
205 let mut out = Vec::with_capacity(nsyms as usize);
206 for i in 0..nsyms as usize {
207 let off = start + i * NLIST_SIZE;
208 out.push(InputSymbol::from_raw(RawNlist::parse(
209 &file_bytes[off..off + NLIST_SIZE],
210 )?));
211 }
212 Ok(out)
213 }
214
215 /// Serialize a symbol table back to wire form (nsyms × 16 contiguous bytes).
216 pub fn write_nlist_table(syms: &[InputSymbol], out: &mut Vec<u8>) {
217 for s in syms {
218 s.raw.write(out);
219 }
220 }
221
222 #[cfg(test)]
223 mod tests {
224 use super::*;
225
226 fn nlist(strx: u32, n_type: u8, n_sect: u8, n_desc: u16, n_value: u64) -> RawNlist {
227 RawNlist {
228 strx,
229 n_type,
230 n_sect,
231 n_desc,
232 n_value,
233 }
234 }
235
236 #[test]
237 fn raw_nlist_round_trips_byte_equal() {
238 let raw = nlist(42, N_SECT | N_EXT, 3, N_WEAK_DEF, 0x1_0000_0040);
239 let mut buf = Vec::new();
240 raw.write(&mut buf);
241 assert_eq!(buf.len(), NLIST_SIZE);
242 let back = RawNlist::parse(&buf).unwrap();
243 assert_eq!(back, raw);
244 }
245
246 #[test]
247 fn classify_extern_text_symbol() {
248 let sym = InputSymbol::from_raw(nlist(10, N_SECT | N_EXT, 1, 0, 0x100));
249 assert_eq!(sym.kind(), SymKind::Sect);
250 assert!(sym.is_ext());
251 assert!(!sym.is_private_ext());
252 assert!(!sym.is_common());
253 assert_eq!(sym.library_ordinal(), None);
254 }
255
256 #[test]
257 fn classify_local_symbol() {
258 let sym = InputSymbol::from_raw(nlist(20, N_SECT, 2, 0, 0x200));
259 assert_eq!(sym.kind(), SymKind::Sect);
260 assert!(!sym.is_ext());
261 }
262
263 #[test]
264 fn classify_undef_import() {
265 let sym = InputSymbol::from_raw(nlist(30, N_UNDF | N_EXT, 0, 3 << 8, 0));
266 assert_eq!(sym.kind(), SymKind::Undef);
267 assert!(sym.is_ext());
268 assert!(!sym.is_common());
269 assert_eq!(sym.library_ordinal(), Some(3));
270 }
271
272 #[test]
273 fn classify_common_symbol() {
274 // UNDF + EXT + size 16, alignment 2^3 = 8.
275 let n_desc = (3u16) << 8;
276 let sym = InputSymbol::from_raw(nlist(40, N_UNDF | N_EXT, 0, n_desc, 16));
277 assert!(sym.is_common());
278 assert_eq!(sym.common_size(), Some(16));
279 assert_eq!(sym.common_align_pow2(), Some(3));
280 assert_eq!(sym.library_ordinal(), None);
281 }
282
283 #[test]
284 fn classify_weak_def() {
285 let sym = InputSymbol::from_raw(nlist(50, N_SECT | N_EXT, 1, N_WEAK_DEF, 0x400));
286 assert!(sym.weak_def());
287 assert!(!sym.weak_ref());
288 }
289
290 #[test]
291 fn classify_weak_ref_import() {
292 let sym = InputSymbol::from_raw(nlist(60, N_UNDF | N_EXT, 0, N_WEAK_REF | (1 << 8), 0));
293 assert_eq!(sym.kind(), SymKind::Undef);
294 assert!(sym.weak_ref());
295 assert_eq!(sym.library_ordinal(), Some(1));
296 }
297
298 #[test]
299 fn classify_private_extern() {
300 let sym = InputSymbol::from_raw(nlist(70, N_SECT | N_EXT | N_PEXT, 1, 0, 0x800));
301 assert!(sym.is_ext());
302 assert!(sym.is_private_ext());
303 }
304
305 #[test]
306 fn classify_absolute() {
307 let sym = InputSymbol::from_raw(nlist(80, N_ABS | N_EXT, 0, 0, 0xDEAD_BEEF));
308 assert_eq!(sym.kind(), SymKind::Abs);
309 assert!(sym.is_ext());
310 }
311
312 #[test]
313 fn classify_indirect_alias() {
314 // `n_value` carries the strx of the aliased name.
315 let sym = InputSymbol::from_raw(nlist(90, N_INDR | N_EXT, 0, 0, 123));
316 assert_eq!(sym.kind(), SymKind::Indirect);
317 assert_eq!(sym.indirect_target_strx(), Some(123));
318 }
319
320 #[test]
321 fn classify_stab_entry_preserved() {
322 // Stab entry — the whole n_type byte encodes the stab kind.
323 let stab_type: u8 = 0x24; // N_FUN
324 let sym = InputSymbol::from_raw(nlist(100, stab_type, 1, 0, 0x1000));
325 assert_eq!(sym.stab_kind(), Some(stab_type));
326 }
327
328 #[test]
329 fn symtab_round_trip_byte_equal() {
330 let syms = vec![
331 InputSymbol::from_raw(nlist(1, N_SECT | N_EXT, 1, 0, 0x100)),
332 InputSymbol::from_raw(nlist(2, N_UNDF | N_EXT, 0, 1 << 8, 0)),
333 InputSymbol::from_raw(nlist(3, N_ABS, 0, 0, 42)),
334 ];
335
336 // Plant them at offset 8 into a synthetic file image.
337 let mut image = vec![0u8; 8];
338 write_nlist_table(&syms, &mut image);
339 assert_eq!(image.len(), 8 + 3 * NLIST_SIZE);
340
341 let parsed = parse_nlist_table(&image, 8, 3).unwrap();
342 assert_eq!(parsed, syms);
343 }
344
345 #[test]
346 fn symtab_truncation_errors() {
347 // Ask for 2 symbols but only 16 bytes available (one fits, second doesn't).
348 let image = vec![0u8; 16];
349 let err = parse_nlist_table(&image, 0, 2).unwrap_err();
350 assert!(matches!(err, ReadError::Truncated { .. }));
351 }
352 }
353