| 1 | //! Symbol-table reader. |
| 2 | //! |
| 3 | //! Decodes 16-byte `nlist_64` records and exposes them via `InputSymbol`, |
| 4 | //! which keeps the raw wire bytes for byte-level round-trip and layers rich |
| 5 | //! accessors (kind, externness, weak flags, common size/alignment, library |
| 6 | //! ordinal, indirect aliased-name strx) on top. |
| 7 | //! |
| 8 | //! Sprint 2 scope: parse/encode + classification. The name-resolution step |
| 9 | //! that turns `strx` into `&str` lives on `ObjectFile` once `StringTable` |
| 10 | //! lands alongside this module. |
| 11 | |
| 12 | use crate::macho::constants::*; |
| 13 | use crate::macho::reader::{u32_le, u64_le, ReadError}; |
| 14 | |
| 15 | /// Size of one `nlist_64` on the wire. |
| 16 | pub const NLIST_SIZE: usize = 16; |
| 17 | |
| 18 | /// 16-byte `nlist_64` — exact wire representation. |
| 19 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] |
| 20 | pub struct RawNlist { |
| 21 | pub strx: u32, |
| 22 | pub n_type: u8, |
| 23 | pub n_sect: u8, |
| 24 | pub n_desc: u16, |
| 25 | pub n_value: u64, |
| 26 | } |
| 27 | |
| 28 | impl RawNlist { |
| 29 | pub fn parse(bytes: &[u8]) -> Result<Self, ReadError> { |
| 30 | if bytes.len() < NLIST_SIZE { |
| 31 | return Err(ReadError::Truncated { |
| 32 | need: NLIST_SIZE, |
| 33 | have: bytes.len(), |
| 34 | context: "nlist_64", |
| 35 | }); |
| 36 | } |
| 37 | Ok(RawNlist { |
| 38 | strx: u32_le(&bytes[0..4]), |
| 39 | n_type: bytes[4], |
| 40 | n_sect: bytes[5], |
| 41 | n_desc: u16::from_le_bytes([bytes[6], bytes[7]]), |
| 42 | n_value: u64_le(&bytes[8..16]), |
| 43 | }) |
| 44 | } |
| 45 | |
| 46 | pub fn write(&self, out: &mut Vec<u8>) { |
| 47 | out.extend_from_slice(&self.strx.to_le_bytes()); |
| 48 | out.push(self.n_type); |
| 49 | out.push(self.n_sect); |
| 50 | out.extend_from_slice(&self.n_desc.to_le_bytes()); |
| 51 | out.extend_from_slice(&self.n_value.to_le_bytes()); |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | /// Coarse classification of an `nlist_64` based on its `n_type & N_TYPE` bits. |
| 56 | /// Stab (debug) entries sit alongside this in `InputSymbol::stab_kind`. |
| 57 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 58 | pub enum SymKind { |
| 59 | Undef, |
| 60 | Abs, |
| 61 | Sect, |
| 62 | Indirect, |
| 63 | } |
| 64 | |
| 65 | /// Linker-side view of one symbol. Carries the raw nlist for round-trip and |
| 66 | /// a set of accessors that decode its semantic meaning. |
| 67 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 68 | pub struct InputSymbol { |
| 69 | pub raw: RawNlist, |
| 70 | } |
| 71 | |
| 72 | impl InputSymbol { |
| 73 | pub fn from_raw(raw: RawNlist) -> Self { |
| 74 | InputSymbol { raw } |
| 75 | } |
| 76 | |
| 77 | pub fn strx(&self) -> u32 { |
| 78 | self.raw.strx |
| 79 | } |
| 80 | |
| 81 | pub fn sect_idx(&self) -> u8 { |
| 82 | self.raw.n_sect |
| 83 | } |
| 84 | |
| 85 | pub fn value(&self) -> u64 { |
| 86 | self.raw.n_value |
| 87 | } |
| 88 | |
| 89 | /// If this is a stabs debug entry, return the stab kind byte; else `None`. |
| 90 | pub fn stab_kind(&self) -> Option<u8> { |
| 91 | if self.raw.n_type & N_STAB != 0 { |
| 92 | Some(self.raw.n_type) |
| 93 | } else { |
| 94 | None |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /// Classify the non-stab kind. For stab entries callers should first check |
| 99 | /// `stab_kind()` and treat the result as opaque. |
| 100 | pub fn kind(&self) -> SymKind { |
| 101 | match self.raw.n_type & N_TYPE { |
| 102 | N_UNDF => SymKind::Undef, |
| 103 | N_ABS => SymKind::Abs, |
| 104 | N_SECT => SymKind::Sect, |
| 105 | N_INDR => SymKind::Indirect, |
| 106 | // `N_PBUD` (0xc) is obsolete; treat as Undef to keep matching total. |
| 107 | _ => SymKind::Undef, |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | pub fn is_ext(&self) -> bool { |
| 112 | self.raw.n_type & N_EXT != 0 |
| 113 | } |
| 114 | |
| 115 | pub fn is_private_ext(&self) -> bool { |
| 116 | self.raw.n_type & N_PEXT != 0 |
| 117 | } |
| 118 | |
| 119 | pub fn weak_ref(&self) -> bool { |
| 120 | self.raw.n_desc & N_WEAK_REF != 0 |
| 121 | } |
| 122 | |
| 123 | pub fn weak_def(&self) -> bool { |
| 124 | self.raw.n_desc & N_WEAK_DEF != 0 |
| 125 | } |
| 126 | |
| 127 | pub fn no_dead_strip(&self) -> bool { |
| 128 | self.raw.n_desc & N_NO_DEAD_STRIP != 0 |
| 129 | } |
| 130 | |
| 131 | /// True for symbols marked as an alternate entry point for their |
| 132 | /// section's preceding atom. Atomization folds these into the |
| 133 | /// owning atom's `alt_entries` list instead of splitting at them. |
| 134 | pub fn alt_entry(&self) -> bool { |
| 135 | self.raw.n_desc & N_ALT_ENTRY != 0 |
| 136 | } |
| 137 | |
| 138 | /// True iff this is an external undefined symbol with a non-zero size — |
| 139 | /// the Mach-O convention for "common" tentative definitions. |
| 140 | pub fn is_common(&self) -> bool { |
| 141 | self.kind() == SymKind::Undef && self.is_ext() && self.raw.n_value > 0 |
| 142 | } |
| 143 | |
| 144 | /// Size (in bytes) of a common symbol; `None` if not common. |
| 145 | pub fn common_size(&self) -> Option<u64> { |
| 146 | self.is_common().then_some(self.raw.n_value) |
| 147 | } |
| 148 | |
| 149 | /// Log-2 alignment of a common symbol, encoded in `n_desc` bits 8..11. |
| 150 | /// `None` if the symbol is not common. |
| 151 | pub fn common_align_pow2(&self) -> Option<u8> { |
| 152 | self.is_common() |
| 153 | .then_some(((self.raw.n_desc >> 8) & 0x0f) as u8) |
| 154 | } |
| 155 | |
| 156 | /// Two-level-namespace library ordinal from an undefined symbol's |
| 157 | /// `n_desc` high byte. `None` for non-undefined symbols and common |
| 158 | /// symbols (common uses those bits for alignment). |
| 159 | pub fn library_ordinal(&self) -> Option<u8> { |
| 160 | if self.kind() == SymKind::Undef && !self.is_common() { |
| 161 | Some((self.raw.n_desc >> 8) as u8) |
| 162 | } else { |
| 163 | None |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | /// For `N_INDR` aliases: `n_value` holds a strx into the string table |
| 168 | /// naming the target symbol. |
| 169 | pub fn indirect_target_strx(&self) -> Option<u32> { |
| 170 | if self.kind() == SymKind::Indirect { |
| 171 | Some(self.raw.n_value as u32) |
| 172 | } else { |
| 173 | None |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | /// Parse `nsyms` consecutive nlist entries starting at `symoff` bytes into the |
| 179 | /// file image. Errors on truncation or an out-of-bounds offset. |
| 180 | pub fn parse_nlist_table( |
| 181 | file_bytes: &[u8], |
| 182 | symoff: u32, |
| 183 | nsyms: u32, |
| 184 | ) -> Result<Vec<InputSymbol>, ReadError> { |
| 185 | let start = symoff as usize; |
| 186 | let total = (nsyms as usize) |
| 187 | .checked_mul(NLIST_SIZE) |
| 188 | .ok_or(ReadError::Truncated { |
| 189 | need: usize::MAX, |
| 190 | have: file_bytes.len(), |
| 191 | context: "symbol table (nsyms × 16 overflows)", |
| 192 | })?; |
| 193 | let end = start.checked_add(total).ok_or(ReadError::Truncated { |
| 194 | need: usize::MAX, |
| 195 | have: file_bytes.len(), |
| 196 | context: "symbol table (symoff + size overflows)", |
| 197 | })?; |
| 198 | if end > file_bytes.len() { |
| 199 | return Err(ReadError::Truncated { |
| 200 | need: end, |
| 201 | have: file_bytes.len(), |
| 202 | context: "symbol table (nlist region)", |
| 203 | }); |
| 204 | } |
| 205 | let mut out = Vec::with_capacity(nsyms as usize); |
| 206 | for i in 0..nsyms as usize { |
| 207 | let off = start + i * NLIST_SIZE; |
| 208 | out.push(InputSymbol::from_raw(RawNlist::parse( |
| 209 | &file_bytes[off..off + NLIST_SIZE], |
| 210 | )?)); |
| 211 | } |
| 212 | Ok(out) |
| 213 | } |
| 214 | |
| 215 | /// Serialize a symbol table back to wire form (nsyms × 16 contiguous bytes). |
| 216 | pub fn write_nlist_table(syms: &[InputSymbol], out: &mut Vec<u8>) { |
| 217 | for s in syms { |
| 218 | s.raw.write(out); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | #[cfg(test)] |
| 223 | mod tests { |
| 224 | use super::*; |
| 225 | |
| 226 | fn nlist(strx: u32, n_type: u8, n_sect: u8, n_desc: u16, n_value: u64) -> RawNlist { |
| 227 | RawNlist { |
| 228 | strx, |
| 229 | n_type, |
| 230 | n_sect, |
| 231 | n_desc, |
| 232 | n_value, |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | #[test] |
| 237 | fn raw_nlist_round_trips_byte_equal() { |
| 238 | let raw = nlist(42, N_SECT | N_EXT, 3, N_WEAK_DEF, 0x1_0000_0040); |
| 239 | let mut buf = Vec::new(); |
| 240 | raw.write(&mut buf); |
| 241 | assert_eq!(buf.len(), NLIST_SIZE); |
| 242 | let back = RawNlist::parse(&buf).unwrap(); |
| 243 | assert_eq!(back, raw); |
| 244 | } |
| 245 | |
| 246 | #[test] |
| 247 | fn classify_extern_text_symbol() { |
| 248 | let sym = InputSymbol::from_raw(nlist(10, N_SECT | N_EXT, 1, 0, 0x100)); |
| 249 | assert_eq!(sym.kind(), SymKind::Sect); |
| 250 | assert!(sym.is_ext()); |
| 251 | assert!(!sym.is_private_ext()); |
| 252 | assert!(!sym.is_common()); |
| 253 | assert_eq!(sym.library_ordinal(), None); |
| 254 | } |
| 255 | |
| 256 | #[test] |
| 257 | fn classify_local_symbol() { |
| 258 | let sym = InputSymbol::from_raw(nlist(20, N_SECT, 2, 0, 0x200)); |
| 259 | assert_eq!(sym.kind(), SymKind::Sect); |
| 260 | assert!(!sym.is_ext()); |
| 261 | } |
| 262 | |
| 263 | #[test] |
| 264 | fn classify_undef_import() { |
| 265 | let sym = InputSymbol::from_raw(nlist(30, N_UNDF | N_EXT, 0, 3 << 8, 0)); |
| 266 | assert_eq!(sym.kind(), SymKind::Undef); |
| 267 | assert!(sym.is_ext()); |
| 268 | assert!(!sym.is_common()); |
| 269 | assert_eq!(sym.library_ordinal(), Some(3)); |
| 270 | } |
| 271 | |
| 272 | #[test] |
| 273 | fn classify_common_symbol() { |
| 274 | // UNDF + EXT + size 16, alignment 2^3 = 8. |
| 275 | let n_desc = (3u16) << 8; |
| 276 | let sym = InputSymbol::from_raw(nlist(40, N_UNDF | N_EXT, 0, n_desc, 16)); |
| 277 | assert!(sym.is_common()); |
| 278 | assert_eq!(sym.common_size(), Some(16)); |
| 279 | assert_eq!(sym.common_align_pow2(), Some(3)); |
| 280 | assert_eq!(sym.library_ordinal(), None); |
| 281 | } |
| 282 | |
| 283 | #[test] |
| 284 | fn classify_weak_def() { |
| 285 | let sym = InputSymbol::from_raw(nlist(50, N_SECT | N_EXT, 1, N_WEAK_DEF, 0x400)); |
| 286 | assert!(sym.weak_def()); |
| 287 | assert!(!sym.weak_ref()); |
| 288 | } |
| 289 | |
| 290 | #[test] |
| 291 | fn classify_weak_ref_import() { |
| 292 | let sym = InputSymbol::from_raw(nlist(60, N_UNDF | N_EXT, 0, N_WEAK_REF | (1 << 8), 0)); |
| 293 | assert_eq!(sym.kind(), SymKind::Undef); |
| 294 | assert!(sym.weak_ref()); |
| 295 | assert_eq!(sym.library_ordinal(), Some(1)); |
| 296 | } |
| 297 | |
| 298 | #[test] |
| 299 | fn classify_private_extern() { |
| 300 | let sym = InputSymbol::from_raw(nlist(70, N_SECT | N_EXT | N_PEXT, 1, 0, 0x800)); |
| 301 | assert!(sym.is_ext()); |
| 302 | assert!(sym.is_private_ext()); |
| 303 | } |
| 304 | |
| 305 | #[test] |
| 306 | fn classify_absolute() { |
| 307 | let sym = InputSymbol::from_raw(nlist(80, N_ABS | N_EXT, 0, 0, 0xDEAD_BEEF)); |
| 308 | assert_eq!(sym.kind(), SymKind::Abs); |
| 309 | assert!(sym.is_ext()); |
| 310 | } |
| 311 | |
| 312 | #[test] |
| 313 | fn classify_indirect_alias() { |
| 314 | // `n_value` carries the strx of the aliased name. |
| 315 | let sym = InputSymbol::from_raw(nlist(90, N_INDR | N_EXT, 0, 0, 123)); |
| 316 | assert_eq!(sym.kind(), SymKind::Indirect); |
| 317 | assert_eq!(sym.indirect_target_strx(), Some(123)); |
| 318 | } |
| 319 | |
| 320 | #[test] |
| 321 | fn classify_stab_entry_preserved() { |
| 322 | // Stab entry — the whole n_type byte encodes the stab kind. |
| 323 | let stab_type: u8 = 0x24; // N_FUN |
| 324 | let sym = InputSymbol::from_raw(nlist(100, stab_type, 1, 0, 0x1000)); |
| 325 | assert_eq!(sym.stab_kind(), Some(stab_type)); |
| 326 | } |
| 327 | |
| 328 | #[test] |
| 329 | fn symtab_round_trip_byte_equal() { |
| 330 | let syms = vec![ |
| 331 | InputSymbol::from_raw(nlist(1, N_SECT | N_EXT, 1, 0, 0x100)), |
| 332 | InputSymbol::from_raw(nlist(2, N_UNDF | N_EXT, 0, 1 << 8, 0)), |
| 333 | InputSymbol::from_raw(nlist(3, N_ABS, 0, 0, 42)), |
| 334 | ]; |
| 335 | |
| 336 | // Plant them at offset 8 into a synthetic file image. |
| 337 | let mut image = vec![0u8; 8]; |
| 338 | write_nlist_table(&syms, &mut image); |
| 339 | assert_eq!(image.len(), 8 + 3 * NLIST_SIZE); |
| 340 | |
| 341 | let parsed = parse_nlist_table(&image, 8, 3).unwrap(); |
| 342 | assert_eq!(parsed, syms); |
| 343 | } |
| 344 | |
| 345 | #[test] |
| 346 | fn symtab_truncation_errors() { |
| 347 | // Ask for 2 symbols but only 16 bytes available (one fits, second doesn't). |
| 348 | let image = vec![0u8; 16]; |
| 349 | let err = parse_nlist_table(&image, 0, 2).unwrap_err(); |
| 350 | assert!(matches!(err, ReadError::Truncated { .. })); |
| 351 | } |
| 352 | } |
| 353 |