| 1 | //! Export-trie walker. |
| 2 | //! |
| 3 | //! dyld stores a dylib's exports as a compact prefix trie rooted at offset 0 |
| 4 | //! of the `export_off / export_size` region (LC_DYLD_INFO_ONLY) or the |
| 5 | //! `LC_DYLD_EXPORTS_TRIE` blob. Each node is: |
| 6 | //! |
| 7 | //! ```text |
| 8 | //! uleb128 terminal_size |
| 9 | //! terminal_size bytes of terminal payload (when non-zero) |
| 10 | //! u8 child_count |
| 11 | //! child_count * (c-string edge, uleb128 child_offset) |
| 12 | //! ``` |
| 13 | //! |
| 14 | //! Terminal payload: |
| 15 | //! `uleb128 flags` |
| 16 | //! then, depending on `flags`: |
| 17 | //! - REEXPORT : `uleb128 other_dylib_ordinal` + null-terminated imported_name |
| 18 | //! - STUB_AND_RESOLVER: `uleb128 stub_offset` + `uleb128 resolver_offset` |
| 19 | //! - otherwise : `uleb128 address` |
| 20 | //! |
| 21 | //! The walker accumulates edge strings to produce each full exported name, |
| 22 | //! enforces a depth cap, and rejects cycles / out-of-bound offsets / ULEB |
| 23 | //! overruns with diagnostics. |
| 24 | |
| 25 | use super::constants::*; |
| 26 | use crate::leb::read_uleb; |
| 27 | use crate::macho::reader::ReadError; |
| 28 | |
| 29 | const MAX_DEPTH: usize = 128; |
| 30 | |
| 31 | /// Terminal export kind, discriminated on the `flags` low bits. |
| 32 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 33 | pub enum ExportKind { |
| 34 | /// Regular symbol at `address`. |
| 35 | Regular { address: u64 }, |
| 36 | /// Thread-local; `address` is the TLV descriptor offset. |
| 37 | ThreadLocal { address: u64 }, |
| 38 | /// Absolute symbol — `address` is the final value. |
| 39 | Absolute { address: u64 }, |
| 40 | /// Re-export of another dylib's symbol. |
| 41 | Reexport { |
| 42 | /// 0 = parent library (the dylib containing this entry), else one |
| 43 | /// of the dylib's LC_*_DYLIB ordinals (1-based). |
| 44 | ordinal: u32, |
| 45 | /// Empty string means "same name as the exported name". |
| 46 | imported_name: String, |
| 47 | }, |
| 48 | /// Two addresses: the normal stub plus a resolver function that produces |
| 49 | /// the final address on demand. |
| 50 | StubAndResolver { stub: u64, resolver: u64 }, |
| 51 | } |
| 52 | |
| 53 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 54 | pub struct ExportEntry { |
| 55 | pub name: String, |
| 56 | pub flags: u64, |
| 57 | pub kind: ExportKind, |
| 58 | } |
| 59 | |
| 60 | impl ExportEntry { |
| 61 | pub fn weak_def(&self) -> bool { |
| 62 | self.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0 |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | /// The linker-side "exports view" of a dylib. Either a wire-form trie |
| 67 | /// (from `MH_DYLIB` via `LC_DYLD_INFO_ONLY` or `LC_DYLD_EXPORTS_TRIE`) or |
| 68 | /// a pre-flattened list (from a TAPI `.tbd`). Both kinds answer the same |
| 69 | /// two questions: `entries()` (iterate all) and `lookup(name)` (find one). |
| 70 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 71 | pub enum Exports { |
| 72 | Trie(Vec<u8>), |
| 73 | Flat(Vec<ExportEntry>), |
| 74 | } |
| 75 | |
| 76 | impl Default for Exports { |
| 77 | fn default() -> Self { |
| 78 | Exports::empty() |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | impl Exports { |
| 83 | pub fn empty() -> Self { |
| 84 | Exports::Flat(Vec::new()) |
| 85 | } |
| 86 | |
| 87 | pub fn from_trie_bytes(bytes: &[u8]) -> Self { |
| 88 | Exports::Trie(bytes.to_vec()) |
| 89 | } |
| 90 | |
| 91 | pub fn from_entries(entries: Vec<ExportEntry>) -> Self { |
| 92 | Exports::Flat(entries) |
| 93 | } |
| 94 | |
| 95 | pub fn is_empty(&self) -> bool { |
| 96 | match self { |
| 97 | Exports::Trie(b) => b.is_empty(), |
| 98 | Exports::Flat(e) => e.is_empty(), |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | /// Wire bytes for the trie variant; empty slice for flat. |
| 103 | pub fn trie_bytes(&self) -> &[u8] { |
| 104 | match self { |
| 105 | Exports::Trie(b) => b, |
| 106 | Exports::Flat(_) => &[], |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | /// Decode every export. For the trie variant this walks the tree; for |
| 111 | /// flat it clones the stored vec. Cycle-safe on the trie side. |
| 112 | pub fn entries(&self) -> Result<Vec<ExportEntry>, ReadError> { |
| 113 | match self { |
| 114 | Exports::Trie(raw) => { |
| 115 | if raw.is_empty() { |
| 116 | return Ok(Vec::new()); |
| 117 | } |
| 118 | let mut out = Vec::new(); |
| 119 | let mut visited = std::collections::HashSet::new(); |
| 120 | walk(raw, 0, String::new(), &mut out, &mut visited, 0)?; |
| 121 | Ok(out) |
| 122 | } |
| 123 | Exports::Flat(v) => Ok(v.clone()), |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | pub fn lookup(&self, name: &str) -> Result<Option<ExportEntry>, ReadError> { |
| 128 | match self { |
| 129 | Exports::Trie(raw) => { |
| 130 | if raw.is_empty() { |
| 131 | return Ok(None); |
| 132 | } |
| 133 | lookup(raw, 0, name.as_bytes(), 0) |
| 134 | } |
| 135 | Exports::Flat(v) => Ok(v.iter().find(|e| e.name == name).cloned()), |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | fn read_terminal( |
| 141 | trie: &[u8], |
| 142 | node_off: usize, |
| 143 | accumulated: &str, |
| 144 | ) -> Result<Option<ExportEntry>, ReadError> { |
| 145 | let (terminal_size, n) = read_uleb_at(trie, node_off)?; |
| 146 | if terminal_size == 0 { |
| 147 | return Ok(None); |
| 148 | } |
| 149 | let mut cursor = node_off + n; |
| 150 | let (flags, m) = read_uleb_at(trie, cursor)?; |
| 151 | cursor += m; |
| 152 | let kind = if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 { |
| 153 | let (ord, on) = read_uleb_at(trie, cursor)?; |
| 154 | cursor += on; |
| 155 | let imported_name = read_cstring(trie, cursor)?.to_string(); |
| 156 | ExportKind::Reexport { |
| 157 | ordinal: ord as u32, |
| 158 | imported_name, |
| 159 | } |
| 160 | } else if flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0 { |
| 161 | let (stub, sn) = read_uleb_at(trie, cursor)?; |
| 162 | cursor += sn; |
| 163 | let (resolver, _rn) = read_uleb_at(trie, cursor)?; |
| 164 | ExportKind::StubAndResolver { stub, resolver } |
| 165 | } else { |
| 166 | let (addr, _an) = read_uleb_at(trie, cursor)?; |
| 167 | match flags & EXPORT_SYMBOL_FLAGS_KIND_MASK { |
| 168 | EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL => ExportKind::ThreadLocal { address: addr }, |
| 169 | EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE => ExportKind::Absolute { address: addr }, |
| 170 | _ => ExportKind::Regular { address: addr }, |
| 171 | } |
| 172 | }; |
| 173 | Ok(Some(ExportEntry { |
| 174 | name: accumulated.to_string(), |
| 175 | flags, |
| 176 | kind, |
| 177 | })) |
| 178 | } |
| 179 | |
| 180 | fn walk( |
| 181 | trie: &[u8], |
| 182 | node_off: usize, |
| 183 | accumulated: String, |
| 184 | out: &mut Vec<ExportEntry>, |
| 185 | visited: &mut std::collections::HashSet<usize>, |
| 186 | depth: usize, |
| 187 | ) -> Result<(), ReadError> { |
| 188 | if depth > MAX_DEPTH { |
| 189 | return Err(ReadError::BadCmdsize { |
| 190 | cmd: 0, |
| 191 | cmdsize: 0, |
| 192 | at_offset: node_off, |
| 193 | reason: "export trie depth cap exceeded", |
| 194 | }); |
| 195 | } |
| 196 | if !visited.insert(node_off) { |
| 197 | return Err(ReadError::BadCmdsize { |
| 198 | cmd: 0, |
| 199 | cmdsize: 0, |
| 200 | at_offset: node_off, |
| 201 | reason: "export trie contains a cycle", |
| 202 | }); |
| 203 | } |
| 204 | |
| 205 | if let Some(entry) = read_terminal(trie, node_off, &accumulated)? { |
| 206 | out.push(entry); |
| 207 | } |
| 208 | |
| 209 | // Skip past the terminal_size + terminal payload to reach child count. |
| 210 | let cursor = skip_terminal(trie, node_off)?; |
| 211 | if cursor >= trie.len() { |
| 212 | return Err(ReadError::Truncated { |
| 213 | need: cursor + 1, |
| 214 | have: trie.len(), |
| 215 | context: "export trie (missing child_count byte)", |
| 216 | }); |
| 217 | } |
| 218 | let child_count = trie[cursor] as usize; |
| 219 | let mut child_cursor = cursor + 1; |
| 220 | for _ in 0..child_count { |
| 221 | let (edge, advanced) = read_cstring_with_len(trie, child_cursor)?; |
| 222 | child_cursor += advanced; |
| 223 | let (child_off, off_len) = read_uleb_at(trie, child_cursor)?; |
| 224 | child_cursor += off_len; |
| 225 | let child_off = child_off as usize; |
| 226 | if child_off >= trie.len() { |
| 227 | return Err(ReadError::BadCmdsize { |
| 228 | cmd: 0, |
| 229 | cmdsize: 0, |
| 230 | at_offset: child_cursor, |
| 231 | reason: "export trie child offset out of bounds", |
| 232 | }); |
| 233 | } |
| 234 | let mut next_prefix = accumulated.clone(); |
| 235 | next_prefix.push_str(edge); |
| 236 | walk(trie, child_off, next_prefix, out, visited, depth + 1)?; |
| 237 | } |
| 238 | |
| 239 | Ok(()) |
| 240 | } |
| 241 | |
| 242 | fn lookup( |
| 243 | trie: &[u8], |
| 244 | node_off: usize, |
| 245 | remaining: &[u8], |
| 246 | depth: usize, |
| 247 | ) -> Result<Option<ExportEntry>, ReadError> { |
| 248 | if depth > MAX_DEPTH { |
| 249 | return Err(ReadError::BadCmdsize { |
| 250 | cmd: 0, |
| 251 | cmdsize: 0, |
| 252 | at_offset: node_off, |
| 253 | reason: "export trie depth cap exceeded", |
| 254 | }); |
| 255 | } |
| 256 | if remaining.is_empty() { |
| 257 | return read_terminal(trie, node_off, "").map(|maybe| { |
| 258 | maybe.map(|mut e| { |
| 259 | e.name.clear(); |
| 260 | e |
| 261 | }) |
| 262 | }); |
| 263 | } |
| 264 | // Scan children for a matching edge prefix. |
| 265 | let cursor = skip_terminal(trie, node_off)?; |
| 266 | if cursor >= trie.len() { |
| 267 | return Ok(None); |
| 268 | } |
| 269 | let child_count = trie[cursor] as usize; |
| 270 | let mut child_cursor = cursor + 1; |
| 271 | for _ in 0..child_count { |
| 272 | let (edge, advanced) = read_cstring_with_len(trie, child_cursor)?; |
| 273 | child_cursor += advanced; |
| 274 | let (child_off, off_len) = read_uleb_at(trie, child_cursor)?; |
| 275 | child_cursor += off_len; |
| 276 | let edge_bytes = edge.as_bytes(); |
| 277 | if remaining.len() >= edge_bytes.len() && &remaining[..edge_bytes.len()] == edge_bytes { |
| 278 | return lookup( |
| 279 | trie, |
| 280 | child_off as usize, |
| 281 | &remaining[edge_bytes.len()..], |
| 282 | depth + 1, |
| 283 | ); |
| 284 | } |
| 285 | } |
| 286 | Ok(None) |
| 287 | } |
| 288 | |
| 289 | // ---- primitives ---------------------------------------------------------- |
| 290 | |
| 291 | fn skip_terminal(trie: &[u8], node_off: usize) -> Result<usize, ReadError> { |
| 292 | let (terminal_size, n) = read_uleb_at(trie, node_off)?; |
| 293 | let end = node_off |
| 294 | .checked_add(n) |
| 295 | .and_then(|p| p.checked_add(terminal_size as usize)) |
| 296 | .ok_or(ReadError::BadCmdsize { |
| 297 | cmd: 0, |
| 298 | cmdsize: 0, |
| 299 | at_offset: node_off, |
| 300 | reason: "export trie terminal_size overflows", |
| 301 | })?; |
| 302 | if end > trie.len() { |
| 303 | return Err(ReadError::Truncated { |
| 304 | need: end, |
| 305 | have: trie.len(), |
| 306 | context: "export trie terminal payload", |
| 307 | }); |
| 308 | } |
| 309 | Ok(end) |
| 310 | } |
| 311 | |
| 312 | fn read_uleb_at(trie: &[u8], off: usize) -> Result<(u64, usize), ReadError> { |
| 313 | if off > trie.len() { |
| 314 | return Err(ReadError::Truncated { |
| 315 | need: off + 1, |
| 316 | have: trie.len(), |
| 317 | context: "export trie uleb", |
| 318 | }); |
| 319 | } |
| 320 | read_uleb(&trie[off..]) |
| 321 | } |
| 322 | |
| 323 | fn read_cstring(trie: &[u8], off: usize) -> Result<&str, ReadError> { |
| 324 | let (s, _) = read_cstring_with_len(trie, off)?; |
| 325 | Ok(s) |
| 326 | } |
| 327 | |
| 328 | fn read_cstring_with_len(trie: &[u8], off: usize) -> Result<(&str, usize), ReadError> { |
| 329 | if off >= trie.len() { |
| 330 | return Err(ReadError::Truncated { |
| 331 | need: off + 1, |
| 332 | have: trie.len(), |
| 333 | context: "export trie c-string", |
| 334 | }); |
| 335 | } |
| 336 | let nul = trie[off..] |
| 337 | .iter() |
| 338 | .position(|&b| b == 0) |
| 339 | .ok_or(ReadError::Truncated { |
| 340 | need: trie.len() + 1, |
| 341 | have: trie.len(), |
| 342 | context: "export trie c-string (no terminator)", |
| 343 | })?; |
| 344 | let s = std::str::from_utf8(&trie[off..off + nul]).map_err(|_| ReadError::BadCmdsize { |
| 345 | cmd: 0, |
| 346 | cmdsize: 0, |
| 347 | at_offset: off, |
| 348 | reason: "export trie string is not UTF-8", |
| 349 | })?; |
| 350 | Ok((s, nul + 1)) |
| 351 | } |
| 352 | |
| 353 | #[cfg(test)] |
| 354 | mod tests { |
| 355 | use super::*; |
| 356 | use crate::leb::write_uleb; |
| 357 | |
| 358 | /// Build a leaf terminal node (no children) with given flags+address. |
| 359 | fn encode_leaf_node(flags: u64, address: u64) -> Vec<u8> { |
| 360 | let mut payload = Vec::new(); |
| 361 | write_uleb(flags, &mut payload); |
| 362 | write_uleb(address, &mut payload); |
| 363 | let mut node = Vec::new(); |
| 364 | write_uleb(payload.len() as u64, &mut node); |
| 365 | node.extend_from_slice(&payload); |
| 366 | node.push(0); // child_count = 0 |
| 367 | node |
| 368 | } |
| 369 | |
| 370 | fn encode_root_with_children(children: &[(&str, u64)], trie_len_hint: usize) -> Vec<u8> { |
| 371 | // Root has no terminal: terminal_size = 0. |
| 372 | let mut node = Vec::new(); |
| 373 | write_uleb(0, &mut node); // terminal_size |
| 374 | node.push(children.len() as u8); |
| 375 | for (edge, child_off) in children { |
| 376 | node.extend_from_slice(edge.as_bytes()); |
| 377 | node.push(0); |
| 378 | write_uleb(*child_off, &mut node); |
| 379 | } |
| 380 | assert!(node.len() <= trie_len_hint); // sanity |
| 381 | node |
| 382 | } |
| 383 | |
| 384 | #[test] |
| 385 | fn empty_trie_yields_no_entries() { |
| 386 | let trie = Exports::empty(); |
| 387 | assert!(trie.entries().unwrap().is_empty()); |
| 388 | assert!(trie.lookup("_anything").unwrap().is_none()); |
| 389 | } |
| 390 | |
| 391 | #[test] |
| 392 | fn single_leaf_decodes_regular_export() { |
| 393 | // Root has one child edge "_foo" pointing at leaf at known offset. |
| 394 | let leaf = encode_leaf_node(EXPORT_SYMBOL_FLAGS_KIND_REGULAR, 0x1000); |
| 395 | // Pre-size the root so we can compute the child offset. |
| 396 | let root_header_len = { |
| 397 | // terminal_size=0, child_count=1, edge "_foo\0", child_off ULEB |
| 398 | // We need the ULEB byte count of child_off — do a guess-and-check. |
| 399 | let mut tmp = Vec::new(); |
| 400 | write_uleb(0, &mut tmp); |
| 401 | tmp.push(1); |
| 402 | tmp.extend_from_slice(b"_foo\0"); |
| 403 | write_uleb(0, &mut tmp); // placeholder offset, ULEB width = 1 |
| 404 | tmp.len() |
| 405 | }; |
| 406 | let child_off = root_header_len as u64; |
| 407 | let root = encode_root_with_children(&[("_foo", child_off)], 256); |
| 408 | assert_eq!(root.len(), root_header_len); |
| 409 | let mut trie_bytes = root; |
| 410 | trie_bytes.extend_from_slice(&leaf); |
| 411 | |
| 412 | let trie = Exports::from_trie_bytes(&trie_bytes); |
| 413 | let entries = trie.entries().unwrap(); |
| 414 | assert_eq!(entries.len(), 1); |
| 415 | assert_eq!(entries[0].name, "_foo"); |
| 416 | assert_eq!(entries[0].kind, ExportKind::Regular { address: 0x1000 }); |
| 417 | } |
| 418 | |
| 419 | #[test] |
| 420 | fn reexport_terminal_decodes() { |
| 421 | let mut payload = Vec::new(); |
| 422 | write_uleb(EXPORT_SYMBOL_FLAGS_REEXPORT, &mut payload); |
| 423 | write_uleb(2, &mut payload); // ordinal |
| 424 | payload.extend_from_slice(b"_target\0"); |
| 425 | let mut leaf = Vec::new(); |
| 426 | write_uleb(payload.len() as u64, &mut leaf); |
| 427 | leaf.extend_from_slice(&payload); |
| 428 | leaf.push(0); |
| 429 | |
| 430 | // Root points at leaf. |
| 431 | let root_header_len = { |
| 432 | let mut tmp = Vec::new(); |
| 433 | write_uleb(0, &mut tmp); |
| 434 | tmp.push(1); |
| 435 | tmp.extend_from_slice(b"_alias\0"); |
| 436 | write_uleb(0, &mut tmp); |
| 437 | tmp.len() |
| 438 | }; |
| 439 | let root = encode_root_with_children(&[("_alias", root_header_len as u64)], 256); |
| 440 | let mut trie_bytes = root; |
| 441 | trie_bytes.extend_from_slice(&leaf); |
| 442 | |
| 443 | let trie = Exports::from_trie_bytes(&trie_bytes); |
| 444 | let entries = trie.entries().unwrap(); |
| 445 | assert_eq!(entries.len(), 1); |
| 446 | assert_eq!(entries[0].name, "_alias"); |
| 447 | assert_eq!( |
| 448 | entries[0].kind, |
| 449 | ExportKind::Reexport { |
| 450 | ordinal: 2, |
| 451 | imported_name: "_target".into(), |
| 452 | } |
| 453 | ); |
| 454 | } |
| 455 | |
| 456 | #[test] |
| 457 | fn lookup_returns_none_for_missing_name() { |
| 458 | let leaf = encode_leaf_node(EXPORT_SYMBOL_FLAGS_KIND_REGULAR, 0x40); |
| 459 | let root_header_len = { |
| 460 | let mut tmp = Vec::new(); |
| 461 | write_uleb(0, &mut tmp); |
| 462 | tmp.push(1); |
| 463 | tmp.extend_from_slice(b"_foo\0"); |
| 464 | write_uleb(0, &mut tmp); |
| 465 | tmp.len() |
| 466 | }; |
| 467 | let root = encode_root_with_children(&[("_foo", root_header_len as u64)], 256); |
| 468 | let mut trie_bytes = root; |
| 469 | trie_bytes.extend_from_slice(&leaf); |
| 470 | let trie = Exports::from_trie_bytes(&trie_bytes); |
| 471 | assert!(trie.lookup("_bar").unwrap().is_none()); |
| 472 | assert!(trie.lookup("_foo").unwrap().is_some()); |
| 473 | } |
| 474 | |
| 475 | #[test] |
| 476 | fn malformed_child_offset_errors() { |
| 477 | // Root claims a child at offset 1000 but trie is tiny. |
| 478 | let root = encode_root_with_children(&[("_x", 1000)], 256); |
| 479 | let trie = Exports::from_trie_bytes(&root); |
| 480 | assert!(trie.entries().is_err()); |
| 481 | } |
| 482 | } |
| 483 |