Rust · 15723 bytes Raw Blame History
1 //! Export-trie walker.
2 //!
3 //! dyld stores a dylib's exports as a compact prefix trie rooted at offset 0
4 //! of the `export_off / export_size` region (LC_DYLD_INFO_ONLY) or the
5 //! `LC_DYLD_EXPORTS_TRIE` blob. Each node is:
6 //!
7 //! ```text
8 //! uleb128 terminal_size
9 //! terminal_size bytes of terminal payload (when non-zero)
10 //! u8 child_count
11 //! child_count * (c-string edge, uleb128 child_offset)
12 //! ```
13 //!
14 //! Terminal payload:
15 //! `uleb128 flags`
16 //! then, depending on `flags`:
17 //! - REEXPORT : `uleb128 other_dylib_ordinal` + null-terminated imported_name
18 //! - STUB_AND_RESOLVER: `uleb128 stub_offset` + `uleb128 resolver_offset`
19 //! - otherwise : `uleb128 address`
20 //!
21 //! The walker accumulates edge strings to produce each full exported name,
22 //! enforces a depth cap, and rejects cycles / out-of-bound offsets / ULEB
23 //! overruns with diagnostics.
24
25 use super::constants::*;
26 use crate::leb::read_uleb;
27 use crate::macho::reader::ReadError;
28
29 const MAX_DEPTH: usize = 128;
30
31 /// Terminal export kind, discriminated on the `flags` low bits.
32 #[derive(Debug, Clone, PartialEq, Eq)]
33 pub enum ExportKind {
34 /// Regular symbol at `address`.
35 Regular { address: u64 },
36 /// Thread-local; `address` is the TLV descriptor offset.
37 ThreadLocal { address: u64 },
38 /// Absolute symbol — `address` is the final value.
39 Absolute { address: u64 },
40 /// Re-export of another dylib's symbol.
41 Reexport {
42 /// 0 = parent library (the dylib containing this entry), else one
43 /// of the dylib's LC_*_DYLIB ordinals (1-based).
44 ordinal: u32,
45 /// Empty string means "same name as the exported name".
46 imported_name: String,
47 },
48 /// Two addresses: the normal stub plus a resolver function that produces
49 /// the final address on demand.
50 StubAndResolver { stub: u64, resolver: u64 },
51 }
52
53 #[derive(Debug, Clone, PartialEq, Eq)]
54 pub struct ExportEntry {
55 pub name: String,
56 pub flags: u64,
57 pub kind: ExportKind,
58 }
59
60 impl ExportEntry {
61 pub fn weak_def(&self) -> bool {
62 self.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0
63 }
64 }
65
66 /// The linker-side "exports view" of a dylib. Either a wire-form trie
67 /// (from `MH_DYLIB` via `LC_DYLD_INFO_ONLY` or `LC_DYLD_EXPORTS_TRIE`) or
68 /// a pre-flattened list (from a TAPI `.tbd`). Both kinds answer the same
69 /// two questions: `entries()` (iterate all) and `lookup(name)` (find one).
70 #[derive(Debug, Clone, PartialEq, Eq)]
71 pub enum Exports {
72 Trie(Vec<u8>),
73 Flat(Vec<ExportEntry>),
74 }
75
76 impl Default for Exports {
77 fn default() -> Self {
78 Exports::empty()
79 }
80 }
81
82 impl Exports {
83 pub fn empty() -> Self {
84 Exports::Flat(Vec::new())
85 }
86
87 pub fn from_trie_bytes(bytes: &[u8]) -> Self {
88 Exports::Trie(bytes.to_vec())
89 }
90
91 pub fn from_entries(entries: Vec<ExportEntry>) -> Self {
92 Exports::Flat(entries)
93 }
94
95 pub fn is_empty(&self) -> bool {
96 match self {
97 Exports::Trie(b) => b.is_empty(),
98 Exports::Flat(e) => e.is_empty(),
99 }
100 }
101
102 /// Wire bytes for the trie variant; empty slice for flat.
103 pub fn trie_bytes(&self) -> &[u8] {
104 match self {
105 Exports::Trie(b) => b,
106 Exports::Flat(_) => &[],
107 }
108 }
109
110 /// Decode every export. For the trie variant this walks the tree; for
111 /// flat it clones the stored vec. Cycle-safe on the trie side.
112 pub fn entries(&self) -> Result<Vec<ExportEntry>, ReadError> {
113 match self {
114 Exports::Trie(raw) => {
115 if raw.is_empty() {
116 return Ok(Vec::new());
117 }
118 let mut out = Vec::new();
119 let mut visited = std::collections::HashSet::new();
120 walk(raw, 0, String::new(), &mut out, &mut visited, 0)?;
121 Ok(out)
122 }
123 Exports::Flat(v) => Ok(v.clone()),
124 }
125 }
126
127 pub fn lookup(&self, name: &str) -> Result<Option<ExportEntry>, ReadError> {
128 match self {
129 Exports::Trie(raw) => {
130 if raw.is_empty() {
131 return Ok(None);
132 }
133 lookup(raw, 0, name.as_bytes(), 0)
134 }
135 Exports::Flat(v) => Ok(v.iter().find(|e| e.name == name).cloned()),
136 }
137 }
138 }
139
140 fn read_terminal(
141 trie: &[u8],
142 node_off: usize,
143 accumulated: &str,
144 ) -> Result<Option<ExportEntry>, ReadError> {
145 let (terminal_size, n) = read_uleb_at(trie, node_off)?;
146 if terminal_size == 0 {
147 return Ok(None);
148 }
149 let mut cursor = node_off + n;
150 let (flags, m) = read_uleb_at(trie, cursor)?;
151 cursor += m;
152 let kind = if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 {
153 let (ord, on) = read_uleb_at(trie, cursor)?;
154 cursor += on;
155 let imported_name = read_cstring(trie, cursor)?.to_string();
156 ExportKind::Reexport {
157 ordinal: ord as u32,
158 imported_name,
159 }
160 } else if flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0 {
161 let (stub, sn) = read_uleb_at(trie, cursor)?;
162 cursor += sn;
163 let (resolver, _rn) = read_uleb_at(trie, cursor)?;
164 ExportKind::StubAndResolver { stub, resolver }
165 } else {
166 let (addr, _an) = read_uleb_at(trie, cursor)?;
167 match flags & EXPORT_SYMBOL_FLAGS_KIND_MASK {
168 EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL => ExportKind::ThreadLocal { address: addr },
169 EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE => ExportKind::Absolute { address: addr },
170 _ => ExportKind::Regular { address: addr },
171 }
172 };
173 Ok(Some(ExportEntry {
174 name: accumulated.to_string(),
175 flags,
176 kind,
177 }))
178 }
179
180 fn walk(
181 trie: &[u8],
182 node_off: usize,
183 accumulated: String,
184 out: &mut Vec<ExportEntry>,
185 visited: &mut std::collections::HashSet<usize>,
186 depth: usize,
187 ) -> Result<(), ReadError> {
188 if depth > MAX_DEPTH {
189 return Err(ReadError::BadCmdsize {
190 cmd: 0,
191 cmdsize: 0,
192 at_offset: node_off,
193 reason: "export trie depth cap exceeded",
194 });
195 }
196 if !visited.insert(node_off) {
197 return Err(ReadError::BadCmdsize {
198 cmd: 0,
199 cmdsize: 0,
200 at_offset: node_off,
201 reason: "export trie contains a cycle",
202 });
203 }
204
205 if let Some(entry) = read_terminal(trie, node_off, &accumulated)? {
206 out.push(entry);
207 }
208
209 // Skip past the terminal_size + terminal payload to reach child count.
210 let cursor = skip_terminal(trie, node_off)?;
211 if cursor >= trie.len() {
212 return Err(ReadError::Truncated {
213 need: cursor + 1,
214 have: trie.len(),
215 context: "export trie (missing child_count byte)",
216 });
217 }
218 let child_count = trie[cursor] as usize;
219 let mut child_cursor = cursor + 1;
220 for _ in 0..child_count {
221 let (edge, advanced) = read_cstring_with_len(trie, child_cursor)?;
222 child_cursor += advanced;
223 let (child_off, off_len) = read_uleb_at(trie, child_cursor)?;
224 child_cursor += off_len;
225 let child_off = child_off as usize;
226 if child_off >= trie.len() {
227 return Err(ReadError::BadCmdsize {
228 cmd: 0,
229 cmdsize: 0,
230 at_offset: child_cursor,
231 reason: "export trie child offset out of bounds",
232 });
233 }
234 let mut next_prefix = accumulated.clone();
235 next_prefix.push_str(edge);
236 walk(trie, child_off, next_prefix, out, visited, depth + 1)?;
237 }
238
239 Ok(())
240 }
241
242 fn lookup(
243 trie: &[u8],
244 node_off: usize,
245 remaining: &[u8],
246 depth: usize,
247 ) -> Result<Option<ExportEntry>, ReadError> {
248 if depth > MAX_DEPTH {
249 return Err(ReadError::BadCmdsize {
250 cmd: 0,
251 cmdsize: 0,
252 at_offset: node_off,
253 reason: "export trie depth cap exceeded",
254 });
255 }
256 if remaining.is_empty() {
257 return read_terminal(trie, node_off, "").map(|maybe| {
258 maybe.map(|mut e| {
259 e.name.clear();
260 e
261 })
262 });
263 }
264 // Scan children for a matching edge prefix.
265 let cursor = skip_terminal(trie, node_off)?;
266 if cursor >= trie.len() {
267 return Ok(None);
268 }
269 let child_count = trie[cursor] as usize;
270 let mut child_cursor = cursor + 1;
271 for _ in 0..child_count {
272 let (edge, advanced) = read_cstring_with_len(trie, child_cursor)?;
273 child_cursor += advanced;
274 let (child_off, off_len) = read_uleb_at(trie, child_cursor)?;
275 child_cursor += off_len;
276 let edge_bytes = edge.as_bytes();
277 if remaining.len() >= edge_bytes.len() && &remaining[..edge_bytes.len()] == edge_bytes {
278 return lookup(
279 trie,
280 child_off as usize,
281 &remaining[edge_bytes.len()..],
282 depth + 1,
283 );
284 }
285 }
286 Ok(None)
287 }
288
289 // ---- primitives ----------------------------------------------------------
290
291 fn skip_terminal(trie: &[u8], node_off: usize) -> Result<usize, ReadError> {
292 let (terminal_size, n) = read_uleb_at(trie, node_off)?;
293 let end = node_off
294 .checked_add(n)
295 .and_then(|p| p.checked_add(terminal_size as usize))
296 .ok_or(ReadError::BadCmdsize {
297 cmd: 0,
298 cmdsize: 0,
299 at_offset: node_off,
300 reason: "export trie terminal_size overflows",
301 })?;
302 if end > trie.len() {
303 return Err(ReadError::Truncated {
304 need: end,
305 have: trie.len(),
306 context: "export trie terminal payload",
307 });
308 }
309 Ok(end)
310 }
311
312 fn read_uleb_at(trie: &[u8], off: usize) -> Result<(u64, usize), ReadError> {
313 if off > trie.len() {
314 return Err(ReadError::Truncated {
315 need: off + 1,
316 have: trie.len(),
317 context: "export trie uleb",
318 });
319 }
320 read_uleb(&trie[off..])
321 }
322
323 fn read_cstring(trie: &[u8], off: usize) -> Result<&str, ReadError> {
324 let (s, _) = read_cstring_with_len(trie, off)?;
325 Ok(s)
326 }
327
328 fn read_cstring_with_len(trie: &[u8], off: usize) -> Result<(&str, usize), ReadError> {
329 if off >= trie.len() {
330 return Err(ReadError::Truncated {
331 need: off + 1,
332 have: trie.len(),
333 context: "export trie c-string",
334 });
335 }
336 let nul = trie[off..]
337 .iter()
338 .position(|&b| b == 0)
339 .ok_or(ReadError::Truncated {
340 need: trie.len() + 1,
341 have: trie.len(),
342 context: "export trie c-string (no terminator)",
343 })?;
344 let s = std::str::from_utf8(&trie[off..off + nul]).map_err(|_| ReadError::BadCmdsize {
345 cmd: 0,
346 cmdsize: 0,
347 at_offset: off,
348 reason: "export trie string is not UTF-8",
349 })?;
350 Ok((s, nul + 1))
351 }
352
353 #[cfg(test)]
354 mod tests {
355 use super::*;
356 use crate::leb::write_uleb;
357
358 /// Build a leaf terminal node (no children) with given flags+address.
359 fn encode_leaf_node(flags: u64, address: u64) -> Vec<u8> {
360 let mut payload = Vec::new();
361 write_uleb(flags, &mut payload);
362 write_uleb(address, &mut payload);
363 let mut node = Vec::new();
364 write_uleb(payload.len() as u64, &mut node);
365 node.extend_from_slice(&payload);
366 node.push(0); // child_count = 0
367 node
368 }
369
370 fn encode_root_with_children(children: &[(&str, u64)], trie_len_hint: usize) -> Vec<u8> {
371 // Root has no terminal: terminal_size = 0.
372 let mut node = Vec::new();
373 write_uleb(0, &mut node); // terminal_size
374 node.push(children.len() as u8);
375 for (edge, child_off) in children {
376 node.extend_from_slice(edge.as_bytes());
377 node.push(0);
378 write_uleb(*child_off, &mut node);
379 }
380 assert!(node.len() <= trie_len_hint); // sanity
381 node
382 }
383
384 #[test]
385 fn empty_trie_yields_no_entries() {
386 let trie = Exports::empty();
387 assert!(trie.entries().unwrap().is_empty());
388 assert!(trie.lookup("_anything").unwrap().is_none());
389 }
390
391 #[test]
392 fn single_leaf_decodes_regular_export() {
393 // Root has one child edge "_foo" pointing at leaf at known offset.
394 let leaf = encode_leaf_node(EXPORT_SYMBOL_FLAGS_KIND_REGULAR, 0x1000);
395 // Pre-size the root so we can compute the child offset.
396 let root_header_len = {
397 // terminal_size=0, child_count=1, edge "_foo\0", child_off ULEB
398 // We need the ULEB byte count of child_off — do a guess-and-check.
399 let mut tmp = Vec::new();
400 write_uleb(0, &mut tmp);
401 tmp.push(1);
402 tmp.extend_from_slice(b"_foo\0");
403 write_uleb(0, &mut tmp); // placeholder offset, ULEB width = 1
404 tmp.len()
405 };
406 let child_off = root_header_len as u64;
407 let root = encode_root_with_children(&[("_foo", child_off)], 256);
408 assert_eq!(root.len(), root_header_len);
409 let mut trie_bytes = root;
410 trie_bytes.extend_from_slice(&leaf);
411
412 let trie = Exports::from_trie_bytes(&trie_bytes);
413 let entries = trie.entries().unwrap();
414 assert_eq!(entries.len(), 1);
415 assert_eq!(entries[0].name, "_foo");
416 assert_eq!(entries[0].kind, ExportKind::Regular { address: 0x1000 });
417 }
418
419 #[test]
420 fn reexport_terminal_decodes() {
421 let mut payload = Vec::new();
422 write_uleb(EXPORT_SYMBOL_FLAGS_REEXPORT, &mut payload);
423 write_uleb(2, &mut payload); // ordinal
424 payload.extend_from_slice(b"_target\0");
425 let mut leaf = Vec::new();
426 write_uleb(payload.len() as u64, &mut leaf);
427 leaf.extend_from_slice(&payload);
428 leaf.push(0);
429
430 // Root points at leaf.
431 let root_header_len = {
432 let mut tmp = Vec::new();
433 write_uleb(0, &mut tmp);
434 tmp.push(1);
435 tmp.extend_from_slice(b"_alias\0");
436 write_uleb(0, &mut tmp);
437 tmp.len()
438 };
439 let root = encode_root_with_children(&[("_alias", root_header_len as u64)], 256);
440 let mut trie_bytes = root;
441 trie_bytes.extend_from_slice(&leaf);
442
443 let trie = Exports::from_trie_bytes(&trie_bytes);
444 let entries = trie.entries().unwrap();
445 assert_eq!(entries.len(), 1);
446 assert_eq!(entries[0].name, "_alias");
447 assert_eq!(
448 entries[0].kind,
449 ExportKind::Reexport {
450 ordinal: 2,
451 imported_name: "_target".into(),
452 }
453 );
454 }
455
456 #[test]
457 fn lookup_returns_none_for_missing_name() {
458 let leaf = encode_leaf_node(EXPORT_SYMBOL_FLAGS_KIND_REGULAR, 0x40);
459 let root_header_len = {
460 let mut tmp = Vec::new();
461 write_uleb(0, &mut tmp);
462 tmp.push(1);
463 tmp.extend_from_slice(b"_foo\0");
464 write_uleb(0, &mut tmp);
465 tmp.len()
466 };
467 let root = encode_root_with_children(&[("_foo", root_header_len as u64)], 256);
468 let mut trie_bytes = root;
469 trie_bytes.extend_from_slice(&leaf);
470 let trie = Exports::from_trie_bytes(&trie_bytes);
471 assert!(trie.lookup("_bar").unwrap().is_none());
472 assert!(trie.lookup("_foo").unwrap().is_some());
473 }
474
475 #[test]
476 fn malformed_child_offset_errors() {
477 // Root claims a child at offset 1000 but trie is tiny.
478 let root = encode_root_with_children(&[("_x", 1000)], 256);
479 let trie = Exports::from_trie_bytes(&root);
480 assert!(trie.entries().is_err());
481 }
482 }
483