Rust · 46853 bytes Raw Blame History
1 use std::collections::{HashMap, HashSet};
2 use std::fmt;
3 use std::path::PathBuf;
4
5 use crate::atom::{Atom, AtomSection, AtomTable};
6 use crate::layout::{Layout, LayoutInput};
7 use crate::macho::constants::S_REGULAR;
8 use crate::reloc::{parse_raw_relocs, parse_relocs, Referent, Reloc};
9 use crate::resolve::{AtomId, InputId, Symbol, SymbolTable};
10 use crate::section::{OutputSection, SectionKind};
11 use crate::synth::SyntheticPlan;
12
13 const PAGE_SIZE: usize = 4096;
14 const UNWIND_INFO_VERSION: u32 = 1;
15 const UNWIND_SECOND_LEVEL_REGULAR: u32 = 2;
16 const UNWIND_SECOND_LEVEL_COMPRESSED: u32 = 3;
17 const MAX_COMPRESSED_FUNCTION_DELTA: u32 = 0x00ff_ffff;
18 const MAX_COMPRESSED_ENCODING_INDEX: usize = 0xff;
19 const FIRST_LEVEL_ENTRY_SIZE: usize = 12;
20 const FIRST_LEVEL_INDEX_GAP_SIZE: usize = FIRST_LEVEL_ENTRY_SIZE;
21 const COMPRESSED_PAGE_HEADER_SIZE: usize = 12;
22 const UNWIND_HAS_LSDA: u32 = 0x4000_0000;
23 const UNWIND_PERSONALITY_MASK: u32 = 0x3000_0000;
24 const UNWIND_PERSONALITY_SHIFT: u32 = 28;
25 const UNWIND_ARM64_MODE_MASK: u32 = 0x0f00_0000;
26 const UNWIND_ARM64_MODE_DWARF: u32 = 0x0300_0000;
27 const UNWIND_ARM64_DWARF_SECTION_OFFSET_MASK: u32 = 0x00ff_ffff;
28 const COMPACT_UNWIND_FUNCTION_OFFSET: usize = 0;
29 const COMPACT_UNWIND_PERSONALITY_OFFSET: usize = 16;
30 const COMPACT_UNWIND_LSDA_OFFSET: usize = 24;
31
32 #[derive(Debug, Clone, PartialEq, Eq)]
33 pub struct UnwindError {
34 pub input: PathBuf,
35 pub atom: AtomId,
36 pub detail: String,
37 }
38
39 impl fmt::Display for UnwindError {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 write!(
42 f,
43 "{}: unwind synthesis for atom {:?}: {}",
44 self.input.display(),
45 self.atom,
46 self.detail
47 )
48 }
49 }
50
51 impl std::error::Error for UnwindError {}
52
53 #[derive(Debug, Clone, PartialEq, Eq)]
54 pub enum UnwindReadError {
55 Truncated(&'static str),
56 UnsupportedVersion(u32),
57 UnsupportedSecondLevelPageKind(u32),
58 BadFirstLevelIndexOrder { previous: u32, next: u32 },
59 BadEncodingIndex { index: u32, max: u32 },
60 TooManyPersonalities(usize),
61 }
62
63 impl fmt::Display for UnwindReadError {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 match self {
66 UnwindReadError::Truncated(what) => write!(f, "truncated {what}"),
67 UnwindReadError::UnsupportedVersion(version) => {
68 write!(f, "unsupported unwind info version {version}")
69 }
70 UnwindReadError::UnsupportedSecondLevelPageKind(kind) => {
71 write!(f, "unsupported second-level page kind {kind}")
72 }
73 UnwindReadError::BadFirstLevelIndexOrder { previous, next } => write!(
74 f,
75 "first-level index is not strictly ascending ({previous:#x} then {next:#x})"
76 ),
77 UnwindReadError::BadEncodingIndex { index, max } => {
78 write!(
79 f,
80 "encoding index {index} exceeds decoded encoding table size {max}"
81 )
82 }
83 UnwindReadError::TooManyPersonalities(count) => {
84 write!(
85 f,
86 "unwind info needs {count} personalities but only 3 are encodable"
87 )
88 }
89 }
90 }
91 }
92
93 impl std::error::Error for UnwindReadError {}
94
95 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
96 pub struct DecodedUnwindRecord {
97 pub function_offset: u32,
98 pub encoding: u32,
99 }
100
101 #[derive(Debug, Clone, PartialEq, Eq)]
102 pub struct DecodedUnwindInfo {
103 pub version: u32,
104 pub personalities: Vec<u32>,
105 pub lsdas: Vec<DecodedLsdaRecord>,
106 pub records: Vec<DecodedUnwindRecord>,
107 }
108
109 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
110 struct UnwindRecord {
111 function_offset: u32,
112 code_len: u32,
113 encoding: u32,
114 personality_offset: Option<u32>,
115 lsda_offset: Option<u32>,
116 }
117
118 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
119 pub struct DecodedLsdaRecord {
120 pub function_offset: u32,
121 pub lsda_offset: u32,
122 }
123
124 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
125 struct LsdaRecord {
126 function_offset: u32,
127 lsda_offset: u32,
128 }
129
130 type FinalizedUnwindTables = (Vec<UnwindRecord>, Vec<u32>, Vec<LsdaRecord>);
131
132 #[derive(Debug, Clone, PartialEq, Eq)]
133 struct CompressedPage {
134 start_function_offset: u32,
135 entries: Vec<u32>,
136 local_encodings: Vec<u32>,
137 }
138
139 pub fn synthesize(
140 layout: &mut Layout,
141 inputs: &[LayoutInput<'_>],
142 atoms: &AtomTable,
143 sym_table: &SymbolTable,
144 synthetic_plan: &SyntheticPlan,
145 ) -> Result<bool, UnwindError> {
146 let mut changed = remove_compact_unwind_sections(layout);
147 let records = collect_records(layout, inputs, atoms, sym_table, synthetic_plan)?;
148 if records.is_empty() {
149 changed |= remove_unwind_info_section(layout);
150 if changed {
151 prune_empty_segments(layout);
152 }
153 return Ok(changed);
154 }
155
156 let bytes = serialize_unwind_info(&records).map_err(|err| UnwindError {
157 input: PathBuf::from("<synthetic unwind>"),
158 atom: AtomId(0),
159 detail: err.to_string(),
160 })?;
161 if should_validate_serialized_unwind_info() {
162 validate_serialized_unwind_info(&bytes, &records).map_err(|err| UnwindError {
163 input: PathBuf::from("<synthetic unwind>"),
164 atom: AtomId(0),
165 detail: err.to_string(),
166 })?;
167 }
168 let section_changed = upsert_unwind_info_section(layout, bytes);
169 if changed || section_changed {
170 prune_empty_segments(layout);
171 }
172 Ok(changed || section_changed)
173 }
174
175 fn should_validate_serialized_unwind_info() -> bool {
176 std::env::var_os("AFS_LD_VALIDATE_UNWIND_INFO").is_some()
177 }
178
179 fn collect_records(
180 layout: &Layout,
181 inputs: &[LayoutInput<'_>],
182 atoms: &AtomTable,
183 sym_table: &SymbolTable,
184 synthetic_plan: &SyntheticPlan,
185 ) -> Result<Vec<UnwindRecord>, UnwindError> {
186 let text_base = layout
187 .segment("__TEXT")
188 .map(|segment| segment.vm_addr)
189 .unwrap_or(0);
190 let input_map: HashMap<InputId, &crate::input::ObjectFile> = inputs
191 .iter()
192 .map(|input| (input.id, input.object))
193 .collect();
194 let compact_unwind_sections: HashSet<(InputId, u8)> = atoms
195 .iter()
196 .filter(|(_, atom)| atom.section == AtomSection::CompactUnwind)
197 .map(|(_, atom)| (atom.origin, atom.input_section))
198 .collect();
199 let mut reloc_cache: HashMap<(InputId, u8), Vec<Reloc>> = HashMap::new();
200 for (input_id, section_idx) in compact_unwind_sections {
201 let obj = input_map.get(&input_id).ok_or_else(|| UnwindError {
202 input: PathBuf::from("<missing object>"),
203 atom: AtomId(0),
204 detail: "missing parsed object".to_string(),
205 })?;
206 let section = obj
207 .sections
208 .get((section_idx as usize).saturating_sub(1))
209 .ok_or_else(|| UnwindError {
210 input: obj.path.clone(),
211 atom: AtomId(0),
212 detail: format!("compact-unwind section {} is out of range", section_idx),
213 })?;
214 if section.nreloc == 0 {
215 continue;
216 }
217 let raws = parse_raw_relocs(&section.raw_relocs, 0, section.nreloc).map_err(|err| {
218 UnwindError {
219 input: obj.path.clone(),
220 atom: AtomId(0),
221 detail: err.to_string(),
222 }
223 })?;
224 let relocs = parse_relocs(&raws).map_err(|err| UnwindError {
225 input: obj.path.clone(),
226 atom: AtomId(0),
227 detail: err.to_string(),
228 })?;
229 reloc_cache.insert((input_id, section_idx), relocs);
230 }
231
232 let mut records = Vec::new();
233 for (atom_id, atom) in atoms.iter() {
234 if atom.section != AtomSection::CompactUnwind {
235 continue;
236 }
237 if atom
238 .parent_of
239 .is_some_and(|parent| layout.atom_addr(parent).is_none())
240 {
241 continue;
242 }
243 let Some(obj) = input_map.get(&atom.origin) else {
244 return Err(UnwindError {
245 input: PathBuf::from("<missing object>"),
246 atom: atom_id,
247 detail: "missing parsed object".to_string(),
248 });
249 };
250 if atom.data.len() < 32 {
251 return Err(UnwindError {
252 input: obj.path.clone(),
253 atom: atom_id,
254 detail: format!(
255 "compact-unwind atom is {} bytes, expected 32-byte record",
256 atom.data.len()
257 ),
258 });
259 }
260 let relocs = reloc_cache
261 .get(&(atom.origin, atom.input_section))
262 .map(Vec::as_slice)
263 .unwrap_or(&[]);
264 let function_addr =
265 resolve_function_address(atom_id, atom, obj, relocs, atoms, sym_table, layout)?;
266 let personality_offset = resolve_metadata_offset(
267 atom_id,
268 atom,
269 obj,
270 relocs,
271 atoms,
272 sym_table,
273 layout,
274 synthetic_plan,
275 COMPACT_UNWIND_PERSONALITY_OFFSET,
276 true,
277 "personality",
278 )?
279 .map(|addr| {
280 u32::try_from(addr.saturating_sub(text_base)).map_err(|_| UnwindError {
281 input: obj.path.clone(),
282 atom: atom_id,
283 detail: "personality target exceeds 32-bit unwind offset range".to_string(),
284 })
285 })
286 .transpose()?;
287 let lsda_offset = resolve_metadata_offset(
288 atom_id,
289 atom,
290 obj,
291 relocs,
292 atoms,
293 sym_table,
294 layout,
295 synthetic_plan,
296 COMPACT_UNWIND_LSDA_OFFSET,
297 false,
298 "LSDA",
299 )?
300 .map(|addr| {
301 u32::try_from(addr.saturating_sub(text_base)).map_err(|_| UnwindError {
302 input: obj.path.clone(),
303 atom: atom_id,
304 detail: "LSDA target exceeds 32-bit unwind offset range".to_string(),
305 })
306 })
307 .transpose()?;
308
309 let function_offset =
310 u32::try_from(function_addr.saturating_sub(text_base)).map_err(|_| UnwindError {
311 input: obj.path.clone(),
312 atom: atom_id,
313 detail: "function start exceeds 32-bit unwind offset range".to_string(),
314 })?;
315 records.push(UnwindRecord {
316 function_offset,
317 code_len: u32::from_le_bytes(atom.data[8..12].try_into().unwrap()),
318 encoding: u32::from_le_bytes(atom.data[12..16].try_into().unwrap()),
319 personality_offset,
320 lsda_offset,
321 });
322 }
323
324 records.sort_by_key(|record| record.function_offset);
325 Ok(records)
326 }
327
328 fn resolve_function_address(
329 atom_id: AtomId,
330 atom: &Atom,
331 obj: &crate::input::ObjectFile,
332 relocs: &[Reloc],
333 atoms: &AtomTable,
334 sym_table: &SymbolTable,
335 layout: &Layout,
336 ) -> Result<u64, UnwindError> {
337 if let Some(parent) = atom.parent_of {
338 return layout.atom_addr(parent).ok_or_else(|| UnwindError {
339 input: obj.path.clone(),
340 atom: atom_id,
341 detail: format!("function atom {:?} missing from final layout", parent),
342 });
343 }
344 let Some(reloc) = relocs
345 .iter()
346 .find(|reloc| reloc.offset == atom.input_offset)
347 else {
348 return Err(UnwindError {
349 input: obj.path.clone(),
350 atom: atom_id,
351 detail: "function_start reloc is missing".to_string(),
352 });
353 };
354 resolve_reference_address(
355 atom_id,
356 atom,
357 obj,
358 atoms,
359 sym_table,
360 layout,
361 None,
362 reloc.referent,
363 read_u64(atom, COMPACT_UNWIND_FUNCTION_OFFSET)? as u32,
364 "function_start",
365 false,
366 )
367 }
368
369 #[allow(clippy::too_many_arguments)]
370 fn resolve_metadata_offset(
371 atom_id: AtomId,
372 atom: &Atom,
373 obj: &crate::input::ObjectFile,
374 relocs: &[Reloc],
375 atoms: &AtomTable,
376 sym_table: &SymbolTable,
377 layout: &Layout,
378 synthetic_plan: &SyntheticPlan,
379 field_offset: usize,
380 allow_import_got: bool,
381 label: &str,
382 ) -> Result<Option<u64>, UnwindError> {
383 let raw_value = read_u64(atom, field_offset)?;
384 let reloc = relocs
385 .iter()
386 .find(|reloc| reloc.offset == atom.input_offset + field_offset as u32);
387 if raw_value == 0 && reloc.is_none() {
388 return Ok(None);
389 }
390 let Some(reloc) = reloc else {
391 return Err(UnwindError {
392 input: obj.path.clone(),
393 atom: atom_id,
394 detail: format!("{label} field has inline value but no relocation"),
395 });
396 };
397 Ok(Some(resolve_reference_address(
398 atom_id,
399 atom,
400 obj,
401 atoms,
402 sym_table,
403 layout,
404 Some(synthetic_plan),
405 reloc.referent,
406 raw_value as u32,
407 label,
408 allow_import_got,
409 )?))
410 }
411
412 #[allow(clippy::too_many_arguments)]
413 fn resolve_reference_address(
414 atom_id: AtomId,
415 atom: &Atom,
416 obj: &crate::input::ObjectFile,
417 atoms: &AtomTable,
418 sym_table: &SymbolTable,
419 layout: &Layout,
420 synthetic_plan: Option<&SyntheticPlan>,
421 referent: Referent,
422 target_offset: u32,
423 label: &str,
424 allow_import_got: bool,
425 ) -> Result<u64, UnwindError> {
426 match referent {
427 Referent::Section(section_idx) => {
428 let input_section = obj
429 .sections
430 .get((section_idx as usize).saturating_sub(1))
431 .ok_or_else(|| UnwindError {
432 input: obj.path.clone(),
433 atom: atom_id,
434 detail: format!("{label} section {} is out of range", section_idx),
435 })?;
436 let Some((candidate_id, candidate)) = atoms.iter().find(|(_, candidate)| {
437 candidate.origin == atom.origin
438 && candidate.input_section == section_idx
439 && input_section.addr + candidate.input_offset as u64 <= target_offset as u64
440 && (target_offset as u64)
441 < input_section.addr + candidate.input_offset as u64 + candidate.size as u64
442 }) else {
443 return Err(UnwindError {
444 input: obj.path.clone(),
445 atom: atom_id,
446 detail: format!(
447 "{label} points at missing input atom section {} offset 0x{:x}",
448 section_idx, target_offset
449 ),
450 });
451 };
452 let Some(base_addr) = layout.atom_addr(candidate_id) else {
453 return Err(UnwindError {
454 input: obj.path.clone(),
455 atom: atom_id,
456 detail: format!("{label} atom {:?} missing from final layout", candidate_id),
457 });
458 };
459 let atom_input_addr = input_section.addr + candidate.input_offset as u64;
460 Ok(base_addr + (target_offset as u64 - atom_input_addr))
461 }
462 Referent::Symbol(sym_idx) => {
463 let input_symbol = obj
464 .symbols
465 .get(sym_idx as usize)
466 .ok_or_else(|| UnwindError {
467 input: obj.path.clone(),
468 atom: atom_id,
469 detail: format!("{label} symbol {} is out of range", sym_idx),
470 })?;
471 let name = obj.symbol_name(input_symbol).map_err(|err| UnwindError {
472 input: obj.path.clone(),
473 atom: atom_id,
474 detail: err.to_string(),
475 })?;
476 let Some((symbol_id, symbol)) = sym_table
477 .iter()
478 .find(|(_, symbol)| sym_table.interner.resolve(symbol.name()) == name)
479 else {
480 return Err(UnwindError {
481 input: obj.path.clone(),
482 atom: atom_id,
483 detail: format!("{label} symbol `{name}` was not resolved"),
484 });
485 };
486 match symbol {
487 Symbol::Defined {
488 atom: target_atom,
489 value,
490 ..
491 } => {
492 let Some(base_addr) = layout.atom_addr(*target_atom) else {
493 return Err(UnwindError {
494 input: obj.path.clone(),
495 atom: atom_id,
496 detail: format!(
497 "{label} atom {:?} missing from final layout",
498 target_atom
499 ),
500 });
501 };
502 Ok(base_addr + *value)
503 }
504 Symbol::DylibImport { .. } if allow_import_got => {
505 personality_got_addr(layout, synthetic_plan, symbol_id, atom_id, obj, label)
506 }
507 other => Err(UnwindError {
508 input: obj.path.clone(),
509 atom: atom_id,
510 detail: format!(
511 "{label} symbol `{name}` resolved to unsupported kind {:?}",
512 other.kind()
513 ),
514 }),
515 }
516 }
517 }
518 }
519
520 fn personality_got_addr(
521 layout: &Layout,
522 synthetic_plan: Option<&SyntheticPlan>,
523 symbol_id: crate::resolve::SymbolId,
524 atom_id: AtomId,
525 obj: &crate::input::ObjectFile,
526 label: &str,
527 ) -> Result<u64, UnwindError> {
528 let Some(plan) = synthetic_plan else {
529 return Err(UnwindError {
530 input: obj.path.clone(),
531 atom: atom_id,
532 detail: format!("{label} import needs a synthetic GOT slot"),
533 });
534 };
535 let Some((idx, _)) = plan.got.get(symbol_id) else {
536 return Err(UnwindError {
537 input: obj.path.clone(),
538 atom: atom_id,
539 detail: format!("{label} import is missing synthetic GOT planning"),
540 });
541 };
542 let Some(section) = layout
543 .sections
544 .iter()
545 .find(|section| section.segment == "__DATA_CONST" && section.name == "__got")
546 else {
547 return Err(UnwindError {
548 input: obj.path.clone(),
549 atom: atom_id,
550 detail: format!("{label} import is missing the output __got section"),
551 });
552 };
553 Ok(section.addr + (idx as u64) * 8)
554 }
555
556 fn read_u64(atom: &Atom, offset: usize) -> Result<u64, UnwindError> {
557 let end = offset + 8;
558 if end > atom.data.len() {
559 return Err(UnwindError {
560 input: PathBuf::from("<compact unwind>"),
561 atom: atom.id,
562 detail: format!("record field at 0x{offset:x} overruns atom data"),
563 });
564 }
565 Ok(u64::from_le_bytes(
566 atom.data[offset..end].try_into().unwrap(),
567 ))
568 }
569
570 fn serialize_unwind_info(records: &[UnwindRecord]) -> Result<Vec<u8>, UnwindReadError> {
571 let (records, personalities, lsdas) = finalize_unwind_records(records)?;
572 let common_encodings = select_common_encodings(&records);
573 let pages = build_pages(&records, &common_encodings);
574 let common_encodings_offset = 7 * 4;
575 let common_encodings_count = common_encodings.len() as u32;
576 let personalities_offset = common_encodings_offset + common_encodings_count as usize * 4;
577 let indices_offset = personalities_offset + personalities.len() * 4;
578 let indices_count = (pages.len() + 1) as u32;
579 let lsdas_offset = indices_offset
580 + indices_count as usize * FIRST_LEVEL_ENTRY_SIZE
581 + FIRST_LEVEL_INDEX_GAP_SIZE;
582 let second_level_start = lsdas_offset + lsdas.len() * 8;
583 let page_blobs: Vec<Vec<u8>> = pages.iter().map(serialize_compressed_page).collect();
584 let sentinel = records
585 .last()
586 .map(|record| record.function_offset + record.code_len)
587 .unwrap_or(0);
588
589 let mut out = Vec::new();
590 out.extend_from_slice(&UNWIND_INFO_VERSION.to_le_bytes());
591 out.extend_from_slice(&(common_encodings_offset as u32).to_le_bytes());
592 out.extend_from_slice(&common_encodings_count.to_le_bytes());
593 out.extend_from_slice(&(personalities_offset as u32).to_le_bytes());
594 out.extend_from_slice(&(personalities.len() as u32).to_le_bytes());
595 out.extend_from_slice(&(indices_offset as u32).to_le_bytes());
596 out.extend_from_slice(&indices_count.to_le_bytes());
597
598 for encoding in &common_encodings {
599 out.extend_from_slice(&encoding.to_le_bytes());
600 }
601 for personality in &personalities {
602 out.extend_from_slice(&personality.to_le_bytes());
603 }
604
605 let mut page_lsda_index = 0usize;
606 let mut page_offset = second_level_start as u32;
607 for (page_idx, page) in pages.iter().enumerate() {
608 let next_page_start = pages
609 .get(page_idx + 1)
610 .map(|page| page.start_function_offset)
611 .unwrap_or(sentinel);
612 while page_lsda_index < lsdas.len()
613 && lsdas[page_lsda_index].function_offset < page.start_function_offset
614 {
615 page_lsda_index += 1;
616 }
617 let lsda_index_offset = lsdas_offset as u32 + (page_lsda_index as u32) * 8;
618 out.extend_from_slice(&page.start_function_offset.to_le_bytes());
619 out.extend_from_slice(&page_offset.to_le_bytes());
620 out.extend_from_slice(&lsda_index_offset.to_le_bytes());
621 while page_lsda_index < lsdas.len()
622 && lsdas[page_lsda_index].function_offset < next_page_start
623 {
624 page_lsda_index += 1;
625 }
626 page_offset += serialize_compressed_page(page).len() as u32;
627 }
628 out.extend_from_slice(&sentinel.to_le_bytes());
629 out.extend_from_slice(&0u32.to_le_bytes());
630 out.extend_from_slice(&(lsdas_offset as u32 + (lsdas.len() as u32) * 8).to_le_bytes());
631
632 while out.len() < lsdas_offset {
633 out.push(0);
634 }
635 for lsda in &lsdas {
636 out.extend_from_slice(&lsda.function_offset.to_le_bytes());
637 out.extend_from_slice(&lsda.lsda_offset.to_le_bytes());
638 }
639
640 while out.len() < second_level_start {
641 out.push(0);
642 }
643
644 for blob in page_blobs {
645 out.extend_from_slice(&blob);
646 }
647
648 while !out.len().is_multiple_of(8) {
649 out.push(0);
650 }
651 Ok(out)
652 }
653
654 pub fn decode_unwind_info(bytes: &[u8]) -> Result<DecodedUnwindInfo, UnwindReadError> {
655 if bytes.len() < 28 {
656 return Err(UnwindReadError::Truncated("unwind_info header"));
657 }
658 let version = read_u32(bytes, 0, "unwind_info version")?;
659 if version != UNWIND_INFO_VERSION {
660 return Err(UnwindReadError::UnsupportedVersion(version));
661 }
662 let common_encodings_offset = read_u32(bytes, 4, "common encodings offset")? as usize;
663 let common_encodings_count = read_u32(bytes, 8, "common encodings count")? as usize;
664 let personalities_offset = read_u32(bytes, 12, "personalities offset")? as usize;
665 let personalities_count = read_u32(bytes, 16, "personalities count")? as usize;
666 let indices_offset = read_u32(bytes, 20, "indices offset")? as usize;
667 let indices_count = read_u32(bytes, 24, "indices count")? as usize;
668
669 let common_encodings = read_u32_array(
670 bytes,
671 common_encodings_offset,
672 common_encodings_count,
673 "common encodings",
674 )?;
675 let personalities = read_u32_array(
676 bytes,
677 personalities_offset,
678 personalities_count,
679 "personality array",
680 )?;
681 let mut index_starts = Vec::new();
682 let mut index_lsda_offsets = Vec::new();
683 for idx in 0..indices_count {
684 let entry_off = indices_offset + idx * FIRST_LEVEL_ENTRY_SIZE;
685 if entry_off + FIRST_LEVEL_ENTRY_SIZE > bytes.len() {
686 return Err(UnwindReadError::Truncated("first-level index"));
687 }
688 index_starts.push(read_u32(bytes, entry_off, "first-level function offset")?);
689 index_lsda_offsets.push(read_u32(bytes, entry_off + 8, "first-level lsda offset")?);
690 }
691 for pair in index_starts.windows(2) {
692 if pair[0] > pair[1] {
693 return Err(UnwindReadError::BadFirstLevelIndexOrder {
694 previous: pair[0],
695 next: pair[1],
696 });
697 }
698 }
699
700 let mut lsdas = Vec::new();
701 if let (Some(&lsda_start), Some(&lsda_end)) =
702 (index_lsda_offsets.first(), index_lsda_offsets.last())
703 {
704 let start = lsda_start as usize;
705 let end = lsda_end as usize;
706 if end < start {
707 return Err(UnwindReadError::Truncated("lsda index array"));
708 }
709 let mut entry_off = start;
710 while entry_off < end {
711 if entry_off + 8 > bytes.len() {
712 return Err(UnwindReadError::Truncated("lsda index entry"));
713 }
714 lsdas.push(DecodedLsdaRecord {
715 function_offset: read_u32(bytes, entry_off, "lsda function offset")?,
716 lsda_offset: read_u32(bytes, entry_off + 4, "lsda target offset")?,
717 });
718 entry_off += 8;
719 }
720 }
721
722 let mut records = Vec::new();
723 for idx in 0..indices_count.saturating_sub(1) {
724 let entry_off = indices_offset + idx * FIRST_LEVEL_ENTRY_SIZE;
725 let function_offset = read_u32(bytes, entry_off, "first-level function offset")?;
726 let second_level_off = read_u32(bytes, entry_off + 4, "second-level page offset")? as usize;
727 let kind = read_u32(bytes, second_level_off, "second-level page kind")?;
728 match kind {
729 UNWIND_SECOND_LEVEL_COMPRESSED => {
730 let entries_off = second_level_off
731 + read_u16(bytes, second_level_off + 4, "page entry offset")? as usize;
732 let entry_count =
733 read_u16(bytes, second_level_off + 6, "page entry count")? as usize;
734 let encodings_off = second_level_off
735 + read_u16(bytes, second_level_off + 8, "page encoding offset")? as usize;
736 let encoding_count =
737 read_u16(bytes, second_level_off + 10, "page encoding count")? as usize;
738 let local_encodings =
739 read_u32_array(bytes, encodings_off, encoding_count, "page-local encodings")?;
740 for entry_idx in 0..entry_count {
741 let word =
742 read_u32(bytes, entries_off + entry_idx * 4, "compressed page entry")?;
743 let encoding_index = word >> 24;
744 let function_delta = word & 0x00ff_ffff;
745 let encoding = if (encoding_index as usize) < common_encodings.len() {
746 common_encodings[encoding_index as usize]
747 } else {
748 let local_idx = encoding_index as usize - common_encodings.len();
749 *local_encodings.get(local_idx).ok_or_else(|| {
750 UnwindReadError::BadEncodingIndex {
751 index: encoding_index,
752 max: (common_encodings.len() + local_encodings.len()) as u32,
753 }
754 })?
755 };
756 records.push(DecodedUnwindRecord {
757 function_offset: function_offset + function_delta,
758 encoding,
759 });
760 }
761 }
762 UNWIND_SECOND_LEVEL_REGULAR => {
763 return Err(UnwindReadError::UnsupportedSecondLevelPageKind(kind));
764 }
765 other => return Err(UnwindReadError::UnsupportedSecondLevelPageKind(other)),
766 }
767 }
768
769 Ok(DecodedUnwindInfo {
770 version,
771 personalities,
772 lsdas,
773 records,
774 })
775 }
776
777 fn validate_serialized_unwind_info(
778 bytes: &[u8],
779 records: &[UnwindRecord],
780 ) -> Result<(), UnwindReadError> {
781 let decoded = decode_unwind_info(bytes)?;
782 let (records, personalities, lsdas) = finalize_unwind_records(records)?;
783 let expected: Vec<DecodedUnwindRecord> = records
784 .iter()
785 .map(|record| DecodedUnwindRecord {
786 function_offset: record.function_offset,
787 encoding: record.encoding,
788 })
789 .collect();
790 if decoded.records != expected {
791 return Err(UnwindReadError::Truncated(
792 "decoded unwind records do not round-trip",
793 ));
794 }
795 if decoded.personalities != personalities {
796 return Err(UnwindReadError::Truncated(
797 "decoded personality table does not round-trip",
798 ));
799 }
800 let expected_lsdas: Vec<DecodedLsdaRecord> = lsdas
801 .iter()
802 .map(|lsda| DecodedLsdaRecord {
803 function_offset: lsda.function_offset,
804 lsda_offset: lsda.lsda_offset,
805 })
806 .collect();
807 if decoded.lsdas != expected_lsdas {
808 return Err(UnwindReadError::Truncated(
809 "decoded lsda table does not round-trip",
810 ));
811 }
812 Ok(())
813 }
814
815 fn finalize_unwind_records(
816 records: &[UnwindRecord],
817 ) -> Result<FinalizedUnwindTables, UnwindReadError> {
818 let mut personalities = Vec::new();
819 let mut finalized = Vec::with_capacity(records.len());
820 let mut lsdas = Vec::new();
821 let mut personality_index = HashMap::new();
822
823 for record in records {
824 let mut encoding = record.encoding & !UNWIND_PERSONALITY_MASK;
825 if let Some(personality_offset) = record.personality_offset {
826 let idx = if let Some(&idx) = personality_index.get(&personality_offset) {
827 idx
828 } else {
829 if personalities.len() == 3 {
830 return Err(UnwindReadError::TooManyPersonalities(
831 personalities.len() + 1,
832 ));
833 }
834 personalities.push(personality_offset);
835 let idx = personalities.len() as u32;
836 personality_index.insert(personality_offset, idx);
837 idx
838 };
839 encoding |= idx << UNWIND_PERSONALITY_SHIFT;
840 }
841 if let Some(lsda_offset) = record.lsda_offset {
842 encoding |= UNWIND_HAS_LSDA;
843 lsdas.push(LsdaRecord {
844 function_offset: record.function_offset,
845 lsda_offset,
846 });
847 } else {
848 encoding &= !UNWIND_HAS_LSDA;
849 }
850 if encoding & UNWIND_ARM64_MODE_MASK == UNWIND_ARM64_MODE_DWARF {
851 encoding &= !UNWIND_ARM64_DWARF_SECTION_OFFSET_MASK;
852 }
853 finalized.push(UnwindRecord {
854 encoding,
855 ..*record
856 });
857 }
858
859 Ok((finalized, personalities, lsdas))
860 }
861
862 fn select_common_encodings(records: &[UnwindRecord]) -> Vec<u32> {
863 let mut stats: HashMap<u32, (u32, usize)> = HashMap::new();
864 for (idx, record) in records.iter().enumerate() {
865 stats
866 .entry(record.encoding)
867 .and_modify(|(count, _)| *count += 1)
868 .or_insert((1, idx));
869 }
870
871 let mut encodings: Vec<(u32, u32, usize)> = stats
872 .into_iter()
873 .filter_map(|(encoding, (count, first_seen))| {
874 (count > 1).then_some((encoding, count, first_seen))
875 })
876 .collect();
877 encodings.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
878 encodings.truncate(127);
879 encodings
880 .into_iter()
881 .map(|(encoding, _, _)| encoding)
882 .collect()
883 }
884
885 fn build_pages(records: &[UnwindRecord], common_encodings: &[u32]) -> Vec<CompressedPage> {
886 let mut pages = Vec::new();
887 let mut current: Option<CompressedPage> = None;
888 let common_indices: HashMap<u32, usize> = common_encodings
889 .iter()
890 .copied()
891 .enumerate()
892 .map(|(idx, encoding)| (encoding, idx))
893 .collect();
894
895 for record in records {
896 loop {
897 let page = current.get_or_insert_with(|| CompressedPage {
898 start_function_offset: record.function_offset,
899 entries: Vec::new(),
900 local_encodings: Vec::new(),
901 });
902
903 let needs_local_encoding = !common_indices.contains_key(&record.encoding)
904 && page
905 .local_encodings
906 .iter()
907 .all(|encoding| *encoding != record.encoding);
908 let prospective_local_count =
909 page.local_encodings.len() + usize::from(needs_local_encoding);
910 let delta = record
911 .function_offset
912 .saturating_sub(page.start_function_offset);
913 let projected_size = COMPRESSED_PAGE_HEADER_SIZE
914 + (page.entries.len() + 1) * 4
915 + prospective_local_count * 4;
916 let projected_encoding_count = common_encodings.len() + prospective_local_count;
917
918 if !page.entries.is_empty()
919 && (projected_size > PAGE_SIZE
920 || delta > MAX_COMPRESSED_FUNCTION_DELTA
921 || projected_encoding_count > MAX_COMPRESSED_ENCODING_INDEX + 1)
922 {
923 pages.push(current.take().unwrap());
924 continue;
925 }
926
927 let page = current.as_mut().unwrap();
928 let encoding_index = if let Some(index) = common_indices.get(&record.encoding) {
929 *index
930 } else if let Some(index) = page
931 .local_encodings
932 .iter()
933 .position(|encoding| *encoding == record.encoding)
934 {
935 common_encodings.len() + index
936 } else {
937 page.local_encodings.push(record.encoding);
938 common_encodings.len() + page.local_encodings.len() - 1
939 };
940 page.entries.push(((encoding_index as u32) << 24) | delta);
941 break;
942 }
943 }
944
945 if let Some(page) = current {
946 pages.push(page);
947 }
948 pages
949 }
950
951 fn read_u16(bytes: &[u8], offset: usize, what: &'static str) -> Result<u16, UnwindReadError> {
952 if offset + 2 > bytes.len() {
953 return Err(UnwindReadError::Truncated(what));
954 }
955 Ok(u16::from_le_bytes(
956 bytes[offset..offset + 2].try_into().unwrap(),
957 ))
958 }
959
960 fn read_u32(bytes: &[u8], offset: usize, what: &'static str) -> Result<u32, UnwindReadError> {
961 if offset + 4 > bytes.len() {
962 return Err(UnwindReadError::Truncated(what));
963 }
964 Ok(u32::from_le_bytes(
965 bytes[offset..offset + 4].try_into().unwrap(),
966 ))
967 }
968
969 fn read_u32_array(
970 bytes: &[u8],
971 offset: usize,
972 count: usize,
973 what: &'static str,
974 ) -> Result<Vec<u32>, UnwindReadError> {
975 let mut out = Vec::with_capacity(count);
976 for idx in 0..count {
977 out.push(read_u32(bytes, offset + idx * 4, what)?);
978 }
979 Ok(out)
980 }
981
982 fn serialize_compressed_page(page: &CompressedPage) -> Vec<u8> {
983 let entry_offset = COMPRESSED_PAGE_HEADER_SIZE as u16;
984 let encoding_offset = entry_offset + (page.entries.len() * 4) as u16;
985 let mut out = Vec::new();
986 out.extend_from_slice(&UNWIND_SECOND_LEVEL_COMPRESSED.to_le_bytes());
987 out.extend_from_slice(&entry_offset.to_le_bytes());
988 out.extend_from_slice(&(page.entries.len() as u16).to_le_bytes());
989 out.extend_from_slice(&encoding_offset.to_le_bytes());
990 out.extend_from_slice(&(page.local_encodings.len() as u16).to_le_bytes());
991 for entry in &page.entries {
992 out.extend_from_slice(&entry.to_le_bytes());
993 }
994 for encoding in &page.local_encodings {
995 out.extend_from_slice(&encoding.to_le_bytes());
996 }
997 while !out.len().is_multiple_of(4) {
998 out.push(0);
999 }
1000 out
1001 }
1002
1003 fn remove_compact_unwind_sections(layout: &mut Layout) -> bool {
1004 let before = layout.sections.len();
1005 layout
1006 .sections
1007 .retain(|section| section.kind != SectionKind::CompactUnwind);
1008 before != layout.sections.len()
1009 }
1010
1011 fn remove_unwind_info_section(layout: &mut Layout) -> bool {
1012 let before = layout.sections.len();
1013 layout
1014 .sections
1015 .retain(|section| !(section.segment == "__TEXT" && section.name == "__unwind_info"));
1016 before != layout.sections.len()
1017 }
1018
1019 fn upsert_unwind_info_section(layout: &mut Layout, bytes: Vec<u8>) -> bool {
1020 if let Some(section) = layout
1021 .sections
1022 .iter_mut()
1023 .find(|section| section.segment == "__TEXT" && section.name == "__unwind_info")
1024 {
1025 let changed = section.synthetic_data.len() != bytes.len();
1026 section.kind = SectionKind::Regular;
1027 section.align_pow2 = 2;
1028 section.flags = S_REGULAR;
1029 section.reserved1 = 0;
1030 section.reserved2 = 0;
1031 section.reserved3 = 0;
1032 section.atoms.clear();
1033 section.synthetic_offset = 0;
1034 section.synthetic_data = bytes;
1035 section.size = section.synthetic_data.len() as u64;
1036 return changed;
1037 }
1038
1039 let insert_idx = layout
1040 .sections
1041 .iter()
1042 .position(|section| section.segment == "__TEXT" && section.name == "__eh_frame")
1043 .or_else(|| {
1044 layout
1045 .sections
1046 .iter()
1047 .rposition(|section| section.segment == "__TEXT")
1048 .map(|idx| idx + 1)
1049 })
1050 .unwrap_or(layout.sections.len());
1051 layout.sections.insert(
1052 insert_idx,
1053 OutputSection {
1054 segment: "__TEXT".into(),
1055 name: "__unwind_info".into(),
1056 kind: SectionKind::Regular,
1057 align_pow2: 2,
1058 flags: S_REGULAR,
1059 reserved1: 0,
1060 reserved2: 0,
1061 reserved3: 0,
1062 atoms: Vec::new(),
1063 synthetic_offset: 0,
1064 synthetic_data: bytes,
1065 addr: 0,
1066 size: 0,
1067 file_off: 0,
1068 },
1069 );
1070 let section = &mut layout.sections[insert_idx];
1071 section.size = section.synthetic_data.len() as u64;
1072 true
1073 }
1074
1075 fn prune_empty_segments(layout: &mut Layout) {
1076 let standard: &[&str] = match layout.kind {
1077 crate::OutputKind::Executable => &[
1078 "__PAGEZERO",
1079 "__TEXT",
1080 "__DATA_CONST",
1081 "__DATA",
1082 "__LINKEDIT",
1083 ],
1084 crate::OutputKind::Dylib => &["__TEXT", "__DATA_CONST", "__DATA", "__LINKEDIT"],
1085 };
1086 layout.segments.retain(|segment| {
1087 standard.iter().any(|name| *name == segment.name)
1088 || layout
1089 .sections
1090 .iter()
1091 .any(|section| section.segment == segment.name)
1092 });
1093 }
1094
1095 #[cfg(test)]
1096 mod tests {
1097 use super::*;
1098
1099 #[test]
1100 fn serialize_single_leaf_record_matches_apple_shape() {
1101 let bytes = serialize_unwind_info(&[UnwindRecord {
1102 function_offset: 0x348,
1103 code_len: 0x14,
1104 encoding: 0x0200_1000,
1105 personality_offset: None,
1106 lsda_offset: None,
1107 }])
1108 .unwrap();
1109 let words: Vec<u32> = bytes
1110 .chunks_exact(4)
1111 .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
1112 .collect();
1113 assert_eq!(
1114 words,
1115 vec![
1116 1,
1117 0x1c,
1118 0,
1119 0x1c,
1120 0,
1121 0x1c,
1122 2,
1123 0x348,
1124 0x40,
1125 0x40,
1126 0x35c,
1127 0,
1128 0x40,
1129 0,
1130 0,
1131 0,
1132 3,
1133 0x0001_000c,
1134 0x0001_0010,
1135 0,
1136 0x0200_1000,
1137 0,
1138 ]
1139 );
1140 let decoded = decode_unwind_info(&bytes).unwrap();
1141 assert!(decoded.personalities.is_empty());
1142 assert!(decoded.lsdas.is_empty());
1143 assert_eq!(
1144 decoded.records,
1145 vec![DecodedUnwindRecord {
1146 function_offset: 0x348,
1147 encoding: 0x0200_1000,
1148 }]
1149 );
1150 }
1151
1152 #[test]
1153 fn serialize_two_records_uses_compressed_entries() {
1154 let bytes = serialize_unwind_info(&[
1155 UnwindRecord {
1156 function_offset: 0x348,
1157 code_len: 0x8,
1158 encoding: 0x0200_0000,
1159 personality_offset: None,
1160 lsda_offset: None,
1161 },
1162 UnwindRecord {
1163 function_offset: 0x350,
1164 code_len: 0x20,
1165 encoding: 0x0400_0000,
1166 personality_offset: None,
1167 lsda_offset: None,
1168 },
1169 ])
1170 .unwrap();
1171 let words: Vec<u32> = bytes
1172 .chunks_exact(4)
1173 .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
1174 .collect();
1175 assert_eq!(
1176 words,
1177 vec![
1178 1,
1179 0x1c,
1180 0,
1181 0x1c,
1182 0,
1183 0x1c,
1184 2,
1185 0x348,
1186 0x40,
1187 0x40,
1188 0x370,
1189 0,
1190 0x40,
1191 0,
1192 0,
1193 0,
1194 3,
1195 0x0002_000c,
1196 0x0002_0014,
1197 0,
1198 0x0100_0008,
1199 0x0200_0000,
1200 0x0400_0000,
1201 0,
1202 ]
1203 );
1204 let decoded = decode_unwind_info(&bytes).unwrap();
1205 assert!(decoded.personalities.is_empty());
1206 assert!(decoded.lsdas.is_empty());
1207 assert_eq!(
1208 decoded.records,
1209 vec![
1210 DecodedUnwindRecord {
1211 function_offset: 0x348,
1212 encoding: 0x0200_0000,
1213 },
1214 DecodedUnwindRecord {
1215 function_offset: 0x350,
1216 encoding: 0x0400_0000,
1217 },
1218 ]
1219 );
1220 }
1221
1222 #[test]
1223 fn repeated_encodings_promote_to_common_table() {
1224 let bytes = serialize_unwind_info(&[
1225 UnwindRecord {
1226 function_offset: 0x348,
1227 code_len: 0x18,
1228 encoding: 0x0400_0000,
1229 personality_offset: None,
1230 lsda_offset: None,
1231 },
1232 UnwindRecord {
1233 function_offset: 0x360,
1234 code_len: 0x20,
1235 encoding: 0x0200_2000,
1236 personality_offset: None,
1237 lsda_offset: None,
1238 },
1239 UnwindRecord {
1240 function_offset: 0x390,
1241 code_len: 0x10,
1242 encoding: 0x0400_0000,
1243 personality_offset: None,
1244 lsda_offset: None,
1245 },
1246 ])
1247 .unwrap();
1248 let words: Vec<u32> = bytes
1249 .chunks_exact(4)
1250 .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
1251 .collect();
1252 assert_eq!(words[2], 1, "expected one promoted common encoding");
1253 assert_eq!(words[7], 0x0400_0000);
1254
1255 let decoded = decode_unwind_info(&bytes).unwrap();
1256 assert_eq!(
1257 decoded.records,
1258 vec![
1259 DecodedUnwindRecord {
1260 function_offset: 0x348,
1261 encoding: 0x0400_0000,
1262 },
1263 DecodedUnwindRecord {
1264 function_offset: 0x360,
1265 encoding: 0x0200_2000,
1266 },
1267 DecodedUnwindRecord {
1268 function_offset: 0x390,
1269 encoding: 0x0400_0000,
1270 },
1271 ]
1272 );
1273 }
1274
1275 #[test]
1276 fn large_function_gaps_start_new_pages_before_delta_overflow() {
1277 let bytes = serialize_unwind_info(&[
1278 UnwindRecord {
1279 function_offset: 0x348,
1280 code_len: 0x14,
1281 encoding: 0x0200_0000,
1282 personality_offset: None,
1283 lsda_offset: None,
1284 },
1285 UnwindRecord {
1286 function_offset: 0x0100_0360,
1287 code_len: 0x14,
1288 encoding: 0x0200_0000,
1289 personality_offset: None,
1290 lsda_offset: None,
1291 },
1292 ])
1293 .unwrap();
1294 let words: Vec<u32> = bytes
1295 .chunks_exact(4)
1296 .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
1297 .collect();
1298 assert_eq!(words[6], 3, "expected two pages plus the sentinel index");
1299 let decoded = decode_unwind_info(&bytes).unwrap();
1300 assert_eq!(
1301 decoded.records,
1302 vec![
1303 DecodedUnwindRecord {
1304 function_offset: 0x348,
1305 encoding: 0x0200_0000,
1306 },
1307 DecodedUnwindRecord {
1308 function_offset: 0x0100_0360,
1309 encoding: 0x0200_0000,
1310 },
1311 ]
1312 );
1313 }
1314
1315 #[test]
1316 fn pages_split_before_encoding_index_overflow() {
1317 let records = (0..300u32)
1318 .map(|idx| UnwindRecord {
1319 function_offset: 0x400 + idx * 4,
1320 code_len: 4,
1321 encoding: 0x0200_0000 | idx,
1322 personality_offset: None,
1323 lsda_offset: None,
1324 })
1325 .collect::<Vec<_>>();
1326 let bytes = serialize_unwind_info(&records).unwrap();
1327 let words: Vec<u32> = bytes
1328 .chunks_exact(4)
1329 .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
1330 .collect();
1331 assert_eq!(
1332 words[2], 0,
1333 "all encodings are unique so nothing should be common"
1334 );
1335 assert_eq!(
1336 words[6], 3,
1337 "expected the encoding pressure to force a second page"
1338 );
1339 let decoded = decode_unwind_info(&bytes).unwrap();
1340 assert_eq!(decoded.records.len(), records.len());
1341 assert_eq!(decoded.records[0].function_offset, 0x400);
1342 assert_eq!(
1343 decoded.records.last().unwrap().function_offset,
1344 0x400 + 299 * 4
1345 );
1346 }
1347
1348 #[test]
1349 fn decode_rejects_bad_encoding_index() {
1350 let mut bytes = serialize_unwind_info(&[UnwindRecord {
1351 function_offset: 0x348,
1352 code_len: 0x14,
1353 encoding: 0x0200_1000,
1354 personality_offset: None,
1355 lsda_offset: None,
1356 }])
1357 .unwrap();
1358 let second_level_offset =
1359 u32::from_le_bytes(bytes[28 + 4..28 + 8].try_into().unwrap()) as usize;
1360 let entries_offset = second_level_offset
1361 + u16::from_le_bytes(
1362 bytes[second_level_offset + 4..second_level_offset + 6]
1363 .try_into()
1364 .unwrap(),
1365 ) as usize;
1366 bytes[entries_offset..entries_offset + 4].copy_from_slice(&0xff00_0000u32.to_le_bytes());
1367 let err = decode_unwind_info(&bytes).unwrap_err();
1368 assert!(matches!(err, UnwindReadError::BadEncodingIndex { .. }));
1369 }
1370 }
1371