Rust · 91526 bytes Raw Blame History
1 //! Differential harness shared by parity-oriented integration tests.
2 //!
3 //! The early scaffold only diffed arbitrary byte slices. Sprint 27 starts
4 //! turning it into a real Apple-`ld` matrix harness with a tiny corpus, basic
5 //! tolerated-diff rules, and reusable link/runtime helpers.
6
7 #![allow(dead_code)]
8
9 use std::collections::{BTreeMap, HashSet};
10 use std::fs;
11 use std::path::{Path, PathBuf};
12 use std::process::{Command, Stdio};
13 use std::thread;
14 use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
15
16 use afs_ld::leb::{read_sleb, read_uleb};
17 use afs_ld::macho::constants::{
18 BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DONE, BIND_OPCODE_DO_BIND,
19 BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB,
20 BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, BIND_OPCODE_MASK, BIND_OPCODE_SET_ADDEND_SLEB,
21 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM, BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB,
22 BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB,
23 BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, BIND_OPCODE_SET_TYPE_IMM,
24 BIND_SYMBOL_FLAGS_WEAK_IMPORT, BIND_TYPE_POINTER, INDIRECT_SYMBOL_ABS, INDIRECT_SYMBOL_LOCAL,
25 LC_BUILD_VERSION, LC_CODE_SIGNATURE, LC_DATA_IN_CODE, LC_DYLD_CHAINED_FIXUPS,
26 LC_DYLD_EXPORTS_TRIE, LC_DYLD_INFO_ONLY, LC_DYSYMTAB, LC_FUNCTION_STARTS, LC_ID_DYLIB,
27 LC_LOAD_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_SEGMENT_64,
28 LC_SYMTAB, LC_UUID, N_TYPE, N_UNDF,
29 };
30 use afs_ld::macho::dylib::DylibFile;
31 use afs_ld::macho::exports::ExportKind;
32 use afs_ld::macho::reader::{
33 parse_commands, parse_header, u32_le, BuildVersionCmd, DyldInfoCmd, LoadCommand,
34 Section64Header,
35 };
36 use afs_ld::string_table::StringTable;
37 use afs_ld::symbol::{parse_nlist_table, SymKind};
38 use afs_ld::synth::stubs::{STUB_HELPER_ENTRY_SIZE, STUB_HELPER_HEADER_SIZE};
39 use afs_ld::synth::unwind::decode_unwind_info;
40
41 #[derive(Debug, Clone)]
42 pub struct LinkCase {
43 pub name: String,
44 pub dir: PathBuf,
45 pub inputs: Vec<PathBuf>,
46 pub args: Vec<String>,
47 pub section_checks: Vec<(String, String)>,
48 pub absent_sections: Vec<(String, String)>,
49 pub page_ref_checks: Vec<PageRefCheck>,
50 pub command_checks: Vec<CommandCheck>,
51 artifacts: Vec<ArtifactSpec>,
52 pub ignored_load_commands: Vec<u32>,
53 pub absent_load_commands: Vec<u32>,
54 pub runtime_args: Vec<String>,
55 pub notes: Option<String>,
56 pub case_tolerances: Vec<CaseTolerance>,
57 }
58
59 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
60 pub enum CommandCheck {
61 BuildVersion,
62 LoadDylibNames,
63 ExportRecords,
64 SymbolRecordMap,
65 IndirectSymbolIdentities,
66 SymbolPartitionNames,
67 StringTableNearParity,
68 FunctionStarts,
69 NormalizedFunctionStarts,
70 DataInCode,
71 DataInCodeIfPresent,
72 RebasedUnwindBytes,
73 DyldInfoRebase,
74 DyldInfoBind,
75 DyldInfoWeakBind,
76 DyldInfoLazyBind,
77 }
78
79 #[derive(Debug, Clone, PartialEq, Eq)]
80 pub struct PageRefCheck {
81 pub segname: String,
82 pub sectname: String,
83 pub site_offset: u64,
84 pub kind: PageRefKind,
85 pub symbol: String,
86 }
87
88 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
89 pub enum PageRefKind {
90 Add,
91 Load,
92 }
93
94 #[derive(Debug, Clone, PartialEq, Eq)]
95 pub struct CaseTolerance {
96 pub region: ToleranceRegion,
97 pub reason: String,
98 }
99
100 #[derive(Debug, Clone, PartialEq, Eq)]
101 pub enum ToleranceRegion {
102 SectionBytes {
103 segname: String,
104 sectname: Option<String>,
105 start: usize,
106 end_inclusive: usize,
107 },
108 }
109
110 #[derive(Debug, Clone, PartialEq, Eq)]
111 struct ArtifactSpec {
112 src_name: String,
113 out_name: String,
114 kind: ArtifactKind,
115 dep_name: Option<String>,
116 }
117
118 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
119 enum ArtifactKind {
120 Dylib,
121 Archive,
122 ReexportDylib,
123 }
124
125 type SymbolPartitions = (Vec<String>, Vec<String>, Vec<String>);
126
127 pub struct LinkOutputs {
128 pub ours: Vec<u8>,
129 pub theirs: Vec<u8>,
130 pub our_path: PathBuf,
131 pub their_path: PathBuf,
132 }
133
134 #[derive(Debug, Clone, PartialEq, Eq)]
135 pub enum DiffCategory {
136 /// A diff we expect: UUID bytes, code-signature hashes, etc.
137 Tolerated(&'static str),
138 /// Anything else. Fails the parity test.
139 Critical,
140 }
141
142 #[derive(Debug, Clone, PartialEq, Eq)]
143 pub struct DiffChunk {
144 pub offset: usize,
145 pub len: usize,
146 pub reason: String,
147 pub category: DiffCategory,
148 }
149
150 #[derive(Debug, Default)]
151 pub struct DiffReport {
152 pub tolerated: Vec<DiffChunk>,
153 pub critical: Vec<DiffChunk>,
154 }
155
156 impl DiffReport {
157 pub fn is_clean(&self) -> bool {
158 self.critical.is_empty()
159 }
160 }
161
162 #[derive(Debug, Clone, PartialEq, Eq)]
163 pub struct ProgramOutput {
164 pub exit_code: Option<i32>,
165 pub stdout: Vec<u8>,
166 pub stderr: Vec<u8>,
167 }
168
169 type NormalizedBuildVersion = (u32, u32, u32, Vec<u32>);
170
171 pub fn have_xcrun() -> bool {
172 Command::new("xcrun")
173 .arg("-f")
174 .arg("as")
175 .output()
176 .map(|o| o.status.success())
177 .unwrap_or(false)
178 }
179
180 pub fn have_xcrun_tool(tool: &str) -> bool {
181 Command::new("xcrun")
182 .arg("-f")
183 .arg(tool)
184 .output()
185 .map(|o| o.status.success())
186 .unwrap_or(false)
187 }
188
189 pub fn have_tool(tool: &str) -> bool {
190 Command::new(tool)
191 .arg("--version")
192 .output()
193 .map(|o| o.status.success() || !o.stderr.is_empty())
194 .unwrap_or(false)
195 }
196
197 pub fn sdk_path() -> Option<String> {
198 let out = Command::new("xcrun")
199 .args(["--sdk", "macosx", "--show-sdk-path"])
200 .output()
201 .ok()?;
202 if !out.status.success() {
203 return None;
204 }
205 Some(String::from_utf8_lossy(&out.stdout).trim().to_string())
206 }
207
208 pub fn sdk_version() -> Option<String> {
209 let out = Command::new("xcrun")
210 .args(["--sdk", "macosx", "--show-sdk-version"])
211 .output()
212 .ok()?;
213 if !out.status.success() {
214 return None;
215 }
216 Some(String::from_utf8_lossy(&out.stdout).trim().to_string())
217 }
218
219 pub fn scratch(name: &str) -> PathBuf {
220 std::env::temp_dir().join(format!("afs-ld-parity-{}-{name}", std::process::id()))
221 }
222
223 pub fn assemble(src: &str, out: &PathBuf) -> Result<(), String> {
224 let tmp = out.with_extension("s");
225 fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?;
226 let output = Command::new("xcrun")
227 .args(["--sdk", "macosx", "as", "-arch", "arm64"])
228 .arg(&tmp)
229 .arg("-o")
230 .arg(out)
231 .output()
232 .map_err(|e| format!("spawn xcrun as: {e}"))?;
233 let _ = fs::remove_file(&tmp);
234 if !output.status.success() {
235 return Err(format!(
236 "xcrun as failed: {}",
237 String::from_utf8_lossy(&output.stderr)
238 ));
239 }
240 Ok(())
241 }
242
243 pub fn compile_c(src: &str, out: &PathBuf) -> Result<(), String> {
244 let tmp = out.with_extension("c");
245 fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?;
246 let output = Command::new("xcrun")
247 .args(["--sdk", "macosx", "clang", "-arch", "arm64", "-c"])
248 .arg(&tmp)
249 .arg("-o")
250 .arg(out)
251 .output()
252 .map_err(|e| format!("spawn xcrun clang: {e}"))?;
253 let _ = fs::remove_file(&tmp);
254 if !output.status.success() {
255 return Err(format!(
256 "xcrun clang failed: {}",
257 String::from_utf8_lossy(&output.stderr)
258 ));
259 }
260 Ok(())
261 }
262
263 fn compile_dylib_c(src: &str, out: &PathBuf) -> Result<(), String> {
264 let tmp = out.with_extension("c");
265 fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?;
266 let install_name = out.to_string_lossy().to_string();
267 let output = Command::new("xcrun")
268 .args(["--sdk", "macosx", "clang", "-arch", "arm64", "-dynamiclib"])
269 .arg(&tmp)
270 .arg(format!("-Wl,-install_name,{install_name}"))
271 .arg("-o")
272 .arg(out)
273 .output()
274 .map_err(|e| format!("spawn xcrun clang dylib: {e}"))?;
275 let _ = fs::remove_file(&tmp);
276 if !output.status.success() {
277 return Err(format!(
278 "xcrun clang dylib failed: {}",
279 String::from_utf8_lossy(&output.stderr)
280 ));
281 }
282 Ok(())
283 }
284
285 fn compile_archive_c(src: &str, out: &PathBuf) -> Result<(), String> {
286 let obj = out.with_extension("o");
287 compile_c(src, &obj)?;
288 let output = Command::new("libtool")
289 .args(["-static", "-o"])
290 .arg(out)
291 .arg(&obj)
292 .output()
293 .map_err(|e| format!("spawn libtool archive: {e}"))?;
294 let _ = fs::remove_file(&obj);
295 if !output.status.success() {
296 return Err(format!(
297 "libtool archive failed: {}",
298 String::from_utf8_lossy(&output.stderr)
299 ));
300 }
301 Ok(())
302 }
303
304 fn compile_reexport_dylib_c(src: &str, out: &PathBuf, dep: &Path) -> Result<(), String> {
305 let tmp = out.with_extension("c");
306 fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?;
307 let install_name = out.to_string_lossy().to_string();
308 let output = Command::new("xcrun")
309 .args(["--sdk", "macosx", "clang", "-arch", "arm64", "-dynamiclib"])
310 .arg(&tmp)
311 .arg(format!("-Wl,-install_name,{install_name}"))
312 .arg(format!("-Wl,-reexport_library,{}", dep.display()))
313 .arg("-o")
314 .arg(out)
315 .output()
316 .map_err(|e| format!("spawn xcrun clang reexport dylib: {e}"))?;
317 let _ = fs::remove_file(&tmp);
318 if !output.status.success() {
319 return Err(format!(
320 "xcrun clang reexport dylib failed: {}",
321 String::from_utf8_lossy(&output.stderr)
322 ));
323 }
324 Ok(())
325 }
326
327 pub fn load_corpus(root: &Path) -> Result<Vec<LinkCase>, String> {
328 let mut cases = Vec::new();
329 let entries =
330 fs::read_dir(root).map_err(|e| format!("read parity corpus {}: {e}", root.display()))?;
331 for entry in entries {
332 let entry = entry.map_err(|e| format!("read parity corpus entry: {e}"))?;
333 let path = entry.path();
334 if !path.is_dir() {
335 continue;
336 }
337
338 let name = path
339 .file_name()
340 .and_then(|s| s.to_str())
341 .ok_or_else(|| format!("invalid UTF-8 case directory {}", path.display()))?
342 .to_string();
343 let inputs_dir = path.join("inputs");
344 let mut inputs = Vec::new();
345 let input_entries = fs::read_dir(&inputs_dir)
346 .map_err(|e| format!("read inputs for {}: {e}", path.display()))?;
347 for input in input_entries {
348 let input = input.map_err(|e| format!("read input entry for {}: {e}", name))?;
349 let input_path = input.path();
350 match input_path.extension().and_then(|s| s.to_str()) {
351 Some("s") | Some("c") | Some("o") | Some("a") | Some("tbd") => {
352 inputs.push(input_path)
353 }
354 _ => {}
355 }
356 }
357 inputs.sort();
358 if inputs.is_empty() {
359 return Err(format!(
360 "parity corpus case {} has no supported source inputs",
361 path.display()
362 ));
363 }
364
365 let args = read_tokens(&path.join("args.txt"))?;
366 let section_checks = read_sections(&path.join("sections.txt"))?;
367 let absent_sections = read_sections_if_present(&path.join("absent_sections.txt"))?;
368 let page_ref_checks = read_page_refs(&path.join("page_refs.txt"))?;
369 let command_checks = read_command_checks(&path.join("command_checks.txt"))?;
370 let artifacts = read_artifacts(&path.join("artifacts.txt"))?;
371 let artifact_srcs: HashSet<&str> = artifacts
372 .iter()
373 .map(|artifact| artifact.src_name.as_str())
374 .collect();
375 inputs.retain(|input| {
376 input
377 .file_name()
378 .and_then(|s| s.to_str())
379 .map(|name| !artifact_srcs.contains(name))
380 .unwrap_or(true)
381 });
382 let ignored_load_commands =
383 read_load_command_names(&path.join("ignored_load_commands.txt"))?;
384 let absent_load_commands = read_load_command_names(&path.join("absent_load_commands.txt"))?;
385 let runtime_args = read_tokens_if_present(&path.join("runtime.txt"))?;
386 let notes = fs::read_to_string(path.join("notes.md")).ok();
387 let case_tolerances = parse_case_tolerances(notes.as_deref())?;
388
389 cases.push(LinkCase {
390 name,
391 dir: path,
392 inputs,
393 args,
394 section_checks,
395 absent_sections,
396 page_ref_checks,
397 command_checks,
398 artifacts,
399 ignored_load_commands,
400 absent_load_commands,
401 runtime_args,
402 notes,
403 case_tolerances,
404 });
405 }
406
407 cases.sort_by(|a, b| a.name.cmp(&b.name));
408 Ok(cases)
409 }
410
411 pub fn link_both(case: &LinkCase) -> Result<LinkOutputs, String> {
412 let sdk = sdk_path().ok_or_else(|| "xcrun --show-sdk-path unavailable".to_string())?;
413 let sdk_ver =
414 sdk_version().ok_or_else(|| "xcrun --show-sdk-version unavailable".to_string())?;
415 let work_dir = unique_temp_dir(&case.name)?;
416 let mut compiled: BTreeMap<String, PathBuf> = BTreeMap::new();
417 let mut sidecars: BTreeMap<String, PathBuf> = BTreeMap::new();
418 let mut artifacts: BTreeMap<String, PathBuf> = BTreeMap::new();
419 for input in &case.inputs {
420 let stem = input
421 .file_stem()
422 .and_then(|s| s.to_str())
423 .ok_or_else(|| format!("invalid input stem {}", input.display()))?;
424 match input.extension().and_then(|s| s.to_str()) {
425 Some("s") => {
426 let src = fs::read_to_string(input)
427 .map_err(|e| format!("read parity input {}: {e}", input.display()))?;
428 let obj = work_dir.join(format!("{stem}.o"));
429 assemble(&src, &obj)?;
430 compiled.insert(format!("{stem}.o"), obj);
431 }
432 Some("c") => {
433 let src = fs::read_to_string(input)
434 .map_err(|e| format!("read parity input {}: {e}", input.display()))?;
435 let obj = work_dir.join(format!("{stem}.o"));
436 compile_c(&src, &obj)?;
437 compiled.insert(format!("{stem}.o"), obj);
438 }
439 Some("o") | Some("a") | Some("tbd") => {
440 let copied = work_dir.join(
441 input
442 .file_name()
443 .ok_or_else(|| format!("invalid input file name {}", input.display()))?,
444 );
445 fs::copy(input, &copied).map_err(|e| {
446 format!(
447 "copy parity input {} -> {}: {e}",
448 input.display(),
449 copied.display()
450 )
451 })?;
452 compiled.insert(
453 input
454 .file_name()
455 .and_then(|s| s.to_str())
456 .ok_or_else(|| format!("invalid UTF-8 input file {}", input.display()))?
457 .to_string(),
458 copied,
459 );
460 }
461 other => {
462 return Err(format!(
463 "unsupported parity input extension {:?} for {}",
464 other,
465 input.display()
466 ));
467 }
468 }
469 }
470 let files_dir = case.dir.join("files");
471 if files_dir.is_dir() {
472 for entry in fs::read_dir(&files_dir)
473 .map_err(|e| format!("read sidecar files for {}: {e}", case.name))?
474 {
475 let entry = entry.map_err(|e| format!("read sidecar entry for {}: {e}", case.name))?;
476 let src = entry.path();
477 if !src.is_file() {
478 continue;
479 }
480 let name = src
481 .file_name()
482 .and_then(|s| s.to_str())
483 .ok_or_else(|| format!("invalid sidecar file name {}", src.display()))?
484 .to_string();
485 let dst = work_dir.join(&name);
486 fs::copy(&src, &dst)
487 .map_err(|e| format!("copy sidecar {} -> {}: {e}", src.display(), dst.display()))?;
488 sidecars.insert(name, dst);
489 }
490 }
491 for artifact in &case.artifacts {
492 let src = case.dir.join("inputs").join(&artifact.src_name);
493 let src_contents = fs::read_to_string(&src)
494 .map_err(|e| format!("read artifact src {}: {e}", src.display()))?;
495 let out = work_dir.join(&artifact.out_name);
496 match artifact.kind {
497 ArtifactKind::Dylib => compile_dylib_c(&src_contents, &out)?,
498 ArtifactKind::Archive => compile_archive_c(&src_contents, &out)?,
499 ArtifactKind::ReexportDylib => {
500 let dep_name = artifact.dep_name.as_ref().ok_or_else(|| {
501 format!(
502 "missing reexport dependency for artifact {}",
503 artifact.out_name
504 )
505 })?;
506 let dep = artifacts
507 .get(dep_name)
508 .ok_or_else(|| format!("unknown reexport dependency `{dep_name}`"))?;
509 compile_reexport_dylib_c(&src_contents, &out, dep)?;
510 }
511 }
512 artifacts.insert(artifact.out_name.clone(), out);
513 }
514
515 let suffix = if case.args.iter().any(|arg| arg == "-dylib") {
516 "dylib"
517 } else {
518 "out"
519 };
520 let our_path = work_dir.join(format!("ours.{suffix}"));
521 let their_path = work_dir.join(format!("apple.{suffix}"));
522
523 let our_args = expand_args(
524 &case.args, &compiled, &sidecars, &artifacts, &our_path, &sdk, &sdk_ver,
525 )?;
526 let their_args = expand_args(
527 &case.args,
528 &compiled,
529 &sidecars,
530 &artifacts,
531 &their_path,
532 &sdk,
533 &sdk_ver,
534 )?;
535
536 let our_output = Command::new(env!("CARGO_BIN_EXE_afs-ld"))
537 .args(&our_args)
538 .output()
539 .map_err(|e| format!("spawn afs-ld: {e}"))?;
540 if !our_output.status.success() {
541 return Err(format!(
542 "afs-ld failed for {}:\n{}",
543 case.name,
544 String::from_utf8_lossy(&our_output.stderr)
545 ));
546 }
547
548 let their_output = Command::new("xcrun")
549 .arg("ld")
550 .args(&their_args)
551 .output()
552 .map_err(|e| format!("spawn xcrun ld: {e}"))?;
553 if !their_output.status.success() {
554 return Err(format!(
555 "Apple ld failed for {}:\n{}",
556 case.name,
557 String::from_utf8_lossy(&their_output.stderr)
558 ));
559 }
560
561 let ours = fs::read(&our_path)
562 .map_err(|e| format!("read afs-ld output {}: {e}", our_path.display()))?;
563 let theirs = fs::read(&their_path)
564 .map_err(|e| format!("read Apple ld output {}: {e}", their_path.display()))?;
565
566 Ok(LinkOutputs {
567 ours,
568 theirs,
569 our_path,
570 their_path,
571 })
572 }
573
574 pub fn command_ids(bytes: &[u8]) -> Result<Vec<u32>, String> {
575 let header = parse_header(bytes).map_err(|e| format!("parse header: {e}"))?;
576 let commands = parse_commands(&header, bytes).map_err(|e| format!("parse commands: {e}"))?;
577 Ok(commands
578 .into_iter()
579 .map(|cmd| match cmd {
580 LoadCommand::Segment64(_) => LC_SEGMENT_64,
581 LoadCommand::Symtab(_) => LC_SYMTAB,
582 LoadCommand::Dysymtab(_) => LC_DYSYMTAB,
583 LoadCommand::BuildVersion(_) => LC_BUILD_VERSION,
584 LoadCommand::DyldInfoOnly(_) => LC_DYLD_INFO_ONLY,
585 LoadCommand::DyldChainedFixups(_) => LC_DYLD_CHAINED_FIXUPS,
586 LoadCommand::DyldExportsTrie(_) => LC_DYLD_EXPORTS_TRIE,
587 LoadCommand::Dylib(d) => d.cmd,
588 LoadCommand::Raw { cmd, .. } => cmd,
589 other => panic!("unexpected load command in command_ids helper: {other:?}"),
590 })
591 .collect())
592 }
593
594 pub fn compare_command_ids(ours: &[u8], theirs: &[u8], ignored: &[u32]) -> Result<(), String> {
595 let our_ids: Vec<u32> = command_ids(ours)?
596 .into_iter()
597 .filter(|cmd| !ignored.contains(cmd))
598 .collect();
599 let their_ids: Vec<u32> = command_ids(theirs)?
600 .into_iter()
601 .filter(|cmd| !ignored.contains(cmd))
602 .collect();
603 if our_ids != their_ids {
604 return Err(format!(
605 "load-command ids differ:\nours: {our_ids:#x?}\ntheirs: {their_ids:#x?}"
606 ));
607 }
608 Ok(())
609 }
610
611 pub fn compare_command_details(
612 ours: &[u8],
613 theirs: &[u8],
614 checks: &[CommandCheck],
615 ) -> Result<(), String> {
616 for check in checks {
617 match check {
618 CommandCheck::BuildVersion => {
619 let ours = normalized_build_version(ours)?;
620 let theirs = normalized_build_version(theirs)?;
621 if ours != theirs {
622 return Err(format!(
623 "LC_BUILD_VERSION diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
624 ));
625 }
626 }
627 CommandCheck::LoadDylibNames => {
628 let ours = load_dylib_names(ours)?;
629 let theirs = load_dylib_names(theirs)?;
630 if ours != theirs {
631 return Err(format!(
632 "LC_LOAD_DYLIB names diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
633 ));
634 }
635 }
636 CommandCheck::ExportRecords => {
637 let ours = canonical_export_records(ours)?;
638 let theirs = canonical_export_records(theirs)?;
639 if ours != theirs {
640 return Err(format!(
641 "canonical export records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
642 ));
643 }
644 }
645 CommandCheck::SymbolRecordMap => {
646 let ours = canonical_symbol_record_map(ours)?;
647 let theirs = canonical_symbol_record_map(theirs)?;
648 if ours != theirs {
649 return Err(format!(
650 "canonical symbol record map diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
651 ));
652 }
653 }
654 CommandCheck::IndirectSymbolIdentities => {
655 let ours = indirect_symbol_identities(ours)?;
656 let theirs = indirect_symbol_identities(theirs)?;
657 if ours != theirs {
658 return Err(format!(
659 "indirect symbol identities diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
660 ));
661 }
662 }
663 CommandCheck::SymbolPartitionNames => {
664 let ours = symbol_partition_names(ours)?;
665 let theirs = symbol_partition_names(theirs)?;
666 if ours != theirs {
667 return Err(format!(
668 "symbol partition names diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
669 ));
670 }
671 }
672 CommandCheck::StringTableNearParity => {
673 let our_len = effective_string_table_len(ours)?;
674 let their_len = effective_string_table_len(theirs)?;
675 if !string_table_within_five_percent(our_len, their_len) {
676 return Err(format!(
677 "string table length drifted too far from Apple ld: ours={} theirs={}",
678 our_len, their_len
679 ));
680 }
681 }
682 CommandCheck::FunctionStarts => {
683 let ours = decode_function_starts(ours)?;
684 let theirs = decode_function_starts(theirs)?;
685 if ours != theirs {
686 return Err(format!(
687 "function starts diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
688 ));
689 }
690 }
691 CommandCheck::NormalizedFunctionStarts => {
692 let ours = normalize_function_start_offsets(&decode_function_starts(ours)?);
693 let theirs = normalize_function_start_offsets(&decode_function_starts(theirs)?);
694 if ours != theirs {
695 return Err(format!(
696 "normalized function starts diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
697 ));
698 }
699 }
700 CommandCheck::DataInCode => {
701 let ours = canonical_data_in_code(ours)?;
702 let theirs = canonical_data_in_code(theirs)?;
703 if ours != theirs {
704 return Err(format!(
705 "canonical data-in-code records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
706 ));
707 }
708 }
709 CommandCheck::DataInCodeIfPresent => {
710 let ours = canonical_data_in_code(ours)?;
711 let theirs = canonical_data_in_code(theirs)?;
712 if !ours.is_empty() && !theirs.is_empty() && ours != theirs {
713 return Err(format!(
714 "canonical data-in-code records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
715 ));
716 }
717 }
718 CommandCheck::RebasedUnwindBytes => {
719 let ours = rebased_unwind_bytes(ours)?;
720 let theirs = rebased_unwind_bytes(theirs)?;
721 if ours != theirs {
722 return Err("rebased unwind bytes diverged".to_string());
723 }
724 }
725 CommandCheck::DyldInfoRebase => {
726 let ours = dyld_info_stream(ours, DyldInfoStreamKind::Rebase)?;
727 let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::Rebase)?;
728 if ours != theirs {
729 return Err("rebase stream diverged".to_string());
730 }
731 }
732 CommandCheck::DyldInfoBind => {
733 let ours = canonical_bind_records(ours, DyldInfoStreamKind::Bind)?;
734 let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::Bind)?;
735 if ours != theirs {
736 return Err(format!(
737 "bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
738 ));
739 }
740 }
741 CommandCheck::DyldInfoWeakBind => {
742 let ours = canonical_bind_records(ours, DyldInfoStreamKind::WeakBind)?;
743 let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::WeakBind)?;
744 if ours != theirs {
745 return Err(format!(
746 "weak-bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
747 ));
748 }
749 }
750 CommandCheck::DyldInfoLazyBind => {
751 let ours = canonical_bind_records(ours, DyldInfoStreamKind::LazyBind)?;
752 let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::LazyBind)?;
753 if ours != theirs {
754 return Err(format!(
755 "lazy-bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
756 ));
757 }
758 }
759 }
760 }
761 Ok(())
762 }
763
764 pub fn ensure_absent_load_commands(
765 bytes: &[u8],
766 commands: &[u32],
767 side: &str,
768 ) -> Result<(), String> {
769 let ids = command_ids(bytes)?;
770 for command in commands {
771 if ids.contains(command) {
772 return Err(format!(
773 "{side} unexpectedly emitted {}",
774 load_command_name(*command)
775 ));
776 }
777 }
778 Ok(())
779 }
780
781 pub fn ensure_absent_sections(
782 bytes: &[u8],
783 sections: &[(String, String)],
784 side: &str,
785 ) -> Result<(), String> {
786 for (segname, sectname) in sections {
787 if output_section(bytes, segname, sectname).is_some() {
788 return Err(format!(
789 "{side} unexpectedly emitted section {segname},{sectname}"
790 ));
791 }
792 }
793 Ok(())
794 }
795
796 pub fn output_section(bytes: &[u8], segname: &str, sectname: &str) -> Option<(u64, Vec<u8>)> {
797 let header = parse_header(bytes).ok()?;
798 let commands = parse_commands(&header, bytes).ok()?;
799 for cmd in commands {
800 if let LoadCommand::Segment64(seg) = cmd {
801 for section in seg.sections {
802 if section.segname_str() == segname && section.sectname_str() == sectname {
803 let data = if section.offset == 0 {
804 Vec::new()
805 } else {
806 let start = section.offset as usize;
807 let end = start + section.size as usize;
808 bytes.get(start..end)?.to_vec()
809 };
810 return Some((section.addr, data));
811 }
812 }
813 }
814 }
815 None
816 }
817
818 fn output_section_header(bytes: &[u8], segname: &str, sectname: &str) -> Option<Section64Header> {
819 let header = parse_header(bytes).ok()?;
820 let commands = parse_commands(&header, bytes).ok()?;
821 for cmd in commands {
822 if let LoadCommand::Segment64(seg) = cmd {
823 for section in seg.sections {
824 if section.segname_str() == segname && section.sectname_str() == sectname {
825 return Some(section);
826 }
827 }
828 }
829 }
830 None
831 }
832
833 fn segment_vmaddr(bytes: &[u8], segname: &str) -> Option<u64> {
834 let header = parse_header(bytes).ok()?;
835 let commands = parse_commands(&header, bytes).ok()?;
836 for cmd in commands {
837 if let LoadCommand::Segment64(seg) = cmd {
838 if seg.segname_str() == segname {
839 return Some(seg.vmaddr);
840 }
841 }
842 }
843 None
844 }
845
846 pub fn compare_sections(
847 ours: &[u8],
848 theirs: &[u8],
849 sections: &[(String, String)],
850 case_tolerances: &[CaseTolerance],
851 ) -> Result<(), String> {
852 for (segname, sectname) in sections {
853 if segname == "__TEXT" && sectname == "__stubs" {
854 let ours = canonical_stub_targets(ours)?;
855 let theirs = canonical_stub_targets(theirs)?;
856 if ours != theirs {
857 return Err(format!(
858 "canonical stub targets diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
859 ));
860 }
861 continue;
862 }
863 if segname == "__TEXT" && sectname == "__stub_helper" {
864 let ours = canonical_stub_helper(ours)?;
865 let theirs = canonical_stub_helper(theirs)?;
866 if ours != theirs {
867 return Err(format!(
868 "canonical stub helper surface diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}"
869 ));
870 }
871 continue;
872 }
873 let (_, our_bytes) = output_section(ours, segname, sectname)
874 .ok_or_else(|| format!("missing section {segname},{sectname} in afs-ld output"))?;
875 let (_, their_bytes) = output_section(theirs, segname, sectname)
876 .ok_or_else(|| format!("missing section {segname},{sectname} in Apple output"))?;
877 let diff = apply_section_tolerances(
878 diff_macho(&our_bytes, &their_bytes),
879 segname,
880 sectname,
881 case_tolerances,
882 );
883 if !diff.is_clean() {
884 return Err(format!(
885 "section bytes differ for {segname},{sectname}: {:#?}",
886 diff.critical
887 ));
888 }
889 }
890 Ok(())
891 }
892
893 pub fn compare_page_refs(
894 ours: &[u8],
895 theirs: &[u8],
896 checks: &[PageRefCheck],
897 ) -> Result<(), String> {
898 if checks.is_empty() {
899 return Ok(());
900 }
901 let our_symbols = symbol_values(ours)?;
902 let their_symbols = symbol_values(theirs)?;
903 for check in checks {
904 let (our_addr, our_bytes) = output_section(ours, &check.segname, &check.sectname)
905 .ok_or_else(|| {
906 format!(
907 "missing section {},{} in afs-ld output",
908 check.segname, check.sectname
909 )
910 })?;
911 let (their_addr, their_bytes) = output_section(theirs, &check.segname, &check.sectname)
912 .ok_or_else(|| {
913 format!(
914 "missing section {},{} in Apple output",
915 check.segname, check.sectname
916 )
917 })?;
918 let our_target =
919 decode_page_reference(&our_bytes, our_addr, check.site_offset, check.kind)?;
920 let their_target =
921 decode_page_reference(&their_bytes, their_addr, check.site_offset, check.kind)?;
922 let expected_ours = resolve_page_ref_expectation(ours, &our_symbols, &check.symbol)?;
923 let expected_theirs = resolve_page_ref_expectation(theirs, &their_symbols, &check.symbol)?;
924 if our_target != expected_ours || their_target != expected_theirs {
925 return Err(format!(
926 "page ref {},{}+0x{:x} -> {} diverged: ours=0x{:x} expected=0x{:x}; theirs=0x{:x} expected=0x{:x}",
927 check.segname,
928 check.sectname,
929 check.site_offset,
930 check.symbol,
931 our_target,
932 expected_ours,
933 their_target,
934 expected_theirs,
935 ));
936 }
937 }
938 Ok(())
939 }
940
941 fn resolve_page_ref_expectation(
942 bytes: &[u8],
943 symbols: &BTreeMap<String, u64>,
944 reference: &str,
945 ) -> Result<u64, String> {
946 if let Some(spec) = reference.strip_prefix("@SECTION:") {
947 let (section_spec, addend) = if let Some((section_spec, addend)) = spec.rsplit_once('+') {
948 (section_spec, parse_u64(addend)?)
949 } else {
950 (spec, 0)
951 };
952 let (segname, sectname) = section_spec
953 .split_once(',')
954 .ok_or_else(|| format!("invalid @SECTION page-ref target `{reference}`"))?;
955 let (addr, data) = output_section(bytes, segname, sectname)
956 .ok_or_else(|| format!("missing section {segname},{sectname} in output"))?;
957 if addend > data.len() as u64 {
958 return Err(format!(
959 "@SECTION target `{reference}` exceeds section size {}",
960 data.len()
961 ));
962 }
963 return Ok(addr + addend);
964 }
965 symbols
966 .get(reference)
967 .copied()
968 .ok_or_else(|| format!("missing symbol {reference} in output"))
969 }
970
971 pub fn run_program(path: &Path, args: &[String]) -> Result<ProgramOutput, String> {
972 let runtime_timeout = runtime_timeout();
973
974 let mut child = Command::new(path)
975 .args(args)
976 .stdout(Stdio::piped())
977 .stderr(Stdio::piped())
978 .spawn()
979 .map_err(|e| format!("run {}: {e}", path.display()))?;
980 let started = Instant::now();
981 loop {
982 if child
983 .try_wait()
984 .map_err(|e| format!("wait for {}: {e}", path.display()))?
985 .is_some()
986 {
987 let output = child
988 .wait_with_output()
989 .map_err(|e| format!("collect output from {}: {e}", path.display()))?;
990 return Ok(ProgramOutput {
991 exit_code: output.status.code(),
992 stdout: output.stdout,
993 stderr: output.stderr,
994 });
995 }
996 if started.elapsed() >= runtime_timeout {
997 let _ = child.kill();
998 let output = child
999 .wait_with_output()
1000 .map_err(|e| format!("collect timed-out output from {}: {e}", path.display()))?;
1001 return Err(format!(
1002 "run {} timed out after {:?}: exit={:?} stdout={:?} stderr={:?}",
1003 path.display(),
1004 runtime_timeout,
1005 output.status.code(),
1006 String::from_utf8_lossy(&output.stdout),
1007 String::from_utf8_lossy(&output.stderr)
1008 ));
1009 }
1010 thread::sleep(Duration::from_millis(5));
1011 }
1012 }
1013
1014 pub fn compare_runtime(our_path: &Path, their_path: &Path, args: &[String]) -> Result<(), String> {
1015 let our_path = our_path.to_path_buf();
1016 let their_path = their_path.to_path_buf();
1017 let their_args = args.to_vec();
1018 let ours = thread::scope(|scope| {
1019 let theirs = scope.spawn(|| run_program(&their_path, &their_args));
1020 let ours = run_program(&our_path, args);
1021 let theirs = theirs
1022 .join()
1023 .map_err(|_| "Apple runtime worker panicked".to_string())?;
1024 Ok::<_, String>((ours, theirs))
1025 })?;
1026 let (ours, theirs) = ours;
1027 let ours = ours?;
1028 let theirs = theirs?;
1029 if ours != theirs {
1030 return Err(format!(
1031 "runtime differs:\nours: exit={:?} stdout={:?} stderr={:?}\ntheirs: exit={:?} stdout={:?} stderr={:?}",
1032 ours.exit_code,
1033 String::from_utf8_lossy(&ours.stdout),
1034 String::from_utf8_lossy(&ours.stderr),
1035 theirs.exit_code,
1036 String::from_utf8_lossy(&theirs.stdout),
1037 String::from_utf8_lossy(&theirs.stderr),
1038 ));
1039 }
1040 Ok(())
1041 }
1042
1043 fn runtime_timeout() -> Duration {
1044 const DEFAULT_RUNTIME_TIMEOUT_SECS: u64 = 120;
1045
1046 std::env::var("PARITY_RUNTIME_TIMEOUT_SECONDS")
1047 .ok()
1048 .and_then(|value| value.parse::<u64>().ok())
1049 .map(Duration::from_secs)
1050 .unwrap_or_else(|| Duration::from_secs(DEFAULT_RUNTIME_TIMEOUT_SECS))
1051 }
1052
1053 /// Byte-level diff between two Mach-O images or section byte slices.
1054 ///
1055 /// Sprint 27 starts tolerating a very small allowlist: UUID bytes, dylib
1056 /// timestamp fields, and code-signature command/blob bytes at matching
1057 /// offsets. Unknown diffs remain critical.
1058 pub fn diff_macho(ours: &[u8], theirs: &[u8]) -> DiffReport {
1059 let mut report = DiffReport::default();
1060
1061 if ours.len() != theirs.len() {
1062 report.critical.push(DiffChunk {
1063 offset: 0,
1064 len: ours.len().max(theirs.len()),
1065 reason: format!(
1066 "total size differs: ours = {}, theirs = {}",
1067 ours.len(),
1068 theirs.len()
1069 ),
1070 category: DiffCategory::Critical,
1071 });
1072 return report;
1073 }
1074
1075 let our_mask = tolerated_mask(ours);
1076 let their_mask = tolerated_mask(theirs);
1077
1078 let mut i = 0;
1079 while i < ours.len() {
1080 if ours[i] == theirs[i] {
1081 i += 1;
1082 continue;
1083 }
1084
1085 let tolerated_reason = match (our_mask[i], their_mask[i]) {
1086 (Some(left), Some(right)) if left == right => Some(left),
1087 _ => None,
1088 };
1089 let start = i;
1090 i += 1;
1091 while i < ours.len() && ours[i] != theirs[i] {
1092 let same_category = match tolerated_reason {
1093 Some(reason) => matches!(
1094 (our_mask[i], their_mask[i]),
1095 (Some(left), Some(right)) if left == reason && right == reason
1096 ),
1097 None => !matches!(
1098 (our_mask[i], their_mask[i]),
1099 (Some(left), Some(right)) if left == right
1100 ),
1101 };
1102 if !same_category {
1103 break;
1104 }
1105 i += 1;
1106 }
1107
1108 let len = i - start;
1109 if let Some(reason) = tolerated_reason {
1110 report.tolerated.push(DiffChunk {
1111 offset: start,
1112 len,
1113 reason: reason.to_string(),
1114 category: DiffCategory::Tolerated(reason),
1115 });
1116 } else {
1117 report.critical.push(DiffChunk {
1118 offset: start,
1119 len,
1120 reason: format!("{} byte(s) differ starting at 0x{start:x}", len),
1121 category: DiffCategory::Critical,
1122 });
1123 }
1124 }
1125
1126 report
1127 }
1128
1129 pub fn parse_case_tolerances(notes: Option<&str>) -> Result<Vec<CaseTolerance>, String> {
1130 let Some(notes) = notes else {
1131 return Ok(Vec::new());
1132 };
1133
1134 let mut tolerances = Vec::new();
1135 let mut in_block = false;
1136 for raw_line in notes.lines() {
1137 let line = raw_line.trim();
1138 if line.is_empty() {
1139 continue;
1140 }
1141 if line == "tolerated:" {
1142 in_block = true;
1143 continue;
1144 }
1145 if !in_block {
1146 continue;
1147 }
1148 if !line.starts_with("- region:") {
1149 // Stop once the simple tolerated block ends.
1150 if !line.starts_with('#') && !raw_line.starts_with(' ') && !raw_line.starts_with('\t') {
1151 break;
1152 }
1153 continue;
1154 }
1155 tolerances.push(parse_case_tolerance_line(line)?);
1156 }
1157 Ok(tolerances)
1158 }
1159
1160 pub fn apply_section_tolerances(
1161 mut diff: DiffReport,
1162 segname: &str,
1163 sectname: &str,
1164 case_tolerances: &[CaseTolerance],
1165 ) -> DiffReport {
1166 if diff.critical.is_empty() || case_tolerances.is_empty() {
1167 return diff;
1168 }
1169
1170 let mut remaining = Vec::new();
1171 for chunk in diff.critical.drain(..) {
1172 let tolerated = case_tolerances
1173 .iter()
1174 .find(|tol| tolerance_covers_chunk(tol, segname, sectname, chunk.offset, chunk.len));
1175 if let Some(tol) = tolerated {
1176 diff.tolerated.push(DiffChunk {
1177 offset: chunk.offset,
1178 len: chunk.len,
1179 reason: tol.reason.clone(),
1180 category: DiffCategory::Tolerated("case-note"),
1181 });
1182 } else {
1183 remaining.push(chunk);
1184 }
1185 }
1186 diff.critical = remaining;
1187 diff
1188 }
1189
1190 fn unique_temp_dir(case_name: &str) -> Result<PathBuf, String> {
1191 let stamp = SystemTime::now()
1192 .duration_since(UNIX_EPOCH)
1193 .map_err(|e| format!("clock error: {e}"))?
1194 .as_nanos();
1195 let safe_name = case_name.replace(['/', ' '], "-");
1196 let dir = std::env::temp_dir().join(format!(
1197 "afs-ld-parity-{}-{safe_name}-{stamp}",
1198 std::process::id()
1199 ));
1200 fs::create_dir_all(&dir).map_err(|e| format!("create {}: {e}", dir.display()))?;
1201 Ok(dir)
1202 }
1203
1204 fn parse_case_tolerance_line(line: &str) -> Result<CaseTolerance, String> {
1205 let rest = line
1206 .strip_prefix("- region:")
1207 .ok_or_else(|| format!("invalid tolerance line `{line}`"))?
1208 .trim();
1209 let (before_reason, reason_part) = rest
1210 .split_once(" reason:")
1211 .ok_or_else(|| format!("missing `reason:` in tolerance line `{line}`"))?;
1212 let (region_part, bytes_part) = before_reason
1213 .split_once(" bytes ")
1214 .ok_or_else(|| format!("missing `bytes` range in tolerance line `{line}`"))?;
1215 let reason = reason_part.trim().trim_matches('"').to_string();
1216 if reason.is_empty() {
1217 return Err(format!("empty tolerance reason in `{line}`"));
1218 }
1219 let (start, end_inclusive) = parse_tolerance_range(bytes_part.trim())?;
1220 let region_token = region_part.trim();
1221 let (segname, sectname) = match region_token.split_once(',') {
1222 Some((segname, sectname)) => (
1223 segname.trim().to_string(),
1224 Some(sectname.trim().to_string()),
1225 ),
1226 None => (region_token.to_string(), None),
1227 };
1228 if segname.is_empty() {
1229 return Err(format!("empty tolerance region in `{line}`"));
1230 }
1231 Ok(CaseTolerance {
1232 region: ToleranceRegion::SectionBytes {
1233 segname,
1234 sectname,
1235 start,
1236 end_inclusive,
1237 },
1238 reason,
1239 })
1240 }
1241
1242 fn parse_tolerance_range(range: &str) -> Result<(usize, usize), String> {
1243 let (start, end) = range
1244 .split_once('-')
1245 .ok_or_else(|| format!("invalid tolerance range `{range}`"))?;
1246 let start = parse_usize(start.trim())?;
1247 let end = parse_usize(end.trim())?;
1248 if end < start {
1249 return Err(format!("tolerance range end before start in `{range}`"));
1250 }
1251 Ok((start, end))
1252 }
1253
1254 fn parse_usize(token: &str) -> Result<usize, String> {
1255 if let Some(rest) = token.strip_prefix("0x") {
1256 usize::from_str_radix(rest, 16).map_err(|e| format!("parse usize `{token}`: {e}"))
1257 } else {
1258 token
1259 .parse::<usize>()
1260 .map_err(|e| format!("parse usize `{token}`: {e}"))
1261 }
1262 }
1263
1264 fn tolerance_covers_chunk(
1265 tolerance: &CaseTolerance,
1266 segname: &str,
1267 sectname: &str,
1268 offset: usize,
1269 len: usize,
1270 ) -> bool {
1271 match &tolerance.region {
1272 ToleranceRegion::SectionBytes {
1273 segname: expected_seg,
1274 sectname: expected_sect,
1275 start,
1276 end_inclusive,
1277 } => {
1278 if expected_seg != segname {
1279 return false;
1280 }
1281 if let Some(expected_sect) = expected_sect {
1282 if expected_sect != sectname {
1283 return false;
1284 }
1285 }
1286 let end = offset.saturating_add(len.saturating_sub(1));
1287 offset >= *start && end <= *end_inclusive
1288 }
1289 }
1290 }
1291
1292 fn read_tokens(path: &Path) -> Result<Vec<String>, String> {
1293 let contents = fs::read_to_string(path).map_err(|e| format!("read {}: {e}", path.display()))?;
1294 Ok(contents
1295 .lines()
1296 .map(str::trim)
1297 .filter(|line| !line.is_empty() && !line.starts_with('#'))
1298 .map(ToOwned::to_owned)
1299 .collect())
1300 }
1301
1302 fn read_tokens_if_present(path: &Path) -> Result<Vec<String>, String> {
1303 if path.exists() {
1304 read_tokens(path)
1305 } else {
1306 Ok(Vec::new())
1307 }
1308 }
1309
1310 fn read_sections(path: &Path) -> Result<Vec<(String, String)>, String> {
1311 let mut sections = Vec::new();
1312 for line in read_tokens(path)? {
1313 let mut parts = line.split_whitespace();
1314 let segname = parts
1315 .next()
1316 .ok_or_else(|| format!("missing segment name in {}", path.display()))?;
1317 let sectname = parts
1318 .next()
1319 .ok_or_else(|| format!("missing section name in {}", path.display()))?;
1320 if parts.next().is_some() {
1321 return Err(format!(
1322 "too many fields in section spec `{line}` from {}",
1323 path.display()
1324 ));
1325 }
1326 sections.push((segname.to_string(), sectname.to_string()));
1327 }
1328 Ok(sections)
1329 }
1330
1331 fn read_sections_if_present(path: &Path) -> Result<Vec<(String, String)>, String> {
1332 if path.exists() {
1333 read_sections(path)
1334 } else {
1335 Ok(Vec::new())
1336 }
1337 }
1338
1339 fn read_load_command_names(path: &Path) -> Result<Vec<u32>, String> {
1340 if !path.exists() {
1341 return Ok(Vec::new());
1342 }
1343 let mut commands = Vec::new();
1344 for line in read_tokens(path)? {
1345 commands.push(parse_load_command_name(&line)?);
1346 }
1347 Ok(commands)
1348 }
1349
1350 fn read_command_checks(path: &Path) -> Result<Vec<CommandCheck>, String> {
1351 if !path.exists() {
1352 return Ok(Vec::new());
1353 }
1354 let mut checks = Vec::new();
1355 for line in read_tokens(path)? {
1356 checks.push(parse_command_check(&line)?);
1357 }
1358 Ok(checks)
1359 }
1360
1361 fn read_page_refs(path: &Path) -> Result<Vec<PageRefCheck>, String> {
1362 if !path.exists() {
1363 return Ok(Vec::new());
1364 }
1365 let mut checks = Vec::new();
1366 for line in read_tokens(path)? {
1367 let mut parts = line.split_whitespace();
1368 let segname = parts
1369 .next()
1370 .ok_or_else(|| format!("missing segment name in {}", path.display()))?;
1371 let sectname = parts
1372 .next()
1373 .ok_or_else(|| format!("missing section name in {}", path.display()))?;
1374 let site_offset = parts
1375 .next()
1376 .ok_or_else(|| format!("missing site offset in {}", path.display()))?;
1377 let kind = parts
1378 .next()
1379 .ok_or_else(|| format!("missing page-ref kind in {}", path.display()))?;
1380 let symbol = parts
1381 .next()
1382 .ok_or_else(|| format!("missing symbol name in {}", path.display()))?;
1383 if parts.next().is_some() {
1384 return Err(format!(
1385 "too many fields in page-ref spec `{line}` from {}",
1386 path.display()
1387 ));
1388 }
1389 checks.push(PageRefCheck {
1390 segname: segname.to_string(),
1391 sectname: sectname.to_string(),
1392 site_offset: parse_u64(site_offset)?,
1393 kind: parse_page_ref_kind(kind)?,
1394 symbol: symbol.to_string(),
1395 });
1396 }
1397 Ok(checks)
1398 }
1399
1400 fn read_artifacts(path: &Path) -> Result<Vec<ArtifactSpec>, String> {
1401 if !path.exists() {
1402 return Ok(Vec::new());
1403 }
1404 let mut specs = Vec::new();
1405 for line in read_tokens(path)? {
1406 let mut parts = line.split_whitespace();
1407 let kind = parts
1408 .next()
1409 .ok_or_else(|| format!("missing artifact kind in {}", path.display()))?;
1410 let src_name = parts
1411 .next()
1412 .ok_or_else(|| format!("missing artifact src in {}", path.display()))?;
1413 let out_name = parts
1414 .next()
1415 .ok_or_else(|| format!("missing artifact output in {}", path.display()))?;
1416 let dep_name = parts.next().map(str::to_string);
1417 if parts.next().is_some() {
1418 return Err(format!(
1419 "too many fields in artifact spec `{line}` from {}",
1420 path.display()
1421 ));
1422 }
1423 let (kind, dep_name) = match kind {
1424 "clang_dylib" => {
1425 if dep_name.is_some() {
1426 return Err(format!(
1427 "clang_dylib takes exactly 3 fields in {}",
1428 path.display()
1429 ));
1430 }
1431 (ArtifactKind::Dylib, None)
1432 }
1433 "clang_archive" => {
1434 if dep_name.is_some() {
1435 return Err(format!(
1436 "clang_archive takes exactly 3 fields in {}",
1437 path.display()
1438 ));
1439 }
1440 (ArtifactKind::Archive, None)
1441 }
1442 "clang_reexport_dylib" => {
1443 let dep_name = dep_name.ok_or_else(|| {
1444 format!(
1445 "clang_reexport_dylib needs a dependency artifact in {}",
1446 path.display()
1447 )
1448 })?;
1449 (ArtifactKind::ReexportDylib, Some(dep_name))
1450 }
1451 other => return Err(format!("unknown artifact kind `{other}`")),
1452 };
1453 specs.push(ArtifactSpec {
1454 src_name: src_name.to_string(),
1455 out_name: out_name.to_string(),
1456 kind,
1457 dep_name,
1458 });
1459 }
1460 Ok(specs)
1461 }
1462
1463 fn parse_command_check(name: &str) -> Result<CommandCheck, String> {
1464 match name {
1465 "build_version" => Ok(CommandCheck::BuildVersion),
1466 "load_dylib_names" => Ok(CommandCheck::LoadDylibNames),
1467 "export_records" => Ok(CommandCheck::ExportRecords),
1468 "symbol_record_map" => Ok(CommandCheck::SymbolRecordMap),
1469 "indirect_symbol_identities" => Ok(CommandCheck::IndirectSymbolIdentities),
1470 "symbol_partition_names" => Ok(CommandCheck::SymbolPartitionNames),
1471 "string_table_near_parity" => Ok(CommandCheck::StringTableNearParity),
1472 "function_starts" => Ok(CommandCheck::FunctionStarts),
1473 "normalized_function_starts" => Ok(CommandCheck::NormalizedFunctionStarts),
1474 "data_in_code" => Ok(CommandCheck::DataInCode),
1475 "data_in_code_if_present" => Ok(CommandCheck::DataInCodeIfPresent),
1476 "rebased_unwind_bytes" => Ok(CommandCheck::RebasedUnwindBytes),
1477 "dyld_info_rebase" => Ok(CommandCheck::DyldInfoRebase),
1478 "dyld_info_bind" => Ok(CommandCheck::DyldInfoBind),
1479 "dyld_info_weak_bind" => Ok(CommandCheck::DyldInfoWeakBind),
1480 "dyld_info_lazy_bind" => Ok(CommandCheck::DyldInfoLazyBind),
1481 other => Err(format!("unknown command check `{other}`")),
1482 }
1483 }
1484
1485 fn parse_page_ref_kind(kind: &str) -> Result<PageRefKind, String> {
1486 match kind {
1487 "add" => Ok(PageRefKind::Add),
1488 "load" => Ok(PageRefKind::Load),
1489 other => Err(format!("unknown page-ref kind `{other}`")),
1490 }
1491 }
1492
1493 fn parse_load_command_name(name: &str) -> Result<u32, String> {
1494 match name {
1495 "LC_SEGMENT_64" => Ok(LC_SEGMENT_64),
1496 "LC_LOAD_DYLIB" => Ok(LC_LOAD_DYLIB),
1497 "LC_UUID" => Ok(LC_UUID),
1498 "LC_CODE_SIGNATURE" => Ok(LC_CODE_SIGNATURE),
1499 "LC_LINKER_OPTIMIZATION_HINT" => Ok(afs_ld::macho::constants::LC_LINKER_OPTIMIZATION_HINT),
1500 other => Err(format!("unknown load command name `{other}`")),
1501 }
1502 }
1503
1504 fn load_command_name(cmd: u32) -> &'static str {
1505 match cmd {
1506 LC_SEGMENT_64 => "LC_SEGMENT_64",
1507 LC_LOAD_DYLIB => "LC_LOAD_DYLIB",
1508 LC_UUID => "LC_UUID",
1509 LC_CODE_SIGNATURE => "LC_CODE_SIGNATURE",
1510 afs_ld::macho::constants::LC_LINKER_OPTIMIZATION_HINT => "LC_LINKER_OPTIMIZATION_HINT",
1511 _ => "unknown load command",
1512 }
1513 }
1514
1515 fn parse_u64(value: &str) -> Result<u64, String> {
1516 if let Some(hex) = value.strip_prefix("0x") {
1517 u64::from_str_radix(hex, 16).map_err(|e| format!("parse hex `{value}`: {e}"))
1518 } else {
1519 value
1520 .parse::<u64>()
1521 .map_err(|e| format!("parse integer `{value}`: {e}"))
1522 }
1523 }
1524
1525 fn expand_args(
1526 args: &[String],
1527 compiled: &BTreeMap<String, PathBuf>,
1528 sidecars: &BTreeMap<String, PathBuf>,
1529 artifacts: &BTreeMap<String, PathBuf>,
1530 out: &Path,
1531 sdk: &str,
1532 sdk_ver: &str,
1533 ) -> Result<Vec<String>, String> {
1534 let mut expanded = Vec::with_capacity(args.len());
1535 for arg in args {
1536 if arg == "@OUT@" {
1537 expanded.push(out.to_string_lossy().to_string());
1538 continue;
1539 }
1540 if arg == "@SDK_PATH@" {
1541 expanded.push(sdk.to_string());
1542 continue;
1543 }
1544 if arg == "@SDK_VERSION@" {
1545 expanded.push(sdk_ver.to_string());
1546 continue;
1547 }
1548 if let Some(rel) = arg
1549 .strip_prefix("@SDK_TBD:")
1550 .and_then(|rest| rest.strip_suffix('@'))
1551 {
1552 expanded.push(Path::new(sdk).join(rel).to_string_lossy().to_string());
1553 continue;
1554 }
1555 if let Some(name) = arg
1556 .strip_prefix("@INPUT:")
1557 .and_then(|rest| rest.strip_suffix('@'))
1558 {
1559 let input = compiled
1560 .get(name)
1561 .ok_or_else(|| format!("unknown parity input placeholder `{name}`"))?;
1562 expanded.push(input.to_string_lossy().to_string());
1563 continue;
1564 }
1565 if let Some(name) = arg
1566 .strip_prefix("@FILE:")
1567 .and_then(|rest| rest.strip_suffix('@'))
1568 {
1569 let file = sidecars
1570 .get(name)
1571 .ok_or_else(|| format!("unknown parity sidecar placeholder `{name}`"))?;
1572 expanded.push(file.to_string_lossy().to_string());
1573 continue;
1574 }
1575 if let Some(name) = arg
1576 .strip_prefix("@ARTIFACT:")
1577 .and_then(|rest| rest.strip_suffix('@'))
1578 {
1579 let artifact = artifacts
1580 .get(name)
1581 .ok_or_else(|| format!("unknown parity artifact placeholder `{name}`"))?;
1582 expanded.push(artifact.to_string_lossy().to_string());
1583 continue;
1584 }
1585 expanded.push(arg.clone());
1586 }
1587 Ok(expanded)
1588 }
1589
1590 fn tolerated_mask(bytes: &[u8]) -> Vec<Option<&'static str>> {
1591 let mut mask = vec![None; bytes.len()];
1592 let Ok(header) = parse_header(bytes) else {
1593 return mask;
1594 };
1595 let cmd_base = 32usize;
1596 let Ok(cmd_limit) = cmd_base.checked_add(header.sizeofcmds as usize).ok_or(()) else {
1597 return mask;
1598 };
1599 if cmd_limit > bytes.len() {
1600 return mask;
1601 }
1602
1603 let mut cursor = cmd_base;
1604 for _ in 0..header.ncmds {
1605 if cursor + 8 > cmd_limit {
1606 break;
1607 }
1608 let cmd = u32_le(&bytes[cursor..cursor + 4]);
1609 let cmdsize = u32_le(&bytes[cursor + 4..cursor + 8]) as usize;
1610 if cmdsize < 8 || cursor + cmdsize > cmd_limit {
1611 break;
1612 }
1613 match cmd {
1614 LC_UUID => mark_range(&mut mask, cursor, cursor + cmdsize, "UUID bytes"),
1615 LC_CODE_SIGNATURE => {
1616 mark_range(
1617 &mut mask,
1618 cursor,
1619 cursor + cmdsize,
1620 "code-signature load command",
1621 );
1622 if cmdsize >= 16 {
1623 let dataoff = u32_le(&bytes[cursor + 8..cursor + 12]) as usize;
1624 let datasize = u32_le(&bytes[cursor + 12..cursor + 16]) as usize;
1625 if let Some(end) = dataoff.checked_add(datasize) {
1626 if end <= bytes.len() {
1627 mark_range(&mut mask, dataoff, end, "code-signature hashes");
1628 }
1629 }
1630 }
1631 }
1632 LC_ID_DYLIB | LC_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB | LC_REEXPORT_DYLIB
1633 | LC_LOAD_UPWARD_DYLIB
1634 if cmdsize >= 16 =>
1635 {
1636 mark_range(&mut mask, cursor + 12, cursor + 16, "dylib timestamp");
1637 }
1638 _ => {}
1639 }
1640 cursor += cmdsize;
1641 }
1642
1643 mask
1644 }
1645
1646 fn mark_range(mask: &mut [Option<&'static str>], start: usize, end: usize, reason: &'static str) {
1647 let start = start.min(mask.len());
1648 let end = end.min(mask.len());
1649 for slot in &mut mask[start..end] {
1650 *slot = Some(reason);
1651 }
1652 }
1653
1654 fn build_version_command(bytes: &[u8]) -> Result<Option<BuildVersionCmd>, String> {
1655 let header = parse_header(bytes).map_err(|e| e.to_string())?;
1656 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
1657 Ok(commands.into_iter().find_map(|cmd| match cmd {
1658 LoadCommand::BuildVersion(cmd) => Some(cmd),
1659 _ => None,
1660 }))
1661 }
1662
1663 fn normalized_build_version(bytes: &[u8]) -> Result<Option<NormalizedBuildVersion>, String> {
1664 Ok(build_version_command(bytes)?.map(|cmd| {
1665 (
1666 cmd.platform,
1667 cmd.minos,
1668 cmd.sdk,
1669 cmd.tools.into_iter().map(|tool| tool.tool).collect(),
1670 )
1671 }))
1672 }
1673
1674 fn load_dylib_names(bytes: &[u8]) -> Result<Vec<String>, String> {
1675 let header = parse_header(bytes).map_err(|e| e.to_string())?;
1676 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
1677 Ok(commands
1678 .into_iter()
1679 .filter_map(|cmd| match cmd {
1680 LoadCommand::Dylib(cmd) if cmd.cmd == LC_LOAD_DYLIB => Some(cmd.name),
1681 _ => None,
1682 })
1683 .collect())
1684 }
1685
1686 #[derive(Debug, Clone, PartialEq, Eq)]
1687 struct CanonicalSymbolRecord {
1688 name: String,
1689 n_type: u8,
1690 section: Option<(String, String)>,
1691 n_desc: u16,
1692 value: u64,
1693 }
1694
1695 #[derive(Debug, Clone, PartialEq, Eq)]
1696 enum CanonicalExportKind {
1697 Regular(u64),
1698 ThreadLocal(u64),
1699 Absolute(u64),
1700 Reexport { ordinal: u32, imported_name: String },
1701 StubAndResolver { stub: u64, resolver: u64 },
1702 }
1703
1704 #[derive(Debug, Clone, PartialEq, Eq)]
1705 struct CanonicalExportRecord {
1706 name: String,
1707 flags: u64,
1708 kind: CanonicalExportKind,
1709 }
1710
1711 fn canonical_symbol_record_map(
1712 bytes: &[u8],
1713 ) -> Result<BTreeMap<String, CanonicalSymbolRecord>, String> {
1714 Ok(canonical_symbol_records(bytes)?
1715 .into_iter()
1716 .map(|record| (record.name.clone(), record))
1717 .collect())
1718 }
1719
1720 fn canonical_symbol_records(bytes: &[u8]) -> Result<Vec<CanonicalSymbolRecord>, String> {
1721 let (symtab, _) = symtab_and_dysymtab(bytes)?;
1722 let symbols =
1723 parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?;
1724 let strings =
1725 StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?;
1726 let sections = section_regions(bytes)?;
1727 Ok(symbols
1728 .iter()
1729 .map(|symbol| {
1730 let (section, value) = if symbol.kind() == SymKind::Sect && symbol.sect_idx() != 0 {
1731 let section = &sections[symbol.sect_idx() as usize - 1];
1732 let value = if symbol.value() >= section.addr {
1733 symbol.value() - section.addr
1734 } else {
1735 symbol.value()
1736 };
1737 (
1738 Some((section.segname.clone(), section.sectname.clone())),
1739 value,
1740 )
1741 } else {
1742 (None, symbol.value())
1743 };
1744 CanonicalSymbolRecord {
1745 name: strings.get(symbol.strx()).unwrap().to_string(),
1746 n_type: symbol.raw.n_type,
1747 section,
1748 n_desc: symbol.raw.n_desc,
1749 value,
1750 }
1751 })
1752 .filter(|record| !is_optional_dyld_stub_binder_record(record))
1753 .collect())
1754 }
1755
1756 fn is_optional_dyld_stub_binder_record(record: &CanonicalSymbolRecord) -> bool {
1757 record.name == "dyld_stub_binder"
1758 && (record.n_type & N_TYPE) == N_UNDF
1759 && record.section.is_none()
1760 }
1761
1762 fn canonical_export_records(bytes: &[u8]) -> Result<Vec<CanonicalExportRecord>, String> {
1763 let dylib = DylibFile::parse("/tmp/canonical.dylib", bytes).map_err(|e| e.to_string())?;
1764 let symbol_values: BTreeMap<String, u64> = canonical_symbol_records(bytes)?
1765 .into_iter()
1766 .map(|record| (record.name, record.value))
1767 .collect();
1768 let mut out = dylib
1769 .exports
1770 .entries()
1771 .map_err(|e| e.to_string())?
1772 .into_iter()
1773 .map(|entry| {
1774 let kind = match entry.kind {
1775 ExportKind::Regular { .. } => {
1776 CanonicalExportKind::Regular(*symbol_values.get(&entry.name).unwrap())
1777 }
1778 ExportKind::ThreadLocal { .. } => {
1779 CanonicalExportKind::ThreadLocal(*symbol_values.get(&entry.name).unwrap())
1780 }
1781 ExportKind::Absolute { .. } => {
1782 CanonicalExportKind::Absolute(*symbol_values.get(&entry.name).unwrap())
1783 }
1784 ExportKind::Reexport {
1785 ordinal,
1786 imported_name,
1787 } => CanonicalExportKind::Reexport {
1788 ordinal,
1789 imported_name,
1790 },
1791 ExportKind::StubAndResolver { stub, resolver } => {
1792 CanonicalExportKind::StubAndResolver { stub, resolver }
1793 }
1794 };
1795 CanonicalExportRecord {
1796 name: entry.name,
1797 flags: entry.flags,
1798 kind,
1799 }
1800 })
1801 .collect::<Vec<_>>();
1802 out.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name));
1803 Ok(out)
1804 }
1805
1806 fn symbol_partition_names(bytes: &[u8]) -> Result<SymbolPartitions, String> {
1807 let (symtab, dysymtab) = symtab_and_dysymtab(bytes)?;
1808 let symbols =
1809 parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?;
1810 let strings =
1811 StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?;
1812 let names_for = |start: u32, count: u32| -> Vec<String> {
1813 symbols[start as usize..(start + count) as usize]
1814 .iter()
1815 .map(|symbol| strings.get(symbol.strx()).unwrap().to_string())
1816 .collect()
1817 };
1818 Ok((
1819 names_for(dysymtab.ilocalsym, dysymtab.nlocalsym),
1820 names_for(dysymtab.iextdefsym, dysymtab.nextdefsym),
1821 names_for(dysymtab.iundefsym, dysymtab.nundefsym)
1822 .into_iter()
1823 .filter(|name| name != "dyld_stub_binder")
1824 .collect(),
1825 ))
1826 }
1827
1828 fn has_optional_dyld_stub_binder(bytes: &[u8]) -> Result<bool, String> {
1829 let (symtab, _) = symtab_and_dysymtab(bytes)?;
1830 let symbols =
1831 parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?;
1832 let strings =
1833 StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?;
1834 Ok(symbols.iter().any(|symbol| {
1835 strings
1836 .get(symbol.strx())
1837 .map(|name| {
1838 name == "dyld_stub_binder"
1839 && (symbol.raw.n_type & N_TYPE) == N_UNDF
1840 && symbol.raw.n_sect == 0
1841 })
1842 .unwrap_or(false)
1843 }))
1844 }
1845
1846 fn raw_string_table(bytes: &[u8]) -> Result<Vec<u8>, String> {
1847 let (symtab, _) = symtab_and_dysymtab(bytes)?;
1848 let start = symtab.stroff as usize;
1849 let end = start + symtab.strsize as usize;
1850 Ok(bytes[start..end].to_vec())
1851 }
1852
1853 fn effective_string_table_len(bytes: &[u8]) -> Result<usize, String> {
1854 let mut len = raw_string_table(bytes)?.len();
1855 if has_optional_dyld_stub_binder(bytes)? {
1856 len = len.saturating_sub("dyld_stub_binder".len() + 1);
1857 }
1858 Ok(len)
1859 }
1860
1861 pub fn string_table_within_five_percent(ours: usize, theirs: usize) -> bool {
1862 let delta = ours.abs_diff(theirs);
1863 delta * 20 <= theirs
1864 }
1865
1866 fn indirect_symbol_table(bytes: &[u8]) -> Result<Vec<u32>, String> {
1867 let (_, dysymtab) = symtab_and_dysymtab(bytes)?;
1868 if dysymtab.nindirectsyms == 0 {
1869 return Ok(Vec::new());
1870 }
1871 let start = dysymtab.indirectsymoff as usize;
1872 let end = start + dysymtab.nindirectsyms as usize * 4;
1873 Ok(bytes[start..end]
1874 .chunks_exact(4)
1875 .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
1876 .collect())
1877 }
1878
1879 fn indirect_symbol_identities(bytes: &[u8]) -> Result<Vec<String>, String> {
1880 let (symtab, _) = symtab_and_dysymtab(bytes)?;
1881 let symbols =
1882 parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?;
1883 let strings =
1884 StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?;
1885 Ok(indirect_symbol_table(bytes)?
1886 .into_iter()
1887 .map(|index| {
1888 if index & INDIRECT_SYMBOL_LOCAL != 0 {
1889 if index & INDIRECT_SYMBOL_ABS != 0 {
1890 "<LOCAL|ABS>".to_string()
1891 } else {
1892 "<LOCAL>".to_string()
1893 }
1894 } else if index & INDIRECT_SYMBOL_ABS != 0 {
1895 "<ABS>".to_string()
1896 } else {
1897 let symbol = &symbols[index as usize];
1898 strings.get(symbol.strx()).unwrap().to_string()
1899 }
1900 })
1901 .collect())
1902 }
1903
1904 fn raw_linkedit_data_cmd(bytes: &[u8], expected_cmd: u32) -> Result<(u32, u32), String> {
1905 let header = parse_header(bytes).map_err(|e| e.to_string())?;
1906 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
1907 for cmd in commands {
1908 match cmd {
1909 LoadCommand::Raw { cmd, data, .. } if cmd == expected_cmd => {
1910 return Ok((u32_le(&data[0..4]), u32_le(&data[4..8])));
1911 }
1912 LoadCommand::LinkerOptimizationHint(linkedit)
1913 if expected_cmd == afs_ld::macho::constants::LC_LINKER_OPTIMIZATION_HINT =>
1914 {
1915 return Ok((linkedit.dataoff, linkedit.datasize));
1916 }
1917 _ => {}
1918 }
1919 }
1920 Err(format!("missing raw linkedit command 0x{expected_cmd:x}"))
1921 }
1922
1923 fn linkedit_payload(bytes: &[u8], cmd: u32) -> Result<Vec<u8>, String> {
1924 let (dataoff, datasize) = raw_linkedit_data_cmd(bytes, cmd)?;
1925 if datasize == 0 {
1926 return Ok(Vec::new());
1927 }
1928 Ok(bytes[dataoff as usize..(dataoff + datasize) as usize].to_vec())
1929 }
1930
1931 fn decode_function_starts(bytes: &[u8]) -> Result<Vec<u64>, String> {
1932 let payload = linkedit_payload(bytes, LC_FUNCTION_STARTS)?;
1933 let mut offsets = Vec::new();
1934 let mut cursor = 0usize;
1935 let mut current = 0u64;
1936 while cursor < payload.len() {
1937 let (delta, used) = read_uleb(&payload[cursor..]).map_err(|e| e.to_string())?;
1938 cursor += used;
1939 if delta == 0 {
1940 break;
1941 }
1942 current += delta;
1943 offsets.push(current);
1944 }
1945 Ok(offsets)
1946 }
1947
1948 fn normalize_function_start_offsets(starts: &[u64]) -> Vec<u64> {
1949 let Some(&base) = starts.first() else {
1950 return Vec::new();
1951 };
1952 starts.iter().map(|offset| offset - base).collect()
1953 }
1954
1955 #[derive(Debug, Clone, PartialEq, Eq)]
1956 struct DataInCodeRecord {
1957 offset: u32,
1958 length: u16,
1959 kind: u16,
1960 }
1961
1962 fn decode_data_in_code(bytes: &[u8]) -> Result<Vec<DataInCodeRecord>, String> {
1963 let payload = linkedit_payload(bytes, LC_DATA_IN_CODE)?;
1964 Ok(payload
1965 .chunks_exact(8)
1966 .map(|chunk| DataInCodeRecord {
1967 offset: u32::from_le_bytes(chunk[0..4].try_into().unwrap()),
1968 length: u16::from_le_bytes(chunk[4..6].try_into().unwrap()),
1969 kind: u16::from_le_bytes(chunk[6..8].try_into().unwrap()),
1970 })
1971 .collect())
1972 }
1973
1974 fn canonical_data_in_code(bytes: &[u8]) -> Result<Vec<DataInCodeRecord>, String> {
1975 let text = output_section_header(bytes, "__TEXT", "__text")
1976 .ok_or_else(|| "missing __TEXT,__text section".to_string())?;
1977 Ok(decode_data_in_code(bytes)?
1978 .into_iter()
1979 .map(|record| DataInCodeRecord {
1980 offset: record.offset - text.offset,
1981 length: record.length,
1982 kind: record.kind,
1983 })
1984 .collect())
1985 }
1986
1987 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1988 enum CanonicalBindLocation {
1989 Section {
1990 segname: String,
1991 sectname: String,
1992 offset: u64,
1993 },
1994 Segment {
1995 segment_index: u8,
1996 segment_offset: u64,
1997 },
1998 }
1999
2000 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
2001 struct CanonicalBindRecord {
2002 location: CanonicalBindLocation,
2003 ordinal: i32,
2004 symbol: String,
2005 weak_import: bool,
2006 bind_type: u8,
2007 addend: i64,
2008 }
2009
2010 fn canonical_bind_records(
2011 bytes: &[u8],
2012 kind: DyldInfoStreamKind,
2013 ) -> Result<Vec<CanonicalBindRecord>, String> {
2014 let stream = dyld_info_stream(bytes, kind)?;
2015 let mut cursor = 0usize;
2016 let mut segment_index = 0u8;
2017 let mut segment_offset = 0u64;
2018 let mut ordinal = 0i32;
2019 let mut symbol = String::new();
2020 let mut weak_import = false;
2021 let mut bind_type = BIND_TYPE_POINTER;
2022 let mut addend = 0i64;
2023 let mut out = Vec::new();
2024
2025 while cursor < stream.len() {
2026 let byte = stream[cursor];
2027 cursor += 1;
2028 let opcode = byte & BIND_OPCODE_MASK;
2029 let imm = byte & BIND_IMMEDIATE_MASK;
2030 match opcode {
2031 BIND_OPCODE_DONE => break,
2032 BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => ordinal = imm as i32,
2033 BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
2034 let (value, used) =
2035 read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
2036 cursor += used;
2037 ordinal = value as i32;
2038 }
2039 BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => {
2040 ordinal = if imm == 0 {
2041 0
2042 } else {
2043 (((imm as i8) << 4) >> 4) as i32
2044 };
2045 }
2046 BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
2047 let (value, used) = read_c_string(&stream[cursor..])?;
2048 cursor += used;
2049 symbol = value;
2050 weak_import = (imm & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0;
2051 }
2052 BIND_OPCODE_SET_TYPE_IMM => bind_type = imm,
2053 BIND_OPCODE_SET_ADDEND_SLEB => {
2054 let (value, used) =
2055 read_sleb(&stream[cursor..]).map_err(|e| format!("bind sleb: {e}"))?;
2056 cursor += used;
2057 addend = value;
2058 }
2059 BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
2060 let (value, used) =
2061 read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
2062 cursor += used;
2063 segment_index = imm;
2064 segment_offset = value;
2065 }
2066 BIND_OPCODE_ADD_ADDR_ULEB => {
2067 let (value, used) =
2068 read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
2069 cursor += used;
2070 segment_offset += value;
2071 }
2072 BIND_OPCODE_DO_BIND => {
2073 out.push(CanonicalBindRecord {
2074 location: canonical_bind_location(bytes, segment_index, segment_offset)?,
2075 ordinal,
2076 symbol: symbol.clone(),
2077 weak_import,
2078 bind_type,
2079 addend,
2080 });
2081 segment_offset += 8;
2082 }
2083 BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => {
2084 let (value, used) =
2085 read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
2086 cursor += used;
2087 out.push(CanonicalBindRecord {
2088 location: canonical_bind_location(bytes, segment_index, segment_offset)?,
2089 ordinal,
2090 symbol: symbol.clone(),
2091 weak_import,
2092 bind_type,
2093 addend,
2094 });
2095 segment_offset += 8 + value;
2096 }
2097 BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => {
2098 out.push(CanonicalBindRecord {
2099 location: canonical_bind_location(bytes, segment_index, segment_offset)?,
2100 ordinal,
2101 symbol: symbol.clone(),
2102 weak_import,
2103 bind_type,
2104 addend,
2105 });
2106 segment_offset += 8 + (imm as u64) * 8;
2107 }
2108 BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => {
2109 let (count, count_used) =
2110 read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
2111 cursor += count_used;
2112 let (skip, skip_used) =
2113 read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
2114 cursor += skip_used;
2115 for _ in 0..count {
2116 out.push(CanonicalBindRecord {
2117 location: canonical_bind_location(bytes, segment_index, segment_offset)?,
2118 ordinal,
2119 symbol: symbol.clone(),
2120 weak_import,
2121 bind_type,
2122 addend,
2123 });
2124 segment_offset += 8 + skip;
2125 }
2126 }
2127 other => return Err(format!("unsupported bind opcode 0x{other:02x}")),
2128 }
2129 }
2130
2131 normalize_bind_section_offsets(&mut out);
2132 out.sort();
2133 Ok(out)
2134 }
2135
2136 fn normalize_bind_section_offsets(records: &mut [CanonicalBindRecord]) {
2137 let mut next_offsets: BTreeMap<(String, String), u64> = BTreeMap::new();
2138 records.sort();
2139 for record in records.iter_mut() {
2140 let CanonicalBindLocation::Section {
2141 segname,
2142 sectname,
2143 offset,
2144 } = &mut record.location
2145 else {
2146 continue;
2147 };
2148 let next = next_offsets
2149 .entry((segname.clone(), sectname.clone()))
2150 .or_insert(0);
2151 *offset = *next;
2152 *next += 8;
2153 }
2154 }
2155
2156 fn rebased_unwind_bytes(bytes: &[u8]) -> Result<Vec<u8>, String> {
2157 let header_base = segment_vmaddr(bytes, "__TEXT").unwrap_or(0);
2158 let text_base = output_section(bytes, "__TEXT", "__text")
2159 .ok_or_else(|| "missing __TEXT,__text section".to_string())?
2160 .0
2161 - header_base;
2162 let got_range = output_section(bytes, "__DATA_CONST", "__got")
2163 .map(|(addr, data)| (addr - header_base, addr - header_base + data.len() as u64));
2164 let lsda_base =
2165 output_section(bytes, "__TEXT", "__gcc_except_tab").map(|(addr, _)| addr - header_base);
2166 let (_, unwind) = output_section(bytes, "__TEXT", "__unwind_info")
2167 .ok_or_else(|| "missing __TEXT,__unwind_info section".to_string())?;
2168 let mut out = unwind;
2169 if out.len() < 28 {
2170 return Ok(out);
2171 }
2172
2173 let personalities_offset = u32_le(&out[12..16]) as usize;
2174 let personalities_count = u32_le(&out[16..20]) as usize;
2175 let indices_offset = u32_le(&out[20..24]) as usize;
2176 let indices_count = u32_le(&out[24..28]) as usize;
2177
2178 for idx in 0..personalities_count {
2179 let off = personalities_offset + idx * 4;
2180 let value = u32_le(&out[off..off + 4]) as u64;
2181 let rebased = if let Some((got_start, got_end)) = got_range {
2182 if got_start <= value && value < got_end {
2183 value - got_start
2184 } else if value >= text_base {
2185 value - text_base
2186 } else {
2187 value
2188 }
2189 } else if value >= text_base {
2190 value - text_base
2191 } else {
2192 value
2193 };
2194 out[off..off + 4].copy_from_slice(&(rebased as u32).to_le_bytes());
2195 }
2196
2197 let mut lsda_offsets = Vec::with_capacity(indices_count);
2198 for idx in 0..indices_count {
2199 let entry_off = indices_offset + idx * 12;
2200 let function_offset = u32_le(&out[entry_off..entry_off + 4]) as u64;
2201 let rebased = function_offset.saturating_sub(text_base);
2202 out[entry_off..entry_off + 4].copy_from_slice(&(rebased as u32).to_le_bytes());
2203 lsda_offsets.push(u32_le(&out[entry_off + 8..entry_off + 12]) as usize);
2204 }
2205
2206 if let (Some(lsda_base), Some(&start), Some(&end)) =
2207 (lsda_base, lsda_offsets.first(), lsda_offsets.last())
2208 {
2209 let mut entry_off = start;
2210 while entry_off < end {
2211 let function_offset = u32_le(&out[entry_off..entry_off + 4]) as u64;
2212 let lsda_offset = u32_le(&out[entry_off + 4..entry_off + 8]) as u64;
2213 out[entry_off..entry_off + 4]
2214 .copy_from_slice(&(function_offset.saturating_sub(text_base) as u32).to_le_bytes());
2215 out[entry_off + 4..entry_off + 8]
2216 .copy_from_slice(&(lsda_offset.saturating_sub(lsda_base) as u32).to_le_bytes());
2217 entry_off += 8;
2218 }
2219 }
2220
2221 let _ = decode_unwind_info(&out).map_err(|e| format!("decode unwind info: {e}"))?;
2222 Ok(out)
2223 }
2224
2225 fn symtab_and_dysymtab(
2226 bytes: &[u8],
2227 ) -> Result<
2228 (
2229 afs_ld::macho::reader::SymtabCmd,
2230 afs_ld::macho::reader::DysymtabCmd,
2231 ),
2232 String,
2233 > {
2234 let header = parse_header(bytes).map_err(|e| e.to_string())?;
2235 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
2236 let mut symtab = None;
2237 let mut dysymtab = None;
2238 for cmd in commands {
2239 match cmd {
2240 LoadCommand::Symtab(cmd) => symtab = Some(cmd),
2241 LoadCommand::Dysymtab(cmd) => dysymtab = Some(cmd),
2242 _ => {}
2243 }
2244 }
2245 Ok((
2246 symtab.ok_or_else(|| "missing LC_SYMTAB".to_string())?,
2247 dysymtab.ok_or_else(|| "missing LC_DYSYMTAB".to_string())?,
2248 ))
2249 }
2250
2251 fn section_addrs(bytes: &[u8]) -> Result<Vec<u64>, String> {
2252 Ok(section_regions(bytes)?
2253 .into_iter()
2254 .map(|section| section.addr)
2255 .collect())
2256 }
2257
2258 #[derive(Debug, Clone)]
2259 struct SegmentRegion {
2260 index: u8,
2261 segname: String,
2262 vmaddr: u64,
2263 vmsize: u64,
2264 }
2265
2266 #[derive(Debug, Clone)]
2267 struct SectionRegion {
2268 segment_index: u8,
2269 segname: String,
2270 sectname: String,
2271 addr: u64,
2272 size: u64,
2273 }
2274
2275 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
2276 struct CanonicalSectionLocation {
2277 segname: String,
2278 sectname: String,
2279 offset: u64,
2280 }
2281
2282 fn segment_regions(bytes: &[u8]) -> Result<Vec<SegmentRegion>, String> {
2283 let header = parse_header(bytes).map_err(|e| e.to_string())?;
2284 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
2285 let mut out = Vec::new();
2286 let mut index = 0u8;
2287 for cmd in commands {
2288 if let LoadCommand::Segment64(seg) = cmd {
2289 out.push(SegmentRegion {
2290 index,
2291 segname: seg.segname_str().to_string(),
2292 vmaddr: seg.vmaddr,
2293 vmsize: seg.vmsize,
2294 });
2295 index = index.saturating_add(1);
2296 }
2297 }
2298 Ok(out)
2299 }
2300
2301 fn section_regions(bytes: &[u8]) -> Result<Vec<SectionRegion>, String> {
2302 let header = parse_header(bytes).map_err(|e| e.to_string())?;
2303 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
2304 let mut out = Vec::new();
2305 let mut segment_index = 0u8;
2306 for cmd in commands {
2307 if let LoadCommand::Segment64(seg) = cmd {
2308 for section in seg.sections {
2309 out.push(SectionRegion {
2310 segment_index,
2311 segname: section.segname_str().to_string(),
2312 sectname: section.sectname_str().to_string(),
2313 addr: section.addr,
2314 size: section.size,
2315 });
2316 }
2317 segment_index = segment_index.saturating_add(1);
2318 }
2319 }
2320 Ok(out)
2321 }
2322
2323 fn canonical_bind_location(
2324 bytes: &[u8],
2325 segment_index: u8,
2326 segment_offset: u64,
2327 ) -> Result<CanonicalBindLocation, String> {
2328 let segments = segment_regions(bytes)?;
2329 let sections = section_regions(bytes)?;
2330 let Some(segment) = segments
2331 .iter()
2332 .find(|segment| segment.index == segment_index)
2333 else {
2334 return Ok(CanonicalBindLocation::Segment {
2335 segment_index,
2336 segment_offset,
2337 });
2338 };
2339 if segment_offset >= segment.vmsize {
2340 return Ok(CanonicalBindLocation::Segment {
2341 segment_index,
2342 segment_offset,
2343 });
2344 }
2345 let addr = segment.vmaddr + segment_offset;
2346 if let Some(section) = sections.iter().find(|section| {
2347 section.segment_index == segment_index
2348 && section.addr <= addr
2349 && addr < section.addr + section.size
2350 }) {
2351 return Ok(CanonicalBindLocation::Section {
2352 segname: section.segname.clone(),
2353 sectname: section.sectname.clone(),
2354 offset: addr - section.addr,
2355 });
2356 }
2357 Ok(CanonicalBindLocation::Segment {
2358 segment_index,
2359 segment_offset,
2360 })
2361 }
2362
2363 fn canonical_section_location(bytes: &[u8], addr: u64) -> Result<CanonicalSectionLocation, String> {
2364 let sections = section_regions(bytes)?;
2365 let section = sections
2366 .into_iter()
2367 .find(|section| section.addr <= addr && addr < section.addr + section.size)
2368 .ok_or_else(|| format!("address 0x{addr:x} is not inside any output section"))?;
2369 Ok(CanonicalSectionLocation {
2370 segname: section.segname,
2371 sectname: section.sectname,
2372 offset: addr - section.addr,
2373 })
2374 }
2375
2376 #[derive(Clone, Copy)]
2377 enum DyldInfoStreamKind {
2378 Rebase,
2379 Bind,
2380 WeakBind,
2381 LazyBind,
2382 }
2383
2384 fn dyld_info_command(bytes: &[u8]) -> Result<DyldInfoCmd, String> {
2385 let header = parse_header(bytes).map_err(|e| e.to_string())?;
2386 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
2387 commands
2388 .into_iter()
2389 .find_map(|cmd| match cmd {
2390 LoadCommand::DyldInfoOnly(cmd) => Some(cmd),
2391 _ => None,
2392 })
2393 .ok_or_else(|| "missing LC_DYLD_INFO_ONLY".to_string())
2394 }
2395
2396 fn dyld_info_stream(bytes: &[u8], kind: DyldInfoStreamKind) -> Result<Vec<u8>, String> {
2397 let dyld_info = dyld_info_command(bytes)?;
2398 let (off, size) = match kind {
2399 DyldInfoStreamKind::Rebase => (dyld_info.rebase_off, dyld_info.rebase_size),
2400 DyldInfoStreamKind::Bind => (dyld_info.bind_off, dyld_info.bind_size),
2401 DyldInfoStreamKind::WeakBind => (dyld_info.weak_bind_off, dyld_info.weak_bind_size),
2402 DyldInfoStreamKind::LazyBind => (dyld_info.lazy_bind_off, dyld_info.lazy_bind_size),
2403 };
2404 if size == 0 {
2405 return Ok(Vec::new());
2406 }
2407 let start = off as usize;
2408 let end = start + size as usize;
2409 bytes
2410 .get(start..end)
2411 .map(|slice| slice.to_vec())
2412 .ok_or_else(|| "dyld-info stream out of bounds".to_string())
2413 }
2414
2415 fn read_c_string(bytes: &[u8]) -> Result<(String, usize), String> {
2416 let end = bytes
2417 .iter()
2418 .position(|byte| *byte == 0)
2419 .ok_or_else(|| "unterminated C string".to_string())?;
2420 let value = std::str::from_utf8(&bytes[..end])
2421 .map_err(|e| format!("utf-8 in C string: {e}"))?
2422 .to_string();
2423 Ok((value, end + 1))
2424 }
2425
2426 fn canonical_stub_targets(bytes: &[u8]) -> Result<Vec<u64>, String> {
2427 let header = output_section_header(bytes, "__TEXT", "__stubs")
2428 .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?;
2429 let (section_addr, section_bytes) = output_section(bytes, "__TEXT", "__stubs")
2430 .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?;
2431 if section_bytes.is_empty() {
2432 return Ok(Vec::new());
2433 }
2434 let stub_size = usize::try_from(header.reserved2)
2435 .ok()
2436 .filter(|size| *size > 0)
2437 .unwrap_or(12);
2438 if section_bytes.len() % stub_size != 0 {
2439 return Err(format!(
2440 "__TEXT,__stubs size {} is not a multiple of stub size {}",
2441 section_bytes.len(),
2442 stub_size
2443 ));
2444 }
2445 let mut out = Vec::new();
2446 for (idx, chunk) in section_bytes.chunks_exact(stub_size).enumerate() {
2447 out.push(decode_stub_target(
2448 chunk,
2449 section_addr + (idx * stub_size) as u64,
2450 )?);
2451 }
2452 Ok(out)
2453 }
2454
2455 #[derive(Debug, Clone, PartialEq, Eq)]
2456 struct CanonicalStubHelper {
2457 dyld_private: CanonicalSectionLocation,
2458 binder_got: CanonicalSectionLocation,
2459 lazy_bind_offsets: Vec<u32>,
2460 }
2461
2462 fn canonical_stub_helper(bytes: &[u8]) -> Result<CanonicalStubHelper, String> {
2463 let (section_addr, section_bytes) = output_section(bytes, "__TEXT", "__stub_helper")
2464 .ok_or_else(|| "missing __TEXT,__stub_helper section".to_string())?;
2465 if section_bytes.len() < STUB_HELPER_HEADER_SIZE as usize {
2466 return Err(format!(
2467 "__TEXT,__stub_helper is too small for header: {} < {}",
2468 section_bytes.len(),
2469 STUB_HELPER_HEADER_SIZE
2470 ));
2471 }
2472 let dyld_private_target =
2473 decode_page_reference(&section_bytes, section_addr, 0, PageRefKind::Add)?;
2474 let binder_got_target =
2475 decode_page_reference(&section_bytes, section_addr, 12, PageRefKind::Load)?;
2476 let dyld_private = canonical_section_location(bytes, dyld_private_target)?;
2477 let binder_got = canonical_section_location(bytes, binder_got_target)?;
2478
2479 let entry_bytes = &section_bytes[STUB_HELPER_HEADER_SIZE as usize..];
2480 if entry_bytes.len() % STUB_HELPER_ENTRY_SIZE as usize != 0 {
2481 return Err(format!(
2482 "__TEXT,__stub_helper entries {} are not a multiple of {}",
2483 entry_bytes.len(),
2484 STUB_HELPER_ENTRY_SIZE
2485 ));
2486 }
2487
2488 let mut lazy_bind_offsets = Vec::new();
2489 for (idx, chunk) in entry_bytes
2490 .chunks_exact(STUB_HELPER_ENTRY_SIZE as usize)
2491 .enumerate()
2492 {
2493 let entry_addr = section_addr
2494 + STUB_HELPER_HEADER_SIZE as u64
2495 + (idx as u64) * STUB_HELPER_ENTRY_SIZE as u64;
2496 let ldr = read_insn(chunk, 0)?;
2497 if ldr != 0x1800_0050 {
2498 return Err(format!(
2499 "stub helper entry at 0x{entry_addr:x} does not start with LDR literal"
2500 ));
2501 }
2502 let branch = read_insn(chunk, 4)?;
2503 let branch_target = decode_branch26_target(branch, entry_addr + 4)?;
2504 if branch_target != section_addr {
2505 return Err(format!(
2506 "stub helper entry at 0x{entry_addr:x} branches to 0x{branch_target:x}, expected header 0x{section_addr:x}"
2507 ));
2508 }
2509 lazy_bind_offsets.push(u32_le(&chunk[8..12]));
2510 }
2511
2512 Ok(CanonicalStubHelper {
2513 dyld_private,
2514 binder_got,
2515 lazy_bind_offsets,
2516 })
2517 }
2518
2519 fn decode_stub_target(bytes: &[u8], stub_addr: u64) -> Result<u64, String> {
2520 let adrp = read_insn(bytes, 0)?;
2521 let ldr = read_insn(bytes, 4)?;
2522 let br = read_insn(bytes, 8)?;
2523 if (adrp & 0x9f00_0000) != 0x9000_0000 {
2524 return Err(format!("stub at 0x{stub_addr:x} does not start with ADRP"));
2525 }
2526 if (ldr & 0xffc0_0000) != 0xf940_0000 {
2527 return Err(format!(
2528 "stub at 0x{stub_addr:x} does not use LDR (unsigned)"
2529 ));
2530 }
2531 if (br & 0xffff_fc1f) != 0xd61f_0000 {
2532 return Err(format!("stub at 0x{stub_addr:x} does not end with BR"));
2533 }
2534 let adrp_reg = (adrp & 0x1f) as u8;
2535 let ldr_base = ((ldr >> 5) & 0x1f) as u8;
2536 let ldr_reg = (ldr & 0x1f) as u8;
2537 let br_reg = ((br >> 5) & 0x1f) as u8;
2538 if adrp_reg != ldr_base || adrp_reg != ldr_reg || adrp_reg != br_reg {
2539 return Err(format!(
2540 "stub at 0x{stub_addr:x} uses inconsistent scratch regs: adrp=x{adrp_reg}, ldr base=x{ldr_base}, ldr rt=x{ldr_reg}, br=x{br_reg}"
2541 ));
2542 }
2543 let adrp_immlo = ((adrp >> 29) & 0x3) as i64;
2544 let adrp_immhi = ((adrp >> 5) & 0x7ffff) as i64;
2545 let adrp_pages = sign_extend_21((adrp_immhi << 2) | adrp_immlo);
2546 let adrp_base = ((stub_addr as i64) & !0xfff) + (adrp_pages << 12);
2547 let scaled = ((ldr >> 10) & 0xfff) as u64;
2548 Ok((adrp_base as u64) + scaled * 8)
2549 }
2550
2551 fn decode_branch26_target(insn: u32, place: u64) -> Result<u64, String> {
2552 if (insn & 0xfc00_0000) != 0x1400_0000 {
2553 return Err(format!(
2554 "instruction 0x{insn:08x} at 0x{place:x} is not a B/BL branch26"
2555 ));
2556 }
2557 let imm26 = sign_extend_26((insn & 0x03ff_ffff) as i64);
2558 Ok(((place as i64) + (imm26 << 2)) as u64)
2559 }
2560
2561 fn sign_extend_26(value: i64) -> i64 {
2562 let shift = 64 - 26;
2563 (value << shift) >> shift
2564 }
2565
2566 fn symbol_values(bytes: &[u8]) -> Result<BTreeMap<String, u64>, String> {
2567 let header = parse_header(bytes).map_err(|e| e.to_string())?;
2568 let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
2569 let symtab = commands
2570 .iter()
2571 .find_map(|cmd| match cmd {
2572 LoadCommand::Symtab(cmd) => Some(*cmd),
2573 _ => None,
2574 })
2575 .ok_or_else(|| "missing LC_SYMTAB".to_string())?;
2576 let symbols =
2577 parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?;
2578 let strings =
2579 StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?;
2580 let mut out = BTreeMap::new();
2581 for symbol in symbols {
2582 let Ok(name) = strings.get(symbol.strx()) else {
2583 continue;
2584 };
2585 out.insert(name.to_string(), symbol.value());
2586 }
2587 Ok(out)
2588 }
2589
2590 fn decode_page_reference(
2591 bytes: &[u8],
2592 section_addr: u64,
2593 site_offset: u64,
2594 kind: PageRefKind,
2595 ) -> Result<u64, String> {
2596 let start = site_offset as usize;
2597 let adrp = read_insn(bytes, start)?;
2598 let second = read_insn(bytes, start + 4)?;
2599 let place = section_addr + site_offset;
2600 let adrp_immlo = ((adrp >> 29) & 0x3) as i64;
2601 let adrp_immhi = ((adrp >> 5) & 0x7ffff) as i64;
2602 let adrp_pages = sign_extend_21((adrp_immhi << 2) | adrp_immlo);
2603 let adrp_base = ((place as i64) & !0xfff) + (adrp_pages << 12);
2604 let low = match kind {
2605 PageRefKind::Add => ((second >> 10) & 0xfff) as u64,
2606 PageRefKind::Load => {
2607 let shift = ((second >> 30) & 0b11) as u64;
2608 (((second >> 10) & 0xfff) as u64) << shift
2609 }
2610 };
2611 Ok((adrp_base as u64) + low)
2612 }
2613
2614 fn read_insn(bytes: &[u8], start: usize) -> Result<u32, String> {
2615 let end = start + 4;
2616 let slice = bytes
2617 .get(start..end)
2618 .ok_or_else(|| format!("instruction read OOB at 0x{start:x}"))?;
2619 Ok(u32::from_le_bytes([slice[0], slice[1], slice[2], slice[3]]))
2620 }
2621
2622 fn sign_extend_21(value: i64) -> i64 {
2623 if value & (1 << 20) != 0 {
2624 value | !0x1f_ffff
2625 } else {
2626 value
2627 }
2628 }
2629