| 1 | //! Differential harness shared by parity-oriented integration tests. |
| 2 | //! |
| 3 | //! The early scaffold only diffed arbitrary byte slices. Sprint 27 starts |
| 4 | //! turning it into a real Apple-`ld` matrix harness with a tiny corpus, basic |
| 5 | //! tolerated-diff rules, and reusable link/runtime helpers. |
| 6 | |
| 7 | #![allow(dead_code)] |
| 8 | |
| 9 | use std::collections::{BTreeMap, HashSet}; |
| 10 | use std::fs; |
| 11 | use std::path::{Path, PathBuf}; |
| 12 | use std::process::{Command, Stdio}; |
| 13 | use std::thread; |
| 14 | use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; |
| 15 | |
| 16 | use afs_ld::leb::{read_sleb, read_uleb}; |
| 17 | use afs_ld::macho::constants::{ |
| 18 | BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DONE, BIND_OPCODE_DO_BIND, |
| 19 | BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB, |
| 20 | BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, BIND_OPCODE_MASK, BIND_OPCODE_SET_ADDEND_SLEB, |
| 21 | BIND_OPCODE_SET_DYLIB_ORDINAL_IMM, BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, |
| 22 | BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, |
| 23 | BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, BIND_OPCODE_SET_TYPE_IMM, |
| 24 | BIND_SYMBOL_FLAGS_WEAK_IMPORT, BIND_TYPE_POINTER, INDIRECT_SYMBOL_ABS, INDIRECT_SYMBOL_LOCAL, |
| 25 | LC_BUILD_VERSION, LC_CODE_SIGNATURE, LC_DATA_IN_CODE, LC_DYLD_CHAINED_FIXUPS, |
| 26 | LC_DYLD_EXPORTS_TRIE, LC_DYLD_INFO_ONLY, LC_DYSYMTAB, LC_FUNCTION_STARTS, LC_ID_DYLIB, |
| 27 | LC_LOAD_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_SEGMENT_64, |
| 28 | LC_SYMTAB, LC_UUID, N_TYPE, N_UNDF, |
| 29 | }; |
| 30 | use afs_ld::macho::dylib::DylibFile; |
| 31 | use afs_ld::macho::exports::ExportKind; |
| 32 | use afs_ld::macho::reader::{ |
| 33 | parse_commands, parse_header, u32_le, BuildVersionCmd, DyldInfoCmd, LoadCommand, |
| 34 | Section64Header, |
| 35 | }; |
| 36 | use afs_ld::string_table::StringTable; |
| 37 | use afs_ld::symbol::{parse_nlist_table, SymKind}; |
| 38 | use afs_ld::synth::stubs::{STUB_HELPER_ENTRY_SIZE, STUB_HELPER_HEADER_SIZE}; |
| 39 | use afs_ld::synth::unwind::decode_unwind_info; |
| 40 | |
| 41 | #[derive(Debug, Clone)] |
| 42 | pub struct LinkCase { |
| 43 | pub name: String, |
| 44 | pub dir: PathBuf, |
| 45 | pub inputs: Vec<PathBuf>, |
| 46 | pub args: Vec<String>, |
| 47 | pub section_checks: Vec<(String, String)>, |
| 48 | pub absent_sections: Vec<(String, String)>, |
| 49 | pub page_ref_checks: Vec<PageRefCheck>, |
| 50 | pub command_checks: Vec<CommandCheck>, |
| 51 | artifacts: Vec<ArtifactSpec>, |
| 52 | pub ignored_load_commands: Vec<u32>, |
| 53 | pub absent_load_commands: Vec<u32>, |
| 54 | pub runtime_args: Vec<String>, |
| 55 | pub notes: Option<String>, |
| 56 | pub case_tolerances: Vec<CaseTolerance>, |
| 57 | } |
| 58 | |
| 59 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 60 | pub enum CommandCheck { |
| 61 | BuildVersion, |
| 62 | LoadDylibNames, |
| 63 | ExportRecords, |
| 64 | SymbolRecordMap, |
| 65 | IndirectSymbolIdentities, |
| 66 | SymbolPartitionNames, |
| 67 | StringTableNearParity, |
| 68 | FunctionStarts, |
| 69 | NormalizedFunctionStarts, |
| 70 | DataInCode, |
| 71 | DataInCodeIfPresent, |
| 72 | RebasedUnwindBytes, |
| 73 | DyldInfoRebase, |
| 74 | DyldInfoBind, |
| 75 | DyldInfoWeakBind, |
| 76 | DyldInfoLazyBind, |
| 77 | } |
| 78 | |
| 79 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 80 | pub struct PageRefCheck { |
| 81 | pub segname: String, |
| 82 | pub sectname: String, |
| 83 | pub site_offset: u64, |
| 84 | pub kind: PageRefKind, |
| 85 | pub symbol: String, |
| 86 | } |
| 87 | |
| 88 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 89 | pub enum PageRefKind { |
| 90 | Add, |
| 91 | Load, |
| 92 | } |
| 93 | |
| 94 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 95 | pub struct CaseTolerance { |
| 96 | pub region: ToleranceRegion, |
| 97 | pub reason: String, |
| 98 | } |
| 99 | |
| 100 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 101 | pub enum ToleranceRegion { |
| 102 | SectionBytes { |
| 103 | segname: String, |
| 104 | sectname: Option<String>, |
| 105 | start: usize, |
| 106 | end_inclusive: usize, |
| 107 | }, |
| 108 | } |
| 109 | |
| 110 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 111 | struct ArtifactSpec { |
| 112 | src_name: String, |
| 113 | out_name: String, |
| 114 | kind: ArtifactKind, |
| 115 | dep_name: Option<String>, |
| 116 | } |
| 117 | |
| 118 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 119 | enum ArtifactKind { |
| 120 | Dylib, |
| 121 | Archive, |
| 122 | ReexportDylib, |
| 123 | } |
| 124 | |
| 125 | type SymbolPartitions = (Vec<String>, Vec<String>, Vec<String>); |
| 126 | |
| 127 | pub struct LinkOutputs { |
| 128 | pub ours: Vec<u8>, |
| 129 | pub theirs: Vec<u8>, |
| 130 | pub our_path: PathBuf, |
| 131 | pub their_path: PathBuf, |
| 132 | } |
| 133 | |
| 134 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 135 | pub enum DiffCategory { |
| 136 | /// A diff we expect: UUID bytes, code-signature hashes, etc. |
| 137 | Tolerated(&'static str), |
| 138 | /// Anything else. Fails the parity test. |
| 139 | Critical, |
| 140 | } |
| 141 | |
| 142 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 143 | pub struct DiffChunk { |
| 144 | pub offset: usize, |
| 145 | pub len: usize, |
| 146 | pub reason: String, |
| 147 | pub category: DiffCategory, |
| 148 | } |
| 149 | |
| 150 | #[derive(Debug, Default)] |
| 151 | pub struct DiffReport { |
| 152 | pub tolerated: Vec<DiffChunk>, |
| 153 | pub critical: Vec<DiffChunk>, |
| 154 | } |
| 155 | |
| 156 | impl DiffReport { |
| 157 | pub fn is_clean(&self) -> bool { |
| 158 | self.critical.is_empty() |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 163 | pub struct ProgramOutput { |
| 164 | pub exit_code: Option<i32>, |
| 165 | pub stdout: Vec<u8>, |
| 166 | pub stderr: Vec<u8>, |
| 167 | } |
| 168 | |
| 169 | type NormalizedBuildVersion = (u32, u32, u32, Vec<u32>); |
| 170 | |
| 171 | pub fn have_xcrun() -> bool { |
| 172 | Command::new("xcrun") |
| 173 | .arg("-f") |
| 174 | .arg("as") |
| 175 | .output() |
| 176 | .map(|o| o.status.success()) |
| 177 | .unwrap_or(false) |
| 178 | } |
| 179 | |
| 180 | pub fn have_xcrun_tool(tool: &str) -> bool { |
| 181 | Command::new("xcrun") |
| 182 | .arg("-f") |
| 183 | .arg(tool) |
| 184 | .output() |
| 185 | .map(|o| o.status.success()) |
| 186 | .unwrap_or(false) |
| 187 | } |
| 188 | |
| 189 | pub fn have_tool(tool: &str) -> bool { |
| 190 | Command::new(tool) |
| 191 | .arg("--version") |
| 192 | .output() |
| 193 | .map(|o| o.status.success() || !o.stderr.is_empty()) |
| 194 | .unwrap_or(false) |
| 195 | } |
| 196 | |
| 197 | pub fn sdk_path() -> Option<String> { |
| 198 | let out = Command::new("xcrun") |
| 199 | .args(["--sdk", "macosx", "--show-sdk-path"]) |
| 200 | .output() |
| 201 | .ok()?; |
| 202 | if !out.status.success() { |
| 203 | return None; |
| 204 | } |
| 205 | Some(String::from_utf8_lossy(&out.stdout).trim().to_string()) |
| 206 | } |
| 207 | |
| 208 | pub fn sdk_version() -> Option<String> { |
| 209 | let out = Command::new("xcrun") |
| 210 | .args(["--sdk", "macosx", "--show-sdk-version"]) |
| 211 | .output() |
| 212 | .ok()?; |
| 213 | if !out.status.success() { |
| 214 | return None; |
| 215 | } |
| 216 | Some(String::from_utf8_lossy(&out.stdout).trim().to_string()) |
| 217 | } |
| 218 | |
| 219 | pub fn scratch(name: &str) -> PathBuf { |
| 220 | std::env::temp_dir().join(format!("afs-ld-parity-{}-{name}", std::process::id())) |
| 221 | } |
| 222 | |
| 223 | pub fn assemble(src: &str, out: &PathBuf) -> Result<(), String> { |
| 224 | let tmp = out.with_extension("s"); |
| 225 | fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?; |
| 226 | let output = Command::new("xcrun") |
| 227 | .args(["--sdk", "macosx", "as", "-arch", "arm64"]) |
| 228 | .arg(&tmp) |
| 229 | .arg("-o") |
| 230 | .arg(out) |
| 231 | .output() |
| 232 | .map_err(|e| format!("spawn xcrun as: {e}"))?; |
| 233 | let _ = fs::remove_file(&tmp); |
| 234 | if !output.status.success() { |
| 235 | return Err(format!( |
| 236 | "xcrun as failed: {}", |
| 237 | String::from_utf8_lossy(&output.stderr) |
| 238 | )); |
| 239 | } |
| 240 | Ok(()) |
| 241 | } |
| 242 | |
| 243 | pub fn compile_c(src: &str, out: &PathBuf) -> Result<(), String> { |
| 244 | let tmp = out.with_extension("c"); |
| 245 | fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?; |
| 246 | let output = Command::new("xcrun") |
| 247 | .args(["--sdk", "macosx", "clang", "-arch", "arm64", "-c"]) |
| 248 | .arg(&tmp) |
| 249 | .arg("-o") |
| 250 | .arg(out) |
| 251 | .output() |
| 252 | .map_err(|e| format!("spawn xcrun clang: {e}"))?; |
| 253 | let _ = fs::remove_file(&tmp); |
| 254 | if !output.status.success() { |
| 255 | return Err(format!( |
| 256 | "xcrun clang failed: {}", |
| 257 | String::from_utf8_lossy(&output.stderr) |
| 258 | )); |
| 259 | } |
| 260 | Ok(()) |
| 261 | } |
| 262 | |
| 263 | fn compile_dylib_c(src: &str, out: &PathBuf) -> Result<(), String> { |
| 264 | let tmp = out.with_extension("c"); |
| 265 | fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?; |
| 266 | let install_name = out.to_string_lossy().to_string(); |
| 267 | let output = Command::new("xcrun") |
| 268 | .args(["--sdk", "macosx", "clang", "-arch", "arm64", "-dynamiclib"]) |
| 269 | .arg(&tmp) |
| 270 | .arg(format!("-Wl,-install_name,{install_name}")) |
| 271 | .arg("-o") |
| 272 | .arg(out) |
| 273 | .output() |
| 274 | .map_err(|e| format!("spawn xcrun clang dylib: {e}"))?; |
| 275 | let _ = fs::remove_file(&tmp); |
| 276 | if !output.status.success() { |
| 277 | return Err(format!( |
| 278 | "xcrun clang dylib failed: {}", |
| 279 | String::from_utf8_lossy(&output.stderr) |
| 280 | )); |
| 281 | } |
| 282 | Ok(()) |
| 283 | } |
| 284 | |
| 285 | fn compile_archive_c(src: &str, out: &PathBuf) -> Result<(), String> { |
| 286 | let obj = out.with_extension("o"); |
| 287 | compile_c(src, &obj)?; |
| 288 | let output = Command::new("libtool") |
| 289 | .args(["-static", "-o"]) |
| 290 | .arg(out) |
| 291 | .arg(&obj) |
| 292 | .output() |
| 293 | .map_err(|e| format!("spawn libtool archive: {e}"))?; |
| 294 | let _ = fs::remove_file(&obj); |
| 295 | if !output.status.success() { |
| 296 | return Err(format!( |
| 297 | "libtool archive failed: {}", |
| 298 | String::from_utf8_lossy(&output.stderr) |
| 299 | )); |
| 300 | } |
| 301 | Ok(()) |
| 302 | } |
| 303 | |
| 304 | fn compile_reexport_dylib_c(src: &str, out: &PathBuf, dep: &Path) -> Result<(), String> { |
| 305 | let tmp = out.with_extension("c"); |
| 306 | fs::write(&tmp, src).map_err(|e| format!("write {}: {e}", tmp.display()))?; |
| 307 | let install_name = out.to_string_lossy().to_string(); |
| 308 | let output = Command::new("xcrun") |
| 309 | .args(["--sdk", "macosx", "clang", "-arch", "arm64", "-dynamiclib"]) |
| 310 | .arg(&tmp) |
| 311 | .arg(format!("-Wl,-install_name,{install_name}")) |
| 312 | .arg(format!("-Wl,-reexport_library,{}", dep.display())) |
| 313 | .arg("-o") |
| 314 | .arg(out) |
| 315 | .output() |
| 316 | .map_err(|e| format!("spawn xcrun clang reexport dylib: {e}"))?; |
| 317 | let _ = fs::remove_file(&tmp); |
| 318 | if !output.status.success() { |
| 319 | return Err(format!( |
| 320 | "xcrun clang reexport dylib failed: {}", |
| 321 | String::from_utf8_lossy(&output.stderr) |
| 322 | )); |
| 323 | } |
| 324 | Ok(()) |
| 325 | } |
| 326 | |
| 327 | pub fn load_corpus(root: &Path) -> Result<Vec<LinkCase>, String> { |
| 328 | let mut cases = Vec::new(); |
| 329 | let entries = |
| 330 | fs::read_dir(root).map_err(|e| format!("read parity corpus {}: {e}", root.display()))?; |
| 331 | for entry in entries { |
| 332 | let entry = entry.map_err(|e| format!("read parity corpus entry: {e}"))?; |
| 333 | let path = entry.path(); |
| 334 | if !path.is_dir() { |
| 335 | continue; |
| 336 | } |
| 337 | |
| 338 | let name = path |
| 339 | .file_name() |
| 340 | .and_then(|s| s.to_str()) |
| 341 | .ok_or_else(|| format!("invalid UTF-8 case directory {}", path.display()))? |
| 342 | .to_string(); |
| 343 | let inputs_dir = path.join("inputs"); |
| 344 | let mut inputs = Vec::new(); |
| 345 | let input_entries = fs::read_dir(&inputs_dir) |
| 346 | .map_err(|e| format!("read inputs for {}: {e}", path.display()))?; |
| 347 | for input in input_entries { |
| 348 | let input = input.map_err(|e| format!("read input entry for {}: {e}", name))?; |
| 349 | let input_path = input.path(); |
| 350 | match input_path.extension().and_then(|s| s.to_str()) { |
| 351 | Some("s") | Some("c") | Some("o") | Some("a") | Some("tbd") => { |
| 352 | inputs.push(input_path) |
| 353 | } |
| 354 | _ => {} |
| 355 | } |
| 356 | } |
| 357 | inputs.sort(); |
| 358 | if inputs.is_empty() { |
| 359 | return Err(format!( |
| 360 | "parity corpus case {} has no supported source inputs", |
| 361 | path.display() |
| 362 | )); |
| 363 | } |
| 364 | |
| 365 | let args = read_tokens(&path.join("args.txt"))?; |
| 366 | let section_checks = read_sections(&path.join("sections.txt"))?; |
| 367 | let absent_sections = read_sections_if_present(&path.join("absent_sections.txt"))?; |
| 368 | let page_ref_checks = read_page_refs(&path.join("page_refs.txt"))?; |
| 369 | let command_checks = read_command_checks(&path.join("command_checks.txt"))?; |
| 370 | let artifacts = read_artifacts(&path.join("artifacts.txt"))?; |
| 371 | let artifact_srcs: HashSet<&str> = artifacts |
| 372 | .iter() |
| 373 | .map(|artifact| artifact.src_name.as_str()) |
| 374 | .collect(); |
| 375 | inputs.retain(|input| { |
| 376 | input |
| 377 | .file_name() |
| 378 | .and_then(|s| s.to_str()) |
| 379 | .map(|name| !artifact_srcs.contains(name)) |
| 380 | .unwrap_or(true) |
| 381 | }); |
| 382 | let ignored_load_commands = |
| 383 | read_load_command_names(&path.join("ignored_load_commands.txt"))?; |
| 384 | let absent_load_commands = read_load_command_names(&path.join("absent_load_commands.txt"))?; |
| 385 | let runtime_args = read_tokens_if_present(&path.join("runtime.txt"))?; |
| 386 | let notes = fs::read_to_string(path.join("notes.md")).ok(); |
| 387 | let case_tolerances = parse_case_tolerances(notes.as_deref())?; |
| 388 | |
| 389 | cases.push(LinkCase { |
| 390 | name, |
| 391 | dir: path, |
| 392 | inputs, |
| 393 | args, |
| 394 | section_checks, |
| 395 | absent_sections, |
| 396 | page_ref_checks, |
| 397 | command_checks, |
| 398 | artifacts, |
| 399 | ignored_load_commands, |
| 400 | absent_load_commands, |
| 401 | runtime_args, |
| 402 | notes, |
| 403 | case_tolerances, |
| 404 | }); |
| 405 | } |
| 406 | |
| 407 | cases.sort_by(|a, b| a.name.cmp(&b.name)); |
| 408 | Ok(cases) |
| 409 | } |
| 410 | |
| 411 | pub fn link_both(case: &LinkCase) -> Result<LinkOutputs, String> { |
| 412 | let sdk = sdk_path().ok_or_else(|| "xcrun --show-sdk-path unavailable".to_string())?; |
| 413 | let sdk_ver = |
| 414 | sdk_version().ok_or_else(|| "xcrun --show-sdk-version unavailable".to_string())?; |
| 415 | let work_dir = unique_temp_dir(&case.name)?; |
| 416 | let mut compiled: BTreeMap<String, PathBuf> = BTreeMap::new(); |
| 417 | let mut sidecars: BTreeMap<String, PathBuf> = BTreeMap::new(); |
| 418 | let mut artifacts: BTreeMap<String, PathBuf> = BTreeMap::new(); |
| 419 | for input in &case.inputs { |
| 420 | let stem = input |
| 421 | .file_stem() |
| 422 | .and_then(|s| s.to_str()) |
| 423 | .ok_or_else(|| format!("invalid input stem {}", input.display()))?; |
| 424 | match input.extension().and_then(|s| s.to_str()) { |
| 425 | Some("s") => { |
| 426 | let src = fs::read_to_string(input) |
| 427 | .map_err(|e| format!("read parity input {}: {e}", input.display()))?; |
| 428 | let obj = work_dir.join(format!("{stem}.o")); |
| 429 | assemble(&src, &obj)?; |
| 430 | compiled.insert(format!("{stem}.o"), obj); |
| 431 | } |
| 432 | Some("c") => { |
| 433 | let src = fs::read_to_string(input) |
| 434 | .map_err(|e| format!("read parity input {}: {e}", input.display()))?; |
| 435 | let obj = work_dir.join(format!("{stem}.o")); |
| 436 | compile_c(&src, &obj)?; |
| 437 | compiled.insert(format!("{stem}.o"), obj); |
| 438 | } |
| 439 | Some("o") | Some("a") | Some("tbd") => { |
| 440 | let copied = work_dir.join( |
| 441 | input |
| 442 | .file_name() |
| 443 | .ok_or_else(|| format!("invalid input file name {}", input.display()))?, |
| 444 | ); |
| 445 | fs::copy(input, &copied).map_err(|e| { |
| 446 | format!( |
| 447 | "copy parity input {} -> {}: {e}", |
| 448 | input.display(), |
| 449 | copied.display() |
| 450 | ) |
| 451 | })?; |
| 452 | compiled.insert( |
| 453 | input |
| 454 | .file_name() |
| 455 | .and_then(|s| s.to_str()) |
| 456 | .ok_or_else(|| format!("invalid UTF-8 input file {}", input.display()))? |
| 457 | .to_string(), |
| 458 | copied, |
| 459 | ); |
| 460 | } |
| 461 | other => { |
| 462 | return Err(format!( |
| 463 | "unsupported parity input extension {:?} for {}", |
| 464 | other, |
| 465 | input.display() |
| 466 | )); |
| 467 | } |
| 468 | } |
| 469 | } |
| 470 | let files_dir = case.dir.join("files"); |
| 471 | if files_dir.is_dir() { |
| 472 | for entry in fs::read_dir(&files_dir) |
| 473 | .map_err(|e| format!("read sidecar files for {}: {e}", case.name))? |
| 474 | { |
| 475 | let entry = entry.map_err(|e| format!("read sidecar entry for {}: {e}", case.name))?; |
| 476 | let src = entry.path(); |
| 477 | if !src.is_file() { |
| 478 | continue; |
| 479 | } |
| 480 | let name = src |
| 481 | .file_name() |
| 482 | .and_then(|s| s.to_str()) |
| 483 | .ok_or_else(|| format!("invalid sidecar file name {}", src.display()))? |
| 484 | .to_string(); |
| 485 | let dst = work_dir.join(&name); |
| 486 | fs::copy(&src, &dst) |
| 487 | .map_err(|e| format!("copy sidecar {} -> {}: {e}", src.display(), dst.display()))?; |
| 488 | sidecars.insert(name, dst); |
| 489 | } |
| 490 | } |
| 491 | for artifact in &case.artifacts { |
| 492 | let src = case.dir.join("inputs").join(&artifact.src_name); |
| 493 | let src_contents = fs::read_to_string(&src) |
| 494 | .map_err(|e| format!("read artifact src {}: {e}", src.display()))?; |
| 495 | let out = work_dir.join(&artifact.out_name); |
| 496 | match artifact.kind { |
| 497 | ArtifactKind::Dylib => compile_dylib_c(&src_contents, &out)?, |
| 498 | ArtifactKind::Archive => compile_archive_c(&src_contents, &out)?, |
| 499 | ArtifactKind::ReexportDylib => { |
| 500 | let dep_name = artifact.dep_name.as_ref().ok_or_else(|| { |
| 501 | format!( |
| 502 | "missing reexport dependency for artifact {}", |
| 503 | artifact.out_name |
| 504 | ) |
| 505 | })?; |
| 506 | let dep = artifacts |
| 507 | .get(dep_name) |
| 508 | .ok_or_else(|| format!("unknown reexport dependency `{dep_name}`"))?; |
| 509 | compile_reexport_dylib_c(&src_contents, &out, dep)?; |
| 510 | } |
| 511 | } |
| 512 | artifacts.insert(artifact.out_name.clone(), out); |
| 513 | } |
| 514 | |
| 515 | let suffix = if case.args.iter().any(|arg| arg == "-dylib") { |
| 516 | "dylib" |
| 517 | } else { |
| 518 | "out" |
| 519 | }; |
| 520 | let our_path = work_dir.join(format!("ours.{suffix}")); |
| 521 | let their_path = work_dir.join(format!("apple.{suffix}")); |
| 522 | |
| 523 | let our_args = expand_args( |
| 524 | &case.args, &compiled, &sidecars, &artifacts, &our_path, &sdk, &sdk_ver, |
| 525 | )?; |
| 526 | let their_args = expand_args( |
| 527 | &case.args, |
| 528 | &compiled, |
| 529 | &sidecars, |
| 530 | &artifacts, |
| 531 | &their_path, |
| 532 | &sdk, |
| 533 | &sdk_ver, |
| 534 | )?; |
| 535 | |
| 536 | let our_output = Command::new(env!("CARGO_BIN_EXE_afs-ld")) |
| 537 | .args(&our_args) |
| 538 | .output() |
| 539 | .map_err(|e| format!("spawn afs-ld: {e}"))?; |
| 540 | if !our_output.status.success() { |
| 541 | return Err(format!( |
| 542 | "afs-ld failed for {}:\n{}", |
| 543 | case.name, |
| 544 | String::from_utf8_lossy(&our_output.stderr) |
| 545 | )); |
| 546 | } |
| 547 | |
| 548 | let their_output = Command::new("xcrun") |
| 549 | .arg("ld") |
| 550 | .args(&their_args) |
| 551 | .output() |
| 552 | .map_err(|e| format!("spawn xcrun ld: {e}"))?; |
| 553 | if !their_output.status.success() { |
| 554 | return Err(format!( |
| 555 | "Apple ld failed for {}:\n{}", |
| 556 | case.name, |
| 557 | String::from_utf8_lossy(&their_output.stderr) |
| 558 | )); |
| 559 | } |
| 560 | |
| 561 | let ours = fs::read(&our_path) |
| 562 | .map_err(|e| format!("read afs-ld output {}: {e}", our_path.display()))?; |
| 563 | let theirs = fs::read(&their_path) |
| 564 | .map_err(|e| format!("read Apple ld output {}: {e}", their_path.display()))?; |
| 565 | |
| 566 | Ok(LinkOutputs { |
| 567 | ours, |
| 568 | theirs, |
| 569 | our_path, |
| 570 | their_path, |
| 571 | }) |
| 572 | } |
| 573 | |
| 574 | pub fn command_ids(bytes: &[u8]) -> Result<Vec<u32>, String> { |
| 575 | let header = parse_header(bytes).map_err(|e| format!("parse header: {e}"))?; |
| 576 | let commands = parse_commands(&header, bytes).map_err(|e| format!("parse commands: {e}"))?; |
| 577 | Ok(commands |
| 578 | .into_iter() |
| 579 | .map(|cmd| match cmd { |
| 580 | LoadCommand::Segment64(_) => LC_SEGMENT_64, |
| 581 | LoadCommand::Symtab(_) => LC_SYMTAB, |
| 582 | LoadCommand::Dysymtab(_) => LC_DYSYMTAB, |
| 583 | LoadCommand::BuildVersion(_) => LC_BUILD_VERSION, |
| 584 | LoadCommand::DyldInfoOnly(_) => LC_DYLD_INFO_ONLY, |
| 585 | LoadCommand::DyldChainedFixups(_) => LC_DYLD_CHAINED_FIXUPS, |
| 586 | LoadCommand::DyldExportsTrie(_) => LC_DYLD_EXPORTS_TRIE, |
| 587 | LoadCommand::Dylib(d) => d.cmd, |
| 588 | LoadCommand::Raw { cmd, .. } => cmd, |
| 589 | other => panic!("unexpected load command in command_ids helper: {other:?}"), |
| 590 | }) |
| 591 | .collect()) |
| 592 | } |
| 593 | |
| 594 | pub fn compare_command_ids(ours: &[u8], theirs: &[u8], ignored: &[u32]) -> Result<(), String> { |
| 595 | let our_ids: Vec<u32> = command_ids(ours)? |
| 596 | .into_iter() |
| 597 | .filter(|cmd| !ignored.contains(cmd)) |
| 598 | .collect(); |
| 599 | let their_ids: Vec<u32> = command_ids(theirs)? |
| 600 | .into_iter() |
| 601 | .filter(|cmd| !ignored.contains(cmd)) |
| 602 | .collect(); |
| 603 | if our_ids != their_ids { |
| 604 | return Err(format!( |
| 605 | "load-command ids differ:\nours: {our_ids:#x?}\ntheirs: {their_ids:#x?}" |
| 606 | )); |
| 607 | } |
| 608 | Ok(()) |
| 609 | } |
| 610 | |
| 611 | pub fn compare_command_details( |
| 612 | ours: &[u8], |
| 613 | theirs: &[u8], |
| 614 | checks: &[CommandCheck], |
| 615 | ) -> Result<(), String> { |
| 616 | for check in checks { |
| 617 | match check { |
| 618 | CommandCheck::BuildVersion => { |
| 619 | let ours = normalized_build_version(ours)?; |
| 620 | let theirs = normalized_build_version(theirs)?; |
| 621 | if ours != theirs { |
| 622 | return Err(format!( |
| 623 | "LC_BUILD_VERSION diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 624 | )); |
| 625 | } |
| 626 | } |
| 627 | CommandCheck::LoadDylibNames => { |
| 628 | let ours = load_dylib_names(ours)?; |
| 629 | let theirs = load_dylib_names(theirs)?; |
| 630 | if ours != theirs { |
| 631 | return Err(format!( |
| 632 | "LC_LOAD_DYLIB names diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 633 | )); |
| 634 | } |
| 635 | } |
| 636 | CommandCheck::ExportRecords => { |
| 637 | let ours = canonical_export_records(ours)?; |
| 638 | let theirs = canonical_export_records(theirs)?; |
| 639 | if ours != theirs { |
| 640 | return Err(format!( |
| 641 | "canonical export records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 642 | )); |
| 643 | } |
| 644 | } |
| 645 | CommandCheck::SymbolRecordMap => { |
| 646 | let ours = canonical_symbol_record_map(ours)?; |
| 647 | let theirs = canonical_symbol_record_map(theirs)?; |
| 648 | if ours != theirs { |
| 649 | return Err(format!( |
| 650 | "canonical symbol record map diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 651 | )); |
| 652 | } |
| 653 | } |
| 654 | CommandCheck::IndirectSymbolIdentities => { |
| 655 | let ours = indirect_symbol_identities(ours)?; |
| 656 | let theirs = indirect_symbol_identities(theirs)?; |
| 657 | if ours != theirs { |
| 658 | return Err(format!( |
| 659 | "indirect symbol identities diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 660 | )); |
| 661 | } |
| 662 | } |
| 663 | CommandCheck::SymbolPartitionNames => { |
| 664 | let ours = symbol_partition_names(ours)?; |
| 665 | let theirs = symbol_partition_names(theirs)?; |
| 666 | if ours != theirs { |
| 667 | return Err(format!( |
| 668 | "symbol partition names diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 669 | )); |
| 670 | } |
| 671 | } |
| 672 | CommandCheck::StringTableNearParity => { |
| 673 | let our_len = effective_string_table_len(ours)?; |
| 674 | let their_len = effective_string_table_len(theirs)?; |
| 675 | if !string_table_within_five_percent(our_len, their_len) { |
| 676 | return Err(format!( |
| 677 | "string table length drifted too far from Apple ld: ours={} theirs={}", |
| 678 | our_len, their_len |
| 679 | )); |
| 680 | } |
| 681 | } |
| 682 | CommandCheck::FunctionStarts => { |
| 683 | let ours = decode_function_starts(ours)?; |
| 684 | let theirs = decode_function_starts(theirs)?; |
| 685 | if ours != theirs { |
| 686 | return Err(format!( |
| 687 | "function starts diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 688 | )); |
| 689 | } |
| 690 | } |
| 691 | CommandCheck::NormalizedFunctionStarts => { |
| 692 | let ours = normalize_function_start_offsets(&decode_function_starts(ours)?); |
| 693 | let theirs = normalize_function_start_offsets(&decode_function_starts(theirs)?); |
| 694 | if ours != theirs { |
| 695 | return Err(format!( |
| 696 | "normalized function starts diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 697 | )); |
| 698 | } |
| 699 | } |
| 700 | CommandCheck::DataInCode => { |
| 701 | let ours = canonical_data_in_code(ours)?; |
| 702 | let theirs = canonical_data_in_code(theirs)?; |
| 703 | if ours != theirs { |
| 704 | return Err(format!( |
| 705 | "canonical data-in-code records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 706 | )); |
| 707 | } |
| 708 | } |
| 709 | CommandCheck::DataInCodeIfPresent => { |
| 710 | let ours = canonical_data_in_code(ours)?; |
| 711 | let theirs = canonical_data_in_code(theirs)?; |
| 712 | if !ours.is_empty() && !theirs.is_empty() && ours != theirs { |
| 713 | return Err(format!( |
| 714 | "canonical data-in-code records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 715 | )); |
| 716 | } |
| 717 | } |
| 718 | CommandCheck::RebasedUnwindBytes => { |
| 719 | let ours = rebased_unwind_bytes(ours)?; |
| 720 | let theirs = rebased_unwind_bytes(theirs)?; |
| 721 | if ours != theirs { |
| 722 | return Err("rebased unwind bytes diverged".to_string()); |
| 723 | } |
| 724 | } |
| 725 | CommandCheck::DyldInfoRebase => { |
| 726 | let ours = dyld_info_stream(ours, DyldInfoStreamKind::Rebase)?; |
| 727 | let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::Rebase)?; |
| 728 | if ours != theirs { |
| 729 | return Err("rebase stream diverged".to_string()); |
| 730 | } |
| 731 | } |
| 732 | CommandCheck::DyldInfoBind => { |
| 733 | let ours = canonical_bind_records(ours, DyldInfoStreamKind::Bind)?; |
| 734 | let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::Bind)?; |
| 735 | if ours != theirs { |
| 736 | return Err(format!( |
| 737 | "bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 738 | )); |
| 739 | } |
| 740 | } |
| 741 | CommandCheck::DyldInfoWeakBind => { |
| 742 | let ours = canonical_bind_records(ours, DyldInfoStreamKind::WeakBind)?; |
| 743 | let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::WeakBind)?; |
| 744 | if ours != theirs { |
| 745 | return Err(format!( |
| 746 | "weak-bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 747 | )); |
| 748 | } |
| 749 | } |
| 750 | CommandCheck::DyldInfoLazyBind => { |
| 751 | let ours = canonical_bind_records(ours, DyldInfoStreamKind::LazyBind)?; |
| 752 | let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::LazyBind)?; |
| 753 | if ours != theirs { |
| 754 | return Err(format!( |
| 755 | "lazy-bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 756 | )); |
| 757 | } |
| 758 | } |
| 759 | } |
| 760 | } |
| 761 | Ok(()) |
| 762 | } |
| 763 | |
| 764 | pub fn ensure_absent_load_commands( |
| 765 | bytes: &[u8], |
| 766 | commands: &[u32], |
| 767 | side: &str, |
| 768 | ) -> Result<(), String> { |
| 769 | let ids = command_ids(bytes)?; |
| 770 | for command in commands { |
| 771 | if ids.contains(command) { |
| 772 | return Err(format!( |
| 773 | "{side} unexpectedly emitted {}", |
| 774 | load_command_name(*command) |
| 775 | )); |
| 776 | } |
| 777 | } |
| 778 | Ok(()) |
| 779 | } |
| 780 | |
| 781 | pub fn ensure_absent_sections( |
| 782 | bytes: &[u8], |
| 783 | sections: &[(String, String)], |
| 784 | side: &str, |
| 785 | ) -> Result<(), String> { |
| 786 | for (segname, sectname) in sections { |
| 787 | if output_section(bytes, segname, sectname).is_some() { |
| 788 | return Err(format!( |
| 789 | "{side} unexpectedly emitted section {segname},{sectname}" |
| 790 | )); |
| 791 | } |
| 792 | } |
| 793 | Ok(()) |
| 794 | } |
| 795 | |
| 796 | pub fn output_section(bytes: &[u8], segname: &str, sectname: &str) -> Option<(u64, Vec<u8>)> { |
| 797 | let header = parse_header(bytes).ok()?; |
| 798 | let commands = parse_commands(&header, bytes).ok()?; |
| 799 | for cmd in commands { |
| 800 | if let LoadCommand::Segment64(seg) = cmd { |
| 801 | for section in seg.sections { |
| 802 | if section.segname_str() == segname && section.sectname_str() == sectname { |
| 803 | let data = if section.offset == 0 { |
| 804 | Vec::new() |
| 805 | } else { |
| 806 | let start = section.offset as usize; |
| 807 | let end = start + section.size as usize; |
| 808 | bytes.get(start..end)?.to_vec() |
| 809 | }; |
| 810 | return Some((section.addr, data)); |
| 811 | } |
| 812 | } |
| 813 | } |
| 814 | } |
| 815 | None |
| 816 | } |
| 817 | |
| 818 | fn output_section_header(bytes: &[u8], segname: &str, sectname: &str) -> Option<Section64Header> { |
| 819 | let header = parse_header(bytes).ok()?; |
| 820 | let commands = parse_commands(&header, bytes).ok()?; |
| 821 | for cmd in commands { |
| 822 | if let LoadCommand::Segment64(seg) = cmd { |
| 823 | for section in seg.sections { |
| 824 | if section.segname_str() == segname && section.sectname_str() == sectname { |
| 825 | return Some(section); |
| 826 | } |
| 827 | } |
| 828 | } |
| 829 | } |
| 830 | None |
| 831 | } |
| 832 | |
| 833 | fn segment_vmaddr(bytes: &[u8], segname: &str) -> Option<u64> { |
| 834 | let header = parse_header(bytes).ok()?; |
| 835 | let commands = parse_commands(&header, bytes).ok()?; |
| 836 | for cmd in commands { |
| 837 | if let LoadCommand::Segment64(seg) = cmd { |
| 838 | if seg.segname_str() == segname { |
| 839 | return Some(seg.vmaddr); |
| 840 | } |
| 841 | } |
| 842 | } |
| 843 | None |
| 844 | } |
| 845 | |
| 846 | pub fn compare_sections( |
| 847 | ours: &[u8], |
| 848 | theirs: &[u8], |
| 849 | sections: &[(String, String)], |
| 850 | case_tolerances: &[CaseTolerance], |
| 851 | ) -> Result<(), String> { |
| 852 | for (segname, sectname) in sections { |
| 853 | if segname == "__TEXT" && sectname == "__stubs" { |
| 854 | let ours = canonical_stub_targets(ours)?; |
| 855 | let theirs = canonical_stub_targets(theirs)?; |
| 856 | if ours != theirs { |
| 857 | return Err(format!( |
| 858 | "canonical stub targets diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 859 | )); |
| 860 | } |
| 861 | continue; |
| 862 | } |
| 863 | if segname == "__TEXT" && sectname == "__stub_helper" { |
| 864 | let ours = canonical_stub_helper(ours)?; |
| 865 | let theirs = canonical_stub_helper(theirs)?; |
| 866 | if ours != theirs { |
| 867 | return Err(format!( |
| 868 | "canonical stub helper surface diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 869 | )); |
| 870 | } |
| 871 | continue; |
| 872 | } |
| 873 | let (_, our_bytes) = output_section(ours, segname, sectname) |
| 874 | .ok_or_else(|| format!("missing section {segname},{sectname} in afs-ld output"))?; |
| 875 | let (_, their_bytes) = output_section(theirs, segname, sectname) |
| 876 | .ok_or_else(|| format!("missing section {segname},{sectname} in Apple output"))?; |
| 877 | let diff = apply_section_tolerances( |
| 878 | diff_macho(&our_bytes, &their_bytes), |
| 879 | segname, |
| 880 | sectname, |
| 881 | case_tolerances, |
| 882 | ); |
| 883 | if !diff.is_clean() { |
| 884 | return Err(format!( |
| 885 | "section bytes differ for {segname},{sectname}: {:#?}", |
| 886 | diff.critical |
| 887 | )); |
| 888 | } |
| 889 | } |
| 890 | Ok(()) |
| 891 | } |
| 892 | |
| 893 | pub fn compare_page_refs( |
| 894 | ours: &[u8], |
| 895 | theirs: &[u8], |
| 896 | checks: &[PageRefCheck], |
| 897 | ) -> Result<(), String> { |
| 898 | if checks.is_empty() { |
| 899 | return Ok(()); |
| 900 | } |
| 901 | let our_symbols = symbol_values(ours)?; |
| 902 | let their_symbols = symbol_values(theirs)?; |
| 903 | for check in checks { |
| 904 | let (our_addr, our_bytes) = output_section(ours, &check.segname, &check.sectname) |
| 905 | .ok_or_else(|| { |
| 906 | format!( |
| 907 | "missing section {},{} in afs-ld output", |
| 908 | check.segname, check.sectname |
| 909 | ) |
| 910 | })?; |
| 911 | let (their_addr, their_bytes) = output_section(theirs, &check.segname, &check.sectname) |
| 912 | .ok_or_else(|| { |
| 913 | format!( |
| 914 | "missing section {},{} in Apple output", |
| 915 | check.segname, check.sectname |
| 916 | ) |
| 917 | })?; |
| 918 | let our_target = |
| 919 | decode_page_reference(&our_bytes, our_addr, check.site_offset, check.kind)?; |
| 920 | let their_target = |
| 921 | decode_page_reference(&their_bytes, their_addr, check.site_offset, check.kind)?; |
| 922 | let expected_ours = resolve_page_ref_expectation(ours, &our_symbols, &check.symbol)?; |
| 923 | let expected_theirs = resolve_page_ref_expectation(theirs, &their_symbols, &check.symbol)?; |
| 924 | if our_target != expected_ours || their_target != expected_theirs { |
| 925 | return Err(format!( |
| 926 | "page ref {},{}+0x{:x} -> {} diverged: ours=0x{:x} expected=0x{:x}; theirs=0x{:x} expected=0x{:x}", |
| 927 | check.segname, |
| 928 | check.sectname, |
| 929 | check.site_offset, |
| 930 | check.symbol, |
| 931 | our_target, |
| 932 | expected_ours, |
| 933 | their_target, |
| 934 | expected_theirs, |
| 935 | )); |
| 936 | } |
| 937 | } |
| 938 | Ok(()) |
| 939 | } |
| 940 | |
| 941 | fn resolve_page_ref_expectation( |
| 942 | bytes: &[u8], |
| 943 | symbols: &BTreeMap<String, u64>, |
| 944 | reference: &str, |
| 945 | ) -> Result<u64, String> { |
| 946 | if let Some(spec) = reference.strip_prefix("@SECTION:") { |
| 947 | let (section_spec, addend) = if let Some((section_spec, addend)) = spec.rsplit_once('+') { |
| 948 | (section_spec, parse_u64(addend)?) |
| 949 | } else { |
| 950 | (spec, 0) |
| 951 | }; |
| 952 | let (segname, sectname) = section_spec |
| 953 | .split_once(',') |
| 954 | .ok_or_else(|| format!("invalid @SECTION page-ref target `{reference}`"))?; |
| 955 | let (addr, data) = output_section(bytes, segname, sectname) |
| 956 | .ok_or_else(|| format!("missing section {segname},{sectname} in output"))?; |
| 957 | if addend > data.len() as u64 { |
| 958 | return Err(format!( |
| 959 | "@SECTION target `{reference}` exceeds section size {}", |
| 960 | data.len() |
| 961 | )); |
| 962 | } |
| 963 | return Ok(addr + addend); |
| 964 | } |
| 965 | symbols |
| 966 | .get(reference) |
| 967 | .copied() |
| 968 | .ok_or_else(|| format!("missing symbol {reference} in output")) |
| 969 | } |
| 970 | |
| 971 | pub fn run_program(path: &Path, args: &[String]) -> Result<ProgramOutput, String> { |
| 972 | let runtime_timeout = runtime_timeout(); |
| 973 | |
| 974 | let mut child = Command::new(path) |
| 975 | .args(args) |
| 976 | .stdout(Stdio::piped()) |
| 977 | .stderr(Stdio::piped()) |
| 978 | .spawn() |
| 979 | .map_err(|e| format!("run {}: {e}", path.display()))?; |
| 980 | let started = Instant::now(); |
| 981 | loop { |
| 982 | if child |
| 983 | .try_wait() |
| 984 | .map_err(|e| format!("wait for {}: {e}", path.display()))? |
| 985 | .is_some() |
| 986 | { |
| 987 | let output = child |
| 988 | .wait_with_output() |
| 989 | .map_err(|e| format!("collect output from {}: {e}", path.display()))?; |
| 990 | return Ok(ProgramOutput { |
| 991 | exit_code: output.status.code(), |
| 992 | stdout: output.stdout, |
| 993 | stderr: output.stderr, |
| 994 | }); |
| 995 | } |
| 996 | if started.elapsed() >= runtime_timeout { |
| 997 | let _ = child.kill(); |
| 998 | let output = child |
| 999 | .wait_with_output() |
| 1000 | .map_err(|e| format!("collect timed-out output from {}: {e}", path.display()))?; |
| 1001 | return Err(format!( |
| 1002 | "run {} timed out after {:?}: exit={:?} stdout={:?} stderr={:?}", |
| 1003 | path.display(), |
| 1004 | runtime_timeout, |
| 1005 | output.status.code(), |
| 1006 | String::from_utf8_lossy(&output.stdout), |
| 1007 | String::from_utf8_lossy(&output.stderr) |
| 1008 | )); |
| 1009 | } |
| 1010 | thread::sleep(Duration::from_millis(5)); |
| 1011 | } |
| 1012 | } |
| 1013 | |
| 1014 | pub fn compare_runtime(our_path: &Path, their_path: &Path, args: &[String]) -> Result<(), String> { |
| 1015 | let our_path = our_path.to_path_buf(); |
| 1016 | let their_path = their_path.to_path_buf(); |
| 1017 | let their_args = args.to_vec(); |
| 1018 | let ours = thread::scope(|scope| { |
| 1019 | let theirs = scope.spawn(|| run_program(&their_path, &their_args)); |
| 1020 | let ours = run_program(&our_path, args); |
| 1021 | let theirs = theirs |
| 1022 | .join() |
| 1023 | .map_err(|_| "Apple runtime worker panicked".to_string())?; |
| 1024 | Ok::<_, String>((ours, theirs)) |
| 1025 | })?; |
| 1026 | let (ours, theirs) = ours; |
| 1027 | let ours = ours?; |
| 1028 | let theirs = theirs?; |
| 1029 | if ours != theirs { |
| 1030 | return Err(format!( |
| 1031 | "runtime differs:\nours: exit={:?} stdout={:?} stderr={:?}\ntheirs: exit={:?} stdout={:?} stderr={:?}", |
| 1032 | ours.exit_code, |
| 1033 | String::from_utf8_lossy(&ours.stdout), |
| 1034 | String::from_utf8_lossy(&ours.stderr), |
| 1035 | theirs.exit_code, |
| 1036 | String::from_utf8_lossy(&theirs.stdout), |
| 1037 | String::from_utf8_lossy(&theirs.stderr), |
| 1038 | )); |
| 1039 | } |
| 1040 | Ok(()) |
| 1041 | } |
| 1042 | |
| 1043 | fn runtime_timeout() -> Duration { |
| 1044 | const DEFAULT_RUNTIME_TIMEOUT_SECS: u64 = 120; |
| 1045 | |
| 1046 | std::env::var("PARITY_RUNTIME_TIMEOUT_SECONDS") |
| 1047 | .ok() |
| 1048 | .and_then(|value| value.parse::<u64>().ok()) |
| 1049 | .map(Duration::from_secs) |
| 1050 | .unwrap_or_else(|| Duration::from_secs(DEFAULT_RUNTIME_TIMEOUT_SECS)) |
| 1051 | } |
| 1052 | |
| 1053 | /// Byte-level diff between two Mach-O images or section byte slices. |
| 1054 | /// |
| 1055 | /// Sprint 27 starts tolerating a very small allowlist: UUID bytes, dylib |
| 1056 | /// timestamp fields, and code-signature command/blob bytes at matching |
| 1057 | /// offsets. Unknown diffs remain critical. |
| 1058 | pub fn diff_macho(ours: &[u8], theirs: &[u8]) -> DiffReport { |
| 1059 | let mut report = DiffReport::default(); |
| 1060 | |
| 1061 | if ours.len() != theirs.len() { |
| 1062 | report.critical.push(DiffChunk { |
| 1063 | offset: 0, |
| 1064 | len: ours.len().max(theirs.len()), |
| 1065 | reason: format!( |
| 1066 | "total size differs: ours = {}, theirs = {}", |
| 1067 | ours.len(), |
| 1068 | theirs.len() |
| 1069 | ), |
| 1070 | category: DiffCategory::Critical, |
| 1071 | }); |
| 1072 | return report; |
| 1073 | } |
| 1074 | |
| 1075 | let our_mask = tolerated_mask(ours); |
| 1076 | let their_mask = tolerated_mask(theirs); |
| 1077 | |
| 1078 | let mut i = 0; |
| 1079 | while i < ours.len() { |
| 1080 | if ours[i] == theirs[i] { |
| 1081 | i += 1; |
| 1082 | continue; |
| 1083 | } |
| 1084 | |
| 1085 | let tolerated_reason = match (our_mask[i], their_mask[i]) { |
| 1086 | (Some(left), Some(right)) if left == right => Some(left), |
| 1087 | _ => None, |
| 1088 | }; |
| 1089 | let start = i; |
| 1090 | i += 1; |
| 1091 | while i < ours.len() && ours[i] != theirs[i] { |
| 1092 | let same_category = match tolerated_reason { |
| 1093 | Some(reason) => matches!( |
| 1094 | (our_mask[i], their_mask[i]), |
| 1095 | (Some(left), Some(right)) if left == reason && right == reason |
| 1096 | ), |
| 1097 | None => !matches!( |
| 1098 | (our_mask[i], their_mask[i]), |
| 1099 | (Some(left), Some(right)) if left == right |
| 1100 | ), |
| 1101 | }; |
| 1102 | if !same_category { |
| 1103 | break; |
| 1104 | } |
| 1105 | i += 1; |
| 1106 | } |
| 1107 | |
| 1108 | let len = i - start; |
| 1109 | if let Some(reason) = tolerated_reason { |
| 1110 | report.tolerated.push(DiffChunk { |
| 1111 | offset: start, |
| 1112 | len, |
| 1113 | reason: reason.to_string(), |
| 1114 | category: DiffCategory::Tolerated(reason), |
| 1115 | }); |
| 1116 | } else { |
| 1117 | report.critical.push(DiffChunk { |
| 1118 | offset: start, |
| 1119 | len, |
| 1120 | reason: format!("{} byte(s) differ starting at 0x{start:x}", len), |
| 1121 | category: DiffCategory::Critical, |
| 1122 | }); |
| 1123 | } |
| 1124 | } |
| 1125 | |
| 1126 | report |
| 1127 | } |
| 1128 | |
| 1129 | pub fn parse_case_tolerances(notes: Option<&str>) -> Result<Vec<CaseTolerance>, String> { |
| 1130 | let Some(notes) = notes else { |
| 1131 | return Ok(Vec::new()); |
| 1132 | }; |
| 1133 | |
| 1134 | let mut tolerances = Vec::new(); |
| 1135 | let mut in_block = false; |
| 1136 | for raw_line in notes.lines() { |
| 1137 | let line = raw_line.trim(); |
| 1138 | if line.is_empty() { |
| 1139 | continue; |
| 1140 | } |
| 1141 | if line == "tolerated:" { |
| 1142 | in_block = true; |
| 1143 | continue; |
| 1144 | } |
| 1145 | if !in_block { |
| 1146 | continue; |
| 1147 | } |
| 1148 | if !line.starts_with("- region:") { |
| 1149 | // Stop once the simple tolerated block ends. |
| 1150 | if !line.starts_with('#') && !raw_line.starts_with(' ') && !raw_line.starts_with('\t') { |
| 1151 | break; |
| 1152 | } |
| 1153 | continue; |
| 1154 | } |
| 1155 | tolerances.push(parse_case_tolerance_line(line)?); |
| 1156 | } |
| 1157 | Ok(tolerances) |
| 1158 | } |
| 1159 | |
| 1160 | pub fn apply_section_tolerances( |
| 1161 | mut diff: DiffReport, |
| 1162 | segname: &str, |
| 1163 | sectname: &str, |
| 1164 | case_tolerances: &[CaseTolerance], |
| 1165 | ) -> DiffReport { |
| 1166 | if diff.critical.is_empty() || case_tolerances.is_empty() { |
| 1167 | return diff; |
| 1168 | } |
| 1169 | |
| 1170 | let mut remaining = Vec::new(); |
| 1171 | for chunk in diff.critical.drain(..) { |
| 1172 | let tolerated = case_tolerances |
| 1173 | .iter() |
| 1174 | .find(|tol| tolerance_covers_chunk(tol, segname, sectname, chunk.offset, chunk.len)); |
| 1175 | if let Some(tol) = tolerated { |
| 1176 | diff.tolerated.push(DiffChunk { |
| 1177 | offset: chunk.offset, |
| 1178 | len: chunk.len, |
| 1179 | reason: tol.reason.clone(), |
| 1180 | category: DiffCategory::Tolerated("case-note"), |
| 1181 | }); |
| 1182 | } else { |
| 1183 | remaining.push(chunk); |
| 1184 | } |
| 1185 | } |
| 1186 | diff.critical = remaining; |
| 1187 | diff |
| 1188 | } |
| 1189 | |
| 1190 | fn unique_temp_dir(case_name: &str) -> Result<PathBuf, String> { |
| 1191 | let stamp = SystemTime::now() |
| 1192 | .duration_since(UNIX_EPOCH) |
| 1193 | .map_err(|e| format!("clock error: {e}"))? |
| 1194 | .as_nanos(); |
| 1195 | let safe_name = case_name.replace(['/', ' '], "-"); |
| 1196 | let dir = std::env::temp_dir().join(format!( |
| 1197 | "afs-ld-parity-{}-{safe_name}-{stamp}", |
| 1198 | std::process::id() |
| 1199 | )); |
| 1200 | fs::create_dir_all(&dir).map_err(|e| format!("create {}: {e}", dir.display()))?; |
| 1201 | Ok(dir) |
| 1202 | } |
| 1203 | |
| 1204 | fn parse_case_tolerance_line(line: &str) -> Result<CaseTolerance, String> { |
| 1205 | let rest = line |
| 1206 | .strip_prefix("- region:") |
| 1207 | .ok_or_else(|| format!("invalid tolerance line `{line}`"))? |
| 1208 | .trim(); |
| 1209 | let (before_reason, reason_part) = rest |
| 1210 | .split_once(" reason:") |
| 1211 | .ok_or_else(|| format!("missing `reason:` in tolerance line `{line}`"))?; |
| 1212 | let (region_part, bytes_part) = before_reason |
| 1213 | .split_once(" bytes ") |
| 1214 | .ok_or_else(|| format!("missing `bytes` range in tolerance line `{line}`"))?; |
| 1215 | let reason = reason_part.trim().trim_matches('"').to_string(); |
| 1216 | if reason.is_empty() { |
| 1217 | return Err(format!("empty tolerance reason in `{line}`")); |
| 1218 | } |
| 1219 | let (start, end_inclusive) = parse_tolerance_range(bytes_part.trim())?; |
| 1220 | let region_token = region_part.trim(); |
| 1221 | let (segname, sectname) = match region_token.split_once(',') { |
| 1222 | Some((segname, sectname)) => ( |
| 1223 | segname.trim().to_string(), |
| 1224 | Some(sectname.trim().to_string()), |
| 1225 | ), |
| 1226 | None => (region_token.to_string(), None), |
| 1227 | }; |
| 1228 | if segname.is_empty() { |
| 1229 | return Err(format!("empty tolerance region in `{line}`")); |
| 1230 | } |
| 1231 | Ok(CaseTolerance { |
| 1232 | region: ToleranceRegion::SectionBytes { |
| 1233 | segname, |
| 1234 | sectname, |
| 1235 | start, |
| 1236 | end_inclusive, |
| 1237 | }, |
| 1238 | reason, |
| 1239 | }) |
| 1240 | } |
| 1241 | |
| 1242 | fn parse_tolerance_range(range: &str) -> Result<(usize, usize), String> { |
| 1243 | let (start, end) = range |
| 1244 | .split_once('-') |
| 1245 | .ok_or_else(|| format!("invalid tolerance range `{range}`"))?; |
| 1246 | let start = parse_usize(start.trim())?; |
| 1247 | let end = parse_usize(end.trim())?; |
| 1248 | if end < start { |
| 1249 | return Err(format!("tolerance range end before start in `{range}`")); |
| 1250 | } |
| 1251 | Ok((start, end)) |
| 1252 | } |
| 1253 | |
| 1254 | fn parse_usize(token: &str) -> Result<usize, String> { |
| 1255 | if let Some(rest) = token.strip_prefix("0x") { |
| 1256 | usize::from_str_radix(rest, 16).map_err(|e| format!("parse usize `{token}`: {e}")) |
| 1257 | } else { |
| 1258 | token |
| 1259 | .parse::<usize>() |
| 1260 | .map_err(|e| format!("parse usize `{token}`: {e}")) |
| 1261 | } |
| 1262 | } |
| 1263 | |
| 1264 | fn tolerance_covers_chunk( |
| 1265 | tolerance: &CaseTolerance, |
| 1266 | segname: &str, |
| 1267 | sectname: &str, |
| 1268 | offset: usize, |
| 1269 | len: usize, |
| 1270 | ) -> bool { |
| 1271 | match &tolerance.region { |
| 1272 | ToleranceRegion::SectionBytes { |
| 1273 | segname: expected_seg, |
| 1274 | sectname: expected_sect, |
| 1275 | start, |
| 1276 | end_inclusive, |
| 1277 | } => { |
| 1278 | if expected_seg != segname { |
| 1279 | return false; |
| 1280 | } |
| 1281 | if let Some(expected_sect) = expected_sect { |
| 1282 | if expected_sect != sectname { |
| 1283 | return false; |
| 1284 | } |
| 1285 | } |
| 1286 | let end = offset.saturating_add(len.saturating_sub(1)); |
| 1287 | offset >= *start && end <= *end_inclusive |
| 1288 | } |
| 1289 | } |
| 1290 | } |
| 1291 | |
| 1292 | fn read_tokens(path: &Path) -> Result<Vec<String>, String> { |
| 1293 | let contents = fs::read_to_string(path).map_err(|e| format!("read {}: {e}", path.display()))?; |
| 1294 | Ok(contents |
| 1295 | .lines() |
| 1296 | .map(str::trim) |
| 1297 | .filter(|line| !line.is_empty() && !line.starts_with('#')) |
| 1298 | .map(ToOwned::to_owned) |
| 1299 | .collect()) |
| 1300 | } |
| 1301 | |
| 1302 | fn read_tokens_if_present(path: &Path) -> Result<Vec<String>, String> { |
| 1303 | if path.exists() { |
| 1304 | read_tokens(path) |
| 1305 | } else { |
| 1306 | Ok(Vec::new()) |
| 1307 | } |
| 1308 | } |
| 1309 | |
| 1310 | fn read_sections(path: &Path) -> Result<Vec<(String, String)>, String> { |
| 1311 | let mut sections = Vec::new(); |
| 1312 | for line in read_tokens(path)? { |
| 1313 | let mut parts = line.split_whitespace(); |
| 1314 | let segname = parts |
| 1315 | .next() |
| 1316 | .ok_or_else(|| format!("missing segment name in {}", path.display()))?; |
| 1317 | let sectname = parts |
| 1318 | .next() |
| 1319 | .ok_or_else(|| format!("missing section name in {}", path.display()))?; |
| 1320 | if parts.next().is_some() { |
| 1321 | return Err(format!( |
| 1322 | "too many fields in section spec `{line}` from {}", |
| 1323 | path.display() |
| 1324 | )); |
| 1325 | } |
| 1326 | sections.push((segname.to_string(), sectname.to_string())); |
| 1327 | } |
| 1328 | Ok(sections) |
| 1329 | } |
| 1330 | |
| 1331 | fn read_sections_if_present(path: &Path) -> Result<Vec<(String, String)>, String> { |
| 1332 | if path.exists() { |
| 1333 | read_sections(path) |
| 1334 | } else { |
| 1335 | Ok(Vec::new()) |
| 1336 | } |
| 1337 | } |
| 1338 | |
| 1339 | fn read_load_command_names(path: &Path) -> Result<Vec<u32>, String> { |
| 1340 | if !path.exists() { |
| 1341 | return Ok(Vec::new()); |
| 1342 | } |
| 1343 | let mut commands = Vec::new(); |
| 1344 | for line in read_tokens(path)? { |
| 1345 | commands.push(parse_load_command_name(&line)?); |
| 1346 | } |
| 1347 | Ok(commands) |
| 1348 | } |
| 1349 | |
| 1350 | fn read_command_checks(path: &Path) -> Result<Vec<CommandCheck>, String> { |
| 1351 | if !path.exists() { |
| 1352 | return Ok(Vec::new()); |
| 1353 | } |
| 1354 | let mut checks = Vec::new(); |
| 1355 | for line in read_tokens(path)? { |
| 1356 | checks.push(parse_command_check(&line)?); |
| 1357 | } |
| 1358 | Ok(checks) |
| 1359 | } |
| 1360 | |
| 1361 | fn read_page_refs(path: &Path) -> Result<Vec<PageRefCheck>, String> { |
| 1362 | if !path.exists() { |
| 1363 | return Ok(Vec::new()); |
| 1364 | } |
| 1365 | let mut checks = Vec::new(); |
| 1366 | for line in read_tokens(path)? { |
| 1367 | let mut parts = line.split_whitespace(); |
| 1368 | let segname = parts |
| 1369 | .next() |
| 1370 | .ok_or_else(|| format!("missing segment name in {}", path.display()))?; |
| 1371 | let sectname = parts |
| 1372 | .next() |
| 1373 | .ok_or_else(|| format!("missing section name in {}", path.display()))?; |
| 1374 | let site_offset = parts |
| 1375 | .next() |
| 1376 | .ok_or_else(|| format!("missing site offset in {}", path.display()))?; |
| 1377 | let kind = parts |
| 1378 | .next() |
| 1379 | .ok_or_else(|| format!("missing page-ref kind in {}", path.display()))?; |
| 1380 | let symbol = parts |
| 1381 | .next() |
| 1382 | .ok_or_else(|| format!("missing symbol name in {}", path.display()))?; |
| 1383 | if parts.next().is_some() { |
| 1384 | return Err(format!( |
| 1385 | "too many fields in page-ref spec `{line}` from {}", |
| 1386 | path.display() |
| 1387 | )); |
| 1388 | } |
| 1389 | checks.push(PageRefCheck { |
| 1390 | segname: segname.to_string(), |
| 1391 | sectname: sectname.to_string(), |
| 1392 | site_offset: parse_u64(site_offset)?, |
| 1393 | kind: parse_page_ref_kind(kind)?, |
| 1394 | symbol: symbol.to_string(), |
| 1395 | }); |
| 1396 | } |
| 1397 | Ok(checks) |
| 1398 | } |
| 1399 | |
| 1400 | fn read_artifacts(path: &Path) -> Result<Vec<ArtifactSpec>, String> { |
| 1401 | if !path.exists() { |
| 1402 | return Ok(Vec::new()); |
| 1403 | } |
| 1404 | let mut specs = Vec::new(); |
| 1405 | for line in read_tokens(path)? { |
| 1406 | let mut parts = line.split_whitespace(); |
| 1407 | let kind = parts |
| 1408 | .next() |
| 1409 | .ok_or_else(|| format!("missing artifact kind in {}", path.display()))?; |
| 1410 | let src_name = parts |
| 1411 | .next() |
| 1412 | .ok_or_else(|| format!("missing artifact src in {}", path.display()))?; |
| 1413 | let out_name = parts |
| 1414 | .next() |
| 1415 | .ok_or_else(|| format!("missing artifact output in {}", path.display()))?; |
| 1416 | let dep_name = parts.next().map(str::to_string); |
| 1417 | if parts.next().is_some() { |
| 1418 | return Err(format!( |
| 1419 | "too many fields in artifact spec `{line}` from {}", |
| 1420 | path.display() |
| 1421 | )); |
| 1422 | } |
| 1423 | let (kind, dep_name) = match kind { |
| 1424 | "clang_dylib" => { |
| 1425 | if dep_name.is_some() { |
| 1426 | return Err(format!( |
| 1427 | "clang_dylib takes exactly 3 fields in {}", |
| 1428 | path.display() |
| 1429 | )); |
| 1430 | } |
| 1431 | (ArtifactKind::Dylib, None) |
| 1432 | } |
| 1433 | "clang_archive" => { |
| 1434 | if dep_name.is_some() { |
| 1435 | return Err(format!( |
| 1436 | "clang_archive takes exactly 3 fields in {}", |
| 1437 | path.display() |
| 1438 | )); |
| 1439 | } |
| 1440 | (ArtifactKind::Archive, None) |
| 1441 | } |
| 1442 | "clang_reexport_dylib" => { |
| 1443 | let dep_name = dep_name.ok_or_else(|| { |
| 1444 | format!( |
| 1445 | "clang_reexport_dylib needs a dependency artifact in {}", |
| 1446 | path.display() |
| 1447 | ) |
| 1448 | })?; |
| 1449 | (ArtifactKind::ReexportDylib, Some(dep_name)) |
| 1450 | } |
| 1451 | other => return Err(format!("unknown artifact kind `{other}`")), |
| 1452 | }; |
| 1453 | specs.push(ArtifactSpec { |
| 1454 | src_name: src_name.to_string(), |
| 1455 | out_name: out_name.to_string(), |
| 1456 | kind, |
| 1457 | dep_name, |
| 1458 | }); |
| 1459 | } |
| 1460 | Ok(specs) |
| 1461 | } |
| 1462 | |
| 1463 | fn parse_command_check(name: &str) -> Result<CommandCheck, String> { |
| 1464 | match name { |
| 1465 | "build_version" => Ok(CommandCheck::BuildVersion), |
| 1466 | "load_dylib_names" => Ok(CommandCheck::LoadDylibNames), |
| 1467 | "export_records" => Ok(CommandCheck::ExportRecords), |
| 1468 | "symbol_record_map" => Ok(CommandCheck::SymbolRecordMap), |
| 1469 | "indirect_symbol_identities" => Ok(CommandCheck::IndirectSymbolIdentities), |
| 1470 | "symbol_partition_names" => Ok(CommandCheck::SymbolPartitionNames), |
| 1471 | "string_table_near_parity" => Ok(CommandCheck::StringTableNearParity), |
| 1472 | "function_starts" => Ok(CommandCheck::FunctionStarts), |
| 1473 | "normalized_function_starts" => Ok(CommandCheck::NormalizedFunctionStarts), |
| 1474 | "data_in_code" => Ok(CommandCheck::DataInCode), |
| 1475 | "data_in_code_if_present" => Ok(CommandCheck::DataInCodeIfPresent), |
| 1476 | "rebased_unwind_bytes" => Ok(CommandCheck::RebasedUnwindBytes), |
| 1477 | "dyld_info_rebase" => Ok(CommandCheck::DyldInfoRebase), |
| 1478 | "dyld_info_bind" => Ok(CommandCheck::DyldInfoBind), |
| 1479 | "dyld_info_weak_bind" => Ok(CommandCheck::DyldInfoWeakBind), |
| 1480 | "dyld_info_lazy_bind" => Ok(CommandCheck::DyldInfoLazyBind), |
| 1481 | other => Err(format!("unknown command check `{other}`")), |
| 1482 | } |
| 1483 | } |
| 1484 | |
| 1485 | fn parse_page_ref_kind(kind: &str) -> Result<PageRefKind, String> { |
| 1486 | match kind { |
| 1487 | "add" => Ok(PageRefKind::Add), |
| 1488 | "load" => Ok(PageRefKind::Load), |
| 1489 | other => Err(format!("unknown page-ref kind `{other}`")), |
| 1490 | } |
| 1491 | } |
| 1492 | |
| 1493 | fn parse_load_command_name(name: &str) -> Result<u32, String> { |
| 1494 | match name { |
| 1495 | "LC_SEGMENT_64" => Ok(LC_SEGMENT_64), |
| 1496 | "LC_LOAD_DYLIB" => Ok(LC_LOAD_DYLIB), |
| 1497 | "LC_UUID" => Ok(LC_UUID), |
| 1498 | "LC_CODE_SIGNATURE" => Ok(LC_CODE_SIGNATURE), |
| 1499 | "LC_LINKER_OPTIMIZATION_HINT" => Ok(afs_ld::macho::constants::LC_LINKER_OPTIMIZATION_HINT), |
| 1500 | other => Err(format!("unknown load command name `{other}`")), |
| 1501 | } |
| 1502 | } |
| 1503 | |
| 1504 | fn load_command_name(cmd: u32) -> &'static str { |
| 1505 | match cmd { |
| 1506 | LC_SEGMENT_64 => "LC_SEGMENT_64", |
| 1507 | LC_LOAD_DYLIB => "LC_LOAD_DYLIB", |
| 1508 | LC_UUID => "LC_UUID", |
| 1509 | LC_CODE_SIGNATURE => "LC_CODE_SIGNATURE", |
| 1510 | afs_ld::macho::constants::LC_LINKER_OPTIMIZATION_HINT => "LC_LINKER_OPTIMIZATION_HINT", |
| 1511 | _ => "unknown load command", |
| 1512 | } |
| 1513 | } |
| 1514 | |
| 1515 | fn parse_u64(value: &str) -> Result<u64, String> { |
| 1516 | if let Some(hex) = value.strip_prefix("0x") { |
| 1517 | u64::from_str_radix(hex, 16).map_err(|e| format!("parse hex `{value}`: {e}")) |
| 1518 | } else { |
| 1519 | value |
| 1520 | .parse::<u64>() |
| 1521 | .map_err(|e| format!("parse integer `{value}`: {e}")) |
| 1522 | } |
| 1523 | } |
| 1524 | |
| 1525 | fn expand_args( |
| 1526 | args: &[String], |
| 1527 | compiled: &BTreeMap<String, PathBuf>, |
| 1528 | sidecars: &BTreeMap<String, PathBuf>, |
| 1529 | artifacts: &BTreeMap<String, PathBuf>, |
| 1530 | out: &Path, |
| 1531 | sdk: &str, |
| 1532 | sdk_ver: &str, |
| 1533 | ) -> Result<Vec<String>, String> { |
| 1534 | let mut expanded = Vec::with_capacity(args.len()); |
| 1535 | for arg in args { |
| 1536 | if arg == "@OUT@" { |
| 1537 | expanded.push(out.to_string_lossy().to_string()); |
| 1538 | continue; |
| 1539 | } |
| 1540 | if arg == "@SDK_PATH@" { |
| 1541 | expanded.push(sdk.to_string()); |
| 1542 | continue; |
| 1543 | } |
| 1544 | if arg == "@SDK_VERSION@" { |
| 1545 | expanded.push(sdk_ver.to_string()); |
| 1546 | continue; |
| 1547 | } |
| 1548 | if let Some(rel) = arg |
| 1549 | .strip_prefix("@SDK_TBD:") |
| 1550 | .and_then(|rest| rest.strip_suffix('@')) |
| 1551 | { |
| 1552 | expanded.push(Path::new(sdk).join(rel).to_string_lossy().to_string()); |
| 1553 | continue; |
| 1554 | } |
| 1555 | if let Some(name) = arg |
| 1556 | .strip_prefix("@INPUT:") |
| 1557 | .and_then(|rest| rest.strip_suffix('@')) |
| 1558 | { |
| 1559 | let input = compiled |
| 1560 | .get(name) |
| 1561 | .ok_or_else(|| format!("unknown parity input placeholder `{name}`"))?; |
| 1562 | expanded.push(input.to_string_lossy().to_string()); |
| 1563 | continue; |
| 1564 | } |
| 1565 | if let Some(name) = arg |
| 1566 | .strip_prefix("@FILE:") |
| 1567 | .and_then(|rest| rest.strip_suffix('@')) |
| 1568 | { |
| 1569 | let file = sidecars |
| 1570 | .get(name) |
| 1571 | .ok_or_else(|| format!("unknown parity sidecar placeholder `{name}`"))?; |
| 1572 | expanded.push(file.to_string_lossy().to_string()); |
| 1573 | continue; |
| 1574 | } |
| 1575 | if let Some(name) = arg |
| 1576 | .strip_prefix("@ARTIFACT:") |
| 1577 | .and_then(|rest| rest.strip_suffix('@')) |
| 1578 | { |
| 1579 | let artifact = artifacts |
| 1580 | .get(name) |
| 1581 | .ok_or_else(|| format!("unknown parity artifact placeholder `{name}`"))?; |
| 1582 | expanded.push(artifact.to_string_lossy().to_string()); |
| 1583 | continue; |
| 1584 | } |
| 1585 | expanded.push(arg.clone()); |
| 1586 | } |
| 1587 | Ok(expanded) |
| 1588 | } |
| 1589 | |
| 1590 | fn tolerated_mask(bytes: &[u8]) -> Vec<Option<&'static str>> { |
| 1591 | let mut mask = vec![None; bytes.len()]; |
| 1592 | let Ok(header) = parse_header(bytes) else { |
| 1593 | return mask; |
| 1594 | }; |
| 1595 | let cmd_base = 32usize; |
| 1596 | let Ok(cmd_limit) = cmd_base.checked_add(header.sizeofcmds as usize).ok_or(()) else { |
| 1597 | return mask; |
| 1598 | }; |
| 1599 | if cmd_limit > bytes.len() { |
| 1600 | return mask; |
| 1601 | } |
| 1602 | |
| 1603 | let mut cursor = cmd_base; |
| 1604 | for _ in 0..header.ncmds { |
| 1605 | if cursor + 8 > cmd_limit { |
| 1606 | break; |
| 1607 | } |
| 1608 | let cmd = u32_le(&bytes[cursor..cursor + 4]); |
| 1609 | let cmdsize = u32_le(&bytes[cursor + 4..cursor + 8]) as usize; |
| 1610 | if cmdsize < 8 || cursor + cmdsize > cmd_limit { |
| 1611 | break; |
| 1612 | } |
| 1613 | match cmd { |
| 1614 | LC_UUID => mark_range(&mut mask, cursor, cursor + cmdsize, "UUID bytes"), |
| 1615 | LC_CODE_SIGNATURE => { |
| 1616 | mark_range( |
| 1617 | &mut mask, |
| 1618 | cursor, |
| 1619 | cursor + cmdsize, |
| 1620 | "code-signature load command", |
| 1621 | ); |
| 1622 | if cmdsize >= 16 { |
| 1623 | let dataoff = u32_le(&bytes[cursor + 8..cursor + 12]) as usize; |
| 1624 | let datasize = u32_le(&bytes[cursor + 12..cursor + 16]) as usize; |
| 1625 | if let Some(end) = dataoff.checked_add(datasize) { |
| 1626 | if end <= bytes.len() { |
| 1627 | mark_range(&mut mask, dataoff, end, "code-signature hashes"); |
| 1628 | } |
| 1629 | } |
| 1630 | } |
| 1631 | } |
| 1632 | LC_ID_DYLIB | LC_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB | LC_REEXPORT_DYLIB |
| 1633 | | LC_LOAD_UPWARD_DYLIB |
| 1634 | if cmdsize >= 16 => |
| 1635 | { |
| 1636 | mark_range(&mut mask, cursor + 12, cursor + 16, "dylib timestamp"); |
| 1637 | } |
| 1638 | _ => {} |
| 1639 | } |
| 1640 | cursor += cmdsize; |
| 1641 | } |
| 1642 | |
| 1643 | mask |
| 1644 | } |
| 1645 | |
| 1646 | fn mark_range(mask: &mut [Option<&'static str>], start: usize, end: usize, reason: &'static str) { |
| 1647 | let start = start.min(mask.len()); |
| 1648 | let end = end.min(mask.len()); |
| 1649 | for slot in &mut mask[start..end] { |
| 1650 | *slot = Some(reason); |
| 1651 | } |
| 1652 | } |
| 1653 | |
| 1654 | fn build_version_command(bytes: &[u8]) -> Result<Option<BuildVersionCmd>, String> { |
| 1655 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 1656 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 1657 | Ok(commands.into_iter().find_map(|cmd| match cmd { |
| 1658 | LoadCommand::BuildVersion(cmd) => Some(cmd), |
| 1659 | _ => None, |
| 1660 | })) |
| 1661 | } |
| 1662 | |
| 1663 | fn normalized_build_version(bytes: &[u8]) -> Result<Option<NormalizedBuildVersion>, String> { |
| 1664 | Ok(build_version_command(bytes)?.map(|cmd| { |
| 1665 | ( |
| 1666 | cmd.platform, |
| 1667 | cmd.minos, |
| 1668 | cmd.sdk, |
| 1669 | cmd.tools.into_iter().map(|tool| tool.tool).collect(), |
| 1670 | ) |
| 1671 | })) |
| 1672 | } |
| 1673 | |
| 1674 | fn load_dylib_names(bytes: &[u8]) -> Result<Vec<String>, String> { |
| 1675 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 1676 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 1677 | Ok(commands |
| 1678 | .into_iter() |
| 1679 | .filter_map(|cmd| match cmd { |
| 1680 | LoadCommand::Dylib(cmd) if cmd.cmd == LC_LOAD_DYLIB => Some(cmd.name), |
| 1681 | _ => None, |
| 1682 | }) |
| 1683 | .collect()) |
| 1684 | } |
| 1685 | |
| 1686 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 1687 | struct CanonicalSymbolRecord { |
| 1688 | name: String, |
| 1689 | n_type: u8, |
| 1690 | section: Option<(String, String)>, |
| 1691 | n_desc: u16, |
| 1692 | value: u64, |
| 1693 | } |
| 1694 | |
| 1695 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 1696 | enum CanonicalExportKind { |
| 1697 | Regular(u64), |
| 1698 | ThreadLocal(u64), |
| 1699 | Absolute(u64), |
| 1700 | Reexport { ordinal: u32, imported_name: String }, |
| 1701 | StubAndResolver { stub: u64, resolver: u64 }, |
| 1702 | } |
| 1703 | |
| 1704 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 1705 | struct CanonicalExportRecord { |
| 1706 | name: String, |
| 1707 | flags: u64, |
| 1708 | kind: CanonicalExportKind, |
| 1709 | } |
| 1710 | |
| 1711 | fn canonical_symbol_record_map( |
| 1712 | bytes: &[u8], |
| 1713 | ) -> Result<BTreeMap<String, CanonicalSymbolRecord>, String> { |
| 1714 | Ok(canonical_symbol_records(bytes)? |
| 1715 | .into_iter() |
| 1716 | .map(|record| (record.name.clone(), record)) |
| 1717 | .collect()) |
| 1718 | } |
| 1719 | |
| 1720 | fn canonical_symbol_records(bytes: &[u8]) -> Result<Vec<CanonicalSymbolRecord>, String> { |
| 1721 | let (symtab, _) = symtab_and_dysymtab(bytes)?; |
| 1722 | let symbols = |
| 1723 | parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?; |
| 1724 | let strings = |
| 1725 | StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?; |
| 1726 | let sections = section_regions(bytes)?; |
| 1727 | Ok(symbols |
| 1728 | .iter() |
| 1729 | .map(|symbol| { |
| 1730 | let (section, value) = if symbol.kind() == SymKind::Sect && symbol.sect_idx() != 0 { |
| 1731 | let section = §ions[symbol.sect_idx() as usize - 1]; |
| 1732 | let value = if symbol.value() >= section.addr { |
| 1733 | symbol.value() - section.addr |
| 1734 | } else { |
| 1735 | symbol.value() |
| 1736 | }; |
| 1737 | ( |
| 1738 | Some((section.segname.clone(), section.sectname.clone())), |
| 1739 | value, |
| 1740 | ) |
| 1741 | } else { |
| 1742 | (None, symbol.value()) |
| 1743 | }; |
| 1744 | CanonicalSymbolRecord { |
| 1745 | name: strings.get(symbol.strx()).unwrap().to_string(), |
| 1746 | n_type: symbol.raw.n_type, |
| 1747 | section, |
| 1748 | n_desc: symbol.raw.n_desc, |
| 1749 | value, |
| 1750 | } |
| 1751 | }) |
| 1752 | .filter(|record| !is_optional_dyld_stub_binder_record(record)) |
| 1753 | .collect()) |
| 1754 | } |
| 1755 | |
| 1756 | fn is_optional_dyld_stub_binder_record(record: &CanonicalSymbolRecord) -> bool { |
| 1757 | record.name == "dyld_stub_binder" |
| 1758 | && (record.n_type & N_TYPE) == N_UNDF |
| 1759 | && record.section.is_none() |
| 1760 | } |
| 1761 | |
| 1762 | fn canonical_export_records(bytes: &[u8]) -> Result<Vec<CanonicalExportRecord>, String> { |
| 1763 | let dylib = DylibFile::parse("/tmp/canonical.dylib", bytes).map_err(|e| e.to_string())?; |
| 1764 | let symbol_values: BTreeMap<String, u64> = canonical_symbol_records(bytes)? |
| 1765 | .into_iter() |
| 1766 | .map(|record| (record.name, record.value)) |
| 1767 | .collect(); |
| 1768 | let mut out = dylib |
| 1769 | .exports |
| 1770 | .entries() |
| 1771 | .map_err(|e| e.to_string())? |
| 1772 | .into_iter() |
| 1773 | .map(|entry| { |
| 1774 | let kind = match entry.kind { |
| 1775 | ExportKind::Regular { .. } => { |
| 1776 | CanonicalExportKind::Regular(*symbol_values.get(&entry.name).unwrap()) |
| 1777 | } |
| 1778 | ExportKind::ThreadLocal { .. } => { |
| 1779 | CanonicalExportKind::ThreadLocal(*symbol_values.get(&entry.name).unwrap()) |
| 1780 | } |
| 1781 | ExportKind::Absolute { .. } => { |
| 1782 | CanonicalExportKind::Absolute(*symbol_values.get(&entry.name).unwrap()) |
| 1783 | } |
| 1784 | ExportKind::Reexport { |
| 1785 | ordinal, |
| 1786 | imported_name, |
| 1787 | } => CanonicalExportKind::Reexport { |
| 1788 | ordinal, |
| 1789 | imported_name, |
| 1790 | }, |
| 1791 | ExportKind::StubAndResolver { stub, resolver } => { |
| 1792 | CanonicalExportKind::StubAndResolver { stub, resolver } |
| 1793 | } |
| 1794 | }; |
| 1795 | CanonicalExportRecord { |
| 1796 | name: entry.name, |
| 1797 | flags: entry.flags, |
| 1798 | kind, |
| 1799 | } |
| 1800 | }) |
| 1801 | .collect::<Vec<_>>(); |
| 1802 | out.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); |
| 1803 | Ok(out) |
| 1804 | } |
| 1805 | |
| 1806 | fn symbol_partition_names(bytes: &[u8]) -> Result<SymbolPartitions, String> { |
| 1807 | let (symtab, dysymtab) = symtab_and_dysymtab(bytes)?; |
| 1808 | let symbols = |
| 1809 | parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?; |
| 1810 | let strings = |
| 1811 | StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?; |
| 1812 | let names_for = |start: u32, count: u32| -> Vec<String> { |
| 1813 | symbols[start as usize..(start + count) as usize] |
| 1814 | .iter() |
| 1815 | .map(|symbol| strings.get(symbol.strx()).unwrap().to_string()) |
| 1816 | .collect() |
| 1817 | }; |
| 1818 | Ok(( |
| 1819 | names_for(dysymtab.ilocalsym, dysymtab.nlocalsym), |
| 1820 | names_for(dysymtab.iextdefsym, dysymtab.nextdefsym), |
| 1821 | names_for(dysymtab.iundefsym, dysymtab.nundefsym) |
| 1822 | .into_iter() |
| 1823 | .filter(|name| name != "dyld_stub_binder") |
| 1824 | .collect(), |
| 1825 | )) |
| 1826 | } |
| 1827 | |
| 1828 | fn has_optional_dyld_stub_binder(bytes: &[u8]) -> Result<bool, String> { |
| 1829 | let (symtab, _) = symtab_and_dysymtab(bytes)?; |
| 1830 | let symbols = |
| 1831 | parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?; |
| 1832 | let strings = |
| 1833 | StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?; |
| 1834 | Ok(symbols.iter().any(|symbol| { |
| 1835 | strings |
| 1836 | .get(symbol.strx()) |
| 1837 | .map(|name| { |
| 1838 | name == "dyld_stub_binder" |
| 1839 | && (symbol.raw.n_type & N_TYPE) == N_UNDF |
| 1840 | && symbol.raw.n_sect == 0 |
| 1841 | }) |
| 1842 | .unwrap_or(false) |
| 1843 | })) |
| 1844 | } |
| 1845 | |
| 1846 | fn raw_string_table(bytes: &[u8]) -> Result<Vec<u8>, String> { |
| 1847 | let (symtab, _) = symtab_and_dysymtab(bytes)?; |
| 1848 | let start = symtab.stroff as usize; |
| 1849 | let end = start + symtab.strsize as usize; |
| 1850 | Ok(bytes[start..end].to_vec()) |
| 1851 | } |
| 1852 | |
| 1853 | fn effective_string_table_len(bytes: &[u8]) -> Result<usize, String> { |
| 1854 | let mut len = raw_string_table(bytes)?.len(); |
| 1855 | if has_optional_dyld_stub_binder(bytes)? { |
| 1856 | len = len.saturating_sub("dyld_stub_binder".len() + 1); |
| 1857 | } |
| 1858 | Ok(len) |
| 1859 | } |
| 1860 | |
| 1861 | pub fn string_table_within_five_percent(ours: usize, theirs: usize) -> bool { |
| 1862 | let delta = ours.abs_diff(theirs); |
| 1863 | delta * 20 <= theirs |
| 1864 | } |
| 1865 | |
| 1866 | fn indirect_symbol_table(bytes: &[u8]) -> Result<Vec<u32>, String> { |
| 1867 | let (_, dysymtab) = symtab_and_dysymtab(bytes)?; |
| 1868 | if dysymtab.nindirectsyms == 0 { |
| 1869 | return Ok(Vec::new()); |
| 1870 | } |
| 1871 | let start = dysymtab.indirectsymoff as usize; |
| 1872 | let end = start + dysymtab.nindirectsyms as usize * 4; |
| 1873 | Ok(bytes[start..end] |
| 1874 | .chunks_exact(4) |
| 1875 | .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap())) |
| 1876 | .collect()) |
| 1877 | } |
| 1878 | |
| 1879 | fn indirect_symbol_identities(bytes: &[u8]) -> Result<Vec<String>, String> { |
| 1880 | let (symtab, _) = symtab_and_dysymtab(bytes)?; |
| 1881 | let symbols = |
| 1882 | parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?; |
| 1883 | let strings = |
| 1884 | StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?; |
| 1885 | Ok(indirect_symbol_table(bytes)? |
| 1886 | .into_iter() |
| 1887 | .map(|index| { |
| 1888 | if index & INDIRECT_SYMBOL_LOCAL != 0 { |
| 1889 | if index & INDIRECT_SYMBOL_ABS != 0 { |
| 1890 | "<LOCAL|ABS>".to_string() |
| 1891 | } else { |
| 1892 | "<LOCAL>".to_string() |
| 1893 | } |
| 1894 | } else if index & INDIRECT_SYMBOL_ABS != 0 { |
| 1895 | "<ABS>".to_string() |
| 1896 | } else { |
| 1897 | let symbol = &symbols[index as usize]; |
| 1898 | strings.get(symbol.strx()).unwrap().to_string() |
| 1899 | } |
| 1900 | }) |
| 1901 | .collect()) |
| 1902 | } |
| 1903 | |
| 1904 | fn raw_linkedit_data_cmd(bytes: &[u8], expected_cmd: u32) -> Result<(u32, u32), String> { |
| 1905 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 1906 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 1907 | for cmd in commands { |
| 1908 | match cmd { |
| 1909 | LoadCommand::Raw { cmd, data, .. } if cmd == expected_cmd => { |
| 1910 | return Ok((u32_le(&data[0..4]), u32_le(&data[4..8]))); |
| 1911 | } |
| 1912 | LoadCommand::LinkerOptimizationHint(linkedit) |
| 1913 | if expected_cmd == afs_ld::macho::constants::LC_LINKER_OPTIMIZATION_HINT => |
| 1914 | { |
| 1915 | return Ok((linkedit.dataoff, linkedit.datasize)); |
| 1916 | } |
| 1917 | _ => {} |
| 1918 | } |
| 1919 | } |
| 1920 | Err(format!("missing raw linkedit command 0x{expected_cmd:x}")) |
| 1921 | } |
| 1922 | |
| 1923 | fn linkedit_payload(bytes: &[u8], cmd: u32) -> Result<Vec<u8>, String> { |
| 1924 | let (dataoff, datasize) = raw_linkedit_data_cmd(bytes, cmd)?; |
| 1925 | if datasize == 0 { |
| 1926 | return Ok(Vec::new()); |
| 1927 | } |
| 1928 | Ok(bytes[dataoff as usize..(dataoff + datasize) as usize].to_vec()) |
| 1929 | } |
| 1930 | |
| 1931 | fn decode_function_starts(bytes: &[u8]) -> Result<Vec<u64>, String> { |
| 1932 | let payload = linkedit_payload(bytes, LC_FUNCTION_STARTS)?; |
| 1933 | let mut offsets = Vec::new(); |
| 1934 | let mut cursor = 0usize; |
| 1935 | let mut current = 0u64; |
| 1936 | while cursor < payload.len() { |
| 1937 | let (delta, used) = read_uleb(&payload[cursor..]).map_err(|e| e.to_string())?; |
| 1938 | cursor += used; |
| 1939 | if delta == 0 { |
| 1940 | break; |
| 1941 | } |
| 1942 | current += delta; |
| 1943 | offsets.push(current); |
| 1944 | } |
| 1945 | Ok(offsets) |
| 1946 | } |
| 1947 | |
| 1948 | fn normalize_function_start_offsets(starts: &[u64]) -> Vec<u64> { |
| 1949 | let Some(&base) = starts.first() else { |
| 1950 | return Vec::new(); |
| 1951 | }; |
| 1952 | starts.iter().map(|offset| offset - base).collect() |
| 1953 | } |
| 1954 | |
| 1955 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 1956 | struct DataInCodeRecord { |
| 1957 | offset: u32, |
| 1958 | length: u16, |
| 1959 | kind: u16, |
| 1960 | } |
| 1961 | |
| 1962 | fn decode_data_in_code(bytes: &[u8]) -> Result<Vec<DataInCodeRecord>, String> { |
| 1963 | let payload = linkedit_payload(bytes, LC_DATA_IN_CODE)?; |
| 1964 | Ok(payload |
| 1965 | .chunks_exact(8) |
| 1966 | .map(|chunk| DataInCodeRecord { |
| 1967 | offset: u32::from_le_bytes(chunk[0..4].try_into().unwrap()), |
| 1968 | length: u16::from_le_bytes(chunk[4..6].try_into().unwrap()), |
| 1969 | kind: u16::from_le_bytes(chunk[6..8].try_into().unwrap()), |
| 1970 | }) |
| 1971 | .collect()) |
| 1972 | } |
| 1973 | |
| 1974 | fn canonical_data_in_code(bytes: &[u8]) -> Result<Vec<DataInCodeRecord>, String> { |
| 1975 | let text = output_section_header(bytes, "__TEXT", "__text") |
| 1976 | .ok_or_else(|| "missing __TEXT,__text section".to_string())?; |
| 1977 | Ok(decode_data_in_code(bytes)? |
| 1978 | .into_iter() |
| 1979 | .map(|record| DataInCodeRecord { |
| 1980 | offset: record.offset - text.offset, |
| 1981 | length: record.length, |
| 1982 | kind: record.kind, |
| 1983 | }) |
| 1984 | .collect()) |
| 1985 | } |
| 1986 | |
| 1987 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] |
| 1988 | enum CanonicalBindLocation { |
| 1989 | Section { |
| 1990 | segname: String, |
| 1991 | sectname: String, |
| 1992 | offset: u64, |
| 1993 | }, |
| 1994 | Segment { |
| 1995 | segment_index: u8, |
| 1996 | segment_offset: u64, |
| 1997 | }, |
| 1998 | } |
| 1999 | |
| 2000 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] |
| 2001 | struct CanonicalBindRecord { |
| 2002 | location: CanonicalBindLocation, |
| 2003 | ordinal: i32, |
| 2004 | symbol: String, |
| 2005 | weak_import: bool, |
| 2006 | bind_type: u8, |
| 2007 | addend: i64, |
| 2008 | } |
| 2009 | |
| 2010 | fn canonical_bind_records( |
| 2011 | bytes: &[u8], |
| 2012 | kind: DyldInfoStreamKind, |
| 2013 | ) -> Result<Vec<CanonicalBindRecord>, String> { |
| 2014 | let stream = dyld_info_stream(bytes, kind)?; |
| 2015 | let mut cursor = 0usize; |
| 2016 | let mut segment_index = 0u8; |
| 2017 | let mut segment_offset = 0u64; |
| 2018 | let mut ordinal = 0i32; |
| 2019 | let mut symbol = String::new(); |
| 2020 | let mut weak_import = false; |
| 2021 | let mut bind_type = BIND_TYPE_POINTER; |
| 2022 | let mut addend = 0i64; |
| 2023 | let mut out = Vec::new(); |
| 2024 | |
| 2025 | while cursor < stream.len() { |
| 2026 | let byte = stream[cursor]; |
| 2027 | cursor += 1; |
| 2028 | let opcode = byte & BIND_OPCODE_MASK; |
| 2029 | let imm = byte & BIND_IMMEDIATE_MASK; |
| 2030 | match opcode { |
| 2031 | BIND_OPCODE_DONE => break, |
| 2032 | BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => ordinal = imm as i32, |
| 2033 | BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { |
| 2034 | let (value, used) = |
| 2035 | read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 2036 | cursor += used; |
| 2037 | ordinal = value as i32; |
| 2038 | } |
| 2039 | BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { |
| 2040 | ordinal = if imm == 0 { |
| 2041 | 0 |
| 2042 | } else { |
| 2043 | (((imm as i8) << 4) >> 4) as i32 |
| 2044 | }; |
| 2045 | } |
| 2046 | BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { |
| 2047 | let (value, used) = read_c_string(&stream[cursor..])?; |
| 2048 | cursor += used; |
| 2049 | symbol = value; |
| 2050 | weak_import = (imm & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0; |
| 2051 | } |
| 2052 | BIND_OPCODE_SET_TYPE_IMM => bind_type = imm, |
| 2053 | BIND_OPCODE_SET_ADDEND_SLEB => { |
| 2054 | let (value, used) = |
| 2055 | read_sleb(&stream[cursor..]).map_err(|e| format!("bind sleb: {e}"))?; |
| 2056 | cursor += used; |
| 2057 | addend = value; |
| 2058 | } |
| 2059 | BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { |
| 2060 | let (value, used) = |
| 2061 | read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 2062 | cursor += used; |
| 2063 | segment_index = imm; |
| 2064 | segment_offset = value; |
| 2065 | } |
| 2066 | BIND_OPCODE_ADD_ADDR_ULEB => { |
| 2067 | let (value, used) = |
| 2068 | read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 2069 | cursor += used; |
| 2070 | segment_offset += value; |
| 2071 | } |
| 2072 | BIND_OPCODE_DO_BIND => { |
| 2073 | out.push(CanonicalBindRecord { |
| 2074 | location: canonical_bind_location(bytes, segment_index, segment_offset)?, |
| 2075 | ordinal, |
| 2076 | symbol: symbol.clone(), |
| 2077 | weak_import, |
| 2078 | bind_type, |
| 2079 | addend, |
| 2080 | }); |
| 2081 | segment_offset += 8; |
| 2082 | } |
| 2083 | BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { |
| 2084 | let (value, used) = |
| 2085 | read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 2086 | cursor += used; |
| 2087 | out.push(CanonicalBindRecord { |
| 2088 | location: canonical_bind_location(bytes, segment_index, segment_offset)?, |
| 2089 | ordinal, |
| 2090 | symbol: symbol.clone(), |
| 2091 | weak_import, |
| 2092 | bind_type, |
| 2093 | addend, |
| 2094 | }); |
| 2095 | segment_offset += 8 + value; |
| 2096 | } |
| 2097 | BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { |
| 2098 | out.push(CanonicalBindRecord { |
| 2099 | location: canonical_bind_location(bytes, segment_index, segment_offset)?, |
| 2100 | ordinal, |
| 2101 | symbol: symbol.clone(), |
| 2102 | weak_import, |
| 2103 | bind_type, |
| 2104 | addend, |
| 2105 | }); |
| 2106 | segment_offset += 8 + (imm as u64) * 8; |
| 2107 | } |
| 2108 | BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { |
| 2109 | let (count, count_used) = |
| 2110 | read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 2111 | cursor += count_used; |
| 2112 | let (skip, skip_used) = |
| 2113 | read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 2114 | cursor += skip_used; |
| 2115 | for _ in 0..count { |
| 2116 | out.push(CanonicalBindRecord { |
| 2117 | location: canonical_bind_location(bytes, segment_index, segment_offset)?, |
| 2118 | ordinal, |
| 2119 | symbol: symbol.clone(), |
| 2120 | weak_import, |
| 2121 | bind_type, |
| 2122 | addend, |
| 2123 | }); |
| 2124 | segment_offset += 8 + skip; |
| 2125 | } |
| 2126 | } |
| 2127 | other => return Err(format!("unsupported bind opcode 0x{other:02x}")), |
| 2128 | } |
| 2129 | } |
| 2130 | |
| 2131 | normalize_bind_section_offsets(&mut out); |
| 2132 | out.sort(); |
| 2133 | Ok(out) |
| 2134 | } |
| 2135 | |
| 2136 | fn normalize_bind_section_offsets(records: &mut [CanonicalBindRecord]) { |
| 2137 | let mut next_offsets: BTreeMap<(String, String), u64> = BTreeMap::new(); |
| 2138 | records.sort(); |
| 2139 | for record in records.iter_mut() { |
| 2140 | let CanonicalBindLocation::Section { |
| 2141 | segname, |
| 2142 | sectname, |
| 2143 | offset, |
| 2144 | } = &mut record.location |
| 2145 | else { |
| 2146 | continue; |
| 2147 | }; |
| 2148 | let next = next_offsets |
| 2149 | .entry((segname.clone(), sectname.clone())) |
| 2150 | .or_insert(0); |
| 2151 | *offset = *next; |
| 2152 | *next += 8; |
| 2153 | } |
| 2154 | } |
| 2155 | |
| 2156 | fn rebased_unwind_bytes(bytes: &[u8]) -> Result<Vec<u8>, String> { |
| 2157 | let header_base = segment_vmaddr(bytes, "__TEXT").unwrap_or(0); |
| 2158 | let text_base = output_section(bytes, "__TEXT", "__text") |
| 2159 | .ok_or_else(|| "missing __TEXT,__text section".to_string())? |
| 2160 | .0 |
| 2161 | - header_base; |
| 2162 | let got_range = output_section(bytes, "__DATA_CONST", "__got") |
| 2163 | .map(|(addr, data)| (addr - header_base, addr - header_base + data.len() as u64)); |
| 2164 | let lsda_base = |
| 2165 | output_section(bytes, "__TEXT", "__gcc_except_tab").map(|(addr, _)| addr - header_base); |
| 2166 | let (_, unwind) = output_section(bytes, "__TEXT", "__unwind_info") |
| 2167 | .ok_or_else(|| "missing __TEXT,__unwind_info section".to_string())?; |
| 2168 | let mut out = unwind; |
| 2169 | if out.len() < 28 { |
| 2170 | return Ok(out); |
| 2171 | } |
| 2172 | |
| 2173 | let personalities_offset = u32_le(&out[12..16]) as usize; |
| 2174 | let personalities_count = u32_le(&out[16..20]) as usize; |
| 2175 | let indices_offset = u32_le(&out[20..24]) as usize; |
| 2176 | let indices_count = u32_le(&out[24..28]) as usize; |
| 2177 | |
| 2178 | for idx in 0..personalities_count { |
| 2179 | let off = personalities_offset + idx * 4; |
| 2180 | let value = u32_le(&out[off..off + 4]) as u64; |
| 2181 | let rebased = if let Some((got_start, got_end)) = got_range { |
| 2182 | if got_start <= value && value < got_end { |
| 2183 | value - got_start |
| 2184 | } else if value >= text_base { |
| 2185 | value - text_base |
| 2186 | } else { |
| 2187 | value |
| 2188 | } |
| 2189 | } else if value >= text_base { |
| 2190 | value - text_base |
| 2191 | } else { |
| 2192 | value |
| 2193 | }; |
| 2194 | out[off..off + 4].copy_from_slice(&(rebased as u32).to_le_bytes()); |
| 2195 | } |
| 2196 | |
| 2197 | let mut lsda_offsets = Vec::with_capacity(indices_count); |
| 2198 | for idx in 0..indices_count { |
| 2199 | let entry_off = indices_offset + idx * 12; |
| 2200 | let function_offset = u32_le(&out[entry_off..entry_off + 4]) as u64; |
| 2201 | let rebased = function_offset.saturating_sub(text_base); |
| 2202 | out[entry_off..entry_off + 4].copy_from_slice(&(rebased as u32).to_le_bytes()); |
| 2203 | lsda_offsets.push(u32_le(&out[entry_off + 8..entry_off + 12]) as usize); |
| 2204 | } |
| 2205 | |
| 2206 | if let (Some(lsda_base), Some(&start), Some(&end)) = |
| 2207 | (lsda_base, lsda_offsets.first(), lsda_offsets.last()) |
| 2208 | { |
| 2209 | let mut entry_off = start; |
| 2210 | while entry_off < end { |
| 2211 | let function_offset = u32_le(&out[entry_off..entry_off + 4]) as u64; |
| 2212 | let lsda_offset = u32_le(&out[entry_off + 4..entry_off + 8]) as u64; |
| 2213 | out[entry_off..entry_off + 4] |
| 2214 | .copy_from_slice(&(function_offset.saturating_sub(text_base) as u32).to_le_bytes()); |
| 2215 | out[entry_off + 4..entry_off + 8] |
| 2216 | .copy_from_slice(&(lsda_offset.saturating_sub(lsda_base) as u32).to_le_bytes()); |
| 2217 | entry_off += 8; |
| 2218 | } |
| 2219 | } |
| 2220 | |
| 2221 | let _ = decode_unwind_info(&out).map_err(|e| format!("decode unwind info: {e}"))?; |
| 2222 | Ok(out) |
| 2223 | } |
| 2224 | |
| 2225 | fn symtab_and_dysymtab( |
| 2226 | bytes: &[u8], |
| 2227 | ) -> Result< |
| 2228 | ( |
| 2229 | afs_ld::macho::reader::SymtabCmd, |
| 2230 | afs_ld::macho::reader::DysymtabCmd, |
| 2231 | ), |
| 2232 | String, |
| 2233 | > { |
| 2234 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 2235 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 2236 | let mut symtab = None; |
| 2237 | let mut dysymtab = None; |
| 2238 | for cmd in commands { |
| 2239 | match cmd { |
| 2240 | LoadCommand::Symtab(cmd) => symtab = Some(cmd), |
| 2241 | LoadCommand::Dysymtab(cmd) => dysymtab = Some(cmd), |
| 2242 | _ => {} |
| 2243 | } |
| 2244 | } |
| 2245 | Ok(( |
| 2246 | symtab.ok_or_else(|| "missing LC_SYMTAB".to_string())?, |
| 2247 | dysymtab.ok_or_else(|| "missing LC_DYSYMTAB".to_string())?, |
| 2248 | )) |
| 2249 | } |
| 2250 | |
| 2251 | fn section_addrs(bytes: &[u8]) -> Result<Vec<u64>, String> { |
| 2252 | Ok(section_regions(bytes)? |
| 2253 | .into_iter() |
| 2254 | .map(|section| section.addr) |
| 2255 | .collect()) |
| 2256 | } |
| 2257 | |
| 2258 | #[derive(Debug, Clone)] |
| 2259 | struct SegmentRegion { |
| 2260 | index: u8, |
| 2261 | segname: String, |
| 2262 | vmaddr: u64, |
| 2263 | vmsize: u64, |
| 2264 | } |
| 2265 | |
| 2266 | #[derive(Debug, Clone)] |
| 2267 | struct SectionRegion { |
| 2268 | segment_index: u8, |
| 2269 | segname: String, |
| 2270 | sectname: String, |
| 2271 | addr: u64, |
| 2272 | size: u64, |
| 2273 | } |
| 2274 | |
| 2275 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] |
| 2276 | struct CanonicalSectionLocation { |
| 2277 | segname: String, |
| 2278 | sectname: String, |
| 2279 | offset: u64, |
| 2280 | } |
| 2281 | |
| 2282 | fn segment_regions(bytes: &[u8]) -> Result<Vec<SegmentRegion>, String> { |
| 2283 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 2284 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 2285 | let mut out = Vec::new(); |
| 2286 | let mut index = 0u8; |
| 2287 | for cmd in commands { |
| 2288 | if let LoadCommand::Segment64(seg) = cmd { |
| 2289 | out.push(SegmentRegion { |
| 2290 | index, |
| 2291 | segname: seg.segname_str().to_string(), |
| 2292 | vmaddr: seg.vmaddr, |
| 2293 | vmsize: seg.vmsize, |
| 2294 | }); |
| 2295 | index = index.saturating_add(1); |
| 2296 | } |
| 2297 | } |
| 2298 | Ok(out) |
| 2299 | } |
| 2300 | |
| 2301 | fn section_regions(bytes: &[u8]) -> Result<Vec<SectionRegion>, String> { |
| 2302 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 2303 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 2304 | let mut out = Vec::new(); |
| 2305 | let mut segment_index = 0u8; |
| 2306 | for cmd in commands { |
| 2307 | if let LoadCommand::Segment64(seg) = cmd { |
| 2308 | for section in seg.sections { |
| 2309 | out.push(SectionRegion { |
| 2310 | segment_index, |
| 2311 | segname: section.segname_str().to_string(), |
| 2312 | sectname: section.sectname_str().to_string(), |
| 2313 | addr: section.addr, |
| 2314 | size: section.size, |
| 2315 | }); |
| 2316 | } |
| 2317 | segment_index = segment_index.saturating_add(1); |
| 2318 | } |
| 2319 | } |
| 2320 | Ok(out) |
| 2321 | } |
| 2322 | |
| 2323 | fn canonical_bind_location( |
| 2324 | bytes: &[u8], |
| 2325 | segment_index: u8, |
| 2326 | segment_offset: u64, |
| 2327 | ) -> Result<CanonicalBindLocation, String> { |
| 2328 | let segments = segment_regions(bytes)?; |
| 2329 | let sections = section_regions(bytes)?; |
| 2330 | let Some(segment) = segments |
| 2331 | .iter() |
| 2332 | .find(|segment| segment.index == segment_index) |
| 2333 | else { |
| 2334 | return Ok(CanonicalBindLocation::Segment { |
| 2335 | segment_index, |
| 2336 | segment_offset, |
| 2337 | }); |
| 2338 | }; |
| 2339 | if segment_offset >= segment.vmsize { |
| 2340 | return Ok(CanonicalBindLocation::Segment { |
| 2341 | segment_index, |
| 2342 | segment_offset, |
| 2343 | }); |
| 2344 | } |
| 2345 | let addr = segment.vmaddr + segment_offset; |
| 2346 | if let Some(section) = sections.iter().find(|section| { |
| 2347 | section.segment_index == segment_index |
| 2348 | && section.addr <= addr |
| 2349 | && addr < section.addr + section.size |
| 2350 | }) { |
| 2351 | return Ok(CanonicalBindLocation::Section { |
| 2352 | segname: section.segname.clone(), |
| 2353 | sectname: section.sectname.clone(), |
| 2354 | offset: addr - section.addr, |
| 2355 | }); |
| 2356 | } |
| 2357 | Ok(CanonicalBindLocation::Segment { |
| 2358 | segment_index, |
| 2359 | segment_offset, |
| 2360 | }) |
| 2361 | } |
| 2362 | |
| 2363 | fn canonical_section_location(bytes: &[u8], addr: u64) -> Result<CanonicalSectionLocation, String> { |
| 2364 | let sections = section_regions(bytes)?; |
| 2365 | let section = sections |
| 2366 | .into_iter() |
| 2367 | .find(|section| section.addr <= addr && addr < section.addr + section.size) |
| 2368 | .ok_or_else(|| format!("address 0x{addr:x} is not inside any output section"))?; |
| 2369 | Ok(CanonicalSectionLocation { |
| 2370 | segname: section.segname, |
| 2371 | sectname: section.sectname, |
| 2372 | offset: addr - section.addr, |
| 2373 | }) |
| 2374 | } |
| 2375 | |
| 2376 | #[derive(Clone, Copy)] |
| 2377 | enum DyldInfoStreamKind { |
| 2378 | Rebase, |
| 2379 | Bind, |
| 2380 | WeakBind, |
| 2381 | LazyBind, |
| 2382 | } |
| 2383 | |
| 2384 | fn dyld_info_command(bytes: &[u8]) -> Result<DyldInfoCmd, String> { |
| 2385 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 2386 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 2387 | commands |
| 2388 | .into_iter() |
| 2389 | .find_map(|cmd| match cmd { |
| 2390 | LoadCommand::DyldInfoOnly(cmd) => Some(cmd), |
| 2391 | _ => None, |
| 2392 | }) |
| 2393 | .ok_or_else(|| "missing LC_DYLD_INFO_ONLY".to_string()) |
| 2394 | } |
| 2395 | |
| 2396 | fn dyld_info_stream(bytes: &[u8], kind: DyldInfoStreamKind) -> Result<Vec<u8>, String> { |
| 2397 | let dyld_info = dyld_info_command(bytes)?; |
| 2398 | let (off, size) = match kind { |
| 2399 | DyldInfoStreamKind::Rebase => (dyld_info.rebase_off, dyld_info.rebase_size), |
| 2400 | DyldInfoStreamKind::Bind => (dyld_info.bind_off, dyld_info.bind_size), |
| 2401 | DyldInfoStreamKind::WeakBind => (dyld_info.weak_bind_off, dyld_info.weak_bind_size), |
| 2402 | DyldInfoStreamKind::LazyBind => (dyld_info.lazy_bind_off, dyld_info.lazy_bind_size), |
| 2403 | }; |
| 2404 | if size == 0 { |
| 2405 | return Ok(Vec::new()); |
| 2406 | } |
| 2407 | let start = off as usize; |
| 2408 | let end = start + size as usize; |
| 2409 | bytes |
| 2410 | .get(start..end) |
| 2411 | .map(|slice| slice.to_vec()) |
| 2412 | .ok_or_else(|| "dyld-info stream out of bounds".to_string()) |
| 2413 | } |
| 2414 | |
| 2415 | fn read_c_string(bytes: &[u8]) -> Result<(String, usize), String> { |
| 2416 | let end = bytes |
| 2417 | .iter() |
| 2418 | .position(|byte| *byte == 0) |
| 2419 | .ok_or_else(|| "unterminated C string".to_string())?; |
| 2420 | let value = std::str::from_utf8(&bytes[..end]) |
| 2421 | .map_err(|e| format!("utf-8 in C string: {e}"))? |
| 2422 | .to_string(); |
| 2423 | Ok((value, end + 1)) |
| 2424 | } |
| 2425 | |
| 2426 | fn canonical_stub_targets(bytes: &[u8]) -> Result<Vec<u64>, String> { |
| 2427 | let header = output_section_header(bytes, "__TEXT", "__stubs") |
| 2428 | .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?; |
| 2429 | let (section_addr, section_bytes) = output_section(bytes, "__TEXT", "__stubs") |
| 2430 | .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?; |
| 2431 | if section_bytes.is_empty() { |
| 2432 | return Ok(Vec::new()); |
| 2433 | } |
| 2434 | let stub_size = usize::try_from(header.reserved2) |
| 2435 | .ok() |
| 2436 | .filter(|size| *size > 0) |
| 2437 | .unwrap_or(12); |
| 2438 | if section_bytes.len() % stub_size != 0 { |
| 2439 | return Err(format!( |
| 2440 | "__TEXT,__stubs size {} is not a multiple of stub size {}", |
| 2441 | section_bytes.len(), |
| 2442 | stub_size |
| 2443 | )); |
| 2444 | } |
| 2445 | let mut out = Vec::new(); |
| 2446 | for (idx, chunk) in section_bytes.chunks_exact(stub_size).enumerate() { |
| 2447 | out.push(decode_stub_target( |
| 2448 | chunk, |
| 2449 | section_addr + (idx * stub_size) as u64, |
| 2450 | )?); |
| 2451 | } |
| 2452 | Ok(out) |
| 2453 | } |
| 2454 | |
| 2455 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 2456 | struct CanonicalStubHelper { |
| 2457 | dyld_private: CanonicalSectionLocation, |
| 2458 | binder_got: CanonicalSectionLocation, |
| 2459 | lazy_bind_offsets: Vec<u32>, |
| 2460 | } |
| 2461 | |
| 2462 | fn canonical_stub_helper(bytes: &[u8]) -> Result<CanonicalStubHelper, String> { |
| 2463 | let (section_addr, section_bytes) = output_section(bytes, "__TEXT", "__stub_helper") |
| 2464 | .ok_or_else(|| "missing __TEXT,__stub_helper section".to_string())?; |
| 2465 | if section_bytes.len() < STUB_HELPER_HEADER_SIZE as usize { |
| 2466 | return Err(format!( |
| 2467 | "__TEXT,__stub_helper is too small for header: {} < {}", |
| 2468 | section_bytes.len(), |
| 2469 | STUB_HELPER_HEADER_SIZE |
| 2470 | )); |
| 2471 | } |
| 2472 | let dyld_private_target = |
| 2473 | decode_page_reference(§ion_bytes, section_addr, 0, PageRefKind::Add)?; |
| 2474 | let binder_got_target = |
| 2475 | decode_page_reference(§ion_bytes, section_addr, 12, PageRefKind::Load)?; |
| 2476 | let dyld_private = canonical_section_location(bytes, dyld_private_target)?; |
| 2477 | let binder_got = canonical_section_location(bytes, binder_got_target)?; |
| 2478 | |
| 2479 | let entry_bytes = §ion_bytes[STUB_HELPER_HEADER_SIZE as usize..]; |
| 2480 | if entry_bytes.len() % STUB_HELPER_ENTRY_SIZE as usize != 0 { |
| 2481 | return Err(format!( |
| 2482 | "__TEXT,__stub_helper entries {} are not a multiple of {}", |
| 2483 | entry_bytes.len(), |
| 2484 | STUB_HELPER_ENTRY_SIZE |
| 2485 | )); |
| 2486 | } |
| 2487 | |
| 2488 | let mut lazy_bind_offsets = Vec::new(); |
| 2489 | for (idx, chunk) in entry_bytes |
| 2490 | .chunks_exact(STUB_HELPER_ENTRY_SIZE as usize) |
| 2491 | .enumerate() |
| 2492 | { |
| 2493 | let entry_addr = section_addr |
| 2494 | + STUB_HELPER_HEADER_SIZE as u64 |
| 2495 | + (idx as u64) * STUB_HELPER_ENTRY_SIZE as u64; |
| 2496 | let ldr = read_insn(chunk, 0)?; |
| 2497 | if ldr != 0x1800_0050 { |
| 2498 | return Err(format!( |
| 2499 | "stub helper entry at 0x{entry_addr:x} does not start with LDR literal" |
| 2500 | )); |
| 2501 | } |
| 2502 | let branch = read_insn(chunk, 4)?; |
| 2503 | let branch_target = decode_branch26_target(branch, entry_addr + 4)?; |
| 2504 | if branch_target != section_addr { |
| 2505 | return Err(format!( |
| 2506 | "stub helper entry at 0x{entry_addr:x} branches to 0x{branch_target:x}, expected header 0x{section_addr:x}" |
| 2507 | )); |
| 2508 | } |
| 2509 | lazy_bind_offsets.push(u32_le(&chunk[8..12])); |
| 2510 | } |
| 2511 | |
| 2512 | Ok(CanonicalStubHelper { |
| 2513 | dyld_private, |
| 2514 | binder_got, |
| 2515 | lazy_bind_offsets, |
| 2516 | }) |
| 2517 | } |
| 2518 | |
| 2519 | fn decode_stub_target(bytes: &[u8], stub_addr: u64) -> Result<u64, String> { |
| 2520 | let adrp = read_insn(bytes, 0)?; |
| 2521 | let ldr = read_insn(bytes, 4)?; |
| 2522 | let br = read_insn(bytes, 8)?; |
| 2523 | if (adrp & 0x9f00_0000) != 0x9000_0000 { |
| 2524 | return Err(format!("stub at 0x{stub_addr:x} does not start with ADRP")); |
| 2525 | } |
| 2526 | if (ldr & 0xffc0_0000) != 0xf940_0000 { |
| 2527 | return Err(format!( |
| 2528 | "stub at 0x{stub_addr:x} does not use LDR (unsigned)" |
| 2529 | )); |
| 2530 | } |
| 2531 | if (br & 0xffff_fc1f) != 0xd61f_0000 { |
| 2532 | return Err(format!("stub at 0x{stub_addr:x} does not end with BR")); |
| 2533 | } |
| 2534 | let adrp_reg = (adrp & 0x1f) as u8; |
| 2535 | let ldr_base = ((ldr >> 5) & 0x1f) as u8; |
| 2536 | let ldr_reg = (ldr & 0x1f) as u8; |
| 2537 | let br_reg = ((br >> 5) & 0x1f) as u8; |
| 2538 | if adrp_reg != ldr_base || adrp_reg != ldr_reg || adrp_reg != br_reg { |
| 2539 | return Err(format!( |
| 2540 | "stub at 0x{stub_addr:x} uses inconsistent scratch regs: adrp=x{adrp_reg}, ldr base=x{ldr_base}, ldr rt=x{ldr_reg}, br=x{br_reg}" |
| 2541 | )); |
| 2542 | } |
| 2543 | let adrp_immlo = ((adrp >> 29) & 0x3) as i64; |
| 2544 | let adrp_immhi = ((adrp >> 5) & 0x7ffff) as i64; |
| 2545 | let adrp_pages = sign_extend_21((adrp_immhi << 2) | adrp_immlo); |
| 2546 | let adrp_base = ((stub_addr as i64) & !0xfff) + (adrp_pages << 12); |
| 2547 | let scaled = ((ldr >> 10) & 0xfff) as u64; |
| 2548 | Ok((adrp_base as u64) + scaled * 8) |
| 2549 | } |
| 2550 | |
| 2551 | fn decode_branch26_target(insn: u32, place: u64) -> Result<u64, String> { |
| 2552 | if (insn & 0xfc00_0000) != 0x1400_0000 { |
| 2553 | return Err(format!( |
| 2554 | "instruction 0x{insn:08x} at 0x{place:x} is not a B/BL branch26" |
| 2555 | )); |
| 2556 | } |
| 2557 | let imm26 = sign_extend_26((insn & 0x03ff_ffff) as i64); |
| 2558 | Ok(((place as i64) + (imm26 << 2)) as u64) |
| 2559 | } |
| 2560 | |
| 2561 | fn sign_extend_26(value: i64) -> i64 { |
| 2562 | let shift = 64 - 26; |
| 2563 | (value << shift) >> shift |
| 2564 | } |
| 2565 | |
| 2566 | fn symbol_values(bytes: &[u8]) -> Result<BTreeMap<String, u64>, String> { |
| 2567 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 2568 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |
| 2569 | let symtab = commands |
| 2570 | .iter() |
| 2571 | .find_map(|cmd| match cmd { |
| 2572 | LoadCommand::Symtab(cmd) => Some(*cmd), |
| 2573 | _ => None, |
| 2574 | }) |
| 2575 | .ok_or_else(|| "missing LC_SYMTAB".to_string())?; |
| 2576 | let symbols = |
| 2577 | parse_nlist_table(bytes, symtab.symoff, symtab.nsyms).map_err(|e| e.to_string())?; |
| 2578 | let strings = |
| 2579 | StringTable::from_file(bytes, symtab.stroff, symtab.strsize).map_err(|e| e.to_string())?; |
| 2580 | let mut out = BTreeMap::new(); |
| 2581 | for symbol in symbols { |
| 2582 | let Ok(name) = strings.get(symbol.strx()) else { |
| 2583 | continue; |
| 2584 | }; |
| 2585 | out.insert(name.to_string(), symbol.value()); |
| 2586 | } |
| 2587 | Ok(out) |
| 2588 | } |
| 2589 | |
| 2590 | fn decode_page_reference( |
| 2591 | bytes: &[u8], |
| 2592 | section_addr: u64, |
| 2593 | site_offset: u64, |
| 2594 | kind: PageRefKind, |
| 2595 | ) -> Result<u64, String> { |
| 2596 | let start = site_offset as usize; |
| 2597 | let adrp = read_insn(bytes, start)?; |
| 2598 | let second = read_insn(bytes, start + 4)?; |
| 2599 | let place = section_addr + site_offset; |
| 2600 | let adrp_immlo = ((adrp >> 29) & 0x3) as i64; |
| 2601 | let adrp_immhi = ((adrp >> 5) & 0x7ffff) as i64; |
| 2602 | let adrp_pages = sign_extend_21((adrp_immhi << 2) | adrp_immlo); |
| 2603 | let adrp_base = ((place as i64) & !0xfff) + (adrp_pages << 12); |
| 2604 | let low = match kind { |
| 2605 | PageRefKind::Add => ((second >> 10) & 0xfff) as u64, |
| 2606 | PageRefKind::Load => { |
| 2607 | let shift = ((second >> 30) & 0b11) as u64; |
| 2608 | (((second >> 10) & 0xfff) as u64) << shift |
| 2609 | } |
| 2610 | }; |
| 2611 | Ok((adrp_base as u64) + low) |
| 2612 | } |
| 2613 | |
| 2614 | fn read_insn(bytes: &[u8], start: usize) -> Result<u32, String> { |
| 2615 | let end = start + 4; |
| 2616 | let slice = bytes |
| 2617 | .get(start..end) |
| 2618 | .ok_or_else(|| format!("instruction read OOB at 0x{start:x}"))?; |
| 2619 | Ok(u32::from_le_bytes([slice[0], slice[1], slice[2], slice[3]])) |
| 2620 | } |
| 2621 | |
| 2622 | fn sign_extend_21(value: i64) -> i64 { |
| 2623 | if value & (1 << 20) != 0 { |
| 2624 | value | !0x1f_ffff |
| 2625 | } else { |
| 2626 | value |
| 2627 | } |
| 2628 | } |
| 2629 |