@@ -12,12 +12,19 @@ use std::path::{Path, PathBuf}; |
| 12 | 12 | use std::process::Command; |
| 13 | 13 | use std::time::{SystemTime, UNIX_EPOCH}; |
| 14 | 14 | |
| 15 | | -use afs_ld::leb::read_uleb; |
| 15 | +use afs_ld::leb::{read_sleb, read_uleb}; |
| 16 | 16 | use afs_ld::macho::constants::{ |
| 17 | + BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DO_BIND, |
| 18 | + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB, |
| 19 | + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, BIND_OPCODE_DONE, BIND_OPCODE_MASK, |
| 20 | + BIND_OPCODE_SET_ADDEND_SLEB, BIND_OPCODE_SET_DYLIB_ORDINAL_IMM, |
| 21 | + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, |
| 22 | + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, |
| 23 | + BIND_OPCODE_SET_TYPE_IMM, BIND_SYMBOL_FLAGS_WEAK_IMPORT, BIND_TYPE_POINTER, |
| 17 | 24 | INDIRECT_SYMBOL_ABS, INDIRECT_SYMBOL_LOCAL, LC_BUILD_VERSION, LC_CODE_SIGNATURE, |
| 18 | 25 | LC_DATA_IN_CODE, LC_DYLD_CHAINED_FIXUPS, LC_DYLD_EXPORTS_TRIE, LC_DYLD_INFO_ONLY, LC_DYSYMTAB, |
| 19 | 26 | LC_FUNCTION_STARTS, LC_ID_DYLIB, LC_LOAD_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_LOAD_WEAK_DYLIB, |
| 20 | | - LC_REEXPORT_DYLIB, LC_SEGMENT_64, LC_SYMTAB, LC_UUID, |
| 27 | + LC_REEXPORT_DYLIB, LC_SEGMENT_64, LC_SYMTAB, LC_UUID, N_TYPE, N_UNDF, |
| 21 | 28 | }; |
| 22 | 29 | use afs_ld::macho::dylib::DylibFile; |
| 23 | 30 | use afs_ld::macho::exports::ExportKind; |
@@ -59,6 +66,7 @@ pub enum CommandCheck { |
| 59 | 66 | FunctionStarts, |
| 60 | 67 | NormalizedFunctionStarts, |
| 61 | 68 | DataInCode, |
| 69 | + DataInCodeIfPresent, |
| 62 | 70 | RebasedUnwindBytes, |
| 63 | 71 | DyldInfoRebase, |
| 64 | 72 | DyldInfoBind, |
@@ -712,6 +720,15 @@ pub fn compare_command_details( |
| 712 | 720 | )); |
| 713 | 721 | } |
| 714 | 722 | } |
| 723 | + CommandCheck::DataInCodeIfPresent => { |
| 724 | + let ours = canonical_data_in_code(ours)?; |
| 725 | + let theirs = canonical_data_in_code(theirs)?; |
| 726 | + if !ours.is_empty() && !theirs.is_empty() && ours != theirs { |
| 727 | + return Err(format!( |
| 728 | + "canonical data-in-code records diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 729 | + )); |
| 730 | + } |
| 731 | + } |
| 715 | 732 | CommandCheck::RebasedUnwindBytes => { |
| 716 | 733 | let ours = rebased_unwind_bytes(ours)?; |
| 717 | 734 | let theirs = rebased_unwind_bytes(theirs)?; |
@@ -727,24 +744,30 @@ pub fn compare_command_details( |
| 727 | 744 | } |
| 728 | 745 | } |
| 729 | 746 | CommandCheck::DyldInfoBind => { |
| 730 | | - let ours = dyld_info_stream(ours, DyldInfoStreamKind::Bind)?; |
| 731 | | - let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::Bind)?; |
| 747 | + let ours = canonical_bind_records(ours, DyldInfoStreamKind::Bind)?; |
| 748 | + let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::Bind)?; |
| 732 | 749 | if ours != theirs { |
| 733 | | - return Err("bind stream diverged".to_string()); |
| 750 | + return Err(format!( |
| 751 | + "bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 752 | + )); |
| 734 | 753 | } |
| 735 | 754 | } |
| 736 | 755 | CommandCheck::DyldInfoWeakBind => { |
| 737 | | - let ours = dyld_info_stream(ours, DyldInfoStreamKind::WeakBind)?; |
| 738 | | - let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::WeakBind)?; |
| 756 | + let ours = canonical_bind_records(ours, DyldInfoStreamKind::WeakBind)?; |
| 757 | + let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::WeakBind)?; |
| 739 | 758 | if ours != theirs { |
| 740 | | - return Err("weak-bind stream diverged".to_string()); |
| 759 | + return Err(format!( |
| 760 | + "weak-bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 761 | + )); |
| 741 | 762 | } |
| 742 | 763 | } |
| 743 | 764 | CommandCheck::DyldInfoLazyBind => { |
| 744 | | - let ours = dyld_info_stream(ours, DyldInfoStreamKind::LazyBind)?; |
| 745 | | - let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::LazyBind)?; |
| 765 | + let ours = canonical_bind_records(ours, DyldInfoStreamKind::LazyBind)?; |
| 766 | + let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::LazyBind)?; |
| 746 | 767 | if ours != theirs { |
| 747 | | - return Err("lazy-bind stream diverged".to_string()); |
| 768 | + return Err(format!( |
| 769 | + "lazy-bind stream diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 770 | + )); |
| 748 | 771 | } |
| 749 | 772 | } |
| 750 | 773 | } |
@@ -841,6 +864,16 @@ pub fn compare_sections( |
| 841 | 864 | case_tolerances: &[CaseTolerance], |
| 842 | 865 | ) -> Result<(), String> { |
| 843 | 866 | for (segname, sectname) in sections { |
| 867 | + if segname == "__TEXT" && sectname == "__stubs" { |
| 868 | + let ours = canonical_stub_targets(ours)?; |
| 869 | + let theirs = canonical_stub_targets(theirs)?; |
| 870 | + if ours != theirs { |
| 871 | + return Err(format!( |
| 872 | + "canonical stub targets diverged:\nours: {ours:#?}\ntheirs: {theirs:#?}" |
| 873 | + )); |
| 874 | + } |
| 875 | + continue; |
| 876 | + } |
| 844 | 877 | let (_, our_bytes) = output_section(ours, segname, sectname) |
| 845 | 878 | .ok_or_else(|| format!("missing section {segname},{sectname} in afs-ld output"))?; |
| 846 | 879 | let (_, their_bytes) = output_section(theirs, segname, sectname) |
@@ -1364,6 +1397,7 @@ fn parse_command_check(name: &str) -> Result<CommandCheck, String> { |
| 1364 | 1397 | "function_starts" => Ok(CommandCheck::FunctionStarts), |
| 1365 | 1398 | "normalized_function_starts" => Ok(CommandCheck::NormalizedFunctionStarts), |
| 1366 | 1399 | "data_in_code" => Ok(CommandCheck::DataInCode), |
| 1400 | + "data_in_code_if_present" => Ok(CommandCheck::DataInCodeIfPresent), |
| 1367 | 1401 | "rebased_unwind_bytes" => Ok(CommandCheck::RebasedUnwindBytes), |
| 1368 | 1402 | "dyld_info_rebase" => Ok(CommandCheck::DyldInfoRebase), |
| 1369 | 1403 | "dyld_info_bind" => Ok(CommandCheck::DyldInfoBind), |
@@ -1636,9 +1670,16 @@ fn canonical_symbol_records(bytes: &[u8]) -> Result<Vec<CanonicalSymbolRecord>, |
| 1636 | 1670 | value, |
| 1637 | 1671 | } |
| 1638 | 1672 | }) |
| 1673 | + .filter(|record| !is_optional_dyld_stub_binder_record(record)) |
| 1639 | 1674 | .collect()) |
| 1640 | 1675 | } |
| 1641 | 1676 | |
| 1677 | +fn is_optional_dyld_stub_binder_record(record: &CanonicalSymbolRecord) -> bool { |
| 1678 | + record.name == "dyld_stub_binder" |
| 1679 | + && (record.n_type & N_TYPE) == N_UNDF |
| 1680 | + && record.n_sect == 0 |
| 1681 | +} |
| 1682 | + |
| 1642 | 1683 | fn canonical_export_records(bytes: &[u8]) -> Result<Vec<CanonicalExportRecord>, String> { |
| 1643 | 1684 | let dylib = DylibFile::parse("/tmp/canonical.dylib", bytes).map_err(|e| e.to_string())?; |
| 1644 | 1685 | let symbol_values: BTreeMap<String, u64> = canonical_symbol_records(bytes)? |
@@ -1835,6 +1876,146 @@ fn canonical_data_in_code(bytes: &[u8]) -> Result<Vec<DataInCodeRecord>, String> |
| 1835 | 1876 | .collect()) |
| 1836 | 1877 | } |
| 1837 | 1878 | |
| 1879 | +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] |
| 1880 | +struct CanonicalBindRecord { |
| 1881 | + segment_index: u8, |
| 1882 | + segment_offset: u64, |
| 1883 | + ordinal: i32, |
| 1884 | + symbol: String, |
| 1885 | + weak_import: bool, |
| 1886 | + bind_type: u8, |
| 1887 | + addend: i64, |
| 1888 | +} |
| 1889 | + |
| 1890 | +fn canonical_bind_records( |
| 1891 | + bytes: &[u8], |
| 1892 | + kind: DyldInfoStreamKind, |
| 1893 | +) -> Result<Vec<CanonicalBindRecord>, String> { |
| 1894 | + let stream = dyld_info_stream(bytes, kind)?; |
| 1895 | + let mut cursor = 0usize; |
| 1896 | + let mut segment_index = 0u8; |
| 1897 | + let mut segment_offset = 0u64; |
| 1898 | + let mut ordinal = 0i32; |
| 1899 | + let mut symbol = String::new(); |
| 1900 | + let mut weak_import = false; |
| 1901 | + let mut bind_type = BIND_TYPE_POINTER; |
| 1902 | + let mut addend = 0i64; |
| 1903 | + let mut out = Vec::new(); |
| 1904 | + |
| 1905 | + while cursor < stream.len() { |
| 1906 | + let byte = stream[cursor]; |
| 1907 | + cursor += 1; |
| 1908 | + let opcode = byte & BIND_OPCODE_MASK; |
| 1909 | + let imm = byte & BIND_IMMEDIATE_MASK; |
| 1910 | + match opcode { |
| 1911 | + BIND_OPCODE_DONE => break, |
| 1912 | + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => ordinal = imm as i32, |
| 1913 | + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { |
| 1914 | + let (value, used) = |
| 1915 | + read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 1916 | + cursor += used; |
| 1917 | + ordinal = value as i32; |
| 1918 | + } |
| 1919 | + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { |
| 1920 | + ordinal = if imm == 0 { |
| 1921 | + 0 |
| 1922 | + } else { |
| 1923 | + (((imm as i8) << 4) >> 4) as i32 |
| 1924 | + }; |
| 1925 | + } |
| 1926 | + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { |
| 1927 | + let (value, used) = read_c_string(&stream[cursor..])?; |
| 1928 | + cursor += used; |
| 1929 | + symbol = value; |
| 1930 | + weak_import = (imm & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0; |
| 1931 | + } |
| 1932 | + BIND_OPCODE_SET_TYPE_IMM => bind_type = imm, |
| 1933 | + BIND_OPCODE_SET_ADDEND_SLEB => { |
| 1934 | + let (value, used) = |
| 1935 | + read_sleb(&stream[cursor..]).map_err(|e| format!("bind sleb: {e}"))?; |
| 1936 | + cursor += used; |
| 1937 | + addend = value; |
| 1938 | + } |
| 1939 | + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { |
| 1940 | + let (value, used) = |
| 1941 | + read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 1942 | + cursor += used; |
| 1943 | + segment_index = imm; |
| 1944 | + segment_offset = value; |
| 1945 | + } |
| 1946 | + BIND_OPCODE_ADD_ADDR_ULEB => { |
| 1947 | + let (value, used) = |
| 1948 | + read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 1949 | + cursor += used; |
| 1950 | + segment_offset += value; |
| 1951 | + } |
| 1952 | + BIND_OPCODE_DO_BIND => { |
| 1953 | + out.push(CanonicalBindRecord { |
| 1954 | + segment_index, |
| 1955 | + segment_offset, |
| 1956 | + ordinal, |
| 1957 | + symbol: symbol.clone(), |
| 1958 | + weak_import, |
| 1959 | + bind_type, |
| 1960 | + addend, |
| 1961 | + }); |
| 1962 | + segment_offset += 8; |
| 1963 | + } |
| 1964 | + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { |
| 1965 | + let (value, used) = |
| 1966 | + read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 1967 | + cursor += used; |
| 1968 | + out.push(CanonicalBindRecord { |
| 1969 | + segment_index, |
| 1970 | + segment_offset, |
| 1971 | + ordinal, |
| 1972 | + symbol: symbol.clone(), |
| 1973 | + weak_import, |
| 1974 | + bind_type, |
| 1975 | + addend, |
| 1976 | + }); |
| 1977 | + segment_offset += 8 + value; |
| 1978 | + } |
| 1979 | + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { |
| 1980 | + out.push(CanonicalBindRecord { |
| 1981 | + segment_index, |
| 1982 | + segment_offset, |
| 1983 | + ordinal, |
| 1984 | + symbol: symbol.clone(), |
| 1985 | + weak_import, |
| 1986 | + bind_type, |
| 1987 | + addend, |
| 1988 | + }); |
| 1989 | + segment_offset += 8 + (imm as u64) * 8; |
| 1990 | + } |
| 1991 | + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { |
| 1992 | + let (count, count_used) = |
| 1993 | + read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 1994 | + cursor += count_used; |
| 1995 | + let (skip, skip_used) = |
| 1996 | + read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?; |
| 1997 | + cursor += skip_used; |
| 1998 | + for _ in 0..count { |
| 1999 | + out.push(CanonicalBindRecord { |
| 2000 | + segment_index, |
| 2001 | + segment_offset, |
| 2002 | + ordinal, |
| 2003 | + symbol: symbol.clone(), |
| 2004 | + weak_import, |
| 2005 | + bind_type, |
| 2006 | + addend, |
| 2007 | + }); |
| 2008 | + segment_offset += 8 + skip; |
| 2009 | + } |
| 2010 | + } |
| 2011 | + other => return Err(format!("unsupported bind opcode 0x{other:02x}")), |
| 2012 | + } |
| 2013 | + } |
| 2014 | + |
| 2015 | + out.sort(); |
| 2016 | + Ok(out) |
| 2017 | +} |
| 2018 | + |
| 1838 | 2019 | fn rebased_unwind_bytes(bytes: &[u8]) -> Result<Vec<u8>, String> { |
| 1839 | 2020 | let header_base = segment_vmaddr(bytes, "__TEXT").unwrap_or(0); |
| 1840 | 2021 | let text_base = output_section(bytes, "__TEXT", "__text") |
@@ -1983,6 +2164,76 @@ fn dyld_info_stream(bytes: &[u8], kind: DyldInfoStreamKind) -> Result<Vec<u8>, S |
| 1983 | 2164 | .ok_or_else(|| "dyld-info stream out of bounds".to_string()) |
| 1984 | 2165 | } |
| 1985 | 2166 | |
| 2167 | +fn read_c_string(bytes: &[u8]) -> Result<(String, usize), String> { |
| 2168 | + let end = bytes |
| 2169 | + .iter() |
| 2170 | + .position(|byte| *byte == 0) |
| 2171 | + .ok_or_else(|| "unterminated C string".to_string())?; |
| 2172 | + let value = std::str::from_utf8(&bytes[..end]) |
| 2173 | + .map_err(|e| format!("utf-8 in C string: {e}"))? |
| 2174 | + .to_string(); |
| 2175 | + Ok((value, end + 1)) |
| 2176 | +} |
| 2177 | + |
| 2178 | +fn canonical_stub_targets(bytes: &[u8]) -> Result<Vec<u64>, String> { |
| 2179 | + let header = output_section_header(bytes, "__TEXT", "__stubs") |
| 2180 | + .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?; |
| 2181 | + let (section_addr, section_bytes) = output_section(bytes, "__TEXT", "__stubs") |
| 2182 | + .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?; |
| 2183 | + if section_bytes.is_empty() { |
| 2184 | + return Ok(Vec::new()); |
| 2185 | + } |
| 2186 | + let stub_size = usize::try_from(header.reserved2) |
| 2187 | + .ok() |
| 2188 | + .filter(|size| *size > 0) |
| 2189 | + .unwrap_or(12); |
| 2190 | + if section_bytes.len() % stub_size != 0 { |
| 2191 | + return Err(format!( |
| 2192 | + "__TEXT,__stubs size {} is not a multiple of stub size {}", |
| 2193 | + section_bytes.len(), |
| 2194 | + stub_size |
| 2195 | + )); |
| 2196 | + } |
| 2197 | + let mut out = Vec::new(); |
| 2198 | + for (idx, chunk) in section_bytes.chunks_exact(stub_size).enumerate() { |
| 2199 | + out.push(decode_stub_target( |
| 2200 | + chunk, |
| 2201 | + section_addr + (idx * stub_size) as u64, |
| 2202 | + )?); |
| 2203 | + } |
| 2204 | + Ok(out) |
| 2205 | +} |
| 2206 | + |
| 2207 | +fn decode_stub_target(bytes: &[u8], stub_addr: u64) -> Result<u64, String> { |
| 2208 | + let adrp = read_insn(bytes, 0)?; |
| 2209 | + let ldr = read_insn(bytes, 4)?; |
| 2210 | + let br = read_insn(bytes, 8)?; |
| 2211 | + if (adrp & 0x9f00_0000) != 0x9000_0000 { |
| 2212 | + return Err(format!("stub at 0x{stub_addr:x} does not start with ADRP")); |
| 2213 | + } |
| 2214 | + if (ldr & 0xffc0_0000) != 0xf940_0000 { |
| 2215 | + return Err(format!("stub at 0x{stub_addr:x} does not use LDR (unsigned)")); |
| 2216 | + } |
| 2217 | + if (br & 0xffff_fc1f) != 0xd61f_0000 { |
| 2218 | + return Err(format!("stub at 0x{stub_addr:x} does not end with BR")); |
| 2219 | + } |
| 2220 | + let adrp_reg = (adrp & 0x1f) as u8; |
| 2221 | + let ldr_base = ((ldr >> 5) & 0x1f) as u8; |
| 2222 | + let ldr_reg = (ldr & 0x1f) as u8; |
| 2223 | + let br_reg = ((br >> 5) & 0x1f) as u8; |
| 2224 | + if adrp_reg != ldr_base || adrp_reg != ldr_reg || adrp_reg != br_reg { |
| 2225 | + return Err(format!( |
| 2226 | + "stub at 0x{stub_addr:x} uses inconsistent scratch regs: adrp=x{adrp_reg}, ldr base=x{ldr_base}, ldr rt=x{ldr_reg}, br=x{br_reg}" |
| 2227 | + )); |
| 2228 | + } |
| 2229 | + let adrp_immlo = ((adrp >> 29) & 0x3) as i64; |
| 2230 | + let adrp_immhi = ((adrp >> 5) & 0x7ffff) as i64; |
| 2231 | + let adrp_pages = sign_extend_21((adrp_immhi << 2) | adrp_immlo); |
| 2232 | + let adrp_base = ((stub_addr as i64) & !0xfff) + (adrp_pages << 12); |
| 2233 | + let scaled = ((ldr >> 10) & 0xfff) as u64; |
| 2234 | + Ok((adrp_base as u64) + scaled * 8) |
| 2235 | +} |
| 2236 | + |
| 1986 | 2237 | fn symbol_values(bytes: &[u8]) -> Result<BTreeMap<String, u64>, String> { |
| 1987 | 2238 | let header = parse_header(bytes).map_err(|e| e.to_string())?; |
| 1988 | 2239 | let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?; |