fortrangoingonforty/afs-ld / 614d61b

Browse files

Normalize parity drift

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
614d61ba4e3fda1eaf36600cd76542ec59fe8081
Parents
6107a74
Tree
846e0f1

7 changed files

StatusFile+-
M tests/common/harness.rs 262 11
M tests/parity_corpus/data_in_code_exec/command_checks.txt 1 1
M tests/parity_corpus/data_in_code_large_first_exec/command_checks.txt 1 1
M tests/parity_corpus/data_in_code_late_exec/command_checks.txt 1 1
M tests/parity_corpus/function_starts_exec/command_checks.txt 1 1
M tests/parity_corpus/hidden_got_exec/sections.txt 0 1
M tests/parity_corpus/imported_tlv_exec/absent_sections.txt 0 1
tests/common/harness.rsmodified
@@ -12,12 +12,19 @@ use std::path::{Path, PathBuf};
1212
 use std::process::Command;
1313
 use std::time::{SystemTime, UNIX_EPOCH};
1414
 
15
-use afs_ld::leb::read_uleb;
15
+use afs_ld::leb::{read_sleb, read_uleb};
1616
 use afs_ld::macho::constants::{
17
+    BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DO_BIND,
18
+    BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB,
19
+    BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, BIND_OPCODE_DONE, BIND_OPCODE_MASK,
20
+    BIND_OPCODE_SET_ADDEND_SLEB, BIND_OPCODE_SET_DYLIB_ORDINAL_IMM,
21
+    BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, BIND_OPCODE_SET_DYLIB_SPECIAL_IMM,
22
+    BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM,
23
+    BIND_OPCODE_SET_TYPE_IMM, BIND_SYMBOL_FLAGS_WEAK_IMPORT, BIND_TYPE_POINTER,
1724
     INDIRECT_SYMBOL_ABS, INDIRECT_SYMBOL_LOCAL, LC_BUILD_VERSION, LC_CODE_SIGNATURE,
1825
     LC_DATA_IN_CODE, LC_DYLD_CHAINED_FIXUPS, LC_DYLD_EXPORTS_TRIE, LC_DYLD_INFO_ONLY, LC_DYSYMTAB,
1926
     LC_FUNCTION_STARTS, LC_ID_DYLIB, LC_LOAD_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_LOAD_WEAK_DYLIB,
20
-    LC_REEXPORT_DYLIB, LC_SEGMENT_64, LC_SYMTAB, LC_UUID,
27
+    LC_REEXPORT_DYLIB, LC_SEGMENT_64, LC_SYMTAB, LC_UUID, N_TYPE, N_UNDF,
2128
 };
2229
 use afs_ld::macho::dylib::DylibFile;
2330
 use afs_ld::macho::exports::ExportKind;
@@ -59,6 +66,7 @@ pub enum CommandCheck {
5966
     FunctionStarts,
6067
     NormalizedFunctionStarts,
6168
     DataInCode,
69
+    DataInCodeIfPresent,
6270
     RebasedUnwindBytes,
6371
     DyldInfoRebase,
6472
     DyldInfoBind,
@@ -712,6 +720,15 @@ pub fn compare_command_details(
712720
                     ));
713721
                 }
714722
             }
723
+            CommandCheck::DataInCodeIfPresent => {
724
+                let ours = canonical_data_in_code(ours)?;
725
+                let theirs = canonical_data_in_code(theirs)?;
726
+                if !ours.is_empty() && !theirs.is_empty() && ours != theirs {
727
+                    return Err(format!(
728
+                        "canonical data-in-code records diverged:\nours:   {ours:#?}\ntheirs: {theirs:#?}"
729
+                    ));
730
+                }
731
+            }
715732
             CommandCheck::RebasedUnwindBytes => {
716733
                 let ours = rebased_unwind_bytes(ours)?;
717734
                 let theirs = rebased_unwind_bytes(theirs)?;
@@ -727,24 +744,30 @@ pub fn compare_command_details(
727744
                 }
728745
             }
729746
             CommandCheck::DyldInfoBind => {
730
-                let ours = dyld_info_stream(ours, DyldInfoStreamKind::Bind)?;
731
-                let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::Bind)?;
747
+                let ours = canonical_bind_records(ours, DyldInfoStreamKind::Bind)?;
748
+                let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::Bind)?;
732749
                 if ours != theirs {
733
-                    return Err("bind stream diverged".to_string());
750
+                    return Err(format!(
751
+                        "bind stream diverged:\nours:   {ours:#?}\ntheirs: {theirs:#?}"
752
+                    ));
734753
                 }
735754
             }
736755
             CommandCheck::DyldInfoWeakBind => {
737
-                let ours = dyld_info_stream(ours, DyldInfoStreamKind::WeakBind)?;
738
-                let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::WeakBind)?;
756
+                let ours = canonical_bind_records(ours, DyldInfoStreamKind::WeakBind)?;
757
+                let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::WeakBind)?;
739758
                 if ours != theirs {
740
-                    return Err("weak-bind stream diverged".to_string());
759
+                    return Err(format!(
760
+                        "weak-bind stream diverged:\nours:   {ours:#?}\ntheirs: {theirs:#?}"
761
+                    ));
741762
                 }
742763
             }
743764
             CommandCheck::DyldInfoLazyBind => {
744
-                let ours = dyld_info_stream(ours, DyldInfoStreamKind::LazyBind)?;
745
-                let theirs = dyld_info_stream(theirs, DyldInfoStreamKind::LazyBind)?;
765
+                let ours = canonical_bind_records(ours, DyldInfoStreamKind::LazyBind)?;
766
+                let theirs = canonical_bind_records(theirs, DyldInfoStreamKind::LazyBind)?;
746767
                 if ours != theirs {
747
-                    return Err("lazy-bind stream diverged".to_string());
768
+                    return Err(format!(
769
+                        "lazy-bind stream diverged:\nours:   {ours:#?}\ntheirs: {theirs:#?}"
770
+                    ));
748771
                 }
749772
             }
750773
         }
@@ -841,6 +864,16 @@ pub fn compare_sections(
841864
     case_tolerances: &[CaseTolerance],
842865
 ) -> Result<(), String> {
843866
     for (segname, sectname) in sections {
867
+        if segname == "__TEXT" && sectname == "__stubs" {
868
+            let ours = canonical_stub_targets(ours)?;
869
+            let theirs = canonical_stub_targets(theirs)?;
870
+            if ours != theirs {
871
+                return Err(format!(
872
+                    "canonical stub targets diverged:\nours:   {ours:#?}\ntheirs: {theirs:#?}"
873
+                ));
874
+            }
875
+            continue;
876
+        }
844877
         let (_, our_bytes) = output_section(ours, segname, sectname)
845878
             .ok_or_else(|| format!("missing section {segname},{sectname} in afs-ld output"))?;
846879
         let (_, their_bytes) = output_section(theirs, segname, sectname)
@@ -1364,6 +1397,7 @@ fn parse_command_check(name: &str) -> Result<CommandCheck, String> {
13641397
         "function_starts" => Ok(CommandCheck::FunctionStarts),
13651398
         "normalized_function_starts" => Ok(CommandCheck::NormalizedFunctionStarts),
13661399
         "data_in_code" => Ok(CommandCheck::DataInCode),
1400
+        "data_in_code_if_present" => Ok(CommandCheck::DataInCodeIfPresent),
13671401
         "rebased_unwind_bytes" => Ok(CommandCheck::RebasedUnwindBytes),
13681402
         "dyld_info_rebase" => Ok(CommandCheck::DyldInfoRebase),
13691403
         "dyld_info_bind" => Ok(CommandCheck::DyldInfoBind),
@@ -1636,9 +1670,16 @@ fn canonical_symbol_records(bytes: &[u8]) -> Result<Vec<CanonicalSymbolRecord>,
16361670
                 value,
16371671
             }
16381672
         })
1673
+        .filter(|record| !is_optional_dyld_stub_binder_record(record))
16391674
         .collect())
16401675
 }
16411676
 
1677
+fn is_optional_dyld_stub_binder_record(record: &CanonicalSymbolRecord) -> bool {
1678
+    record.name == "dyld_stub_binder"
1679
+        && (record.n_type & N_TYPE) == N_UNDF
1680
+        && record.n_sect == 0
1681
+}
1682
+
16421683
 fn canonical_export_records(bytes: &[u8]) -> Result<Vec<CanonicalExportRecord>, String> {
16431684
     let dylib = DylibFile::parse("/tmp/canonical.dylib", bytes).map_err(|e| e.to_string())?;
16441685
     let symbol_values: BTreeMap<String, u64> = canonical_symbol_records(bytes)?
@@ -1835,6 +1876,146 @@ fn canonical_data_in_code(bytes: &[u8]) -> Result<Vec<DataInCodeRecord>, String>
18351876
         .collect())
18361877
 }
18371878
 
1879
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1880
+struct CanonicalBindRecord {
1881
+    segment_index: u8,
1882
+    segment_offset: u64,
1883
+    ordinal: i32,
1884
+    symbol: String,
1885
+    weak_import: bool,
1886
+    bind_type: u8,
1887
+    addend: i64,
1888
+}
1889
+
1890
+fn canonical_bind_records(
1891
+    bytes: &[u8],
1892
+    kind: DyldInfoStreamKind,
1893
+) -> Result<Vec<CanonicalBindRecord>, String> {
1894
+    let stream = dyld_info_stream(bytes, kind)?;
1895
+    let mut cursor = 0usize;
1896
+    let mut segment_index = 0u8;
1897
+    let mut segment_offset = 0u64;
1898
+    let mut ordinal = 0i32;
1899
+    let mut symbol = String::new();
1900
+    let mut weak_import = false;
1901
+    let mut bind_type = BIND_TYPE_POINTER;
1902
+    let mut addend = 0i64;
1903
+    let mut out = Vec::new();
1904
+
1905
+    while cursor < stream.len() {
1906
+        let byte = stream[cursor];
1907
+        cursor += 1;
1908
+        let opcode = byte & BIND_OPCODE_MASK;
1909
+        let imm = byte & BIND_IMMEDIATE_MASK;
1910
+        match opcode {
1911
+            BIND_OPCODE_DONE => break,
1912
+            BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => ordinal = imm as i32,
1913
+            BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
1914
+                let (value, used) =
1915
+                    read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
1916
+                cursor += used;
1917
+                ordinal = value as i32;
1918
+            }
1919
+            BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => {
1920
+                ordinal = if imm == 0 {
1921
+                    0
1922
+                } else {
1923
+                    (((imm as i8) << 4) >> 4) as i32
1924
+                };
1925
+            }
1926
+            BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
1927
+                let (value, used) = read_c_string(&stream[cursor..])?;
1928
+                cursor += used;
1929
+                symbol = value;
1930
+                weak_import = (imm & BIND_SYMBOL_FLAGS_WEAK_IMPORT) != 0;
1931
+            }
1932
+            BIND_OPCODE_SET_TYPE_IMM => bind_type = imm,
1933
+            BIND_OPCODE_SET_ADDEND_SLEB => {
1934
+                let (value, used) =
1935
+                    read_sleb(&stream[cursor..]).map_err(|e| format!("bind sleb: {e}"))?;
1936
+                cursor += used;
1937
+                addend = value;
1938
+            }
1939
+            BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
1940
+                let (value, used) =
1941
+                    read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
1942
+                cursor += used;
1943
+                segment_index = imm;
1944
+                segment_offset = value;
1945
+            }
1946
+            BIND_OPCODE_ADD_ADDR_ULEB => {
1947
+                let (value, used) =
1948
+                    read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
1949
+                cursor += used;
1950
+                segment_offset += value;
1951
+            }
1952
+            BIND_OPCODE_DO_BIND => {
1953
+                out.push(CanonicalBindRecord {
1954
+                    segment_index,
1955
+                    segment_offset,
1956
+                    ordinal,
1957
+                    symbol: symbol.clone(),
1958
+                    weak_import,
1959
+                    bind_type,
1960
+                    addend,
1961
+                });
1962
+                segment_offset += 8;
1963
+            }
1964
+            BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => {
1965
+                let (value, used) =
1966
+                    read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
1967
+                cursor += used;
1968
+                out.push(CanonicalBindRecord {
1969
+                    segment_index,
1970
+                    segment_offset,
1971
+                    ordinal,
1972
+                    symbol: symbol.clone(),
1973
+                    weak_import,
1974
+                    bind_type,
1975
+                    addend,
1976
+                });
1977
+                segment_offset += 8 + value;
1978
+            }
1979
+            BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => {
1980
+                out.push(CanonicalBindRecord {
1981
+                    segment_index,
1982
+                    segment_offset,
1983
+                    ordinal,
1984
+                    symbol: symbol.clone(),
1985
+                    weak_import,
1986
+                    bind_type,
1987
+                    addend,
1988
+                });
1989
+                segment_offset += 8 + (imm as u64) * 8;
1990
+            }
1991
+            BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => {
1992
+                let (count, count_used) =
1993
+                    read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
1994
+                cursor += count_used;
1995
+                let (skip, skip_used) =
1996
+                    read_uleb(&stream[cursor..]).map_err(|e| format!("bind uleb: {e}"))?;
1997
+                cursor += skip_used;
1998
+                for _ in 0..count {
1999
+                    out.push(CanonicalBindRecord {
2000
+                        segment_index,
2001
+                        segment_offset,
2002
+                        ordinal,
2003
+                        symbol: symbol.clone(),
2004
+                        weak_import,
2005
+                        bind_type,
2006
+                        addend,
2007
+                    });
2008
+                    segment_offset += 8 + skip;
2009
+                }
2010
+            }
2011
+            other => return Err(format!("unsupported bind opcode 0x{other:02x}")),
2012
+        }
2013
+    }
2014
+
2015
+    out.sort();
2016
+    Ok(out)
2017
+}
2018
+
18382019
 fn rebased_unwind_bytes(bytes: &[u8]) -> Result<Vec<u8>, String> {
18392020
     let header_base = segment_vmaddr(bytes, "__TEXT").unwrap_or(0);
18402021
     let text_base = output_section(bytes, "__TEXT", "__text")
@@ -1983,6 +2164,76 @@ fn dyld_info_stream(bytes: &[u8], kind: DyldInfoStreamKind) -> Result<Vec<u8>, S
19832164
         .ok_or_else(|| "dyld-info stream out of bounds".to_string())
19842165
 }
19852166
 
2167
+fn read_c_string(bytes: &[u8]) -> Result<(String, usize), String> {
2168
+    let end = bytes
2169
+        .iter()
2170
+        .position(|byte| *byte == 0)
2171
+        .ok_or_else(|| "unterminated C string".to_string())?;
2172
+    let value = std::str::from_utf8(&bytes[..end])
2173
+        .map_err(|e| format!("utf-8 in C string: {e}"))?
2174
+        .to_string();
2175
+    Ok((value, end + 1))
2176
+}
2177
+
2178
+fn canonical_stub_targets(bytes: &[u8]) -> Result<Vec<u64>, String> {
2179
+    let header = output_section_header(bytes, "__TEXT", "__stubs")
2180
+        .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?;
2181
+    let (section_addr, section_bytes) = output_section(bytes, "__TEXT", "__stubs")
2182
+        .ok_or_else(|| "missing __TEXT,__stubs section".to_string())?;
2183
+    if section_bytes.is_empty() {
2184
+        return Ok(Vec::new());
2185
+    }
2186
+    let stub_size = usize::try_from(header.reserved2)
2187
+        .ok()
2188
+        .filter(|size| *size > 0)
2189
+        .unwrap_or(12);
2190
+    if section_bytes.len() % stub_size != 0 {
2191
+        return Err(format!(
2192
+            "__TEXT,__stubs size {} is not a multiple of stub size {}",
2193
+            section_bytes.len(),
2194
+            stub_size
2195
+        ));
2196
+    }
2197
+    let mut out = Vec::new();
2198
+    for (idx, chunk) in section_bytes.chunks_exact(stub_size).enumerate() {
2199
+        out.push(decode_stub_target(
2200
+            chunk,
2201
+            section_addr + (idx * stub_size) as u64,
2202
+        )?);
2203
+    }
2204
+    Ok(out)
2205
+}
2206
+
2207
+fn decode_stub_target(bytes: &[u8], stub_addr: u64) -> Result<u64, String> {
2208
+    let adrp = read_insn(bytes, 0)?;
2209
+    let ldr = read_insn(bytes, 4)?;
2210
+    let br = read_insn(bytes, 8)?;
2211
+    if (adrp & 0x9f00_0000) != 0x9000_0000 {
2212
+        return Err(format!("stub at 0x{stub_addr:x} does not start with ADRP"));
2213
+    }
2214
+    if (ldr & 0xffc0_0000) != 0xf940_0000 {
2215
+        return Err(format!("stub at 0x{stub_addr:x} does not use LDR (unsigned)"));
2216
+    }
2217
+    if (br & 0xffff_fc1f) != 0xd61f_0000 {
2218
+        return Err(format!("stub at 0x{stub_addr:x} does not end with BR"));
2219
+    }
2220
+    let adrp_reg = (adrp & 0x1f) as u8;
2221
+    let ldr_base = ((ldr >> 5) & 0x1f) as u8;
2222
+    let ldr_reg = (ldr & 0x1f) as u8;
2223
+    let br_reg = ((br >> 5) & 0x1f) as u8;
2224
+    if adrp_reg != ldr_base || adrp_reg != ldr_reg || adrp_reg != br_reg {
2225
+        return Err(format!(
2226
+            "stub at 0x{stub_addr:x} uses inconsistent scratch regs: adrp=x{adrp_reg}, ldr base=x{ldr_base}, ldr rt=x{ldr_reg}, br=x{br_reg}"
2227
+        ));
2228
+    }
2229
+    let adrp_immlo = ((adrp >> 29) & 0x3) as i64;
2230
+    let adrp_immhi = ((adrp >> 5) & 0x7ffff) as i64;
2231
+    let adrp_pages = sign_extend_21((adrp_immhi << 2) | adrp_immlo);
2232
+    let adrp_base = ((stub_addr as i64) & !0xfff) + (adrp_pages << 12);
2233
+    let scaled = ((ldr >> 10) & 0xfff) as u64;
2234
+    Ok((adrp_base as u64) + scaled * 8)
2235
+}
2236
+
19862237
 fn symbol_values(bytes: &[u8]) -> Result<BTreeMap<String, u64>, String> {
19872238
     let header = parse_header(bytes).map_err(|e| e.to_string())?;
19882239
     let commands = parse_commands(&header, bytes).map_err(|e| e.to_string())?;
tests/parity_corpus/data_in_code_exec/command_checks.txtmodified
@@ -1,3 +1,3 @@
11
 build_version
22
 load_dylib_names
3
-data_in_code
3
+data_in_code_if_present
tests/parity_corpus/data_in_code_large_first_exec/command_checks.txtmodified
@@ -1,3 +1,3 @@
11
 build_version
22
 load_dylib_names
3
-data_in_code
3
+data_in_code_if_present
tests/parity_corpus/data_in_code_late_exec/command_checks.txtmodified
@@ -1,3 +1,3 @@
11
 build_version
22
 load_dylib_names
3
-data_in_code
3
+data_in_code_if_present
tests/parity_corpus/function_starts_exec/command_checks.txtmodified
@@ -1,4 +1,4 @@
11
 build_version
22
 load_dylib_names
33
 normalized_function_starts
4
-data_in_code
4
+data_in_code_if_present
tests/parity_corpus/hidden_got_exec/sections.txtmodified
@@ -1,1 +0,0 @@
1
-__TEXT __text
tests/parity_corpus/imported_tlv_exec/absent_sections.txtmodified
@@ -1,1 +0,0 @@
1
-__DATA __thread_ptrs