fortrangoingonforty/afs-ld / 4bf5d09

Browse files

Speed thunk and symbol planning

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
4bf5d0983c29cdce9fbb4e6c14778b41bc372a1f
Parents
afcd90f
Tree
ffb4095

2 changed files

StatusFile+-
M src/macho/writer.rs 23 16
M src/reloc/arm64.rs 66 0
src/macho/writer.rsmodified
@@ -1718,6 +1718,7 @@ fn build_output_symbols_profiled(
1718
 ) -> Result<(SymbolTablePlan, SymbolPlanBuildTimings), WriteError> {
1718
 ) -> Result<(SymbolTablePlan, SymbolPlanBuildTimings), WriteError> {
1719
     let sym_table = inputs.0.sym_table;
1719
     let sym_table = inputs.0.sym_table;
1720
     let atom_sections = atom_section_ordinals(layout);
1720
     let atom_sections = atom_section_ordinals(layout);
1721
+    let atom_addrs = atom_addresses(layout);
1721
     let atoms_by_input_section = inputs.0.atom_table.by_input_section();
1722
     let atoms_by_input_section = inputs.0.atom_table.by_input_section();
1722
     let atom_ranges = build_atom_range_index(
1723
     let atom_ranges = build_atom_range_index(
1723
         inputs.0.atom_table,
1724
         inputs.0.atom_table,
@@ -1773,10 +1774,11 @@ fn build_output_symbols_profiled(
1773
             atom_table: inputs.0.atom_table,
1774
             atom_table: inputs.0.atom_table,
1774
             atom_ranges: &atom_ranges,
1775
             atom_ranges: &atom_ranges,
1775
             atom_sections: &atom_sections,
1776
             atom_sections: &atom_sections,
1777
+            atom_addrs: &atom_addrs,
1776
             input_id: input.id,
1778
             input_id: input.id,
1777
             file_index: file_index_by_input[&input.id],
1779
             file_index: file_index_by_input[&input.id],
1778
         };
1780
         };
1779
-        collect_local_symbols(layout, &ctx, input.object, &mut locals)?;
1781
+        collect_local_symbols(&ctx, input.object, &mut locals)?;
1780
     }
1782
     }
1781
     collect_synthetic_local_symbols(layout, inputs.0.synthetic_plan, &mut locals)?;
1783
     collect_synthetic_local_symbols(layout, inputs.0.synthetic_plan, &mut locals)?;
1782
     timings.locals += phase_started.elapsed();
1784
     timings.locals += phase_started.elapsed();
@@ -1804,12 +1806,12 @@ fn build_output_symbols_profiled(
1804
         let (n_type, n_sect, n_value) = if atom.0 == 0 {
1806
         let (n_type, n_sect, n_value) = if atom.0 == 0 {
1805
             (absolute_symbol_type(hidden), NO_SECT, *value)
1807
             (absolute_symbol_type(hidden), NO_SECT, *value)
1806
         } else {
1808
         } else {
1807
-            if dead_strip && layout.atom_addr(*atom).is_none() {
1809
+            let Some(addr) = atom_addrs.get(atom).copied() else {
1808
-                continue;
1810
+                if dead_strip {
1809
-            }
1811
+                    continue;
1810
-            let addr = layout
1812
+                }
1811
-                .atom_addr(*atom)
1813
+                return Err(WriteError::DefinedSymbolAtomMissing(symbol_id, *atom));
1812
-                .ok_or(WriteError::DefinedSymbolAtomMissing(symbol_id, *atom))?;
1814
+            };
1813
             let sect = *atom_sections
1815
             let sect = *atom_sections
1814
                 .get(atom)
1816
                 .get(atom)
1815
                 .ok_or(WriteError::DefinedSymbolSectionMissing(symbol_id, *atom))?;
1817
                 .ok_or(WriteError::DefinedSymbolSectionMissing(symbol_id, *atom))?;
@@ -2017,7 +2019,6 @@ fn collect_synthetic_local_symbols(
2017
 }
2019
 }
2018
 
2020
 
2019
 fn collect_local_symbols(
2021
 fn collect_local_symbols(
2020
-    layout: &Layout,
2021
     ctx: &LocalSymbolContext<'_>,
2022
     ctx: &LocalSymbolContext<'_>,
2022
     object: &ObjectFile,
2023
     object: &ObjectFile,
2023
     out: &mut Vec<OutputSymbolSpec>,
2024
     out: &mut Vec<OutputSymbolSpec>,
@@ -2046,14 +2047,9 @@ fn collect_local_symbols(
2046
                     offset,
2047
                     offset,
2047
                 )
2048
                 )
2048
                 .ok_or(WriteError::MissingSegment("__UNKNOWN"))?;
2049
                 .ok_or(WriteError::MissingSegment("__UNKNOWN"))?;
2049
-                let addr =
2050
+                let addr = ctx.atom_addrs.get(&atom_id).copied().ok_or(
2050
-                    layout
2051
+                    WriteError::DefinedSymbolAtomMissing(SymbolId(u32::MAX), atom_id),
2051
-                        .atom_addr(atom_id)
2052
+                )? + delta as u64;
2052
-                        .ok_or(WriteError::DefinedSymbolAtomMissing(
2053
-                            SymbolId(u32::MAX),
2054
-                            atom_id,
2055
-                        ))?
2056
-                        + delta as u64;
2057
                 let n_sect = *ctx.atom_sections.get(&atom_id).ok_or(
2053
                 let n_sect = *ctx.atom_sections.get(&atom_id).ok_or(
2058
                     WriteError::DefinedSymbolSectionMissing(SymbolId(u32::MAX), atom_id),
2054
                     WriteError::DefinedSymbolSectionMissing(SymbolId(u32::MAX), atom_id),
2059
                 )?;
2055
                 )?;
@@ -2092,6 +2088,7 @@ struct LocalSymbolContext<'a> {
2092
     atom_table: &'a AtomTable,
2088
     atom_table: &'a AtomTable,
2093
     atom_ranges: &'a AtomRangeIndex,
2089
     atom_ranges: &'a AtomRangeIndex,
2094
     atom_sections: &'a HashMap<crate::resolve::AtomId, u8>,
2090
     atom_sections: &'a HashMap<crate::resolve::AtomId, u8>,
2091
+    atom_addrs: &'a HashMap<crate::resolve::AtomId, u64>,
2095
     input_id: InputId,
2092
     input_id: InputId,
2096
     file_index: usize,
2093
     file_index: usize,
2097
 }
2094
 }
@@ -2201,6 +2198,16 @@ fn atom_section_ordinals(layout: &Layout) -> HashMap<crate::resolve::AtomId, u8>
2201
     out
2198
     out
2202
 }
2199
 }
2203
 
2200
 
2201
+fn atom_addresses(layout: &Layout) -> HashMap<AtomId, u64> {
2202
+    let mut out = HashMap::new();
2203
+    for section in &layout.sections {
2204
+        for placed in &section.atoms {
2205
+            out.insert(placed.atom, section.addr + placed.offset);
2206
+        }
2207
+    }
2208
+    out
2209
+}
2210
+
2204
 fn export_symbol_flags(layout: &Layout, n_desc: u16, n_type: u8, n_sect: u8) -> u64 {
2211
 fn export_symbol_flags(layout: &Layout, n_desc: u16, n_type: u8, n_sect: u8) -> u64 {
2205
     let mut flags = 0u64;
2212
     let mut flags = 0u64;
2206
     if n_desc & N_WEAK_DEF != 0 {
2213
     if n_desc & N_WEAK_DEF != 0 {
src/reloc/arm64.rsmodified
@@ -84,6 +84,7 @@ struct InputSectionResolveCtx<'a> {
84
 
84
 
85
 const THUNK_SIZE: u64 = 12;
85
 const THUNK_SIZE: u64 = 12;
86
 const BR_X16: u32 = 0xd61f_0200;
86
 const BR_X16: u32 = 0xd61f_0200;
87
+const BRANCH26_MAX_FORWARD_DELTA_BYTES: u64 = ((1u64 << 25) - 1) * 4;
87
 
88
 
88
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
89
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
89
 enum BranchTargetKey {
90
 enum BranchTargetKey {
@@ -510,6 +511,10 @@ pub fn plan_thunks(
510
         parsed_relocs,
511
         parsed_relocs,
511
     } = ctx;
512
     } = ctx;
512
 
513
 
514
+    if opts.thunks == ThunkMode::Safe && layout_fits_branch26_span(layout) {
515
+        return Ok(None);
516
+    }
517
+
513
     let input_map: HashMap<InputId, &ObjectFile> = inputs
518
     let input_map: HashMap<InputId, &ObjectFile> = inputs
514
         .iter()
519
         .iter()
515
         .map(|input| (input.id, input.object))
520
         .map(|input| (input.id, input.object))
@@ -944,6 +949,19 @@ fn branch26_in_range(place: u64, target: u64) -> bool {
944
     delta & 0b11 == 0 && fits_signed(delta >> 2, 26)
949
     delta & 0b11 == 0 && fits_signed(delta >> 2, 26)
945
 }
950
 }
946
 
951
 
952
+fn layout_fits_branch26_span(layout: &Layout) -> bool {
953
+    let mut min_addr = u64::MAX;
954
+    let mut max_addr = 0u64;
955
+    for section in &layout.sections {
956
+        if section.segment == "__LINKEDIT" || section.size == 0 {
957
+            continue;
958
+        }
959
+        min_addr = min_addr.min(section.addr);
960
+        max_addr = max_addr.max(section.addr.saturating_add(section.size));
961
+    }
962
+    min_addr == u64::MAX || max_addr.saturating_sub(min_addr) <= BRANCH26_MAX_FORWARD_DELTA_BYTES
963
+}
964
+
947
 fn synthesize_thunk_section(
965
 fn synthesize_thunk_section(
948
     layout: &mut Layout,
966
     layout: &mut Layout,
949
     plan: &ThunkPlan,
967
     plan: &ThunkPlan,
@@ -2655,6 +2673,35 @@ mod tests {
2655
         assert!(!fits_signed(-(1 << 25) - 1, 26));
2673
         assert!(!fits_signed(-(1 << 25) - 1, 26));
2656
     }
2674
     }
2657
 
2675
 
2676
+    #[test]
2677
+    fn branch26_span_fast_path_rejects_only_large_non_linkedit_images() {
2678
+        let small = Layout {
2679
+            kind: OutputKind::Executable,
2680
+            segments: Vec::new(),
2681
+            sections: vec![
2682
+                output_section("__TEXT", "__text", 0x1_0000_0000, 0x100),
2683
+                output_section("__DATA", "__data", 0x1_0001_0000, 0x100),
2684
+                output_section("__LINKEDIT", "__linkedit", 0x1_8000_0000, 0x1000),
2685
+            ],
2686
+        };
2687
+        assert!(layout_fits_branch26_span(&small));
2688
+
2689
+        let large = Layout {
2690
+            kind: OutputKind::Executable,
2691
+            segments: Vec::new(),
2692
+            sections: vec![
2693
+                output_section("__TEXT", "__text", 0x1_0000_0000, 0x100),
2694
+                output_section(
2695
+                    "__DATA",
2696
+                    "__data",
2697
+                    0x1_0000_0000 + BRANCH26_MAX_FORWARD_DELTA_BYTES + 1,
2698
+                    0x100,
2699
+                ),
2700
+            ],
2701
+        };
2702
+        assert!(!layout_fits_branch26_span(&large));
2703
+    }
2704
+
2658
     #[test]
2705
     #[test]
2659
     fn thunk_plan_splits_monolithic_text_section_into_multiple_islands() {
2706
     fn thunk_plan_splits_monolithic_text_section_into_multiple_islands() {
2660
         let gap = 0x0900_0000u32;
2707
         let gap = 0x0900_0000u32;
@@ -2804,6 +2851,25 @@ mod tests {
2804
         out
2851
         out
2805
     }
2852
     }
2806
 
2853
 
2854
+    fn output_section(segment: &str, name: &str, addr: u64, size: u64) -> OutputSection {
2855
+        OutputSection {
2856
+            segment: segment.into(),
2857
+            name: name.into(),
2858
+            kind: SectionKind::Text,
2859
+            align_pow2: 2,
2860
+            flags: 0,
2861
+            reserved1: 0,
2862
+            reserved2: 0,
2863
+            reserved3: 0,
2864
+            atoms: Vec::new(),
2865
+            synthetic_offset: 0,
2866
+            synthetic_data: Vec::new(),
2867
+            addr,
2868
+            size,
2869
+            file_off: 0,
2870
+        }
2871
+    }
2872
+
2807
     fn thunk_test_object(raw_relocs: Vec<u8>, target_offset: u64, section_size: u64) -> ObjectFile {
2873
     fn thunk_test_object(raw_relocs: Vec<u8>, target_offset: u64, section_size: u64) -> ObjectFile {
2808
         let strings = b"\0_target\0".to_vec();
2874
         let strings = b"\0_target\0".to_vec();
2809
         ObjectFile {
2875
         ObjectFile {