fortrangoingonforty/afs-ld / e41e4b6

Browse files

Speed linkedit export planning

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
e41e4b61a533d1f494557e235fe07b3555ba562a
Parents
1d9f402
Tree
5c52ea0

5 changed files

StatusFile+-
M src/lib.rs 12 0
M src/macho/writer.rs 76 18
M src/string_table.rs 9 17
M src/synth/dyld_info.rs 55 57
M tests/perf_baseline.rs 11 1
src/lib.rsmodified
@@ -225,6 +225,9 @@ pub struct LinkPhaseTimings {
225225
     pub synth_linkedit_symbol_plan_globals: Duration,
226226
     pub synth_linkedit_symbol_plan_strtab: Duration,
227227
     pub synth_linkedit_dyld_info: Duration,
228
+    pub synth_linkedit_dyld_bind: Duration,
229
+    pub synth_linkedit_dyld_rebase: Duration,
230
+    pub synth_linkedit_dyld_export: Duration,
228231
     pub synth_linkedit_metadata_tables: Duration,
229232
     pub synth_linkedit_code_signature: Duration,
230233
     pub synth_unwind: Duration,
@@ -705,6 +708,9 @@ impl Linker {
705708
         let mut synth_linkedit_symbol_plan_globals = Duration::ZERO;
706709
         let mut synth_linkedit_symbol_plan_strtab = Duration::ZERO;
707710
         let mut synth_linkedit_dyld_info = Duration::ZERO;
711
+        let mut synth_linkedit_dyld_bind = Duration::ZERO;
712
+        let mut synth_linkedit_dyld_rebase = Duration::ZERO;
713
+        let mut synth_linkedit_dyld_export = Duration::ZERO;
708714
         let mut synth_linkedit_metadata_tables = Duration::ZERO;
709715
         let mut synth_linkedit_code_signature = Duration::ZERO;
710716
         let mut synth_unwind = Duration::ZERO;
@@ -724,6 +730,9 @@ impl Linker {
724730
             synth_linkedit_symbol_plan_globals += linkedit_timings.symbol_plan_globals;
725731
             synth_linkedit_symbol_plan_strtab += linkedit_timings.symbol_plan_strtab;
726732
             synth_linkedit_dyld_info += linkedit_timings.dyld_info;
733
+            synth_linkedit_dyld_bind += linkedit_timings.dyld_bind;
734
+            synth_linkedit_dyld_rebase += linkedit_timings.dyld_rebase;
735
+            synth_linkedit_dyld_export += linkedit_timings.dyld_export;
727736
             synth_linkedit_metadata_tables += linkedit_timings.metadata_tables;
728737
             synth_linkedit_code_signature += linkedit_timings.code_signature;
729738
             layout = next_layout;
@@ -748,6 +757,9 @@ impl Linker {
748757
         phases.synth_linkedit_symbol_plan_globals = synth_linkedit_symbol_plan_globals;
749758
         phases.synth_linkedit_symbol_plan_strtab = synth_linkedit_symbol_plan_strtab;
750759
         phases.synth_linkedit_dyld_info = synth_linkedit_dyld_info;
760
+        phases.synth_linkedit_dyld_bind = synth_linkedit_dyld_bind;
761
+        phases.synth_linkedit_dyld_rebase = synth_linkedit_dyld_rebase;
762
+        phases.synth_linkedit_dyld_export = synth_linkedit_dyld_export;
751763
         phases.synth_linkedit_metadata_tables = synth_linkedit_metadata_tables;
752764
         phases.synth_linkedit_code_signature = synth_linkedit_code_signature;
753765
         phases.synth_unwind = synth_unwind;
src/macho/writer.rsmodified
@@ -23,7 +23,7 @@ use crate::macho::reader::{
2323
 use crate::reloc::{
2424
     parse_raw_relocs, parse_relocs, ParsedRelocCache, Referent, Reloc, RelocKind, RelocLength,
2525
 };
26
-use crate::resolve::InputId;
26
+use crate::resolve::{AtomId, InputId};
2727
 use crate::resolve::{Symbol, SymbolId, SymbolTable};
2828
 use crate::section::is_executable;
2929
 use crate::string_table::StringTableBuilder;
@@ -62,6 +62,9 @@ pub struct LinkEditBuildTimings {
6262
     pub symbol_plan_globals: Duration,
6363
     pub symbol_plan_strtab: Duration,
6464
     pub dyld_info: Duration,
65
+    pub dyld_bind: Duration,
66
+    pub dyld_rebase: Duration,
67
+    pub dyld_export: Duration,
6568
     pub metadata_tables: Duration,
6669
     pub code_signature: Duration,
6770
 }
@@ -73,6 +76,9 @@ impl std::ops::AddAssign for LinkEditBuildTimings {
7376
         self.symbol_plan_globals += rhs.symbol_plan_globals;
7477
         self.symbol_plan_strtab += rhs.symbol_plan_strtab;
7578
         self.dyld_info += rhs.dyld_info;
79
+        self.dyld_bind += rhs.dyld_bind;
80
+        self.dyld_rebase += rhs.dyld_rebase;
81
+        self.dyld_export += rhs.dyld_export;
7682
         self.metadata_tables += rhs.metadata_tables;
7783
         self.code_signature += rhs.code_signature;
7884
     }
@@ -956,14 +962,20 @@ fn build_linkedit_plan_profiled(
956962
         indirect_bytes.extend_from_slice(&index.to_le_bytes());
957963
     }
958964
 
965
+    let dyld_started = std::time::Instant::now();
959966
     let phase_started = std::time::Instant::now();
960967
     let bind_streams = build_bind_streams(layout, synthetic_plan, &import_lookup)?;
961
-    let rebase_bytes = pad_dyld_info_stream(build_rebase_stream(layout, synthetic_plan, inputs)?);
962968
     let bind_bytes = pad_dyld_info_stream(bind_streams.bind);
963969
     let weak_bind_bytes = pad_dyld_info_stream(bind_streams.weak_bind);
964970
     let lazy_bind_bytes = pad_dyld_info_stream(bind_streams.lazy_bind);
971
+    timings.dyld_bind += phase_started.elapsed();
972
+    let phase_started = std::time::Instant::now();
973
+    let rebase_bytes = pad_dyld_info_stream(build_rebase_stream(layout, synthetic_plan, inputs)?);
974
+    timings.dyld_rebase += phase_started.elapsed();
975
+    let phase_started = std::time::Instant::now();
965976
     let export_bytes = pad_dyld_info_stream(build_export_trie(&symbol_plan.exports));
966
-    timings.dyld_info += phase_started.elapsed();
977
+    timings.dyld_export += phase_started.elapsed();
978
+    timings.dyld_info += dyld_started.elapsed();
967979
 
968980
     let phase_started = std::time::Instant::now();
969981
     let loh_bytes = build_loh(
@@ -2384,6 +2396,7 @@ fn build_bind_streams(
23842396
     let weak_bind = Vec::new();
23852397
     let mut lazy_bind = OpcodeStream::new();
23862398
     let mut lazy_offsets = HashMap::new();
2399
+    let layout_index = BindLayoutIndex::build(layout)?;
23872400
 
23882401
     if let Some(tlv_bootstrap) = synthetic_plan.tlv_bootstrap_symbol {
23892402
         let segment_index = segment_index(layout, "__DATA")?;
@@ -2450,29 +2463,21 @@ fn build_bind_streams(
24502463
             .get(&entry.symbol)
24512464
             .copied()
24522465
             .ok_or(WriteError::ImportSymbolMissing(entry.symbol))?;
2453
-        let atom_addr = layout
2454
-            .atom_addr(entry.atom)
2466
+        let placement = layout_index
2467
+            .atoms
2468
+            .get(&entry.atom)
24552469
             .ok_or(WriteError::DirectBindAtomMissing(entry.atom))?;
2456
-        let section = layout
2457
-            .sections
2458
-            .iter()
2459
-            .find(|section| section.atoms.iter().any(|placed| placed.atom == entry.atom))
2460
-            .ok_or(WriteError::DirectBindSectionMissing(entry.atom))?;
2461
-        if section.segment == "__DATA" && section.name == "__thread_vars" {
2470
+        if placement.is_thread_vars {
24622471
             // `__thread_vars` starts are emitted through the dedicated
24632472
             // `__tlv_bootstrap` pass above. Descriptor tails are rewritten to
24642473
             // template offsets before write, so any generic direct bind landing
24652474
             // back in this section is stale and would override the TLV bind.
24662475
             continue;
24672476
         }
2468
-        let segment_index = segment_index(layout, &section.segment)?;
2469
-        let segment = layout
2470
-            .segment(&section.segment)
2471
-            .ok_or(WriteError::MissingSegment("__UNKNOWN"))?;
2472
-        let slot_addr = atom_addr + entry.atom_offset as u64;
2477
+        let slot_addr = placement.addr + entry.atom_offset as u64;
24732478
         bind_specs.push(BindRecordSpec {
2474
-            segment_index,
2475
-            segment_offset: slot_addr - segment.vm_addr,
2479
+            segment_index: placement.segment_index,
2480
+            segment_offset: slot_addr - placement.segment_vm_addr,
24762481
             ordinal: import.ordinal,
24772482
             name: &import.name,
24782483
             weak_import: import.weak_import,
@@ -2521,6 +2526,59 @@ fn build_bind_streams(
25212526
     })
25222527
 }
25232528
 
2529
+struct BindLayoutIndex {
2530
+    atoms: HashMap<AtomId, BindAtomPlacement>,
2531
+}
2532
+
2533
+#[derive(Clone, Copy)]
2534
+struct BindAtomPlacement {
2535
+    addr: u64,
2536
+    segment_index: u8,
2537
+    segment_vm_addr: u64,
2538
+    is_thread_vars: bool,
2539
+}
2540
+
2541
+impl BindLayoutIndex {
2542
+    fn build(layout: &Layout) -> Result<Self, WriteError> {
2543
+        let mut segment_meta = HashMap::with_capacity(layout.segments.len());
2544
+        for (idx, segment) in layout.segments.iter().enumerate() {
2545
+            segment_meta.insert(
2546
+                segment.name.as_str(),
2547
+                (
2548
+                    u8::try_from(idx).map_err(|_| WriteError::OffsetTooLarge("segment index"))?,
2549
+                    segment.vm_addr,
2550
+                ),
2551
+            );
2552
+        }
2553
+        let atom_count: usize = layout
2554
+            .sections
2555
+            .iter()
2556
+            .map(|section| section.atoms.len())
2557
+            .sum();
2558
+        let mut atoms = HashMap::with_capacity(atom_count);
2559
+        for section in &layout.sections {
2560
+            let Some((segment_index, segment_vm_addr)) =
2561
+                segment_meta.get(section.segment.as_str()).copied()
2562
+            else {
2563
+                continue;
2564
+            };
2565
+            let is_thread_vars = section.segment == "__DATA" && section.name == "__thread_vars";
2566
+            for placed in &section.atoms {
2567
+                atoms.insert(
2568
+                    placed.atom,
2569
+                    BindAtomPlacement {
2570
+                        addr: section.addr + placed.offset,
2571
+                        segment_index,
2572
+                        segment_vm_addr,
2573
+                        is_thread_vars,
2574
+                    },
2575
+                );
2576
+            }
2577
+        }
2578
+        Ok(Self { atoms })
2579
+    }
2580
+}
2581
+
25242582
 fn segment_index(layout: &Layout, name: &str) -> Result<u8, WriteError> {
25252583
     let idx = layout
25262584
         .segments
src/string_table.rsmodified
@@ -99,7 +99,6 @@ impl StringTable {
9999
 #[derive(Debug, Clone, Default, PartialEq, Eq)]
100100
 pub struct StringTableBuilder {
101101
     roots: Vec<RootString>,
102
-    roots_by_last_byte: HashMap<u8, Vec<usize>>,
103102
     offsets: HashMap<String, u32>,
104103
 }
105104
 
@@ -132,17 +131,10 @@ impl StringTableBuilder {
132131
             let offset = raw.len() as u32;
133132
             raw.extend_from_slice(name.as_bytes());
134133
             raw.push(0);
135
-            let root_index = self.roots.len();
136134
             self.roots.push(RootString {
137135
                 name: name.clone(),
138136
                 offset,
139137
             });
140
-            if let Some(&last_byte) = name.as_bytes().last() {
141
-                self.roots_by_last_byte
142
-                    .entry(last_byte)
143
-                    .or_default()
144
-                    .push(root_index);
145
-            }
146138
             self.offsets.insert(name, offset);
147139
         }
148140
 
@@ -153,15 +145,15 @@ impl StringTableBuilder {
153145
     }
154146
 
155147
     fn find_suffix_offset(&self, name: &str) -> Option<u32> {
156
-        let last_byte = *name.as_bytes().last()?;
157
-        self.roots_by_last_byte
158
-            .get(&last_byte)?
159
-            .iter()
160
-            .find_map(|&idx| {
161
-                let existing = &self.roots[idx];
162
-                (existing.name.len() >= name.len() && existing.name.ends_with(name))
163
-                    .then(|| existing.offset + (existing.name.len() - name.len()) as u32)
164
-            })
148
+        if name.is_empty() {
149
+            return Some(0);
150
+        }
151
+        let insert_at = self
152
+            .roots
153
+            .partition_point(|root| reverse_suffix_order(&root.name, name).is_lt());
154
+        let existing = self.roots.get(insert_at.checked_sub(1)?)?;
155
+        (existing.name.len() >= name.len() && existing.name.ends_with(name))
156
+            .then(|| existing.offset + (existing.name.len() - name.len()) as u32)
165157
     }
166158
 }
167159
 
src/synth/dyld_info.rsmodified
@@ -1,5 +1,3 @@
1
-use std::collections::BTreeMap;
2
-
31
 use crate::leb::{write_sleb, write_uleb};
42
 use crate::macho::constants::{
53
     BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DO_BIND,
@@ -81,25 +79,9 @@ struct BindState {
8179
     pointer_type_set: bool,
8280
 }
8381
 
84
-#[derive(Debug, Clone, Default)]
85
-struct TrieNode {
86
-    terminal: Option<ExportEntry>,
87
-    children: BTreeMap<u8, TrieNode>,
88
-}
89
-
90
-impl TrieNode {
91
-    fn insert(&mut self, name: &str, entry: ExportEntry) {
92
-        let mut node = self;
93
-        for byte in name.bytes() {
94
-            node = node.children.entry(byte).or_default();
95
-        }
96
-        node.terminal = Some(entry);
97
-    }
98
-}
99
-
10082
 #[derive(Debug, Clone)]
10183
 struct FlatTrieNode {
102
-    terminal: Option<ExportEntry>,
84
+    terminal_payload: Vec<u8>,
10385
     children: Vec<(String, usize)>,
10486
 }
10587
 
@@ -108,17 +90,11 @@ pub fn build_export_trie(entries: &[ExportEntry]) -> Vec<u8> {
10890
         return Vec::new();
10991
     }
11092
 
111
-    let mut sorted = entries.to_vec();
93
+    let mut sorted: Vec<&ExportEntry> = entries.iter().collect();
11294
     sorted.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name));
11395
 
114
-    let mut root = TrieNode::default();
115
-    for entry in sorted {
116
-        let name = entry.name.clone();
117
-        root.insert(&name, entry);
118
-    }
119
-
12096
     let mut nodes = Vec::new();
121
-    flatten_trie(&root, &mut nodes);
97
+    flatten_sorted_export_trie(&sorted, 0, &mut nodes);
12298
 
12399
     let mut offsets = vec![0usize; nodes.len()];
124100
     loop {
@@ -149,42 +125,67 @@ pub fn build_export_trie(entries: &[ExportEntry]) -> Vec<u8> {
149125
     out
150126
 }
151127
 
152
-fn flatten_trie(node: &TrieNode, flat: &mut Vec<FlatTrieNode>) -> usize {
128
+fn flatten_sorted_export_trie(
129
+    entries: &[&ExportEntry],
130
+    prefix_len: usize,
131
+    flat: &mut Vec<FlatTrieNode>,
132
+) -> usize {
153133
     let id = flat.len();
134
+    let mut entry_idx = 0usize;
135
+    let mut terminal = None;
136
+    while entries
137
+        .get(entry_idx)
138
+        .is_some_and(|entry| entry.name.len() == prefix_len)
139
+    {
140
+        terminal = Some(entries[entry_idx]);
141
+        entry_idx += 1;
142
+    }
143
+
154144
     flat.push(FlatTrieNode {
155
-        terminal: node.terminal.clone(),
145
+        terminal_payload: terminal_payload(terminal),
156146
         children: Vec::new(),
157147
     });
158148
 
159
-    let mut children = Vec::with_capacity(node.children.len());
160
-    for (&edge, child) in &node.children {
161
-        let (label, child_id) = flatten_edge(edge, child, flat);
149
+    let mut children = Vec::new();
150
+    while entry_idx < entries.len() {
151
+        let edge = entries[entry_idx].name.as_bytes()[prefix_len];
152
+        let group_start = entry_idx;
153
+        entry_idx += 1;
154
+        while entry_idx < entries.len() && entries[entry_idx].name.as_bytes()[prefix_len] == edge {
155
+            entry_idx += 1;
156
+        }
157
+        let group = &entries[group_start..entry_idx];
158
+        let label_end = common_prefix_len(group, prefix_len + 1);
159
+        let label = String::from_utf8(group[0].name.as_bytes()[prefix_len..label_end].to_vec())
160
+            .expect("export labels should stay UTF-8");
161
+        let child_id = flatten_sorted_export_trie(group, label_end, flat);
162162
         children.push((label, child_id));
163163
     }
164164
     flat[id].children = children;
165165
     id
166166
 }
167167
 
168
-fn flatten_edge(first: u8, child: &TrieNode, flat: &mut Vec<FlatTrieNode>) -> (String, usize) {
169
-    let mut label = vec![first];
170
-    let mut node = child;
171
-    while node.terminal.is_none() && node.children.len() == 1 {
172
-        let (&next, next_child) = node
173
-            .children
174
-            .iter()
175
-            .next()
176
-            .expect("single-child trie node should expose one edge");
177
-        label.push(next);
178
-        node = next_child;
179
-    }
180
-    let label = String::from_utf8(label).expect("export labels should stay UTF-8");
181
-    let child_id = flatten_trie(node, flat);
182
-    (label, child_id)
168
+fn common_prefix_len(entries: &[&ExportEntry], start: usize) -> usize {
169
+    let first = entries
170
+        .first()
171
+        .expect("export trie child groups should be non-empty")
172
+        .name
173
+        .as_bytes();
174
+    let mut len = first.len();
175
+    for entry in &entries[1..] {
176
+        let bytes = entry.name.as_bytes();
177
+        len = len.min(bytes.len());
178
+        let mut idx = start;
179
+        while idx < len && first[idx] == bytes[idx] {
180
+            idx += 1;
181
+        }
182
+        len = idx;
183
+    }
184
+    len
183185
 }
184186
 
185187
 fn trie_node_size(node: &FlatTrieNode, offsets: &[usize]) -> usize {
186
-    let terminal = terminal_payload(node.terminal.as_ref());
187
-    let mut size = uleb_size(terminal.len() as u64) + terminal.len() + 1;
188
+    let mut size = uleb_size(node.terminal_payload.len() as u64) + node.terminal_payload.len() + 1;
188189
     for (edge, child) in &node.children {
189190
         size += edge.len() + 1 + uleb_size(offsets[*child] as u64);
190191
     }
@@ -192,10 +193,9 @@ fn trie_node_size(node: &FlatTrieNode, offsets: &[usize]) -> usize {
192193
 }
193194
 
194195
 fn emit_trie_node(node: &FlatTrieNode, offsets: &[usize], out: &mut Vec<u8>) {
195
-    let terminal = terminal_payload(node.terminal.as_ref());
196196
     let mut stream = OpcodeStream::new();
197
-    stream.uleb(terminal.len() as u64);
198
-    stream.bytes(&terminal);
197
+    stream.uleb(node.terminal_payload.len() as u64);
198
+    stream.bytes(&node.terminal_payload);
199199
     stream
200200
         .byte(u8::try_from(node.children.len()).expect("export trie node fanout should fit in u8"));
201201
     for (edge, child) in &node.children {
@@ -252,7 +252,7 @@ pub fn emit_rebase_run(out: &mut OpcodeStream, count: usize) {
252252
 pub fn emit_bind_records(specs: &[BindRecordSpec<'_>]) -> Vec<u8> {
253253
     let mut out = OpcodeStream::new();
254254
     let mut state = BindState::default();
255
-    let mut current_symbol: Option<String> = None;
255
+    let mut current_symbol: Option<&str> = None;
256256
 
257257
     let mut idx = 0usize;
258258
     while idx < specs.len() {
@@ -262,14 +262,12 @@ pub fn emit_bind_records(specs: &[BindRecordSpec<'_>]) -> Vec<u8> {
262262
             state.ordinal = Some(spec.ordinal);
263263
         }
264264
 
265
-        if current_symbol.as_deref() != Some(spec.name)
266
-            || state.weak_import != Some(spec.weak_import)
267
-        {
265
+        if current_symbol != Some(spec.name) || state.weak_import != Some(spec.weak_import) {
268266
             out.byte(
269267
                 BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | bind_symbol_flags(spec.weak_import),
270268
             );
271269
             out.string(spec.name);
272
-            current_symbol = Some(spec.name.to_string());
270
+            current_symbol = Some(spec.name);
273271
             state.weak_import = Some(spec.weak_import);
274272
         }
275273
 
tests/perf_baseline.rsmodified
@@ -39,7 +39,7 @@ fn executable_opts(inputs: Vec<PathBuf>, output: PathBuf) -> LinkOptions {
3939
 
4040
 fn assert_profile_basics(name: &str, profile: &LinkProfile) {
4141
     eprintln!(
42
-        "{name}: total={:?} parse={:?} resolve={:?} atomize={:?} layout={:?} (entry={:?} dead={:?} icf={:?} synth_plan={:?} build={:?} thunks={:?}) synth={:?} (linkedit={:?}: symbols={:?} [locals={:?} globals={:?} strtab={:?}] dyld={:?} metadata={:?} codesig={:?}; unwind={:?}) reloc={:?} write={:?}",
42
+        "{name}: total={:?} parse={:?} resolve={:?} atomize={:?} layout={:?} (entry={:?} dead={:?} icf={:?} synth_plan={:?} build={:?} thunks={:?}) synth={:?} (linkedit={:?}: symbols={:?} [locals={:?} globals={:?} strtab={:?}] dyld={:?} [bind={:?} rebase={:?} export={:?}] metadata={:?} codesig={:?}; unwind={:?}) reloc={:?} write={:?}",
4343
         profile.total_wall,
4444
         profile.phases.input_parsing,
4545
         profile.phases.symbol_resolution,
@@ -58,6 +58,9 @@ fn assert_profile_basics(name: &str, profile: &LinkProfile) {
5858
         profile.phases.synth_linkedit_symbol_plan_globals,
5959
         profile.phases.synth_linkedit_symbol_plan_strtab,
6060
         profile.phases.synth_linkedit_dyld_info,
61
+        profile.phases.synth_linkedit_dyld_bind,
62
+        profile.phases.synth_linkedit_dyld_rebase,
63
+        profile.phases.synth_linkedit_dyld_export,
6164
         profile.phases.synth_linkedit_metadata_tables,
6265
         profile.phases.synth_linkedit_code_signature,
6366
         profile.phases.synth_unwind,
@@ -103,6 +106,13 @@ fn assert_profile_basics(name: &str, profile: &LinkProfile) {
103106
                 + profile.phases.synth_linkedit_symbol_plan_strtab,
104107
         "{name}: symbol-plan subphases exceeded symbol-plan total"
105108
     );
109
+    assert!(
110
+        profile.phases.synth_linkedit_dyld_info
111
+            >= profile.phases.synth_linkedit_dyld_bind
112
+                + profile.phases.synth_linkedit_dyld_rebase
113
+                + profile.phases.synth_linkedit_dyld_export,
114
+        "{name}: dyld-info subphases exceeded dyld-info total"
115
+    );
106116
 }
107117
 
108118
 #[test]