fortrangoingonforty/afs-ld / bb7169d

Browse files

Parallelize atom relocation patching

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
bb7169d27fdb113e1806783e17df550269f4b1de
Parents
47f6d74
Tree
540fd13

4 changed files

StatusFile+-
M src/lib.rs 1 0
M src/reloc/arm64.rs 90 36
M src/resolve.rs 4 5
M tests/determinism.rs 69 0
src/lib.rsmodified
@@ -857,6 +857,7 @@ impl Linker {
857857
                 linkedit: &linkedit,
858858
                 icf_redirects,
859859
                 parsed_relocs: &parsed_relocs,
860
+                parallel_jobs,
860861
             },
861862
         )?;
862863
         phases.reloc_apply = phase_started.elapsed();
src/reloc/arm64.rsmodified
@@ -1,6 +1,7 @@
11
 use std::collections::HashMap;
22
 use std::fmt;
33
 use std::path::PathBuf;
4
+use std::thread;
45
 
56
 use crate::atom::{Atom, AtomSection, AtomTable};
67
 use crate::input::ObjectFile;
@@ -8,7 +9,7 @@ use crate::layout::{ExtraOutputSection, ExtraSectionAnchor, Layout, LayoutInput}
89
 use crate::macho::writer::LinkEditPlan;
910
 use crate::reloc::{ParsedRelocCache, Referent, Reloc, RelocKind, RelocLength};
1011
 use crate::resolve::{InputId, Symbol, SymbolId, SymbolTable};
11
-use crate::section::{OutputSection, SectionKind};
12
+use crate::section::{OutputAtom, OutputSection, SectionKind};
1213
 use crate::symbol::{InputSymbol, SymKind};
1314
 use crate::synth::stubs::{STUB_HELPER_ENTRY_SIZE, STUB_HELPER_HEADER_SIZE, STUB_SIZE};
1415
 use crate::synth::tlv::THREAD_VARIABLE_DESCRIPTOR_SIZE;
@@ -73,6 +74,7 @@ pub struct ApplyLayoutPlan<'a> {
7374
     pub linkedit: &'a LinkEditPlan,
7475
     pub icf_redirects: Option<&'a HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
7576
     pub parsed_relocs: &'a ParsedRelocCache,
77
+    pub parallel_jobs: usize,
7678
 }
7779
 
7880
 struct InputSectionResolveCtx<'a> {
@@ -82,6 +84,15 @@ struct InputSectionResolveCtx<'a> {
8284
     referent: &'a str,
8385
 }
8486
 
87
+struct RegularRelocContext<'a> {
88
+    input_map: &'a HashMap<InputId, &'a ObjectFile>,
89
+    atoms: &'a AtomTable,
90
+    resolve: &'a ResolveView<'a>,
91
+    thunk_plan: Option<&'a ThunkPlan>,
92
+    thunk_addrs: Option<&'a HashMap<usize, u64>>,
93
+    parsed_relocs: &'a ParsedRelocCache,
94
+}
95
+
8596
 const THUNK_SIZE: u64 = 12;
8697
 const BR_X16: u32 = 0xd61f_0200;
8798
 const BRANCH26_MAX_FORWARD_DELTA_BYTES: u64 = ((1u64 << 25) - 1) * 4;
@@ -232,41 +243,15 @@ pub fn apply_layout(
232243
         .thunk_plan
233244
         .map(|thunk_plan| thunk_plan.thunk_addrs(layout));
234245
 
235
-    for out_section in &mut layout.sections {
236
-        for placed in &mut out_section.atoms {
237
-            let atom = atoms.get(placed.atom);
238
-            if atom.size == 0 || placed.data.is_empty() {
239
-                continue;
240
-            }
241
-            let obj = input_map.get(&atom.origin).ok_or_else(|| {
242
-                reloc_error(
243
-                    atom,
244
-                    &PathBuf::from("<missing object>"),
245
-                    0,
246
-                    RelocKind::Unsigned,
247
-                    "object",
248
-                    "missing parsed object".to_string(),
249
-                )
250
-            })?;
251
-            patch_eh_frame_cie_pointer(&mut placed.data, atom, &resolve)?;
252
-            let relocs = plan
253
-                .parsed_relocs
254
-                .get(&(atom.origin, atom.input_section))
255
-                .map(Vec::as_slice)
256
-                .unwrap_or(&[]);
257
-            for reloc in relocs_for_atom(relocs, atom) {
258
-                apply_one(
259
-                    &mut placed.data,
260
-                    atom,
261
-                    obj,
262
-                    reloc,
263
-                    &resolve,
264
-                    plan.thunk_plan,
265
-                    thunk_addrs.as_ref(),
266
-                )?;
267
-            }
268
-        }
269
-    }
246
+    let regular_ctx = RegularRelocContext {
247
+        input_map: &input_map,
248
+        atoms,
249
+        resolve: &resolve,
250
+        thunk_plan: plan.thunk_plan,
251
+        thunk_addrs: thunk_addrs.as_ref(),
252
+        parsed_relocs: plan.parsed_relocs,
253
+    };
254
+    apply_regular_relocs(layout, &regular_ctx, plan.parallel_jobs)?;
270255
 
271256
     if let Some(thunk_plan) = plan.thunk_plan {
272257
         synthesize_thunk_section(layout, thunk_plan, &resolve)?;
@@ -290,6 +275,75 @@ pub fn apply_layout(
290275
     Ok(())
291276
 }
292277
 
278
+fn apply_regular_relocs(
279
+    layout: &mut Layout,
280
+    ctx: &RegularRelocContext<'_>,
281
+    parallel_jobs: usize,
282
+) -> Result<(), RelocError> {
283
+    let parallel_jobs = parallel_jobs.max(1);
284
+    for out_section in &mut layout.sections {
285
+        let atom_count = out_section.atoms.len();
286
+        if parallel_jobs == 1 || atom_count < 2 {
287
+            apply_regular_atom_chunk(&mut out_section.atoms, ctx)?;
288
+            continue;
289
+        }
290
+
291
+        let job_count = parallel_jobs.min(atom_count).max(1);
292
+        let chunk_size = atom_count.div_ceil(job_count);
293
+        thread::scope(|scope| {
294
+            let mut handles = Vec::new();
295
+            for chunk in out_section.atoms.chunks_mut(chunk_size) {
296
+                handles.push(scope.spawn(move || apply_regular_atom_chunk(chunk, ctx)));
297
+            }
298
+            for handle in handles {
299
+                handle.join().expect("relocation worker panicked")?;
300
+            }
301
+            Ok::<(), RelocError>(())
302
+        })?;
303
+    }
304
+    Ok(())
305
+}
306
+
307
+fn apply_regular_atom_chunk(
308
+    placed_atoms: &mut [OutputAtom],
309
+    ctx: &RegularRelocContext<'_>,
310
+) -> Result<(), RelocError> {
311
+    for placed in placed_atoms {
312
+        let atom = ctx.atoms.get(placed.atom);
313
+        if atom.size == 0 || placed.data.is_empty() {
314
+            continue;
315
+        }
316
+        let obj = ctx.input_map.get(&atom.origin).ok_or_else(|| {
317
+            reloc_error(
318
+                atom,
319
+                &PathBuf::from("<missing object>"),
320
+                0,
321
+                RelocKind::Unsigned,
322
+                "object",
323
+                "missing parsed object".to_string(),
324
+            )
325
+        })?;
326
+        patch_eh_frame_cie_pointer(&mut placed.data, atom, ctx.resolve)?;
327
+        let relocs = ctx
328
+            .parsed_relocs
329
+            .get(&(atom.origin, atom.input_section))
330
+            .map(Vec::as_slice)
331
+            .unwrap_or(&[]);
332
+        for reloc in relocs_for_atom(relocs, atom) {
333
+            apply_one(
334
+                &mut placed.data,
335
+                atom,
336
+                obj,
337
+                reloc,
338
+                ctx.resolve,
339
+                ctx.thunk_plan,
340
+                ctx.thunk_addrs,
341
+            )?;
342
+        }
343
+    }
344
+    Ok(())
345
+}
346
+
293347
 fn patch_eh_frame_cie_pointer(
294348
     bytes: &mut [u8],
295349
     atom: &Atom,
src/resolve.rsmodified
@@ -17,7 +17,6 @@
1717
 
1818
 use std::collections::{HashMap, HashSet, VecDeque};
1919
 use std::path::{Path, PathBuf};
20
-use std::rc::Rc;
2120
 use std::sync::{mpsc, Arc, Mutex};
2221
 use std::thread;
2322
 
@@ -43,8 +42,8 @@ impl Istr {
4342
 
4443
 #[derive(Debug, Default)]
4544
 pub struct StringInterner {
46
-    strings: Vec<Rc<str>>,
47
-    index: HashMap<Rc<str>, u32>,
45
+    strings: Vec<Arc<str>>,
46
+    index: HashMap<Arc<str>, u32>,
4847
 }
4948
 
5049
 impl StringInterner {
@@ -53,12 +52,12 @@ impl StringInterner {
5352
     }
5453
 
5554
     /// Intern `s`, returning the existing handle when the string was already
56
-    /// seen. Allocates at most one `Rc<str>` per unique name.
55
+    /// seen. Allocates at most one `Arc<str>` per unique name.
5756
     pub fn intern(&mut self, s: &str) -> Istr {
5857
         if let Some(&i) = self.index.get(s) {
5958
             return Istr(i);
6059
         }
61
-        let rc: Rc<str> = Rc::from(s);
60
+        let rc: Arc<str> = Arc::from(s);
6261
         let id = self.strings.len() as u32;
6362
         self.strings.push(rc.clone());
6463
         self.index.insert(rc, id);
tests/determinism.rsmodified
@@ -147,6 +147,75 @@ fn repeated_parallel_archive_fetches_are_byte_identical() {
147147
     let _ = fs::remove_dir_all(root);
148148
 }
149149
 
150
+#[test]
151
+fn relocation_workers_match_single_worker_for_many_atoms() {
152
+    if !have_xcrun() || !have_xcrun_tool("as") {
153
+        eprintln!("skipping: xcrun as unavailable");
154
+        return;
155
+    }
156
+
157
+    let root = unique_temp_dir("reloc-workers").expect("create relocation worker temp dir");
158
+    let text_obj = root.join("text.o");
159
+    let data_obj = root.join("data.o");
160
+
161
+    let mut asm = String::from(
162
+        "\
163
+        .section __TEXT,__text,regular,pure_instructions\n\
164
+        .globl _main\n\
165
+        _main:\n",
166
+    );
167
+    for index in 0..64 {
168
+        asm.push_str(&format!("            bl _helper_{index}\n"));
169
+    }
170
+    asm.push_str(
171
+        "\
172
+            adrp x8, _value@GOTPAGE\n\
173
+            ldr x8, [x8, _value@GOTPAGEOFF]\n\
174
+            ldr w0, [x8]\n\
175
+            ret\n\
176
+\n",
177
+    );
178
+    for index in 0..64 {
179
+        asm.push_str(&format!(
180
+            "\
181
+        .globl _helper_{index}\n\
182
+        _helper_{index}:\n\
183
+            adrp x9, _value@GOTPAGE\n\
184
+            ldr x9, [x9, _value@GOTPAGEOFF]\n\
185
+            ldr w9, [x9]\n\
186
+            ret\n\
187
+\n"
188
+        ));
189
+    }
190
+    asm.push_str("        .subsections_via_symbols\n");
191
+
192
+    assemble(&asm, &text_obj).expect("assemble relocation worker text fixture");
193
+    assemble(
194
+        "\
195
+        .section __DATA,__data\n\
196
+        .globl _value\n\
197
+        .p2align 2\n\
198
+        _value:\n\
199
+            .long 11\n\
200
+\n\
201
+        .subsections_via_symbols\n",
202
+        &data_obj,
203
+    )
204
+    .expect("assemble relocation worker data fixture");
205
+
206
+    let inputs = vec![text_obj, data_obj];
207
+    let serial =
208
+        link_once_with_jobs(&inputs, &root, "reloc-workers-serial", Some(1)).expect("serial link");
209
+    let parallel = link_once_with_jobs(&inputs, &root, "reloc-workers-parallel", Some(8))
210
+        .expect("parallel link");
211
+    assert_eq!(
212
+        parallel, serial,
213
+        "parallel relocation workers changed final output bytes"
214
+    );
215
+
216
+    let _ = fs::remove_dir_all(root);
217
+}
218
+
150219
 fn assert_repeated_links_identical(inputs: Vec<PathBuf>, root: &Path, label: &str) {
151220
     let baseline = link_once(&inputs, root, &format!("{label}-baseline"))
152221
         .expect("baseline deterministic link");