Parallelize atom relocation patching
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
bb7169d27fdb113e1806783e17df550269f4b1de- Parents
-
47f6d74 - Tree
540fd13
bb7169d
bb7169d27fdb113e1806783e17df550269f4b1de47f6d74
540fd13| Status | File | + | - |
|---|---|---|---|
| M |
src/lib.rs
|
1 | 0 |
| M |
src/reloc/arm64.rs
|
90 | 36 |
| M |
src/resolve.rs
|
4 | 5 |
| M |
tests/determinism.rs
|
69 | 0 |
src/lib.rsmodified@@ -857,6 +857,7 @@ impl Linker { | ||
| 857 | 857 | linkedit: &linkedit, |
| 858 | 858 | icf_redirects, |
| 859 | 859 | parsed_relocs: &parsed_relocs, |
| 860 | + parallel_jobs, | |
| 860 | 861 | }, |
| 861 | 862 | )?; |
| 862 | 863 | phases.reloc_apply = phase_started.elapsed(); |
src/reloc/arm64.rsmodified@@ -1,6 +1,7 @@ | ||
| 1 | 1 | use std::collections::HashMap; |
| 2 | 2 | use std::fmt; |
| 3 | 3 | use std::path::PathBuf; |
| 4 | +use std::thread; | |
| 4 | 5 | |
| 5 | 6 | use crate::atom::{Atom, AtomSection, AtomTable}; |
| 6 | 7 | use crate::input::ObjectFile; |
@@ -8,7 +9,7 @@ use crate::layout::{ExtraOutputSection, ExtraSectionAnchor, Layout, LayoutInput} | ||
| 8 | 9 | use crate::macho::writer::LinkEditPlan; |
| 9 | 10 | use crate::reloc::{ParsedRelocCache, Referent, Reloc, RelocKind, RelocLength}; |
| 10 | 11 | use crate::resolve::{InputId, Symbol, SymbolId, SymbolTable}; |
| 11 | -use crate::section::{OutputSection, SectionKind}; | |
| 12 | +use crate::section::{OutputAtom, OutputSection, SectionKind}; | |
| 12 | 13 | use crate::symbol::{InputSymbol, SymKind}; |
| 13 | 14 | use crate::synth::stubs::{STUB_HELPER_ENTRY_SIZE, STUB_HELPER_HEADER_SIZE, STUB_SIZE}; |
| 14 | 15 | use crate::synth::tlv::THREAD_VARIABLE_DESCRIPTOR_SIZE; |
@@ -73,6 +74,7 @@ pub struct ApplyLayoutPlan<'a> { | ||
| 73 | 74 | pub linkedit: &'a LinkEditPlan, |
| 74 | 75 | pub icf_redirects: Option<&'a HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>, |
| 75 | 76 | pub parsed_relocs: &'a ParsedRelocCache, |
| 77 | + pub parallel_jobs: usize, | |
| 76 | 78 | } |
| 77 | 79 | |
| 78 | 80 | struct InputSectionResolveCtx<'a> { |
@@ -82,6 +84,15 @@ struct InputSectionResolveCtx<'a> { | ||
| 82 | 84 | referent: &'a str, |
| 83 | 85 | } |
| 84 | 86 | |
| 87 | +struct RegularRelocContext<'a> { | |
| 88 | + input_map: &'a HashMap<InputId, &'a ObjectFile>, | |
| 89 | + atoms: &'a AtomTable, | |
| 90 | + resolve: &'a ResolveView<'a>, | |
| 91 | + thunk_plan: Option<&'a ThunkPlan>, | |
| 92 | + thunk_addrs: Option<&'a HashMap<usize, u64>>, | |
| 93 | + parsed_relocs: &'a ParsedRelocCache, | |
| 94 | +} | |
| 95 | + | |
| 85 | 96 | const THUNK_SIZE: u64 = 12; |
| 86 | 97 | const BR_X16: u32 = 0xd61f_0200; |
| 87 | 98 | const BRANCH26_MAX_FORWARD_DELTA_BYTES: u64 = ((1u64 << 25) - 1) * 4; |
@@ -232,41 +243,15 @@ pub fn apply_layout( | ||
| 232 | 243 | .thunk_plan |
| 233 | 244 | .map(|thunk_plan| thunk_plan.thunk_addrs(layout)); |
| 234 | 245 | |
| 235 | - for out_section in &mut layout.sections { | |
| 236 | - for placed in &mut out_section.atoms { | |
| 237 | - let atom = atoms.get(placed.atom); | |
| 238 | - if atom.size == 0 || placed.data.is_empty() { | |
| 239 | - continue; | |
| 240 | - } | |
| 241 | - let obj = input_map.get(&atom.origin).ok_or_else(|| { | |
| 242 | - reloc_error( | |
| 243 | - atom, | |
| 244 | - &PathBuf::from("<missing object>"), | |
| 245 | - 0, | |
| 246 | - RelocKind::Unsigned, | |
| 247 | - "object", | |
| 248 | - "missing parsed object".to_string(), | |
| 249 | - ) | |
| 250 | - })?; | |
| 251 | - patch_eh_frame_cie_pointer(&mut placed.data, atom, &resolve)?; | |
| 252 | - let relocs = plan | |
| 253 | - .parsed_relocs | |
| 254 | - .get(&(atom.origin, atom.input_section)) | |
| 255 | - .map(Vec::as_slice) | |
| 256 | - .unwrap_or(&[]); | |
| 257 | - for reloc in relocs_for_atom(relocs, atom) { | |
| 258 | - apply_one( | |
| 259 | - &mut placed.data, | |
| 260 | - atom, | |
| 261 | - obj, | |
| 262 | - reloc, | |
| 263 | - &resolve, | |
| 264 | - plan.thunk_plan, | |
| 265 | - thunk_addrs.as_ref(), | |
| 266 | - )?; | |
| 267 | - } | |
| 268 | - } | |
| 269 | - } | |
| 246 | + let regular_ctx = RegularRelocContext { | |
| 247 | + input_map: &input_map, | |
| 248 | + atoms, | |
| 249 | + resolve: &resolve, | |
| 250 | + thunk_plan: plan.thunk_plan, | |
| 251 | + thunk_addrs: thunk_addrs.as_ref(), | |
| 252 | + parsed_relocs: plan.parsed_relocs, | |
| 253 | + }; | |
| 254 | + apply_regular_relocs(layout, ®ular_ctx, plan.parallel_jobs)?; | |
| 270 | 255 | |
| 271 | 256 | if let Some(thunk_plan) = plan.thunk_plan { |
| 272 | 257 | synthesize_thunk_section(layout, thunk_plan, &resolve)?; |
@@ -290,6 +275,75 @@ pub fn apply_layout( | ||
| 290 | 275 | Ok(()) |
| 291 | 276 | } |
| 292 | 277 | |
| 278 | +fn apply_regular_relocs( | |
| 279 | + layout: &mut Layout, | |
| 280 | + ctx: &RegularRelocContext<'_>, | |
| 281 | + parallel_jobs: usize, | |
| 282 | +) -> Result<(), RelocError> { | |
| 283 | + let parallel_jobs = parallel_jobs.max(1); | |
| 284 | + for out_section in &mut layout.sections { | |
| 285 | + let atom_count = out_section.atoms.len(); | |
| 286 | + if parallel_jobs == 1 || atom_count < 2 { | |
| 287 | + apply_regular_atom_chunk(&mut out_section.atoms, ctx)?; | |
| 288 | + continue; | |
| 289 | + } | |
| 290 | + | |
| 291 | + let job_count = parallel_jobs.min(atom_count).max(1); | |
| 292 | + let chunk_size = atom_count.div_ceil(job_count); | |
| 293 | + thread::scope(|scope| { | |
| 294 | + let mut handles = Vec::new(); | |
| 295 | + for chunk in out_section.atoms.chunks_mut(chunk_size) { | |
| 296 | + handles.push(scope.spawn(move || apply_regular_atom_chunk(chunk, ctx))); | |
| 297 | + } | |
| 298 | + for handle in handles { | |
| 299 | + handle.join().expect("relocation worker panicked")?; | |
| 300 | + } | |
| 301 | + Ok::<(), RelocError>(()) | |
| 302 | + })?; | |
| 303 | + } | |
| 304 | + Ok(()) | |
| 305 | +} | |
| 306 | + | |
| 307 | +fn apply_regular_atom_chunk( | |
| 308 | + placed_atoms: &mut [OutputAtom], | |
| 309 | + ctx: &RegularRelocContext<'_>, | |
| 310 | +) -> Result<(), RelocError> { | |
| 311 | + for placed in placed_atoms { | |
| 312 | + let atom = ctx.atoms.get(placed.atom); | |
| 313 | + if atom.size == 0 || placed.data.is_empty() { | |
| 314 | + continue; | |
| 315 | + } | |
| 316 | + let obj = ctx.input_map.get(&atom.origin).ok_or_else(|| { | |
| 317 | + reloc_error( | |
| 318 | + atom, | |
| 319 | + &PathBuf::from("<missing object>"), | |
| 320 | + 0, | |
| 321 | + RelocKind::Unsigned, | |
| 322 | + "object", | |
| 323 | + "missing parsed object".to_string(), | |
| 324 | + ) | |
| 325 | + })?; | |
| 326 | + patch_eh_frame_cie_pointer(&mut placed.data, atom, ctx.resolve)?; | |
| 327 | + let relocs = ctx | |
| 328 | + .parsed_relocs | |
| 329 | + .get(&(atom.origin, atom.input_section)) | |
| 330 | + .map(Vec::as_slice) | |
| 331 | + .unwrap_or(&[]); | |
| 332 | + for reloc in relocs_for_atom(relocs, atom) { | |
| 333 | + apply_one( | |
| 334 | + &mut placed.data, | |
| 335 | + atom, | |
| 336 | + obj, | |
| 337 | + reloc, | |
| 338 | + ctx.resolve, | |
| 339 | + ctx.thunk_plan, | |
| 340 | + ctx.thunk_addrs, | |
| 341 | + )?; | |
| 342 | + } | |
| 343 | + } | |
| 344 | + Ok(()) | |
| 345 | +} | |
| 346 | + | |
| 293 | 347 | fn patch_eh_frame_cie_pointer( |
| 294 | 348 | bytes: &mut [u8], |
| 295 | 349 | atom: &Atom, |
src/resolve.rsmodified@@ -17,7 +17,6 @@ | ||
| 17 | 17 | |
| 18 | 18 | use std::collections::{HashMap, HashSet, VecDeque}; |
| 19 | 19 | use std::path::{Path, PathBuf}; |
| 20 | -use std::rc::Rc; | |
| 21 | 20 | use std::sync::{mpsc, Arc, Mutex}; |
| 22 | 21 | use std::thread; |
| 23 | 22 | |
@@ -43,8 +42,8 @@ impl Istr { | ||
| 43 | 42 | |
| 44 | 43 | #[derive(Debug, Default)] |
| 45 | 44 | pub struct StringInterner { |
| 46 | - strings: Vec<Rc<str>>, | |
| 47 | - index: HashMap<Rc<str>, u32>, | |
| 45 | + strings: Vec<Arc<str>>, | |
| 46 | + index: HashMap<Arc<str>, u32>, | |
| 48 | 47 | } |
| 49 | 48 | |
| 50 | 49 | impl StringInterner { |
@@ -53,12 +52,12 @@ impl StringInterner { | ||
| 53 | 52 | } |
| 54 | 53 | |
| 55 | 54 | /// Intern `s`, returning the existing handle when the string was already |
| 56 | - /// seen. Allocates at most one `Rc<str>` per unique name. | |
| 55 | + /// seen. Allocates at most one `Arc<str>` per unique name. | |
| 57 | 56 | pub fn intern(&mut self, s: &str) -> Istr { |
| 58 | 57 | if let Some(&i) = self.index.get(s) { |
| 59 | 58 | return Istr(i); |
| 60 | 59 | } |
| 61 | - let rc: Rc<str> = Rc::from(s); | |
| 60 | + let rc: Arc<str> = Arc::from(s); | |
| 62 | 61 | let id = self.strings.len() as u32; |
| 63 | 62 | self.strings.push(rc.clone()); |
| 64 | 63 | self.index.insert(rc, id); |
tests/determinism.rsmodified@@ -147,6 +147,75 @@ fn repeated_parallel_archive_fetches_are_byte_identical() { | ||
| 147 | 147 | let _ = fs::remove_dir_all(root); |
| 148 | 148 | } |
| 149 | 149 | |
| 150 | +#[test] | |
| 151 | +fn relocation_workers_match_single_worker_for_many_atoms() { | |
| 152 | + if !have_xcrun() || !have_xcrun_tool("as") { | |
| 153 | + eprintln!("skipping: xcrun as unavailable"); | |
| 154 | + return; | |
| 155 | + } | |
| 156 | + | |
| 157 | + let root = unique_temp_dir("reloc-workers").expect("create relocation worker temp dir"); | |
| 158 | + let text_obj = root.join("text.o"); | |
| 159 | + let data_obj = root.join("data.o"); | |
| 160 | + | |
| 161 | + let mut asm = String::from( | |
| 162 | + "\ | |
| 163 | + .section __TEXT,__text,regular,pure_instructions\n\ | |
| 164 | + .globl _main\n\ | |
| 165 | + _main:\n", | |
| 166 | + ); | |
| 167 | + for index in 0..64 { | |
| 168 | + asm.push_str(&format!(" bl _helper_{index}\n")); | |
| 169 | + } | |
| 170 | + asm.push_str( | |
| 171 | + "\ | |
| 172 | + adrp x8, _value@GOTPAGE\n\ | |
| 173 | + ldr x8, [x8, _value@GOTPAGEOFF]\n\ | |
| 174 | + ldr w0, [x8]\n\ | |
| 175 | + ret\n\ | |
| 176 | +\n", | |
| 177 | + ); | |
| 178 | + for index in 0..64 { | |
| 179 | + asm.push_str(&format!( | |
| 180 | + "\ | |
| 181 | + .globl _helper_{index}\n\ | |
| 182 | + _helper_{index}:\n\ | |
| 183 | + adrp x9, _value@GOTPAGE\n\ | |
| 184 | + ldr x9, [x9, _value@GOTPAGEOFF]\n\ | |
| 185 | + ldr w9, [x9]\n\ | |
| 186 | + ret\n\ | |
| 187 | +\n" | |
| 188 | + )); | |
| 189 | + } | |
| 190 | + asm.push_str(" .subsections_via_symbols\n"); | |
| 191 | + | |
| 192 | + assemble(&asm, &text_obj).expect("assemble relocation worker text fixture"); | |
| 193 | + assemble( | |
| 194 | + "\ | |
| 195 | + .section __DATA,__data\n\ | |
| 196 | + .globl _value\n\ | |
| 197 | + .p2align 2\n\ | |
| 198 | + _value:\n\ | |
| 199 | + .long 11\n\ | |
| 200 | +\n\ | |
| 201 | + .subsections_via_symbols\n", | |
| 202 | + &data_obj, | |
| 203 | + ) | |
| 204 | + .expect("assemble relocation worker data fixture"); | |
| 205 | + | |
| 206 | + let inputs = vec![text_obj, data_obj]; | |
| 207 | + let serial = | |
| 208 | + link_once_with_jobs(&inputs, &root, "reloc-workers-serial", Some(1)).expect("serial link"); | |
| 209 | + let parallel = link_once_with_jobs(&inputs, &root, "reloc-workers-parallel", Some(8)) | |
| 210 | + .expect("parallel link"); | |
| 211 | + assert_eq!( | |
| 212 | + parallel, serial, | |
| 213 | + "parallel relocation workers changed final output bytes" | |
| 214 | + ); | |
| 215 | + | |
| 216 | + let _ = fs::remove_dir_all(root); | |
| 217 | +} | |
| 218 | + | |
| 150 | 219 | fn assert_repeated_links_identical(inputs: Vec<PathBuf>, root: &Path, label: &str) { |
| 151 | 220 | let baseline = link_once(&inputs, root, &format!("{label}-baseline")) |
| 152 | 221 | .expect("baseline deterministic link"); |