Rust · 15492 bytes Raw Blame History
1 use std::collections::{HashMap, HashSet};
2 use std::fmt;
3
4 use crate::atom::{Atom, AtomFlags, AtomSection, AtomTable};
5 use crate::layout::LayoutInput;
6 use crate::reloc::{parse_raw_relocs, parse_relocs, Referent, Reloc, RelocKind, RelocLength};
7 use crate::resolve::{AtomId, InputId, Symbol, SymbolId, SymbolTable};
8
9 #[derive(Debug, Clone, Default)]
10 pub struct IcfPlan {
11 kept_atoms: HashSet<AtomId>,
12 redirects: HashMap<AtomId, AtomId>,
13 }
14
15 #[derive(Debug, Clone, PartialEq, Eq)]
16 pub struct FoldedSymbol {
17 pub name: String,
18 pub winner: String,
19 pub file_index: usize,
20 }
21
22 impl IcfPlan {
23 pub fn kept_atoms(&self) -> &HashSet<AtomId> {
24 &self.kept_atoms
25 }
26
27 pub fn redirects(&self) -> &HashMap<AtomId, AtomId> {
28 &self.redirects
29 }
30
31 pub fn folded_symbols(
32 &self,
33 atom_table: &AtomTable,
34 sym_table: &SymbolTable,
35 layout_inputs: &[LayoutInput<'_>],
36 ) -> Vec<FoldedSymbol> {
37 let file_index_by_input: HashMap<InputId, usize> = layout_inputs
38 .iter()
39 .enumerate()
40 .map(|(idx, input)| (input.id, idx + 1))
41 .collect();
42 let mut out = Vec::new();
43 for (&loser, &winner) in &self.redirects {
44 let winner = canonical_atom(winner, &self.redirects);
45 let Some(winner_symbol) = representative_symbol(atom_table.get(winner)) else {
46 continue;
47 };
48 let winner_name = symbol_name(sym_table, winner_symbol);
49 for symbol_id in atom_symbols(atom_table.get(loser)) {
50 let Symbol::Defined { origin, .. } = sym_table.get(symbol_id) else {
51 continue;
52 };
53 let name = symbol_name(sym_table, symbol_id);
54 if name == winner_name {
55 continue;
56 }
57 out.push(FoldedSymbol {
58 name,
59 winner: winner_name.clone(),
60 file_index: file_index_by_input.get(origin).copied().unwrap_or(0),
61 });
62 }
63 }
64 out.sort_by(|lhs, rhs| {
65 lhs.name
66 .cmp(&rhs.name)
67 .then_with(|| lhs.winner.cmp(&rhs.winner))
68 .then_with(|| lhs.file_index.cmp(&rhs.file_index))
69 });
70 out.dedup_by(|lhs, rhs| {
71 lhs.name == rhs.name && lhs.winner == rhs.winner && lhs.file_index == rhs.file_index
72 });
73 out
74 }
75 }
76
77 #[derive(Debug, Clone)]
78 pub struct IcfError(String);
79
80 impl fmt::Display for IcfError {
81 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82 write!(f, "ICF error: {}", self.0)
83 }
84 }
85
86 impl std::error::Error for IcfError {}
87
88 pub fn fold_safe(
89 layout_inputs: &[LayoutInput<'_>],
90 atom_table: &mut AtomTable,
91 sym_table: &mut SymbolTable,
92 live_atoms: Option<&HashSet<AtomId>>,
93 ) -> Result<IcfPlan, IcfError> {
94 let resolved_by_name = resolved_symbol_map(sym_table);
95 let reloc_cache = reloc_cache(layout_inputs)?;
96
97 mark_address_taken(
98 layout_inputs,
99 atom_table,
100 sym_table,
101 &resolved_by_name,
102 &reloc_cache,
103 );
104
105 let mut kept_atoms = live_atoms.cloned().unwrap_or_else(|| {
106 atom_table
107 .iter()
108 .map(|(atom_id, _)| atom_id)
109 .collect::<HashSet<_>>()
110 });
111 let mut redirects = HashMap::new();
112
113 let order_by_input: HashMap<InputId, (usize, Option<u32>)> = layout_inputs
114 .iter()
115 .map(|input| (input.id, (input.load_order, input.archive_member_offset)))
116 .collect();
117
118 loop {
119 let mut buckets: HashMap<FoldKey, Vec<AtomId>> = HashMap::new();
120 for (atom_id, atom) in atom_table.iter() {
121 if !kept_atoms.contains(&atom_id) {
122 continue;
123 }
124 if !is_foldable_atom(atom, sym_table) {
125 continue;
126 }
127 let relocs = reloc_cache
128 .get(&(atom.origin, atom.input_section))
129 .map(Vec::as_slice)
130 .unwrap_or(&[]);
131 let Some(reloc_sig) = reloc_signature_for_atom(
132 atom,
133 relocs,
134 layout_inputs,
135 sym_table,
136 &resolved_by_name,
137 &redirects,
138 ) else {
139 continue;
140 };
141 buckets
142 .entry(FoldKey::from_atom(atom, reloc_sig))
143 .or_default()
144 .push(atom_id);
145 }
146
147 let mut changed = false;
148 for atom_ids in buckets.into_values() {
149 if atom_ids.len() < 2 {
150 continue;
151 }
152 let winner = *atom_ids
153 .iter()
154 .min_by_key(|atom_id| {
155 fold_order_key(atom_table.get(**atom_id), &order_by_input, **atom_id)
156 })
157 .expect("bucket is non-empty");
158 for loser in atom_ids {
159 if loser == winner {
160 continue;
161 }
162 redirects.insert(loser, winner);
163 kept_atoms.remove(&loser);
164 rebind_folded_symbols(sym_table, atom_table.get(loser), winner);
165 changed = true;
166 }
167 }
168
169 if !changed {
170 break;
171 }
172 }
173
174 rebind_symbols_to_canonical_winners(sym_table, &redirects);
175
176 Ok(IcfPlan {
177 kept_atoms,
178 redirects,
179 })
180 }
181
182 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
183 struct FoldKey {
184 section: AtomSection,
185 size: u32,
186 align_pow2: u8,
187 flags: u32,
188 data: Vec<u8>,
189 relocs: Vec<FoldReloc>,
190 }
191
192 impl FoldKey {
193 fn from_atom(atom: &Atom, relocs: Vec<FoldReloc>) -> Self {
194 Self {
195 section: atom.section,
196 size: atom.size,
197 align_pow2: atom.align_pow2,
198 flags: atom.flags.bits() & !AtomFlags::ADDRESS_TAKEN,
199 data: atom.data.clone(),
200 relocs,
201 }
202 }
203 }
204
205 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
206 struct FoldReloc {
207 offset: u32,
208 kind: RelocKind,
209 length: RelocLength,
210 pcrel: bool,
211 referent: FoldReferent,
212 addend: i64,
213 subtrahend: Option<FoldReferent>,
214 }
215
216 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
217 enum FoldReferent {
218 Atom(AtomId),
219 Symbol(SymbolId),
220 Section(u8),
221 }
222
223 fn fold_order_key(
224 atom: &Atom,
225 order_by_input: &HashMap<InputId, (usize, Option<u32>)>,
226 atom_id: AtomId,
227 ) -> (usize, u32, u32, u32) {
228 let (load_order, archive_member_offset) = order_by_input
229 .get(&atom.origin)
230 .copied()
231 .unwrap_or((usize::MAX, None));
232 (
233 load_order,
234 archive_member_offset.unwrap_or(0),
235 atom.input_offset,
236 atom_id.0,
237 )
238 }
239
240 fn is_foldable_atom(atom: &Atom, sym_table: &SymbolTable) -> bool {
241 if !matches!(
242 atom.section,
243 AtomSection::Text
244 | AtomSection::ConstData
245 | AtomSection::CStringLiterals
246 | AtomSection::Literal16
247 ) {
248 return false;
249 }
250 if atom.flags.has(AtomFlags::NO_DEAD_STRIP) || atom.flags.has(AtomFlags::ADDRESS_TAKEN) {
251 return false;
252 }
253 !matches!(
254 atom.owner.map(|owner| sym_table.get(owner)),
255 Some(Symbol::Defined {
256 private_extern: false,
257 ..
258 })
259 )
260 }
261
262 fn rebind_folded_symbols(sym_table: &mut SymbolTable, atom: &Atom, winner: AtomId) {
263 if let Some(owner) = atom.owner {
264 let value = match sym_table.get(owner) {
265 Symbol::Defined { value, .. } => *value,
266 _ => 0,
267 };
268 sym_table.bind_atom(owner, winner, value);
269 }
270 for alt in &atom.alt_entries {
271 let value = match sym_table.get(alt.symbol) {
272 Symbol::Defined { value, .. } => *value,
273 _ => alt.offset_within_atom as u64,
274 };
275 sym_table.bind_atom(alt.symbol, winner, value);
276 }
277 }
278
279 fn rebind_symbols_to_canonical_winners(
280 sym_table: &mut SymbolTable,
281 redirects: &HashMap<AtomId, AtomId>,
282 ) {
283 let updates: Vec<(SymbolId, AtomId, u64)> = sym_table
284 .iter()
285 .filter_map(|(symbol_id, symbol)| match symbol {
286 Symbol::Defined { atom, value, .. } if atom.0 != 0 => {
287 let canonical = canonical_atom(*atom, redirects);
288 (canonical != *atom).then_some((symbol_id, canonical, *value))
289 }
290 _ => None,
291 })
292 .collect();
293 for (symbol_id, atom, value) in updates {
294 sym_table.bind_atom(symbol_id, atom, value);
295 }
296 }
297
298 fn resolved_symbol_map(sym_table: &SymbolTable) -> HashMap<String, SymbolId> {
299 let mut out = HashMap::new();
300 for (symbol_id, symbol) in sym_table.iter() {
301 out.insert(
302 sym_table.interner.resolve(symbol.name()).to_string(),
303 symbol_id,
304 );
305 }
306 out
307 }
308
309 fn reloc_cache(
310 layout_inputs: &[LayoutInput<'_>],
311 ) -> Result<HashMap<(InputId, u8), Vec<Reloc>>, IcfError> {
312 let mut out = HashMap::new();
313 for input in layout_inputs {
314 for (section_idx_zero, section) in input.object.sections.iter().enumerate() {
315 if section.raw_relocs.is_empty() {
316 continue;
317 }
318 let raws = parse_raw_relocs(&section.raw_relocs, 0, section.nreloc)
319 .map_err(|err| IcfError(format!("{}: {err}", input.object.path.display())))?;
320 let relocs = parse_relocs(&raws)
321 .map_err(|err| IcfError(format!("{}: {err}", input.object.path.display())))?;
322 out.insert((input.id, (section_idx_zero + 1) as u8), relocs);
323 }
324 }
325 Ok(out)
326 }
327
328 #[allow(clippy::too_many_arguments)]
329 fn mark_address_taken(
330 layout_inputs: &[LayoutInput<'_>],
331 atom_table: &mut AtomTable,
332 sym_table: &SymbolTable,
333 resolved_by_name: &HashMap<String, SymbolId>,
334 reloc_cache: &HashMap<(InputId, u8), Vec<Reloc>>,
335 ) {
336 for input in layout_inputs {
337 for (section_idx_zero, _section) in input.object.sections.iter().enumerate() {
338 let input_section = (section_idx_zero + 1) as u8;
339 let Some(relocs) = reloc_cache.get(&(input.id, input_section)) else {
340 continue;
341 };
342 for reloc in relocs {
343 if !marks_address_taken(reloc.kind) {
344 continue;
345 }
346 for target_atom in target_atoms_for_reloc(
347 input.object,
348 reloc.referent,
349 sym_table,
350 resolved_by_name,
351 ) {
352 atom_table
353 .get_mut(target_atom)
354 .flags
355 .set(AtomFlags::ADDRESS_TAKEN);
356 }
357 }
358 }
359 }
360 }
361
362 fn marks_address_taken(kind: RelocKind) -> bool {
363 matches!(
364 kind,
365 RelocKind::Unsigned
366 | RelocKind::Page21
367 | RelocKind::PageOff12
368 | RelocKind::PointerToGot
369 | RelocKind::GotLoadPage21
370 | RelocKind::GotLoadPageOff12
371 | RelocKind::TlvpLoadPage21
372 | RelocKind::TlvpLoadPageOff12
373 )
374 }
375
376 fn relocs_for_atom<'a>(relocs: &'a [Reloc], atom: &Atom) -> impl Iterator<Item = Reloc> + 'a {
377 let start = atom.input_offset;
378 let end = atom.input_offset.saturating_add(atom.size);
379 relocs.iter().copied().filter(move |reloc| {
380 let reloc_end = reloc
381 .offset
382 .saturating_add(reloc.length.byte_width() as u32);
383 reloc.offset >= start && reloc_end <= end
384 })
385 }
386
387 fn reloc_signature_for_atom(
388 atom: &Atom,
389 relocs: &[Reloc],
390 layout_inputs: &[LayoutInput<'_>],
391 sym_table: &SymbolTable,
392 resolved_by_name: &HashMap<String, SymbolId>,
393 redirects: &HashMap<AtomId, AtomId>,
394 ) -> Option<Vec<FoldReloc>> {
395 let objects_by_input: HashMap<InputId, &crate::input::ObjectFile> = layout_inputs
396 .iter()
397 .map(|input| (input.id, input.object))
398 .collect();
399 let object = objects_by_input.get(&atom.origin)?;
400 relocs_for_atom(relocs, atom)
401 .map(|reloc| {
402 Some(FoldReloc {
403 offset: reloc.offset.saturating_sub(atom.input_offset),
404 kind: reloc.kind,
405 length: reloc.length,
406 pcrel: reloc.pcrel,
407 referent: normalize_referent(
408 object,
409 reloc.referent,
410 sym_table,
411 resolved_by_name,
412 redirects,
413 )?,
414 addend: reloc.addend,
415 subtrahend: match reloc.subtrahend {
416 Some(referent) => Some(normalize_referent(
417 object,
418 referent,
419 sym_table,
420 resolved_by_name,
421 redirects,
422 )?),
423 None => None,
424 },
425 })
426 })
427 .collect()
428 }
429
430 fn normalize_referent(
431 object: &crate::input::ObjectFile,
432 referent: Referent,
433 sym_table: &SymbolTable,
434 resolved_by_name: &HashMap<String, SymbolId>,
435 redirects: &HashMap<AtomId, AtomId>,
436 ) -> Option<FoldReferent> {
437 match referent {
438 Referent::Symbol(sym_idx) => {
439 let input_sym = object.symbols.get(sym_idx as usize)?;
440 let name = object.symbol_name(input_sym).ok()?;
441 let &symbol_id = resolved_by_name.get(name)?;
442 match sym_table.get(symbol_id) {
443 Symbol::Defined { atom, .. } if atom.0 != 0 => {
444 Some(FoldReferent::Atom(canonical_atom(*atom, redirects)))
445 }
446 _ => Some(FoldReferent::Symbol(symbol_id)),
447 }
448 }
449 Referent::Section(section) => Some(FoldReferent::Section(section)),
450 }
451 }
452
453 fn canonical_atom(atom_id: AtomId, redirects: &HashMap<AtomId, AtomId>) -> AtomId {
454 let mut current = atom_id;
455 while let Some(&next) = redirects.get(&current) {
456 if next == current {
457 break;
458 }
459 current = next;
460 }
461 current
462 }
463
464 fn representative_symbol(atom: &Atom) -> Option<SymbolId> {
465 atom.owner
466 .or_else(|| atom.alt_entries.first().map(|alt| alt.symbol))
467 }
468
469 fn atom_symbols(atom: &Atom) -> impl Iterator<Item = SymbolId> + '_ {
470 atom.owner
471 .into_iter()
472 .chain(atom.alt_entries.iter().map(|alt| alt.symbol))
473 }
474
475 fn symbol_name(sym_table: &SymbolTable, symbol_id: SymbolId) -> String {
476 sym_table
477 .interner
478 .resolve(sym_table.get(symbol_id).name())
479 .to_string()
480 }
481
482 fn target_atoms_for_reloc(
483 object: &crate::input::ObjectFile,
484 referent: Referent,
485 sym_table: &SymbolTable,
486 resolved_by_name: &HashMap<String, SymbolId>,
487 ) -> Vec<AtomId> {
488 match referent {
489 Referent::Symbol(sym_idx) => {
490 let Some(input_sym) = object.symbols.get(sym_idx as usize) else {
491 return Vec::new();
492 };
493 let Some(name) = object.symbol_name(input_sym).ok() else {
494 return Vec::new();
495 };
496 let Some(&symbol_id) = resolved_by_name.get(name) else {
497 return Vec::new();
498 };
499 match sym_table.get(symbol_id) {
500 Symbol::Defined { atom, .. } if atom.0 != 0 => vec![*atom],
501 _ => Vec::new(),
502 }
503 }
504 Referent::Section(_) => Vec::new(),
505 }
506 }
507