Preserve LOH payloads
- SHA
1e809169ea3a69ab9fa66d3256729086813a6b50- Parents
-
1eae32b - Tree
fead290
1e80916
1e809169ea3a69ab9fa66d3256729086813a6b501eae32b
fead290| Status | File | + | - |
|---|---|---|---|
| M |
src/input.rs
|
138 | 0 |
| M |
src/layout.rs
|
8 | 0 |
| M |
src/lib.rs
|
1 | 0 |
| A |
src/loh.rs
|
104 | 0 |
| M |
src/macho/writer.rs
|
168 | 1 |
| M |
src/synth/mod.rs
|
3 | 0 |
| M |
tests/linker_run.rs
|
75 | 6 |
src/input.rsmodified@@ -7,6 +7,7 @@ | ||
| 7 | 7 | |
| 8 | 8 | use std::path::PathBuf; |
| 9 | 9 | |
| 10 | +use crate::loh::{parse_loh_blob, LohEntry}; | |
| 10 | 11 | use crate::macho::constants::LC_DATA_IN_CODE; |
| 11 | 12 | use crate::macho::reader::{ |
| 12 | 13 | parse_commands, parse_header, DysymtabCmd, LinkEditDataCmd, LoadCommand, MachHeader64, |
@@ -28,6 +29,7 @@ pub struct ObjectFile { | ||
| 28 | 29 | pub strings: StringTable, |
| 29 | 30 | pub symtab: Option<SymtabCmd>, |
| 30 | 31 | pub dysymtab: Option<DysymtabCmd>, |
| 32 | + pub loh: Vec<LohEntry>, | |
| 31 | 33 | pub data_in_code: Vec<DataInCodeEntry>, |
| 32 | 34 | } |
| 33 | 35 | |
@@ -90,6 +92,7 @@ impl ObjectFile { | ||
| 90 | 92 | ), |
| 91 | 93 | None => (Vec::new(), StringTable::from_bytes(Vec::new())), |
| 92 | 94 | }; |
| 95 | + let loh = parse_loh(&commands, file_bytes)?; | |
| 93 | 96 | let data_in_code = parse_data_in_code(&commands, file_bytes)?; |
| 94 | 97 | |
| 95 | 98 | Ok(ObjectFile { |
@@ -101,6 +104,7 @@ impl ObjectFile { | ||
| 101 | 104 | strings, |
| 102 | 105 | symtab, |
| 103 | 106 | dysymtab, |
| 107 | + loh, | |
| 104 | 108 | data_in_code, |
| 105 | 109 | }) |
| 106 | 110 | } |
@@ -132,6 +136,32 @@ impl ObjectFile { | ||
| 132 | 136 | } |
| 133 | 137 | } |
| 134 | 138 | |
| 139 | +fn parse_loh(commands: &[LoadCommand], file_bytes: &[u8]) -> Result<Vec<LohEntry>, ReadError> { | |
| 140 | + let mut out = Vec::new(); | |
| 141 | + for command in commands { | |
| 142 | + let LoadCommand::LinkerOptimizationHint(linkedit) = command else { | |
| 143 | + continue; | |
| 144 | + }; | |
| 145 | + let start = linkedit.dataoff as usize; | |
| 146 | + let end = start | |
| 147 | + .checked_add(linkedit.datasize as usize) | |
| 148 | + .ok_or(ReadError::Truncated { | |
| 149 | + need: usize::MAX, | |
| 150 | + have: file_bytes.len(), | |
| 151 | + context: "LC_LINKER_OPTIMIZATION_HINT payload (offset + size overflows)", | |
| 152 | + })?; | |
| 153 | + if end > file_bytes.len() { | |
| 154 | + return Err(ReadError::Truncated { | |
| 155 | + need: end, | |
| 156 | + have: file_bytes.len(), | |
| 157 | + context: "LC_LINKER_OPTIMIZATION_HINT payload", | |
| 158 | + }); | |
| 159 | + } | |
| 160 | + out.extend(parse_loh_blob(&file_bytes[start..end])?); | |
| 161 | + } | |
| 162 | + Ok(out) | |
| 163 | +} | |
| 164 | + | |
| 135 | 165 | fn parse_data_in_code( |
| 136 | 166 | commands: &[LoadCommand], |
| 137 | 167 | file_bytes: &[u8], |
@@ -182,6 +212,7 @@ pub fn header_and_cmds_end(header: &MachHeader64) -> usize { | ||
| 182 | 212 | #[cfg(test)] |
| 183 | 213 | mod tests { |
| 184 | 214 | use super::*; |
| 215 | + use crate::loh::{write_loh_blob, LOH_ARM64_ADRP_ADD}; | |
| 185 | 216 | use crate::macho::constants::*; |
| 186 | 217 | use crate::macho::reader::{ |
| 187 | 218 | write_commands, write_header, LinkEditDataCmd, LoadCommand, Section64Header, Segment64, |
@@ -389,6 +420,99 @@ mod tests { | ||
| 389 | 420 | image |
| 390 | 421 | } |
| 391 | 422 | |
| 423 | + fn synth_image_with_loh() -> Vec<u8> { | |
| 424 | + let text_sect = Section64Header { | |
| 425 | + sectname: name16("__text"), | |
| 426 | + segname: name16("__TEXT"), | |
| 427 | + addr: 0, | |
| 428 | + size: 8, | |
| 429 | + offset: 0, | |
| 430 | + align: 2, | |
| 431 | + reloff: 0, | |
| 432 | + nreloc: 0, | |
| 433 | + flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS, | |
| 434 | + reserved1: 0, | |
| 435 | + reserved2: 0, | |
| 436 | + reserved3: 0, | |
| 437 | + }; | |
| 438 | + let seg = Segment64 { | |
| 439 | + segname: name16(""), | |
| 440 | + vmaddr: 0, | |
| 441 | + vmsize: 8, | |
| 442 | + fileoff: 0, | |
| 443 | + filesize: 8, | |
| 444 | + maxprot: 7, | |
| 445 | + initprot: 7, | |
| 446 | + flags: 0, | |
| 447 | + sections: vec![text_sect], | |
| 448 | + }; | |
| 449 | + let strtab = b"\0_main\0"; | |
| 450 | + let nsyms = 1u32; | |
| 451 | + let sym = RawNlist { | |
| 452 | + strx: 1, | |
| 453 | + n_type: N_SECT | N_EXT, | |
| 454 | + n_sect: 1, | |
| 455 | + n_desc: 0, | |
| 456 | + n_value: 0, | |
| 457 | + }; | |
| 458 | + let loh_blob = write_loh_blob(&[LohEntry { | |
| 459 | + kind: LOH_ARM64_ADRP_ADD, | |
| 460 | + args: vec![0, 4], | |
| 461 | + }]); | |
| 462 | + let hdr_size = HEADER_SIZE; | |
| 463 | + let seg_size = seg.wire_size() as usize; | |
| 464 | + let loh_size = LinkEditDataCmd::WIRE_SIZE as usize; | |
| 465 | + let symtab_size = SymtabCmd::WIRE_SIZE as usize; | |
| 466 | + let sizeofcmds = (seg_size + loh_size + symtab_size) as u32; | |
| 467 | + | |
| 468 | + let section_offset = (hdr_size + sizeofcmds as usize) as u32; | |
| 469 | + let loh_off = section_offset + 8; | |
| 470 | + let symoff = loh_off + loh_blob.len() as u32; | |
| 471 | + let stroff = symoff + NLIST_SIZE as u32 * nsyms; | |
| 472 | + let seg = Segment64 { | |
| 473 | + sections: vec![Section64Header { | |
| 474 | + offset: section_offset, | |
| 475 | + ..seg.sections[0] | |
| 476 | + }], | |
| 477 | + fileoff: section_offset as u64, | |
| 478 | + ..seg | |
| 479 | + }; | |
| 480 | + let header = MachHeader64 { | |
| 481 | + magic: MH_MAGIC_64, | |
| 482 | + cputype: CPU_TYPE_ARM64, | |
| 483 | + cpusubtype: 0, | |
| 484 | + filetype: MH_OBJECT, | |
| 485 | + ncmds: 3, | |
| 486 | + sizeofcmds, | |
| 487 | + flags: MH_SUBSECTIONS_VIA_SYMBOLS, | |
| 488 | + reserved: 0, | |
| 489 | + }; | |
| 490 | + let symtab_cmd = SymtabCmd { | |
| 491 | + symoff, | |
| 492 | + nsyms, | |
| 493 | + stroff, | |
| 494 | + strsize: strtab.len() as u32, | |
| 495 | + }; | |
| 496 | + let loh_cmd = LoadCommand::LinkerOptimizationHint(LinkEditDataCmd { | |
| 497 | + dataoff: loh_off, | |
| 498 | + datasize: loh_blob.len() as u32, | |
| 499 | + }); | |
| 500 | + | |
| 501 | + let mut image = Vec::new(); | |
| 502 | + write_header(&header, &mut image); | |
| 503 | + let cmds = vec![ | |
| 504 | + LoadCommand::Segment64(seg), | |
| 505 | + loh_cmd, | |
| 506 | + LoadCommand::Symtab(symtab_cmd), | |
| 507 | + ]; | |
| 508 | + write_commands(&cmds, &mut image); | |
| 509 | + image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]); | |
| 510 | + image.extend_from_slice(&loh_blob); | |
| 511 | + sym.write(&mut image); | |
| 512 | + image.extend_from_slice(strtab); | |
| 513 | + image | |
| 514 | + } | |
| 515 | + | |
| 392 | 516 | #[test] |
| 393 | 517 | fn parse_synth_object_end_to_end() { |
| 394 | 518 | let image = synth_image(); |
@@ -425,6 +549,19 @@ mod tests { | ||
| 425 | 549 | ); |
| 426 | 550 | } |
| 427 | 551 | |
| 552 | + #[test] | |
| 553 | + fn parse_preserves_loh_entries() { | |
| 554 | + let image = synth_image_with_loh(); | |
| 555 | + let obj = ObjectFile::parse("/tmp/synth-loh.o", &image).unwrap(); | |
| 556 | + assert_eq!( | |
| 557 | + obj.loh, | |
| 558 | + vec![LohEntry { | |
| 559 | + kind: LOH_ARM64_ADRP_ADD, | |
| 560 | + args: vec![0, 4], | |
| 561 | + }] | |
| 562 | + ); | |
| 563 | + } | |
| 564 | + | |
| 428 | 565 | #[test] |
| 429 | 566 | fn indirect_target_name_resolves() { |
| 430 | 567 | // Build a minimal strtab with "\0_alias\0_target\0" and a RawNlist |
@@ -448,6 +585,7 @@ mod tests { | ||
| 448 | 585 | strings: strtab, |
| 449 | 586 | symtab: None, |
| 450 | 587 | dysymtab: None, |
| 588 | + loh: Vec::new(), | |
| 451 | 589 | data_in_code: Vec::new(), |
| 452 | 590 | }; |
| 453 | 591 | let alias = InputSymbol::from_raw(RawNlist { |
src/layout.rsmodified@@ -668,6 +668,7 @@ mod tests { | ||
| 668 | 668 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 669 | 669 | symtab: None, |
| 670 | 670 | dysymtab: None, |
| 671 | + loh: Vec::new(), | |
| 671 | 672 | data_in_code: Vec::new(), |
| 672 | 673 | }; |
| 673 | 674 | |
@@ -755,6 +756,7 @@ mod tests { | ||
| 755 | 756 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 756 | 757 | symtab: None, |
| 757 | 758 | dysymtab: None, |
| 759 | + loh: Vec::new(), | |
| 758 | 760 | data_in_code: Vec::new(), |
| 759 | 761 | }; |
| 760 | 762 | |
@@ -817,6 +819,7 @@ mod tests { | ||
| 817 | 819 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 818 | 820 | symtab: None, |
| 819 | 821 | dysymtab: None, |
| 822 | + loh: Vec::new(), | |
| 820 | 823 | data_in_code: Vec::new(), |
| 821 | 824 | }; |
| 822 | 825 | |
@@ -875,6 +878,7 @@ mod tests { | ||
| 875 | 878 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 876 | 879 | symtab: None, |
| 877 | 880 | dysymtab: None, |
| 881 | + loh: Vec::new(), | |
| 878 | 882 | data_in_code: Vec::new(), |
| 879 | 883 | }; |
| 880 | 884 | |
@@ -947,6 +951,7 @@ mod tests { | ||
| 947 | 951 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 948 | 952 | symtab: None, |
| 949 | 953 | dysymtab: None, |
| 954 | + loh: Vec::new(), | |
| 950 | 955 | data_in_code: Vec::new(), |
| 951 | 956 | }; |
| 952 | 957 | |
@@ -1008,6 +1013,7 @@ mod tests { | ||
| 1008 | 1013 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 1009 | 1014 | symtab: None, |
| 1010 | 1015 | dysymtab: None, |
| 1016 | + loh: Vec::new(), | |
| 1011 | 1017 | data_in_code: Vec::new(), |
| 1012 | 1018 | }; |
| 1013 | 1019 | |
@@ -1143,6 +1149,7 @@ mod tests { | ||
| 1143 | 1149 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 1144 | 1150 | symtab: None, |
| 1145 | 1151 | dysymtab: None, |
| 1152 | + loh: Vec::new(), | |
| 1146 | 1153 | data_in_code: Vec::new(), |
| 1147 | 1154 | }; |
| 1148 | 1155 | |
@@ -1230,6 +1237,7 @@ mod tests { | ||
| 1230 | 1237 | strings: crate::string_table::StringTable::from_bytes(vec![0]), |
| 1231 | 1238 | symtab: None, |
| 1232 | 1239 | dysymtab: None, |
| 1240 | + loh: Vec::new(), | |
| 1233 | 1241 | data_in_code: Vec::new(), |
| 1234 | 1242 | }; |
| 1235 | 1243 | |
src/lib.rsmodified@@ -14,6 +14,7 @@ pub mod input; | ||
| 14 | 14 | pub mod layout; |
| 15 | 15 | pub mod leb; |
| 16 | 16 | pub mod link_map; |
| 17 | +pub mod loh; | |
| 17 | 18 | pub mod macho; |
| 18 | 19 | pub mod reloc; |
| 19 | 20 | pub mod resolve; |
src/loh.rsadded@@ -0,0 +1,104 @@ | ||
| 1 | +//! ARM64 Linker Optimization Hints (LOH). | |
| 2 | +//! | |
| 3 | +//! `LC_LINKER_OPTIMIZATION_HINT` stores a ULEB128 stream of `(kind, argc, | |
| 4 | +//! args...)` records. The args are file offsets of the participating | |
| 5 | +//! instructions. | |
| 6 | + | |
| 7 | +use crate::leb::{read_uleb, write_uleb}; | |
| 8 | +use crate::macho::reader::ReadError; | |
| 9 | + | |
| 10 | +pub const LOH_ARM64_ADRP_LDR: u32 = 2; | |
| 11 | +pub const LOH_ARM64_ADRP_LDR_GOT_LDR: u32 = 4; | |
| 12 | +pub const LOH_ARM64_ADRP_ADD: u32 = 7; | |
| 13 | +pub const LOH_ARM64_ADRP_LDR_GOT: u32 = 8; | |
| 14 | + | |
| 15 | +#[derive(Debug, Clone, PartialEq, Eq)] | |
| 16 | +pub struct LohEntry { | |
| 17 | + pub kind: u32, | |
| 18 | + pub args: Vec<u32>, | |
| 19 | +} | |
| 20 | + | |
| 21 | +pub fn parse_loh_blob(bytes: &[u8]) -> Result<Vec<LohEntry>, ReadError> { | |
| 22 | + let mut out = Vec::new(); | |
| 23 | + let mut cursor = 0usize; | |
| 24 | + while cursor < bytes.len() { | |
| 25 | + if bytes[cursor..].iter().all(|&byte| byte == 0) { | |
| 26 | + break; | |
| 27 | + } | |
| 28 | + let at_offset = cursor as u32; | |
| 29 | + let (kind, used) = read_uleb(&bytes[cursor..])?; | |
| 30 | + cursor += used; | |
| 31 | + let (argc, used) = read_uleb(&bytes[cursor..])?; | |
| 32 | + cursor += used; | |
| 33 | + let kind = u32::try_from(kind).map_err(|_| ReadError::BadRelocation { | |
| 34 | + at_offset, | |
| 35 | + reason: "LOH kind overflows u32", | |
| 36 | + })?; | |
| 37 | + let argc = usize::try_from(argc).map_err(|_| ReadError::BadRelocation { | |
| 38 | + at_offset, | |
| 39 | + reason: "LOH argcount overflows usize", | |
| 40 | + })?; | |
| 41 | + let mut args = Vec::with_capacity(argc); | |
| 42 | + for _ in 0..argc { | |
| 43 | + let (arg, used) = read_uleb(&bytes[cursor..])?; | |
| 44 | + cursor += used; | |
| 45 | + args.push(u32::try_from(arg).map_err(|_| ReadError::BadRelocation { | |
| 46 | + at_offset, | |
| 47 | + reason: "LOH arg overflows u32", | |
| 48 | + })?); | |
| 49 | + } | |
| 50 | + out.push(LohEntry { kind, args }); | |
| 51 | + } | |
| 52 | + Ok(out) | |
| 53 | +} | |
| 54 | + | |
| 55 | +pub fn write_loh_blob(entries: &[LohEntry]) -> Vec<u8> { | |
| 56 | + let mut out = Vec::new(); | |
| 57 | + for entry in entries { | |
| 58 | + write_uleb(entry.kind as u64, &mut out); | |
| 59 | + write_uleb(entry.args.len() as u64, &mut out); | |
| 60 | + for &arg in &entry.args { | |
| 61 | + write_uleb(arg as u64, &mut out); | |
| 62 | + } | |
| 63 | + } | |
| 64 | + out | |
| 65 | +} | |
| 66 | + | |
| 67 | +#[cfg(test)] | |
| 68 | +mod tests { | |
| 69 | + use super::*; | |
| 70 | + | |
| 71 | + #[test] | |
| 72 | + fn loh_blob_round_trips() { | |
| 73 | + let entries = vec![ | |
| 74 | + LohEntry { | |
| 75 | + kind: LOH_ARM64_ADRP_ADD, | |
| 76 | + args: vec![0, 4], | |
| 77 | + }, | |
| 78 | + LohEntry { | |
| 79 | + kind: LOH_ARM64_ADRP_LDR_GOT_LDR, | |
| 80 | + args: vec![8, 12, 16], | |
| 81 | + }, | |
| 82 | + ]; | |
| 83 | + let blob = write_loh_blob(&entries); | |
| 84 | + assert_eq!(parse_loh_blob(&blob).unwrap(), entries); | |
| 85 | + } | |
| 86 | + | |
| 87 | + #[test] | |
| 88 | + fn loh_blob_ignores_trailing_zero_padding() { | |
| 89 | + let mut blob = write_loh_blob(&[LohEntry { | |
| 90 | + kind: LOH_ARM64_ADRP_ADD, | |
| 91 | + args: vec![0, 4], | |
| 92 | + }]); | |
| 93 | + while !blob.len().is_multiple_of(8) { | |
| 94 | + blob.push(0); | |
| 95 | + } | |
| 96 | + assert_eq!( | |
| 97 | + parse_loh_blob(&blob).unwrap(), | |
| 98 | + vec![LohEntry { | |
| 99 | + kind: LOH_ARM64_ADRP_ADD, | |
| 100 | + args: vec![0, 4], | |
| 101 | + }] | |
| 102 | + ); | |
| 103 | + } | |
| 104 | +} | |
src/macho/writer.rsmodified@@ -11,6 +11,7 @@ use crate::atom::AtomTable; | ||
| 11 | 11 | use crate::input::{DataInCodeEntry, ObjectFile}; |
| 12 | 12 | use crate::layout::{Layout, LayoutInput, PAGE_SIZE}; |
| 13 | 13 | use crate::leb::write_uleb; |
| 14 | +use crate::loh::{write_loh_blob, LohEntry}; | |
| 14 | 15 | use crate::macho::constants::*; |
| 15 | 16 | use crate::macho::dylib::DylibDependency; |
| 16 | 17 | use crate::macho::exports::{ExportEntry, ExportKind}; |
@@ -71,6 +72,7 @@ pub enum WriteError { | ||
| 71 | 72 | ImportSymbolMissing(SymbolId), |
| 72 | 73 | ImportSymbolWrongKind(SymbolId), |
| 73 | 74 | MalformedRelocations(PathBuf, u8, String), |
| 75 | + MalformedLoh(PathBuf, String), | |
| 74 | 76 | MalformedDataInCode(PathBuf, String), |
| 75 | 77 | SymbolListRead(PathBuf, String), |
| 76 | 78 | } |
@@ -125,6 +127,13 @@ impl fmt::Display for WriteError { | ||
| 125 | 127 | path.display(), |
| 126 | 128 | section |
| 127 | 129 | ), |
| 130 | + WriteError::MalformedLoh(path, detail) => { | |
| 131 | + write!( | |
| 132 | + f, | |
| 133 | + "failed to remap LC_LINKER_OPTIMIZATION_HINT in {}: {detail}", | |
| 134 | + path.display() | |
| 135 | + ) | |
| 136 | + } | |
| 128 | 137 | WriteError::MalformedDataInCode(path, detail) => { |
| 129 | 138 | write!( |
| 130 | 139 | f, |
@@ -289,6 +298,7 @@ pub fn write_finalized_with_linkedit( | ||
| 289 | 298 | let weak_bind_off = linkedit_plan.dyld_info.weak_bind_off as usize; |
| 290 | 299 | let lazy_bind_off = linkedit_plan.dyld_info.lazy_bind_off as usize; |
| 291 | 300 | let export_off = linkedit_plan.dyld_info.export_off as usize; |
| 301 | + let loh_off = linkedit_plan.loh.map(|loh| loh.dataoff as usize); | |
| 292 | 302 | let function_starts_off = linkedit_plan.function_starts.dataoff as usize; |
| 293 | 303 | let data_in_code_off = linkedit_plan.data_in_code.dataoff as usize; |
| 294 | 304 | let stroff = linkedit_plan.symtab.stroff as usize; |
@@ -320,6 +330,12 @@ pub fn write_finalized_with_linkedit( | ||
| 320 | 330 | let end = export_off + linkedit_plan.export_bytes.len(); |
| 321 | 331 | out[export_off..end].copy_from_slice(&linkedit_plan.export_bytes); |
| 322 | 332 | } |
| 333 | + if let Some(loh_off) = loh_off { | |
| 334 | + if !linkedit_plan.loh_bytes.is_empty() { | |
| 335 | + let end = loh_off + linkedit_plan.loh_bytes.len(); | |
| 336 | + out[loh_off..end].copy_from_slice(&linkedit_plan.loh_bytes); | |
| 337 | + } | |
| 338 | + } | |
| 323 | 339 | if !linkedit_plan.function_starts_bytes.is_empty() { |
| 324 | 340 | let end = function_starts_off + linkedit_plan.function_starts_bytes.len(); |
| 325 | 341 | out[function_starts_off..end].copy_from_slice(&linkedit_plan.function_starts_bytes); |
@@ -400,6 +416,13 @@ fn build_commands( | ||
| 400 | 416 | })); |
| 401 | 417 | } |
| 402 | 418 | |
| 419 | + if let Some(loh) = linkedit.loh { | |
| 420 | + commands.push(raw_linkedit_command( | |
| 421 | + LC_LINKER_OPTIMIZATION_HINT, | |
| 422 | + loh.dataoff, | |
| 423 | + loh.datasize, | |
| 424 | + )); | |
| 425 | + } | |
| 403 | 426 | commands.push(raw_linkedit_command( |
| 404 | 427 | LC_FUNCTION_STARTS, |
| 405 | 428 | linkedit.function_starts.dataoff, |
@@ -433,7 +456,7 @@ fn estimate_header_size( | ||
| 433 | 456 | kind: OutputKind, |
| 434 | 457 | opts: &LinkOptions, |
| 435 | 458 | dylibs: &[DylibDependency], |
| 436 | - _linkedit: &LinkEditPlan, | |
| 459 | + linkedit: &LinkEditPlan, | |
| 437 | 460 | ) -> u64 { |
| 438 | 461 | let mut size = HEADER_SIZE as u64; |
| 439 | 462 | for segment in &layout.segments { |
@@ -477,6 +500,9 @@ fn estimate_header_size( | ||
| 477 | 500 | size += SymtabCmd::WIRE_SIZE as u64; |
| 478 | 501 | size += DysymtabCmd::WIRE_SIZE as u64; |
| 479 | 502 | size += 16 * 3; |
| 503 | + if linkedit.loh.is_some() { | |
| 504 | + size += 16; | |
| 505 | + } | |
| 480 | 506 | size += DyldInfoCmd::WIRE_SIZE as u64; |
| 481 | 507 | size |
| 482 | 508 | } |
@@ -683,6 +709,7 @@ pub struct LinkEditPlan { | ||
| 683 | 709 | pub symtab: SymtabCmd, |
| 684 | 710 | pub dysymtab: DysymtabCmd, |
| 685 | 711 | pub dyld_info: DyldInfoCmd, |
| 712 | + pub loh: Option<LinkEditDataCmd>, | |
| 686 | 713 | pub function_starts: LinkEditDataCmd, |
| 687 | 714 | pub data_in_code: LinkEditDataCmd, |
| 688 | 715 | pub symtab_bytes: Vec<u8>, |
@@ -692,6 +719,7 @@ pub struct LinkEditPlan { | ||
| 692 | 719 | weak_bind_bytes: Vec<u8>, |
| 693 | 720 | lazy_bind_bytes: Vec<u8>, |
| 694 | 721 | export_bytes: Vec<u8>, |
| 722 | + loh_bytes: Vec<u8>, | |
| 695 | 723 | function_starts_bytes: Vec<u8>, |
| 696 | 724 | data_in_code_bytes: Vec<u8>, |
| 697 | 725 | pub strtab_bytes: Vec<u8>, |
@@ -745,6 +773,7 @@ fn build_linkedit_plan( | ||
| 745 | 773 | }, |
| 746 | 774 | dysymtab: DysymtabCmd::default(), |
| 747 | 775 | dyld_info: DyldInfoCmd::default(), |
| 776 | + loh: None, | |
| 748 | 777 | function_starts: LinkEditDataCmd { |
| 749 | 778 | dataoff: base_off, |
| 750 | 779 | datasize: 0, |
@@ -760,6 +789,7 @@ fn build_linkedit_plan( | ||
| 760 | 789 | weak_bind_bytes: Vec::new(), |
| 761 | 790 | lazy_bind_bytes: Vec::new(), |
| 762 | 791 | export_bytes: Vec::new(), |
| 792 | + loh_bytes: Vec::new(), | |
| 763 | 793 | function_starts_bytes: Vec::new(), |
| 764 | 794 | data_in_code_bytes: Vec::new(), |
| 765 | 795 | strtab_bytes: vec![0; 8], |
@@ -833,6 +863,12 @@ fn build_linkedit_plan( | ||
| 833 | 863 | let weak_bind_bytes = pad_dyld_info_stream(bind_streams.weak_bind); |
| 834 | 864 | let lazy_bind_bytes = pad_dyld_info_stream(bind_streams.lazy_bind); |
| 835 | 865 | let export_bytes = pad_dyld_info_stream(build_export_trie(&symbol_plan.exports)); |
| 866 | + let loh_bytes = build_loh( | |
| 867 | + layout, | |
| 868 | + inputs.0.layout_inputs, | |
| 869 | + inputs.0.atom_table, | |
| 870 | + inputs.0.icf_redirects, | |
| 871 | + )?; | |
| 836 | 872 | let function_starts_bytes = |
| 837 | 873 | build_function_starts(layout, inputs.0.layout_inputs, inputs.0.atom_table)?; |
| 838 | 874 | let data_in_code_bytes = build_data_in_code( |
@@ -856,6 +892,7 @@ fn build_linkedit_plan( | ||
| 856 | 892 | "lazy bind stream offset", |
| 857 | 893 | )?; |
| 858 | 894 | let export_off = place_optional_block(&mut cursor, export_bytes.len(), "export trie offset")?; |
| 895 | + let loh = place_optional_linkedit_data_block(&mut cursor, loh_bytes.len(), "LOH offset")?; | |
| 859 | 896 | let function_starts = place_linkedit_data_block( |
| 860 | 897 | &mut cursor, |
| 861 | 898 | function_starts_bytes.len(), |
@@ -900,6 +937,7 @@ fn build_linkedit_plan( | ||
| 900 | 937 | export_off, |
| 901 | 938 | export_size: export_bytes.len() as u32, |
| 902 | 939 | }, |
| 940 | + loh, | |
| 903 | 941 | function_starts, |
| 904 | 942 | data_in_code, |
| 905 | 943 | symtab_bytes, |
@@ -909,6 +947,7 @@ fn build_linkedit_plan( | ||
| 909 | 947 | weak_bind_bytes, |
| 910 | 948 | lazy_bind_bytes, |
| 911 | 949 | export_bytes, |
| 950 | + loh_bytes, | |
| 912 | 951 | function_starts_bytes, |
| 913 | 952 | data_in_code_bytes, |
| 914 | 953 | strtab_bytes: symbol_plan.strtab_bytes, |
@@ -1420,6 +1459,83 @@ fn build_data_in_code( | ||
| 1420 | 1459 | Ok(out) |
| 1421 | 1460 | } |
| 1422 | 1461 | |
| 1462 | +fn build_loh( | |
| 1463 | + layout: &Layout, | |
| 1464 | + inputs: &[LayoutInput<'_>], | |
| 1465 | + atom_table: &AtomTable, | |
| 1466 | + icf_redirects: Option<&HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>, | |
| 1467 | +) -> Result<Vec<u8>, WriteError> { | |
| 1468 | + #[derive(Clone)] | |
| 1469 | + struct RemappedEntry { | |
| 1470 | + input_order: usize, | |
| 1471 | + input_entry_index: usize, | |
| 1472 | + first_arg: u32, | |
| 1473 | + entry: LohEntry, | |
| 1474 | + } | |
| 1475 | + | |
| 1476 | + let atoms_by_input_section = atom_table.by_input_section(); | |
| 1477 | + let mut remapped = Vec::new(); | |
| 1478 | + for (input_order, input) in inputs.iter().enumerate() { | |
| 1479 | + for (input_entry_index, entry) in input.object.loh.iter().cloned().enumerate() { | |
| 1480 | + let mut args = Vec::with_capacity(entry.args.len()); | |
| 1481 | + for input_offset in entry.args { | |
| 1482 | + let (section_index, section_relative) = | |
| 1483 | + remap_loh_to_section(input.object, input_offset)?; | |
| 1484 | + let (atom_id, atom_delta) = find_containing_atom_range( | |
| 1485 | + atom_table, | |
| 1486 | + &atoms_by_input_section, | |
| 1487 | + input.id, | |
| 1488 | + section_index, | |
| 1489 | + section_relative, | |
| 1490 | + 4, | |
| 1491 | + icf_redirects, | |
| 1492 | + ) | |
| 1493 | + .ok_or_else(|| { | |
| 1494 | + WriteError::MalformedLoh( | |
| 1495 | + input.object.path.clone(), | |
| 1496 | + format!( | |
| 1497 | + "instruction at file offset {} did not land inside any atom", | |
| 1498 | + input_offset | |
| 1499 | + ), | |
| 1500 | + ) | |
| 1501 | + })?; | |
| 1502 | + let output_offset = layout.atom_file_offset(atom_id).ok_or_else(|| { | |
| 1503 | + WriteError::MalformedLoh( | |
| 1504 | + input.object.path.clone(), | |
| 1505 | + format!( | |
| 1506 | + "atom {:?} for instruction at file offset {} is missing from final layout", | |
| 1507 | + atom_id, input_offset | |
| 1508 | + ), | |
| 1509 | + ) | |
| 1510 | + })? + atom_delta as u64; | |
| 1511 | + args.push(u32_fit(output_offset, "LOH output offset")?); | |
| 1512 | + } | |
| 1513 | + remapped.push(RemappedEntry { | |
| 1514 | + input_order, | |
| 1515 | + input_entry_index, | |
| 1516 | + first_arg: args.first().copied().unwrap_or(0), | |
| 1517 | + entry: LohEntry { | |
| 1518 | + kind: entry.kind, | |
| 1519 | + args, | |
| 1520 | + }, | |
| 1521 | + }); | |
| 1522 | + } | |
| 1523 | + } | |
| 1524 | + | |
| 1525 | + remapped.sort_by(|a, b| { | |
| 1526 | + a.first_arg | |
| 1527 | + .cmp(&b.first_arg) | |
| 1528 | + .then_with(|| a.input_order.cmp(&b.input_order)) | |
| 1529 | + .then_with(|| a.input_entry_index.cmp(&b.input_entry_index)) | |
| 1530 | + }); | |
| 1531 | + Ok(write_loh_blob( | |
| 1532 | + &remapped | |
| 1533 | + .into_iter() | |
| 1534 | + .map(|entry| entry.entry) | |
| 1535 | + .collect::<Vec<_>>(), | |
| 1536 | + )) | |
| 1537 | +} | |
| 1538 | + | |
| 1423 | 1539 | fn remap_data_in_code_to_section( |
| 1424 | 1540 | object: &ObjectFile, |
| 1425 | 1541 | entry: DataInCodeEntry, |
@@ -1468,6 +1584,46 @@ fn remap_data_in_code_to_section( | ||
| 1468 | 1584 | )) |
| 1469 | 1585 | } |
| 1470 | 1586 | |
| 1587 | +fn remap_loh_to_section(object: &ObjectFile, input_offset: u32) -> Result<(u8, u32), WriteError> { | |
| 1588 | + let instruction_start = input_offset as u64; | |
| 1589 | + let instruction_end = instruction_start.checked_add(4).ok_or_else(|| { | |
| 1590 | + WriteError::MalformedLoh( | |
| 1591 | + object.path.clone(), | |
| 1592 | + format!("instruction at input offset {} overflows u64", input_offset), | |
| 1593 | + ) | |
| 1594 | + })?; | |
| 1595 | + let mut matches = object | |
| 1596 | + .sections | |
| 1597 | + .iter() | |
| 1598 | + .enumerate() | |
| 1599 | + .filter(|(_, section)| !section.data.is_empty() && is_executable(section.kind)) | |
| 1600 | + .filter_map(|(idx, section)| { | |
| 1601 | + let section_start = section.addr; | |
| 1602 | + let section_end = section.addr.checked_add(section.size)?; | |
| 1603 | + (section_start <= instruction_start && instruction_end <= section_end) | |
| 1604 | + .then_some(((idx + 1) as u8, (instruction_start - section_start) as u32)) | |
| 1605 | + }); | |
| 1606 | + if let Some(mapped) = matches.next() { | |
| 1607 | + if matches.next().is_none() { | |
| 1608 | + return Ok(mapped); | |
| 1609 | + } | |
| 1610 | + return Err(WriteError::MalformedLoh( | |
| 1611 | + object.path.clone(), | |
| 1612 | + format!( | |
| 1613 | + "instruction at input offset {} ambiguously matches multiple executable input sections", | |
| 1614 | + input_offset | |
| 1615 | + ), | |
| 1616 | + )); | |
| 1617 | + } | |
| 1618 | + Err(WriteError::MalformedLoh( | |
| 1619 | + object.path.clone(), | |
| 1620 | + format!( | |
| 1621 | + "instruction at input offset {} does not map to any executable input section range", | |
| 1622 | + input_offset | |
| 1623 | + ), | |
| 1624 | + )) | |
| 1625 | +} | |
| 1626 | + | |
| 1471 | 1627 | fn collect_imports( |
| 1472 | 1628 | sym_table: &SymbolTable, |
| 1473 | 1629 | synthetic_plan: &SyntheticPlan, |
@@ -2133,6 +2289,17 @@ fn place_linkedit_data_block( | ||
| 2133 | 2289 | }) |
| 2134 | 2290 | } |
| 2135 | 2291 | |
| 2292 | +fn place_optional_linkedit_data_block( | |
| 2293 | + cursor: &mut u64, | |
| 2294 | + size: usize, | |
| 2295 | + context: &'static str, | |
| 2296 | +) -> Result<Option<LinkEditDataCmd>, WriteError> { | |
| 2297 | + if size == 0 { | |
| 2298 | + return Ok(None); | |
| 2299 | + } | |
| 2300 | + Ok(Some(place_linkedit_data_block(cursor, size, context)?)) | |
| 2301 | +} | |
| 2302 | + | |
| 2136 | 2303 | fn push_indirect_section( |
| 2137 | 2304 | indirect_symbols: &mut Vec<u32>, |
| 2138 | 2305 | indirect_starts: &mut HashMap<(String, String), u32>, |
src/synth/mod.rsmodified@@ -1203,6 +1203,7 @@ mod tests { | ||
| 1203 | 1203 | strings: StringTable::from_bytes(strings), |
| 1204 | 1204 | symtab: None, |
| 1205 | 1205 | dysymtab: None, |
| 1206 | + loh: Vec::new(), | |
| 1206 | 1207 | data_in_code: Vec::new(), |
| 1207 | 1208 | } |
| 1208 | 1209 | } |
@@ -1288,6 +1289,7 @@ mod tests { | ||
| 1288 | 1289 | strings: StringTable::from_bytes(strings), |
| 1289 | 1290 | symtab: None, |
| 1290 | 1291 | dysymtab: None, |
| 1292 | + loh: Vec::new(), | |
| 1291 | 1293 | data_in_code: Vec::new(), |
| 1292 | 1294 | } |
| 1293 | 1295 | } |
@@ -1337,6 +1339,7 @@ mod tests { | ||
| 1337 | 1339 | strings: StringTable::from_bytes(strings), |
| 1338 | 1340 | symtab: None, |
| 1339 | 1341 | dysymtab: None, |
| 1342 | + loh: Vec::new(), | |
| 1340 | 1343 | data_in_code: Vec::new(), |
| 1341 | 1344 | } |
| 1342 | 1345 | } |
tests/linker_run.rsmodified@@ -9,6 +9,7 @@ use std::process::Command; | ||
| 9 | 9 | mod common; |
| 10 | 10 | |
| 11 | 11 | use afs_ld::leb::read_uleb; |
| 12 | +use afs_ld::loh::{parse_loh_blob, LOH_ARM64_ADRP_ADD}; | |
| 12 | 13 | use afs_ld::macho::constants::{ |
| 13 | 14 | BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DONE, BIND_OPCODE_DO_BIND, |
| 14 | 15 | BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB, |
@@ -18,9 +19,9 @@ use afs_ld::macho::constants::{ | ||
| 18 | 19 | BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, BIND_OPCODE_SET_TYPE_IMM, |
| 19 | 20 | BIND_SYMBOL_FLAGS_WEAK_IMPORT, DICE_KIND_JUMP_TABLE32, INDIRECT_SYMBOL_ABS, |
| 20 | 21 | INDIRECT_SYMBOL_LOCAL, LC_BUILD_VERSION, LC_DATA_IN_CODE, LC_DYLD_INFO_ONLY, LC_DYSYMTAB, |
| 21 | - LC_FUNCTION_STARTS, LC_SEGMENT_64, LC_SYMTAB, N_PEXT, REBASE_IMMEDIATE_MASK, | |
| 22 | - REBASE_OPCODE_ADD_ADDR_IMM_SCALED, REBASE_OPCODE_ADD_ADDR_ULEB, REBASE_OPCODE_DONE, | |
| 23 | - REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB, REBASE_OPCODE_DO_REBASE_IMM_TIMES, | |
| 22 | + LC_FUNCTION_STARTS, LC_LINKER_OPTIMIZATION_HINT, LC_SEGMENT_64, LC_SYMTAB, N_PEXT, | |
| 23 | + REBASE_IMMEDIATE_MASK, REBASE_OPCODE_ADD_ADDR_IMM_SCALED, REBASE_OPCODE_ADD_ADDR_ULEB, | |
| 24 | + REBASE_OPCODE_DONE, REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB, REBASE_OPCODE_DO_REBASE_IMM_TIMES, | |
| 24 | 25 | REBASE_OPCODE_DO_REBASE_ULEB_TIMES, REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, |
| 25 | 26 | REBASE_OPCODE_MASK, REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, REBASE_OPCODE_SET_TYPE_IMM, |
| 26 | 27 | REBASE_TYPE_POINTER, SG_READ_ONLY, |
@@ -516,10 +517,16 @@ fn raw_linkedit_data_cmd(bytes: &[u8], expected_cmd: u32) -> (u32, u32) { | ||
| 516 | 517 | let header = parse_header(bytes).unwrap(); |
| 517 | 518 | let commands = parse_commands(&header, bytes).unwrap(); |
| 518 | 519 | for cmd in commands { |
| 519 | - if let LoadCommand::Raw { cmd, data, .. } = cmd { | |
| 520 | - if cmd == expected_cmd { | |
| 520 | + match cmd { | |
| 521 | + LoadCommand::Raw { cmd, data, .. } if cmd == expected_cmd => { | |
| 521 | 522 | return (u32_le(&data[0..4]), u32_le(&data[4..8])); |
| 522 | 523 | } |
| 524 | + LoadCommand::LinkerOptimizationHint(linkedit) | |
| 525 | + if expected_cmd == LC_LINKER_OPTIMIZATION_HINT => | |
| 526 | + { | |
| 527 | + return (linkedit.dataoff, linkedit.datasize); | |
| 528 | + } | |
| 529 | + _ => {} | |
| 523 | 530 | } |
| 524 | 531 | } |
| 525 | 532 | panic!("missing raw linkedit command 0x{expected_cmd:x}"); |
@@ -772,6 +779,10 @@ fn canonical_data_in_code(bytes: &[u8]) -> Vec<DataInCodeRecord> { | ||
| 772 | 779 | .collect() |
| 773 | 780 | } |
| 774 | 781 | |
| 782 | +fn decode_loh(bytes: &[u8]) -> Vec<afs_ld::loh::LohEntry> { | |
| 783 | + parse_loh_blob(&linkedit_payload(bytes, LC_LINKER_OPTIMIZATION_HINT)).unwrap() | |
| 784 | +} | |
| 785 | + | |
| 775 | 786 | fn assert_strtab_within_five_percent(ours: &[u8], apple: &[u8]) { |
| 776 | 787 | let delta = ours.len().abs_diff(apple.len()); |
| 777 | 788 | assert!( |
@@ -1918,6 +1929,62 @@ fn linker_run_emits_non_empty_executable_from_real_object() { | ||
| 1918 | 1929 | let _ = fs::remove_file(apple_out); |
| 1919 | 1930 | } |
| 1920 | 1931 | |
| 1932 | +#[test] | |
| 1933 | +fn linker_run_preserves_loh_payloads_from_input_objects() { | |
| 1934 | + if !have_xcrun() { | |
| 1935 | + eprintln!("skipping: xcrun as unavailable"); | |
| 1936 | + return; | |
| 1937 | + } | |
| 1938 | + | |
| 1939 | + let obj = scratch("loh-main.o"); | |
| 1940 | + let out = scratch("loh-main.out"); | |
| 1941 | + let src = r#" | |
| 1942 | + .section __TEXT,__text,regular,pure_instructions | |
| 1943 | + .globl _main | |
| 1944 | + _main: | |
| 1945 | + Lloh0: | |
| 1946 | + adrp x0, _msg@PAGE | |
| 1947 | + Lloh1: | |
| 1948 | + add x0, x0, _msg@PAGEOFF | |
| 1949 | + mov x0, #0 | |
| 1950 | + ret | |
| 1951 | + .section __DATA,__data | |
| 1952 | + _msg: | |
| 1953 | + .quad 0 | |
| 1954 | + .loh AdrpAdd Lloh0, Lloh1 | |
| 1955 | + .subsections_via_symbols | |
| 1956 | + "#; | |
| 1957 | + if let Err(e) = assemble(src, &obj) { | |
| 1958 | + eprintln!("skipping: assemble failed: {e}"); | |
| 1959 | + return; | |
| 1960 | + } | |
| 1961 | + | |
| 1962 | + let opts = LinkOptions { | |
| 1963 | + inputs: vec![obj], | |
| 1964 | + output: Some(out.clone()), | |
| 1965 | + kind: OutputKind::Executable, | |
| 1966 | + ..LinkOptions::default() | |
| 1967 | + }; | |
| 1968 | + Linker::run(&opts).unwrap(); | |
| 1969 | + | |
| 1970 | + let bytes = fs::read(&out).unwrap(); | |
| 1971 | + let loh = decode_loh(&bytes); | |
| 1972 | + assert_eq!(loh.len(), 1); | |
| 1973 | + assert_eq!(loh[0].kind, LOH_ARM64_ADRP_ADD); | |
| 1974 | + assert_eq!(loh[0].args.len(), 2); | |
| 1975 | + assert_eq!(loh[0].args[1] - loh[0].args[0], 4); | |
| 1976 | + | |
| 1977 | + let text = output_section_header(&bytes, "__TEXT", "__text").unwrap(); | |
| 1978 | + let text_start = text.offset; | |
| 1979 | + let text_end = text.offset + text.size as u32; | |
| 1980 | + for &arg in &loh[0].args { | |
| 1981 | + assert!( | |
| 1982 | + text_start <= arg && arg + 4 <= text_end, | |
| 1983 | + "LOH instruction offset {arg:#x} escaped __TEXT,__text [{text_start:#x}, {text_end:#x})", | |
| 1984 | + ); | |
| 1985 | + } | |
| 1986 | +} | |
| 1987 | + | |
| 1921 | 1988 | #[test] |
| 1922 | 1989 | fn linker_run_emits_minimal_dylib_from_real_object() { |
| 1923 | 1990 | if !have_xcrun() { |
@@ -7556,7 +7623,9 @@ fn linker_run_icf_safe_folds_identical_private_literal16() { | ||
| 7556 | 7623 | let baseline_literals = output_section(&baseline_bytes, "__TEXT", "__literal16") |
| 7557 | 7624 | .unwrap() |
| 7558 | 7625 | .1; |
| 7559 | - let our_literals = output_section(&our_bytes, "__TEXT", "__literal16").unwrap().1; | |
| 7626 | + let our_literals = output_section(&our_bytes, "__TEXT", "__literal16") | |
| 7627 | + .unwrap() | |
| 7628 | + .1; | |
| 7560 | 7629 | |
| 7561 | 7630 | assert_ne!( |
| 7562 | 7631 | baseline_symbols.get("_lit1"), |