fortrangoingonforty/afs-ld / 1e80916

Browse files

Preserve LOH payloads

Authored by espadonne
SHA
1e809169ea3a69ab9fa66d3256729086813a6b50
Parents
1eae32b
Tree
fead290

7 changed files

StatusFile+-
M src/input.rs 138 0
M src/layout.rs 8 0
M src/lib.rs 1 0
A src/loh.rs 104 0
M src/macho/writer.rs 168 1
M src/synth/mod.rs 3 0
M tests/linker_run.rs 75 6
src/input.rsmodified
@@ -7,6 +7,7 @@
77
 
88
 use std::path::PathBuf;
99
 
10
+use crate::loh::{parse_loh_blob, LohEntry};
1011
 use crate::macho::constants::LC_DATA_IN_CODE;
1112
 use crate::macho::reader::{
1213
     parse_commands, parse_header, DysymtabCmd, LinkEditDataCmd, LoadCommand, MachHeader64,
@@ -28,6 +29,7 @@ pub struct ObjectFile {
2829
     pub strings: StringTable,
2930
     pub symtab: Option<SymtabCmd>,
3031
     pub dysymtab: Option<DysymtabCmd>,
32
+    pub loh: Vec<LohEntry>,
3133
     pub data_in_code: Vec<DataInCodeEntry>,
3234
 }
3335
 
@@ -90,6 +92,7 @@ impl ObjectFile {
9092
             ),
9193
             None => (Vec::new(), StringTable::from_bytes(Vec::new())),
9294
         };
95
+        let loh = parse_loh(&commands, file_bytes)?;
9396
         let data_in_code = parse_data_in_code(&commands, file_bytes)?;
9497
 
9598
         Ok(ObjectFile {
@@ -101,6 +104,7 @@ impl ObjectFile {
101104
             strings,
102105
             symtab,
103106
             dysymtab,
107
+            loh,
104108
             data_in_code,
105109
         })
106110
     }
@@ -132,6 +136,32 @@ impl ObjectFile {
132136
     }
133137
 }
134138
 
139
+fn parse_loh(commands: &[LoadCommand], file_bytes: &[u8]) -> Result<Vec<LohEntry>, ReadError> {
140
+    let mut out = Vec::new();
141
+    for command in commands {
142
+        let LoadCommand::LinkerOptimizationHint(linkedit) = command else {
143
+            continue;
144
+        };
145
+        let start = linkedit.dataoff as usize;
146
+        let end = start
147
+            .checked_add(linkedit.datasize as usize)
148
+            .ok_or(ReadError::Truncated {
149
+                need: usize::MAX,
150
+                have: file_bytes.len(),
151
+                context: "LC_LINKER_OPTIMIZATION_HINT payload (offset + size overflows)",
152
+            })?;
153
+        if end > file_bytes.len() {
154
+            return Err(ReadError::Truncated {
155
+                need: end,
156
+                have: file_bytes.len(),
157
+                context: "LC_LINKER_OPTIMIZATION_HINT payload",
158
+            });
159
+        }
160
+        out.extend(parse_loh_blob(&file_bytes[start..end])?);
161
+    }
162
+    Ok(out)
163
+}
164
+
135165
 fn parse_data_in_code(
136166
     commands: &[LoadCommand],
137167
     file_bytes: &[u8],
@@ -182,6 +212,7 @@ pub fn header_and_cmds_end(header: &MachHeader64) -> usize {
182212
 #[cfg(test)]
183213
 mod tests {
184214
     use super::*;
215
+    use crate::loh::{write_loh_blob, LOH_ARM64_ADRP_ADD};
185216
     use crate::macho::constants::*;
186217
     use crate::macho::reader::{
187218
         write_commands, write_header, LinkEditDataCmd, LoadCommand, Section64Header, Segment64,
@@ -389,6 +420,99 @@ mod tests {
389420
         image
390421
     }
391422
 
423
+    fn synth_image_with_loh() -> Vec<u8> {
424
+        let text_sect = Section64Header {
425
+            sectname: name16("__text"),
426
+            segname: name16("__TEXT"),
427
+            addr: 0,
428
+            size: 8,
429
+            offset: 0,
430
+            align: 2,
431
+            reloff: 0,
432
+            nreloc: 0,
433
+            flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
434
+            reserved1: 0,
435
+            reserved2: 0,
436
+            reserved3: 0,
437
+        };
438
+        let seg = Segment64 {
439
+            segname: name16(""),
440
+            vmaddr: 0,
441
+            vmsize: 8,
442
+            fileoff: 0,
443
+            filesize: 8,
444
+            maxprot: 7,
445
+            initprot: 7,
446
+            flags: 0,
447
+            sections: vec![text_sect],
448
+        };
449
+        let strtab = b"\0_main\0";
450
+        let nsyms = 1u32;
451
+        let sym = RawNlist {
452
+            strx: 1,
453
+            n_type: N_SECT | N_EXT,
454
+            n_sect: 1,
455
+            n_desc: 0,
456
+            n_value: 0,
457
+        };
458
+        let loh_blob = write_loh_blob(&[LohEntry {
459
+            kind: LOH_ARM64_ADRP_ADD,
460
+            args: vec![0, 4],
461
+        }]);
462
+        let hdr_size = HEADER_SIZE;
463
+        let seg_size = seg.wire_size() as usize;
464
+        let loh_size = LinkEditDataCmd::WIRE_SIZE as usize;
465
+        let symtab_size = SymtabCmd::WIRE_SIZE as usize;
466
+        let sizeofcmds = (seg_size + loh_size + symtab_size) as u32;
467
+
468
+        let section_offset = (hdr_size + sizeofcmds as usize) as u32;
469
+        let loh_off = section_offset + 8;
470
+        let symoff = loh_off + loh_blob.len() as u32;
471
+        let stroff = symoff + NLIST_SIZE as u32 * nsyms;
472
+        let seg = Segment64 {
473
+            sections: vec![Section64Header {
474
+                offset: section_offset,
475
+                ..seg.sections[0]
476
+            }],
477
+            fileoff: section_offset as u64,
478
+            ..seg
479
+        };
480
+        let header = MachHeader64 {
481
+            magic: MH_MAGIC_64,
482
+            cputype: CPU_TYPE_ARM64,
483
+            cpusubtype: 0,
484
+            filetype: MH_OBJECT,
485
+            ncmds: 3,
486
+            sizeofcmds,
487
+            flags: MH_SUBSECTIONS_VIA_SYMBOLS,
488
+            reserved: 0,
489
+        };
490
+        let symtab_cmd = SymtabCmd {
491
+            symoff,
492
+            nsyms,
493
+            stroff,
494
+            strsize: strtab.len() as u32,
495
+        };
496
+        let loh_cmd = LoadCommand::LinkerOptimizationHint(LinkEditDataCmd {
497
+            dataoff: loh_off,
498
+            datasize: loh_blob.len() as u32,
499
+        });
500
+
501
+        let mut image = Vec::new();
502
+        write_header(&header, &mut image);
503
+        let cmds = vec![
504
+            LoadCommand::Segment64(seg),
505
+            loh_cmd,
506
+            LoadCommand::Symtab(symtab_cmd),
507
+        ];
508
+        write_commands(&cmds, &mut image);
509
+        image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
510
+        image.extend_from_slice(&loh_blob);
511
+        sym.write(&mut image);
512
+        image.extend_from_slice(strtab);
513
+        image
514
+    }
515
+
392516
     #[test]
393517
     fn parse_synth_object_end_to_end() {
394518
         let image = synth_image();
@@ -425,6 +549,19 @@ mod tests {
425549
         );
426550
     }
427551
 
552
+    #[test]
553
+    fn parse_preserves_loh_entries() {
554
+        let image = synth_image_with_loh();
555
+        let obj = ObjectFile::parse("/tmp/synth-loh.o", &image).unwrap();
556
+        assert_eq!(
557
+            obj.loh,
558
+            vec![LohEntry {
559
+                kind: LOH_ARM64_ADRP_ADD,
560
+                args: vec![0, 4],
561
+            }]
562
+        );
563
+    }
564
+
428565
     #[test]
429566
     fn indirect_target_name_resolves() {
430567
         // Build a minimal strtab with "\0_alias\0_target\0" and a RawNlist
@@ -448,6 +585,7 @@ mod tests {
448585
             strings: strtab,
449586
             symtab: None,
450587
             dysymtab: None,
588
+            loh: Vec::new(),
451589
             data_in_code: Vec::new(),
452590
         };
453591
         let alias = InputSymbol::from_raw(RawNlist {
src/layout.rsmodified
@@ -668,6 +668,7 @@ mod tests {
668668
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
669669
             symtab: None,
670670
             dysymtab: None,
671
+            loh: Vec::new(),
671672
             data_in_code: Vec::new(),
672673
         };
673674
 
@@ -755,6 +756,7 @@ mod tests {
755756
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
756757
             symtab: None,
757758
             dysymtab: None,
759
+            loh: Vec::new(),
758760
             data_in_code: Vec::new(),
759761
         };
760762
 
@@ -817,6 +819,7 @@ mod tests {
817819
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
818820
             symtab: None,
819821
             dysymtab: None,
822
+            loh: Vec::new(),
820823
             data_in_code: Vec::new(),
821824
         };
822825
 
@@ -875,6 +878,7 @@ mod tests {
875878
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
876879
             symtab: None,
877880
             dysymtab: None,
881
+            loh: Vec::new(),
878882
             data_in_code: Vec::new(),
879883
         };
880884
 
@@ -947,6 +951,7 @@ mod tests {
947951
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
948952
             symtab: None,
949953
             dysymtab: None,
954
+            loh: Vec::new(),
950955
             data_in_code: Vec::new(),
951956
         };
952957
 
@@ -1008,6 +1013,7 @@ mod tests {
10081013
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
10091014
             symtab: None,
10101015
             dysymtab: None,
1016
+            loh: Vec::new(),
10111017
             data_in_code: Vec::new(),
10121018
         };
10131019
 
@@ -1143,6 +1149,7 @@ mod tests {
11431149
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
11441150
             symtab: None,
11451151
             dysymtab: None,
1152
+            loh: Vec::new(),
11461153
             data_in_code: Vec::new(),
11471154
         };
11481155
 
@@ -1230,6 +1237,7 @@ mod tests {
12301237
             strings: crate::string_table::StringTable::from_bytes(vec![0]),
12311238
             symtab: None,
12321239
             dysymtab: None,
1240
+            loh: Vec::new(),
12331241
             data_in_code: Vec::new(),
12341242
         };
12351243
 
src/lib.rsmodified
@@ -14,6 +14,7 @@ pub mod input;
1414
 pub mod layout;
1515
 pub mod leb;
1616
 pub mod link_map;
17
+pub mod loh;
1718
 pub mod macho;
1819
 pub mod reloc;
1920
 pub mod resolve;
src/loh.rsadded
@@ -0,0 +1,104 @@
1
+//! ARM64 Linker Optimization Hints (LOH).
2
+//!
3
+//! `LC_LINKER_OPTIMIZATION_HINT` stores a ULEB128 stream of `(kind, argc,
4
+//! args...)` records. The args are file offsets of the participating
5
+//! instructions.
6
+
7
+use crate::leb::{read_uleb, write_uleb};
8
+use crate::macho::reader::ReadError;
9
+
10
+pub const LOH_ARM64_ADRP_LDR: u32 = 2;
11
+pub const LOH_ARM64_ADRP_LDR_GOT_LDR: u32 = 4;
12
+pub const LOH_ARM64_ADRP_ADD: u32 = 7;
13
+pub const LOH_ARM64_ADRP_LDR_GOT: u32 = 8;
14
+
15
+#[derive(Debug, Clone, PartialEq, Eq)]
16
+pub struct LohEntry {
17
+    pub kind: u32,
18
+    pub args: Vec<u32>,
19
+}
20
+
21
+pub fn parse_loh_blob(bytes: &[u8]) -> Result<Vec<LohEntry>, ReadError> {
22
+    let mut out = Vec::new();
23
+    let mut cursor = 0usize;
24
+    while cursor < bytes.len() {
25
+        if bytes[cursor..].iter().all(|&byte| byte == 0) {
26
+            break;
27
+        }
28
+        let at_offset = cursor as u32;
29
+        let (kind, used) = read_uleb(&bytes[cursor..])?;
30
+        cursor += used;
31
+        let (argc, used) = read_uleb(&bytes[cursor..])?;
32
+        cursor += used;
33
+        let kind = u32::try_from(kind).map_err(|_| ReadError::BadRelocation {
34
+            at_offset,
35
+            reason: "LOH kind overflows u32",
36
+        })?;
37
+        let argc = usize::try_from(argc).map_err(|_| ReadError::BadRelocation {
38
+            at_offset,
39
+            reason: "LOH argcount overflows usize",
40
+        })?;
41
+        let mut args = Vec::with_capacity(argc);
42
+        for _ in 0..argc {
43
+            let (arg, used) = read_uleb(&bytes[cursor..])?;
44
+            cursor += used;
45
+            args.push(u32::try_from(arg).map_err(|_| ReadError::BadRelocation {
46
+                at_offset,
47
+                reason: "LOH arg overflows u32",
48
+            })?);
49
+        }
50
+        out.push(LohEntry { kind, args });
51
+    }
52
+    Ok(out)
53
+}
54
+
55
+pub fn write_loh_blob(entries: &[LohEntry]) -> Vec<u8> {
56
+    let mut out = Vec::new();
57
+    for entry in entries {
58
+        write_uleb(entry.kind as u64, &mut out);
59
+        write_uleb(entry.args.len() as u64, &mut out);
60
+        for &arg in &entry.args {
61
+            write_uleb(arg as u64, &mut out);
62
+        }
63
+    }
64
+    out
65
+}
66
+
67
+#[cfg(test)]
68
+mod tests {
69
+    use super::*;
70
+
71
+    #[test]
72
+    fn loh_blob_round_trips() {
73
+        let entries = vec![
74
+            LohEntry {
75
+                kind: LOH_ARM64_ADRP_ADD,
76
+                args: vec![0, 4],
77
+            },
78
+            LohEntry {
79
+                kind: LOH_ARM64_ADRP_LDR_GOT_LDR,
80
+                args: vec![8, 12, 16],
81
+            },
82
+        ];
83
+        let blob = write_loh_blob(&entries);
84
+        assert_eq!(parse_loh_blob(&blob).unwrap(), entries);
85
+    }
86
+
87
+    #[test]
88
+    fn loh_blob_ignores_trailing_zero_padding() {
89
+        let mut blob = write_loh_blob(&[LohEntry {
90
+            kind: LOH_ARM64_ADRP_ADD,
91
+            args: vec![0, 4],
92
+        }]);
93
+        while !blob.len().is_multiple_of(8) {
94
+            blob.push(0);
95
+        }
96
+        assert_eq!(
97
+            parse_loh_blob(&blob).unwrap(),
98
+            vec![LohEntry {
99
+                kind: LOH_ARM64_ADRP_ADD,
100
+                args: vec![0, 4],
101
+            }]
102
+        );
103
+    }
104
+}
src/macho/writer.rsmodified
@@ -11,6 +11,7 @@ use crate::atom::AtomTable;
1111
 use crate::input::{DataInCodeEntry, ObjectFile};
1212
 use crate::layout::{Layout, LayoutInput, PAGE_SIZE};
1313
 use crate::leb::write_uleb;
14
+use crate::loh::{write_loh_blob, LohEntry};
1415
 use crate::macho::constants::*;
1516
 use crate::macho::dylib::DylibDependency;
1617
 use crate::macho::exports::{ExportEntry, ExportKind};
@@ -71,6 +72,7 @@ pub enum WriteError {
7172
     ImportSymbolMissing(SymbolId),
7273
     ImportSymbolWrongKind(SymbolId),
7374
     MalformedRelocations(PathBuf, u8, String),
75
+    MalformedLoh(PathBuf, String),
7476
     MalformedDataInCode(PathBuf, String),
7577
     SymbolListRead(PathBuf, String),
7678
 }
@@ -125,6 +127,13 @@ impl fmt::Display for WriteError {
125127
                 path.display(),
126128
                 section
127129
             ),
130
+            WriteError::MalformedLoh(path, detail) => {
131
+                write!(
132
+                    f,
133
+                    "failed to remap LC_LINKER_OPTIMIZATION_HINT in {}: {detail}",
134
+                    path.display()
135
+                )
136
+            }
128137
             WriteError::MalformedDataInCode(path, detail) => {
129138
                 write!(
130139
                     f,
@@ -289,6 +298,7 @@ pub fn write_finalized_with_linkedit(
289298
     let weak_bind_off = linkedit_plan.dyld_info.weak_bind_off as usize;
290299
     let lazy_bind_off = linkedit_plan.dyld_info.lazy_bind_off as usize;
291300
     let export_off = linkedit_plan.dyld_info.export_off as usize;
301
+    let loh_off = linkedit_plan.loh.map(|loh| loh.dataoff as usize);
292302
     let function_starts_off = linkedit_plan.function_starts.dataoff as usize;
293303
     let data_in_code_off = linkedit_plan.data_in_code.dataoff as usize;
294304
     let stroff = linkedit_plan.symtab.stroff as usize;
@@ -320,6 +330,12 @@ pub fn write_finalized_with_linkedit(
320330
         let end = export_off + linkedit_plan.export_bytes.len();
321331
         out[export_off..end].copy_from_slice(&linkedit_plan.export_bytes);
322332
     }
333
+    if let Some(loh_off) = loh_off {
334
+        if !linkedit_plan.loh_bytes.is_empty() {
335
+            let end = loh_off + linkedit_plan.loh_bytes.len();
336
+            out[loh_off..end].copy_from_slice(&linkedit_plan.loh_bytes);
337
+        }
338
+    }
323339
     if !linkedit_plan.function_starts_bytes.is_empty() {
324340
         let end = function_starts_off + linkedit_plan.function_starts_bytes.len();
325341
         out[function_starts_off..end].copy_from_slice(&linkedit_plan.function_starts_bytes);
@@ -400,6 +416,13 @@ fn build_commands(
400416
         }));
401417
     }
402418
 
419
+    if let Some(loh) = linkedit.loh {
420
+        commands.push(raw_linkedit_command(
421
+            LC_LINKER_OPTIMIZATION_HINT,
422
+            loh.dataoff,
423
+            loh.datasize,
424
+        ));
425
+    }
403426
     commands.push(raw_linkedit_command(
404427
         LC_FUNCTION_STARTS,
405428
         linkedit.function_starts.dataoff,
@@ -433,7 +456,7 @@ fn estimate_header_size(
433456
     kind: OutputKind,
434457
     opts: &LinkOptions,
435458
     dylibs: &[DylibDependency],
436
-    _linkedit: &LinkEditPlan,
459
+    linkedit: &LinkEditPlan,
437460
 ) -> u64 {
438461
     let mut size = HEADER_SIZE as u64;
439462
     for segment in &layout.segments {
@@ -477,6 +500,9 @@ fn estimate_header_size(
477500
     size += SymtabCmd::WIRE_SIZE as u64;
478501
     size += DysymtabCmd::WIRE_SIZE as u64;
479502
     size += 16 * 3;
503
+    if linkedit.loh.is_some() {
504
+        size += 16;
505
+    }
480506
     size += DyldInfoCmd::WIRE_SIZE as u64;
481507
     size
482508
 }
@@ -683,6 +709,7 @@ pub struct LinkEditPlan {
683709
     pub symtab: SymtabCmd,
684710
     pub dysymtab: DysymtabCmd,
685711
     pub dyld_info: DyldInfoCmd,
712
+    pub loh: Option<LinkEditDataCmd>,
686713
     pub function_starts: LinkEditDataCmd,
687714
     pub data_in_code: LinkEditDataCmd,
688715
     pub symtab_bytes: Vec<u8>,
@@ -692,6 +719,7 @@ pub struct LinkEditPlan {
692719
     weak_bind_bytes: Vec<u8>,
693720
     lazy_bind_bytes: Vec<u8>,
694721
     export_bytes: Vec<u8>,
722
+    loh_bytes: Vec<u8>,
695723
     function_starts_bytes: Vec<u8>,
696724
     data_in_code_bytes: Vec<u8>,
697725
     pub strtab_bytes: Vec<u8>,
@@ -745,6 +773,7 @@ fn build_linkedit_plan(
745773
             },
746774
             dysymtab: DysymtabCmd::default(),
747775
             dyld_info: DyldInfoCmd::default(),
776
+            loh: None,
748777
             function_starts: LinkEditDataCmd {
749778
                 dataoff: base_off,
750779
                 datasize: 0,
@@ -760,6 +789,7 @@ fn build_linkedit_plan(
760789
             weak_bind_bytes: Vec::new(),
761790
             lazy_bind_bytes: Vec::new(),
762791
             export_bytes: Vec::new(),
792
+            loh_bytes: Vec::new(),
763793
             function_starts_bytes: Vec::new(),
764794
             data_in_code_bytes: Vec::new(),
765795
             strtab_bytes: vec![0; 8],
@@ -833,6 +863,12 @@ fn build_linkedit_plan(
833863
     let weak_bind_bytes = pad_dyld_info_stream(bind_streams.weak_bind);
834864
     let lazy_bind_bytes = pad_dyld_info_stream(bind_streams.lazy_bind);
835865
     let export_bytes = pad_dyld_info_stream(build_export_trie(&symbol_plan.exports));
866
+    let loh_bytes = build_loh(
867
+        layout,
868
+        inputs.0.layout_inputs,
869
+        inputs.0.atom_table,
870
+        inputs.0.icf_redirects,
871
+    )?;
836872
     let function_starts_bytes =
837873
         build_function_starts(layout, inputs.0.layout_inputs, inputs.0.atom_table)?;
838874
     let data_in_code_bytes = build_data_in_code(
@@ -856,6 +892,7 @@ fn build_linkedit_plan(
856892
         "lazy bind stream offset",
857893
     )?;
858894
     let export_off = place_optional_block(&mut cursor, export_bytes.len(), "export trie offset")?;
895
+    let loh = place_optional_linkedit_data_block(&mut cursor, loh_bytes.len(), "LOH offset")?;
859896
     let function_starts = place_linkedit_data_block(
860897
         &mut cursor,
861898
         function_starts_bytes.len(),
@@ -900,6 +937,7 @@ fn build_linkedit_plan(
900937
             export_off,
901938
             export_size: export_bytes.len() as u32,
902939
         },
940
+        loh,
903941
         function_starts,
904942
         data_in_code,
905943
         symtab_bytes,
@@ -909,6 +947,7 @@ fn build_linkedit_plan(
909947
         weak_bind_bytes,
910948
         lazy_bind_bytes,
911949
         export_bytes,
950
+        loh_bytes,
912951
         function_starts_bytes,
913952
         data_in_code_bytes,
914953
         strtab_bytes: symbol_plan.strtab_bytes,
@@ -1420,6 +1459,83 @@ fn build_data_in_code(
14201459
     Ok(out)
14211460
 }
14221461
 
1462
+fn build_loh(
1463
+    layout: &Layout,
1464
+    inputs: &[LayoutInput<'_>],
1465
+    atom_table: &AtomTable,
1466
+    icf_redirects: Option<&HashMap<crate::resolve::AtomId, crate::resolve::AtomId>>,
1467
+) -> Result<Vec<u8>, WriteError> {
1468
+    #[derive(Clone)]
1469
+    struct RemappedEntry {
1470
+        input_order: usize,
1471
+        input_entry_index: usize,
1472
+        first_arg: u32,
1473
+        entry: LohEntry,
1474
+    }
1475
+
1476
+    let atoms_by_input_section = atom_table.by_input_section();
1477
+    let mut remapped = Vec::new();
1478
+    for (input_order, input) in inputs.iter().enumerate() {
1479
+        for (input_entry_index, entry) in input.object.loh.iter().cloned().enumerate() {
1480
+            let mut args = Vec::with_capacity(entry.args.len());
1481
+            for input_offset in entry.args {
1482
+                let (section_index, section_relative) =
1483
+                    remap_loh_to_section(input.object, input_offset)?;
1484
+                let (atom_id, atom_delta) = find_containing_atom_range(
1485
+                    atom_table,
1486
+                    &atoms_by_input_section,
1487
+                    input.id,
1488
+                    section_index,
1489
+                    section_relative,
1490
+                    4,
1491
+                    icf_redirects,
1492
+                )
1493
+                .ok_or_else(|| {
1494
+                    WriteError::MalformedLoh(
1495
+                        input.object.path.clone(),
1496
+                        format!(
1497
+                            "instruction at file offset {} did not land inside any atom",
1498
+                            input_offset
1499
+                        ),
1500
+                    )
1501
+                })?;
1502
+                let output_offset = layout.atom_file_offset(atom_id).ok_or_else(|| {
1503
+                    WriteError::MalformedLoh(
1504
+                        input.object.path.clone(),
1505
+                        format!(
1506
+                            "atom {:?} for instruction at file offset {} is missing from final layout",
1507
+                            atom_id, input_offset
1508
+                        ),
1509
+                    )
1510
+                })? + atom_delta as u64;
1511
+                args.push(u32_fit(output_offset, "LOH output offset")?);
1512
+            }
1513
+            remapped.push(RemappedEntry {
1514
+                input_order,
1515
+                input_entry_index,
1516
+                first_arg: args.first().copied().unwrap_or(0),
1517
+                entry: LohEntry {
1518
+                    kind: entry.kind,
1519
+                    args,
1520
+                },
1521
+            });
1522
+        }
1523
+    }
1524
+
1525
+    remapped.sort_by(|a, b| {
1526
+        a.first_arg
1527
+            .cmp(&b.first_arg)
1528
+            .then_with(|| a.input_order.cmp(&b.input_order))
1529
+            .then_with(|| a.input_entry_index.cmp(&b.input_entry_index))
1530
+    });
1531
+    Ok(write_loh_blob(
1532
+        &remapped
1533
+            .into_iter()
1534
+            .map(|entry| entry.entry)
1535
+            .collect::<Vec<_>>(),
1536
+    ))
1537
+}
1538
+
14231539
 fn remap_data_in_code_to_section(
14241540
     object: &ObjectFile,
14251541
     entry: DataInCodeEntry,
@@ -1468,6 +1584,46 @@ fn remap_data_in_code_to_section(
14681584
     ))
14691585
 }
14701586
 
1587
+fn remap_loh_to_section(object: &ObjectFile, input_offset: u32) -> Result<(u8, u32), WriteError> {
1588
+    let instruction_start = input_offset as u64;
1589
+    let instruction_end = instruction_start.checked_add(4).ok_or_else(|| {
1590
+        WriteError::MalformedLoh(
1591
+            object.path.clone(),
1592
+            format!("instruction at input offset {} overflows u64", input_offset),
1593
+        )
1594
+    })?;
1595
+    let mut matches = object
1596
+        .sections
1597
+        .iter()
1598
+        .enumerate()
1599
+        .filter(|(_, section)| !section.data.is_empty() && is_executable(section.kind))
1600
+        .filter_map(|(idx, section)| {
1601
+            let section_start = section.addr;
1602
+            let section_end = section.addr.checked_add(section.size)?;
1603
+            (section_start <= instruction_start && instruction_end <= section_end)
1604
+                .then_some(((idx + 1) as u8, (instruction_start - section_start) as u32))
1605
+        });
1606
+    if let Some(mapped) = matches.next() {
1607
+        if matches.next().is_none() {
1608
+            return Ok(mapped);
1609
+        }
1610
+        return Err(WriteError::MalformedLoh(
1611
+            object.path.clone(),
1612
+            format!(
1613
+                "instruction at input offset {} ambiguously matches multiple executable input sections",
1614
+                input_offset
1615
+            ),
1616
+        ));
1617
+    }
1618
+    Err(WriteError::MalformedLoh(
1619
+        object.path.clone(),
1620
+        format!(
1621
+            "instruction at input offset {} does not map to any executable input section range",
1622
+            input_offset
1623
+        ),
1624
+    ))
1625
+}
1626
+
14711627
 fn collect_imports(
14721628
     sym_table: &SymbolTable,
14731629
     synthetic_plan: &SyntheticPlan,
@@ -2133,6 +2289,17 @@ fn place_linkedit_data_block(
21332289
     })
21342290
 }
21352291
 
2292
+fn place_optional_linkedit_data_block(
2293
+    cursor: &mut u64,
2294
+    size: usize,
2295
+    context: &'static str,
2296
+) -> Result<Option<LinkEditDataCmd>, WriteError> {
2297
+    if size == 0 {
2298
+        return Ok(None);
2299
+    }
2300
+    Ok(Some(place_linkedit_data_block(cursor, size, context)?))
2301
+}
2302
+
21362303
 fn push_indirect_section(
21372304
     indirect_symbols: &mut Vec<u32>,
21382305
     indirect_starts: &mut HashMap<(String, String), u32>,
src/synth/mod.rsmodified
@@ -1203,6 +1203,7 @@ mod tests {
12031203
             strings: StringTable::from_bytes(strings),
12041204
             symtab: None,
12051205
             dysymtab: None,
1206
+            loh: Vec::new(),
12061207
             data_in_code: Vec::new(),
12071208
         }
12081209
     }
@@ -1288,6 +1289,7 @@ mod tests {
12881289
             strings: StringTable::from_bytes(strings),
12891290
             symtab: None,
12901291
             dysymtab: None,
1292
+            loh: Vec::new(),
12911293
             data_in_code: Vec::new(),
12921294
         }
12931295
     }
@@ -1337,6 +1339,7 @@ mod tests {
13371339
             strings: StringTable::from_bytes(strings),
13381340
             symtab: None,
13391341
             dysymtab: None,
1342
+            loh: Vec::new(),
13401343
             data_in_code: Vec::new(),
13411344
         }
13421345
     }
tests/linker_run.rsmodified
@@ -9,6 +9,7 @@ use std::process::Command;
99
 mod common;
1010
 
1111
 use afs_ld::leb::read_uleb;
12
+use afs_ld::loh::{parse_loh_blob, LOH_ARM64_ADRP_ADD};
1213
 use afs_ld::macho::constants::{
1314
     BIND_IMMEDIATE_MASK, BIND_OPCODE_ADD_ADDR_ULEB, BIND_OPCODE_DONE, BIND_OPCODE_DO_BIND,
1415
     BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB,
@@ -18,9 +19,9 @@ use afs_ld::macho::constants::{
1819
     BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, BIND_OPCODE_SET_TYPE_IMM,
1920
     BIND_SYMBOL_FLAGS_WEAK_IMPORT, DICE_KIND_JUMP_TABLE32, INDIRECT_SYMBOL_ABS,
2021
     INDIRECT_SYMBOL_LOCAL, LC_BUILD_VERSION, LC_DATA_IN_CODE, LC_DYLD_INFO_ONLY, LC_DYSYMTAB,
21
-    LC_FUNCTION_STARTS, LC_SEGMENT_64, LC_SYMTAB, N_PEXT, REBASE_IMMEDIATE_MASK,
22
-    REBASE_OPCODE_ADD_ADDR_IMM_SCALED, REBASE_OPCODE_ADD_ADDR_ULEB, REBASE_OPCODE_DONE,
23
-    REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB, REBASE_OPCODE_DO_REBASE_IMM_TIMES,
22
+    LC_FUNCTION_STARTS, LC_LINKER_OPTIMIZATION_HINT, LC_SEGMENT_64, LC_SYMTAB, N_PEXT,
23
+    REBASE_IMMEDIATE_MASK, REBASE_OPCODE_ADD_ADDR_IMM_SCALED, REBASE_OPCODE_ADD_ADDR_ULEB,
24
+    REBASE_OPCODE_DONE, REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB, REBASE_OPCODE_DO_REBASE_IMM_TIMES,
2425
     REBASE_OPCODE_DO_REBASE_ULEB_TIMES, REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB,
2526
     REBASE_OPCODE_MASK, REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB, REBASE_OPCODE_SET_TYPE_IMM,
2627
     REBASE_TYPE_POINTER, SG_READ_ONLY,
@@ -516,10 +517,16 @@ fn raw_linkedit_data_cmd(bytes: &[u8], expected_cmd: u32) -> (u32, u32) {
516517
     let header = parse_header(bytes).unwrap();
517518
     let commands = parse_commands(&header, bytes).unwrap();
518519
     for cmd in commands {
519
-        if let LoadCommand::Raw { cmd, data, .. } = cmd {
520
-            if cmd == expected_cmd {
520
+        match cmd {
521
+            LoadCommand::Raw { cmd, data, .. } if cmd == expected_cmd => {
521522
                 return (u32_le(&data[0..4]), u32_le(&data[4..8]));
522523
             }
524
+            LoadCommand::LinkerOptimizationHint(linkedit)
525
+                if expected_cmd == LC_LINKER_OPTIMIZATION_HINT =>
526
+            {
527
+                return (linkedit.dataoff, linkedit.datasize);
528
+            }
529
+            _ => {}
523530
         }
524531
     }
525532
     panic!("missing raw linkedit command 0x{expected_cmd:x}");
@@ -772,6 +779,10 @@ fn canonical_data_in_code(bytes: &[u8]) -> Vec<DataInCodeRecord> {
772779
         .collect()
773780
 }
774781
 
782
+fn decode_loh(bytes: &[u8]) -> Vec<afs_ld::loh::LohEntry> {
783
+    parse_loh_blob(&linkedit_payload(bytes, LC_LINKER_OPTIMIZATION_HINT)).unwrap()
784
+}
785
+
775786
 fn assert_strtab_within_five_percent(ours: &[u8], apple: &[u8]) {
776787
     let delta = ours.len().abs_diff(apple.len());
777788
     assert!(
@@ -1918,6 +1929,62 @@ fn linker_run_emits_non_empty_executable_from_real_object() {
19181929
     let _ = fs::remove_file(apple_out);
19191930
 }
19201931
 
1932
+#[test]
1933
+fn linker_run_preserves_loh_payloads_from_input_objects() {
1934
+    if !have_xcrun() {
1935
+        eprintln!("skipping: xcrun as unavailable");
1936
+        return;
1937
+    }
1938
+
1939
+    let obj = scratch("loh-main.o");
1940
+    let out = scratch("loh-main.out");
1941
+    let src = r#"
1942
+        .section __TEXT,__text,regular,pure_instructions
1943
+        .globl _main
1944
+        _main:
1945
+        Lloh0:
1946
+            adrp x0, _msg@PAGE
1947
+        Lloh1:
1948
+            add x0, x0, _msg@PAGEOFF
1949
+            mov x0, #0
1950
+            ret
1951
+        .section __DATA,__data
1952
+        _msg:
1953
+            .quad 0
1954
+        .loh AdrpAdd Lloh0, Lloh1
1955
+        .subsections_via_symbols
1956
+    "#;
1957
+    if let Err(e) = assemble(src, &obj) {
1958
+        eprintln!("skipping: assemble failed: {e}");
1959
+        return;
1960
+    }
1961
+
1962
+    let opts = LinkOptions {
1963
+        inputs: vec![obj],
1964
+        output: Some(out.clone()),
1965
+        kind: OutputKind::Executable,
1966
+        ..LinkOptions::default()
1967
+    };
1968
+    Linker::run(&opts).unwrap();
1969
+
1970
+    let bytes = fs::read(&out).unwrap();
1971
+    let loh = decode_loh(&bytes);
1972
+    assert_eq!(loh.len(), 1);
1973
+    assert_eq!(loh[0].kind, LOH_ARM64_ADRP_ADD);
1974
+    assert_eq!(loh[0].args.len(), 2);
1975
+    assert_eq!(loh[0].args[1] - loh[0].args[0], 4);
1976
+
1977
+    let text = output_section_header(&bytes, "__TEXT", "__text").unwrap();
1978
+    let text_start = text.offset;
1979
+    let text_end = text.offset + text.size as u32;
1980
+    for &arg in &loh[0].args {
1981
+        assert!(
1982
+            text_start <= arg && arg + 4 <= text_end,
1983
+            "LOH instruction offset {arg:#x} escaped __TEXT,__text [{text_start:#x}, {text_end:#x})",
1984
+        );
1985
+    }
1986
+}
1987
+
19211988
 #[test]
19221989
 fn linker_run_emits_minimal_dylib_from_real_object() {
19231990
     if !have_xcrun() {
@@ -7556,7 +7623,9 @@ fn linker_run_icf_safe_folds_identical_private_literal16() {
75567623
     let baseline_literals = output_section(&baseline_bytes, "__TEXT", "__literal16")
75577624
         .unwrap()
75587625
         .1;
7559
-    let our_literals = output_section(&our_bytes, "__TEXT", "__literal16").unwrap().1;
7626
+    let our_literals = output_section(&our_bytes, "__TEXT", "__literal16")
7627
+        .unwrap()
7628
+        .1;
75607629
 
75617630
     assert_ne!(
75627631
         baseline_symbols.get("_lit1"),