Rust · 19916 bytes Raw Blame History
1 //! Input-file aggregate: one `ObjectFile` per parsed `.o` on disk.
2 //!
3 //! Sprint 2 ties `MachHeader64`, the load-command list, `InputSection`s,
4 //! `InputSymbol`s, the `StringTable`, and the decoded `DysymtabCmd` together.
5 //! Sprint 4 will wrap this in an `InputFile` enum alongside `ArchiveFile`;
6 //! for now `ObjectFile` stands alone.
7
8 use std::path::PathBuf;
9
10 use crate::loh::{parse_loh_blob, LohEntry};
11 use crate::macho::constants::LC_DATA_IN_CODE;
12 use crate::macho::reader::{
13 parse_commands, parse_header, DysymtabCmd, LinkEditDataCmd, LoadCommand, MachHeader64,
14 ReadError, SymtabCmd, HEADER_SIZE,
15 };
16 use crate::section::InputSection;
17 use crate::string_table::StringTable;
18 use crate::symbol::{parse_nlist_table, InputSymbol, SymKind};
19
20 /// Whole parsed `.o` on disk. Section bodies and relocation bytes are owned
21 /// copies so the buffer this was parsed from can drop.
22 #[derive(Debug, Clone)]
23 pub struct ObjectFile {
24 pub path: PathBuf,
25 pub header: MachHeader64,
26 pub commands: Vec<LoadCommand>,
27 pub sections: Vec<InputSection>,
28 pub symbols: Vec<InputSymbol>,
29 pub strings: StringTable,
30 pub symtab: Option<SymtabCmd>,
31 pub dysymtab: Option<DysymtabCmd>,
32 pub loh: Vec<LohEntry>,
33 pub data_in_code: Vec<DataInCodeEntry>,
34 }
35
36 /// One `data_in_code_entry` preserved from an input object.
37 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
38 pub struct DataInCodeEntry {
39 /// File offset from the input Mach-O header.
40 pub offset: u32,
41 pub length: u16,
42 pub kind: u16,
43 }
44
45 impl DataInCodeEntry {
46 const SIZE: usize = 8;
47
48 fn parse_payload(payload: &[u8]) -> Vec<Self> {
49 payload
50 .chunks_exact(Self::SIZE)
51 .map(|chunk| DataInCodeEntry {
52 offset: u32::from_le_bytes(chunk[0..4].try_into().unwrap()),
53 length: u16::from_le_bytes(chunk[4..6].try_into().unwrap()),
54 kind: u16::from_le_bytes(chunk[6..8].try_into().unwrap()),
55 })
56 .collect()
57 }
58 }
59
60 impl ObjectFile {
61 pub fn parse(path: impl Into<PathBuf>, file_bytes: &[u8]) -> Result<Self, ReadError> {
62 let path = path.into();
63 let header = parse_header(file_bytes)?;
64 let commands = parse_commands(&header, file_bytes)?;
65
66 // Collect sections from every LC_SEGMENT_64 (MH_OBJECT usually has
67 // exactly one segment, but the layout is not required to).
68 let mut sections = Vec::new();
69 for cmd in &commands {
70 if let LoadCommand::Segment64(seg) = cmd {
71 for hdr in &seg.sections {
72 sections.push(InputSection::from_header(hdr, file_bytes)?);
73 }
74 }
75 }
76
77 // Lift the symbol table + string table if present. A bare segment-only
78 // .o without LC_SYMTAB is technically legal and stays symbol-empty.
79 let symtab = commands.iter().find_map(|c| match c {
80 LoadCommand::Symtab(s) => Some(*s),
81 _ => None,
82 });
83 let dysymtab = commands.iter().find_map(|c| match c {
84 LoadCommand::Dysymtab(d) => Some(*d),
85 _ => None,
86 });
87
88 let (symbols, strings) = match symtab {
89 Some(s) => (
90 parse_nlist_table(file_bytes, s.symoff, s.nsyms)?,
91 StringTable::from_file(file_bytes, s.stroff, s.strsize)?,
92 ),
93 None => (Vec::new(), StringTable::from_bytes(Vec::new())),
94 };
95 let loh = parse_loh(&commands, file_bytes)?;
96 let data_in_code = parse_data_in_code(&commands, file_bytes)?;
97
98 Ok(ObjectFile {
99 path,
100 header,
101 commands,
102 sections,
103 symbols,
104 strings,
105 symtab,
106 dysymtab,
107 loh,
108 data_in_code,
109 })
110 }
111
112 /// Resolve the name of a symbol via this object's string table.
113 pub fn symbol_name(&self, sym: &InputSymbol) -> Result<&str, ReadError> {
114 self.strings.get(sym.strx())
115 }
116
117 /// For `N_INDR` aliases, resolve the aliased name via this object's
118 /// string table. Returns `None` when the symbol is not an indirect entry.
119 pub fn indirect_target_name(&self, sym: &InputSymbol) -> Option<Result<&str, ReadError>> {
120 if sym.kind() == SymKind::Indirect {
121 Some(self.strings.get(sym.value() as u32))
122 } else {
123 None
124 }
125 }
126
127 /// Iterate over sections in 1-based-nlist order — i.e., the order
128 /// `nlist.n_sect` refers to. afs-ld preserves the parsed order, which
129 /// matches the order they appear in the segments.
130 pub fn section_for_symbol(&self, sym: &InputSymbol) -> Option<&InputSection> {
131 if sym.sect_idx() == 0 {
132 return None;
133 }
134 self.sections
135 .get((sym.sect_idx() as usize).saturating_sub(1))
136 }
137 }
138
139 fn parse_loh(commands: &[LoadCommand], file_bytes: &[u8]) -> Result<Vec<LohEntry>, ReadError> {
140 let mut out = Vec::new();
141 for command in commands {
142 let LoadCommand::LinkerOptimizationHint(linkedit) = command else {
143 continue;
144 };
145 let start = linkedit.dataoff as usize;
146 let end = start
147 .checked_add(linkedit.datasize as usize)
148 .ok_or(ReadError::Truncated {
149 need: usize::MAX,
150 have: file_bytes.len(),
151 context: "LC_LINKER_OPTIMIZATION_HINT payload (offset + size overflows)",
152 })?;
153 if end > file_bytes.len() {
154 return Err(ReadError::Truncated {
155 need: end,
156 have: file_bytes.len(),
157 context: "LC_LINKER_OPTIMIZATION_HINT payload",
158 });
159 }
160 out.extend(parse_loh_blob(&file_bytes[start..end])?);
161 }
162 Ok(out)
163 }
164
165 fn parse_data_in_code(
166 commands: &[LoadCommand],
167 file_bytes: &[u8],
168 ) -> Result<Vec<DataInCodeEntry>, ReadError> {
169 let mut out = Vec::new();
170 for command in commands {
171 let LoadCommand::Raw { cmd, cmdsize, data } = command else {
172 continue;
173 };
174 if *cmd != LC_DATA_IN_CODE {
175 continue;
176 }
177 let linkedit = LinkEditDataCmd::parse(*cmd, *cmdsize, data)?;
178 if !(linkedit.datasize as usize).is_multiple_of(DataInCodeEntry::SIZE) {
179 return Err(ReadError::BadCmdsize {
180 cmd: *cmd,
181 cmdsize: linkedit.datasize,
182 at_offset: 0,
183 reason: "LC_DATA_IN_CODE payload size is not a multiple of 8",
184 });
185 }
186 let start = linkedit.dataoff as usize;
187 let end = start
188 .checked_add(linkedit.datasize as usize)
189 .ok_or(ReadError::Truncated {
190 need: usize::MAX,
191 have: file_bytes.len(),
192 context: "LC_DATA_IN_CODE payload (offset + size overflows)",
193 })?;
194 if end > file_bytes.len() {
195 return Err(ReadError::Truncated {
196 need: end,
197 have: file_bytes.len(),
198 context: "LC_DATA_IN_CODE payload",
199 });
200 }
201 out.extend(DataInCodeEntry::parse_payload(&file_bytes[start..end]));
202 }
203 Ok(out)
204 }
205
206 /// Total `sizeofcmds` region, exposed for callers doing byte-level round-trip
207 /// checks against the original file image.
208 pub fn header_and_cmds_end(header: &MachHeader64) -> usize {
209 HEADER_SIZE + header.sizeofcmds as usize
210 }
211
212 #[cfg(test)]
213 mod tests {
214 use super::*;
215 use crate::loh::{write_loh_blob, LOH_ARM64_ADRP_ADD};
216 use crate::macho::constants::*;
217 use crate::macho::reader::{
218 write_commands, write_header, LinkEditDataCmd, LoadCommand, Section64Header, Segment64,
219 };
220 use crate::symbol::{RawNlist, NLIST_SIZE};
221
222 fn name16(s: &str) -> [u8; 16] {
223 let mut out = [0u8; 16];
224 let bytes = s.as_bytes();
225 let n = bytes.len().min(16);
226 out[..n].copy_from_slice(&bytes[..n]);
227 out
228 }
229
230 /// Build a tiny in-memory MH_OBJECT image with one __TEXT,__text section
231 /// holding 8 bytes, one external symbol `_main`, and the minimum symtab.
232 fn synth_image() -> Vec<u8> {
233 // 1) Section header for __text.
234 let text_sect = Section64Header {
235 sectname: name16("__text"),
236 segname: name16("__TEXT"),
237 addr: 0,
238 size: 8,
239 offset: 0, // fill in after layout
240 align: 2,
241 reloff: 0,
242 nreloc: 0,
243 flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
244 reserved1: 0,
245 reserved2: 0,
246 reserved3: 0,
247 };
248 // 2) Segment with one section.
249 let seg = Segment64 {
250 segname: name16(""),
251 vmaddr: 0,
252 vmsize: 8,
253 fileoff: 0,
254 filesize: 8,
255 maxprot: 7,
256 initprot: 7,
257 flags: 0,
258 sections: vec![text_sect],
259 };
260 // 3) Symtab+string table.
261 let strtab = b"\0_main\0";
262 let nsyms = 1u32;
263 let sym = RawNlist {
264 strx: 1, // "_main"
265 n_type: N_SECT | N_EXT,
266 n_sect: 1,
267 n_desc: 0,
268 n_value: 0,
269 };
270
271 // Layout: header → seg load cmd → symtab cmd → section content → nlist → strtab.
272 let hdr_size = HEADER_SIZE;
273 let seg_size = seg.wire_size() as usize;
274 let symtab_size = SymtabCmd::WIRE_SIZE as usize;
275 let sizeofcmds = (seg_size + symtab_size) as u32;
276
277 let section_offset = (hdr_size + sizeofcmds as usize) as u32;
278 let symoff = section_offset + 8; // after section content
279 let stroff = symoff + NLIST_SIZE as u32 * nsyms;
280
281 // Rebuild segment with the correct offset now that we know it.
282 let seg = Segment64 {
283 sections: vec![Section64Header {
284 offset: section_offset,
285 ..seg.sections[0]
286 }],
287 fileoff: section_offset as u64,
288 ..seg
289 };
290 let seg_size = seg.wire_size() as usize;
291 let sizeofcmds = (seg_size + symtab_size) as u32;
292
293 let header = MachHeader64 {
294 magic: MH_MAGIC_64,
295 cputype: CPU_TYPE_ARM64,
296 cpusubtype: 0,
297 filetype: MH_OBJECT,
298 ncmds: 2,
299 sizeofcmds,
300 flags: MH_SUBSECTIONS_VIA_SYMBOLS,
301 reserved: 0,
302 };
303 let symtab_cmd = SymtabCmd {
304 symoff,
305 nsyms,
306 stroff,
307 strsize: strtab.len() as u32,
308 };
309
310 let mut image = Vec::new();
311 write_header(&header, &mut image);
312 let cmds = vec![LoadCommand::Segment64(seg), LoadCommand::Symtab(symtab_cmd)];
313 write_commands(&cmds, &mut image);
314 // Section content: 8 bytes (fake instructions).
315 image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
316 // Nlist.
317 sym.write(&mut image);
318 // String table.
319 image.extend_from_slice(strtab);
320 image
321 }
322
323 fn synth_image_with_data_in_code() -> Vec<u8> {
324 let text_sect = Section64Header {
325 sectname: name16("__text"),
326 segname: name16("__TEXT"),
327 addr: 0,
328 size: 8,
329 offset: 0,
330 align: 2,
331 reloff: 0,
332 nreloc: 0,
333 flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
334 reserved1: 0,
335 reserved2: 0,
336 reserved3: 0,
337 };
338 let seg = Segment64 {
339 segname: name16(""),
340 vmaddr: 0,
341 vmsize: 8,
342 fileoff: 0,
343 filesize: 8,
344 maxprot: 7,
345 initprot: 7,
346 flags: 0,
347 sections: vec![text_sect],
348 };
349 let strtab = b"\0_main\0";
350 let nsyms = 1u32;
351 let sym = RawNlist {
352 strx: 1,
353 n_type: N_SECT | N_EXT,
354 n_sect: 1,
355 n_desc: 0,
356 n_value: 0,
357 };
358 let dic_blob = [
359 0u32.to_le_bytes().as_slice(),
360 4u16.to_le_bytes().as_slice(),
361 DICE_KIND_DATA.to_le_bytes().as_slice(),
362 ]
363 .concat();
364 let hdr_size = HEADER_SIZE;
365 let seg_size = seg.wire_size() as usize;
366 let dic_size = LinkEditDataCmd::WIRE_SIZE as usize;
367 let symtab_size = SymtabCmd::WIRE_SIZE as usize;
368 let sizeofcmds = (seg_size + dic_size + symtab_size) as u32;
369
370 let section_offset = (hdr_size + sizeofcmds as usize) as u32;
371 let data_in_code_off = section_offset + 8;
372 let symoff = data_in_code_off + dic_blob.len() as u32;
373 let stroff = symoff + NLIST_SIZE as u32 * nsyms;
374 let seg = Segment64 {
375 sections: vec![Section64Header {
376 offset: section_offset,
377 ..seg.sections[0]
378 }],
379 fileoff: section_offset as u64,
380 ..seg
381 };
382 let header = MachHeader64 {
383 magic: MH_MAGIC_64,
384 cputype: CPU_TYPE_ARM64,
385 cpusubtype: 0,
386 filetype: MH_OBJECT,
387 ncmds: 3,
388 sizeofcmds,
389 flags: MH_SUBSECTIONS_VIA_SYMBOLS,
390 reserved: 0,
391 };
392 let symtab_cmd = SymtabCmd {
393 symoff,
394 nsyms,
395 stroff,
396 strsize: strtab.len() as u32,
397 };
398 let dic_cmd = LoadCommand::Raw {
399 cmd: LC_DATA_IN_CODE,
400 cmdsize: LinkEditDataCmd::WIRE_SIZE,
401 data: [
402 data_in_code_off.to_le_bytes().as_slice(),
403 (dic_blob.len() as u32).to_le_bytes().as_slice(),
404 ]
405 .concat(),
406 };
407
408 let mut image = Vec::new();
409 write_header(&header, &mut image);
410 let cmds = vec![
411 LoadCommand::Segment64(seg),
412 dic_cmd,
413 LoadCommand::Symtab(symtab_cmd),
414 ];
415 write_commands(&cmds, &mut image);
416 image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
417 image.extend_from_slice(&dic_blob);
418 sym.write(&mut image);
419 image.extend_from_slice(strtab);
420 image
421 }
422
423 fn synth_image_with_loh() -> Vec<u8> {
424 let text_sect = Section64Header {
425 sectname: name16("__text"),
426 segname: name16("__TEXT"),
427 addr: 0,
428 size: 8,
429 offset: 0,
430 align: 2,
431 reloff: 0,
432 nreloc: 0,
433 flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
434 reserved1: 0,
435 reserved2: 0,
436 reserved3: 0,
437 };
438 let seg = Segment64 {
439 segname: name16(""),
440 vmaddr: 0,
441 vmsize: 8,
442 fileoff: 0,
443 filesize: 8,
444 maxprot: 7,
445 initprot: 7,
446 flags: 0,
447 sections: vec![text_sect],
448 };
449 let strtab = b"\0_main\0";
450 let nsyms = 1u32;
451 let sym = RawNlist {
452 strx: 1,
453 n_type: N_SECT | N_EXT,
454 n_sect: 1,
455 n_desc: 0,
456 n_value: 0,
457 };
458 let loh_blob = write_loh_blob(&[LohEntry {
459 kind: LOH_ARM64_ADRP_ADD,
460 args: vec![0, 4],
461 }]);
462 let hdr_size = HEADER_SIZE;
463 let seg_size = seg.wire_size() as usize;
464 let loh_size = LinkEditDataCmd::WIRE_SIZE as usize;
465 let symtab_size = SymtabCmd::WIRE_SIZE as usize;
466 let sizeofcmds = (seg_size + loh_size + symtab_size) as u32;
467
468 let section_offset = (hdr_size + sizeofcmds as usize) as u32;
469 let loh_off = section_offset + 8;
470 let symoff = loh_off + loh_blob.len() as u32;
471 let stroff = symoff + NLIST_SIZE as u32 * nsyms;
472 let seg = Segment64 {
473 sections: vec![Section64Header {
474 offset: section_offset,
475 ..seg.sections[0]
476 }],
477 fileoff: section_offset as u64,
478 ..seg
479 };
480 let header = MachHeader64 {
481 magic: MH_MAGIC_64,
482 cputype: CPU_TYPE_ARM64,
483 cpusubtype: 0,
484 filetype: MH_OBJECT,
485 ncmds: 3,
486 sizeofcmds,
487 flags: MH_SUBSECTIONS_VIA_SYMBOLS,
488 reserved: 0,
489 };
490 let symtab_cmd = SymtabCmd {
491 symoff,
492 nsyms,
493 stroff,
494 strsize: strtab.len() as u32,
495 };
496 let loh_cmd = LoadCommand::LinkerOptimizationHint(LinkEditDataCmd {
497 dataoff: loh_off,
498 datasize: loh_blob.len() as u32,
499 });
500
501 let mut image = Vec::new();
502 write_header(&header, &mut image);
503 let cmds = vec![
504 LoadCommand::Segment64(seg),
505 loh_cmd,
506 LoadCommand::Symtab(symtab_cmd),
507 ];
508 write_commands(&cmds, &mut image);
509 image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
510 image.extend_from_slice(&loh_blob);
511 sym.write(&mut image);
512 image.extend_from_slice(strtab);
513 image
514 }
515
516 #[test]
517 fn parse_synth_object_end_to_end() {
518 let image = synth_image();
519 let obj = ObjectFile::parse("/tmp/synth.o", &image).unwrap();
520 assert_eq!(obj.sections.len(), 1);
521 let sec = &obj.sections[0];
522 assert_eq!(sec.segname, "__TEXT");
523 assert_eq!(sec.sectname, "__text");
524 assert_eq!(sec.data.len(), 8);
525 assert_eq!(
526 sec.data,
527 vec![0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]
528 );
529
530 assert_eq!(obj.symbols.len(), 1);
531 let sym = &obj.symbols[0];
532 assert_eq!(obj.symbol_name(sym).unwrap(), "_main");
533 assert!(sym.is_ext());
534 let sect = obj.section_for_symbol(sym).expect("n_sect=1 resolves");
535 assert_eq!(sect.sectname, "__text");
536 }
537
538 #[test]
539 fn parse_preserves_data_in_code_entries() {
540 let image = synth_image_with_data_in_code();
541 let obj = ObjectFile::parse("/tmp/synth-dic.o", &image).unwrap();
542 assert_eq!(
543 obj.data_in_code,
544 vec![DataInCodeEntry {
545 offset: 0,
546 length: 4,
547 kind: DICE_KIND_DATA,
548 }]
549 );
550 }
551
552 #[test]
553 fn parse_preserves_loh_entries() {
554 let image = synth_image_with_loh();
555 let obj = ObjectFile::parse("/tmp/synth-loh.o", &image).unwrap();
556 assert_eq!(
557 obj.loh,
558 vec![LohEntry {
559 kind: LOH_ARM64_ADRP_ADD,
560 args: vec![0, 4],
561 }]
562 );
563 }
564
565 #[test]
566 fn indirect_target_name_resolves() {
567 // Build a minimal strtab with "\0_alias\0_target\0" and a RawNlist
568 // whose n_value points at "_target".
569 let strtab = StringTable::from_bytes(b"\0_alias\0_target\0".to_vec());
570 let obj = ObjectFile {
571 path: PathBuf::from("/tmp/t"),
572 header: MachHeader64 {
573 magic: MH_MAGIC_64,
574 cputype: CPU_TYPE_ARM64,
575 cpusubtype: 0,
576 filetype: MH_OBJECT,
577 ncmds: 0,
578 sizeofcmds: 0,
579 flags: 0,
580 reserved: 0,
581 },
582 commands: Vec::new(),
583 sections: Vec::new(),
584 symbols: Vec::new(),
585 strings: strtab,
586 symtab: None,
587 dysymtab: None,
588 loh: Vec::new(),
589 data_in_code: Vec::new(),
590 };
591 let alias = InputSymbol::from_raw(RawNlist {
592 strx: 1,
593 n_type: N_INDR | N_EXT,
594 n_sect: 0,
595 n_desc: 0,
596 n_value: 8, // strx of "_target"
597 });
598 let resolved = obj.indirect_target_name(&alias).unwrap().unwrap();
599 assert_eq!(resolved, "_target");
600 }
601 }
602