Rust · 15484 bytes Raw Blame History
1 //! Input-file aggregate: one `ObjectFile` per parsed `.o` on disk.
2 //!
3 //! Sprint 2 ties `MachHeader64`, the load-command list, `InputSection`s,
4 //! `InputSymbol`s, the `StringTable`, and the decoded `DysymtabCmd` together.
5 //! Sprint 4 will wrap this in an `InputFile` enum alongside `ArchiveFile`;
6 //! for now `ObjectFile` stands alone.
7
8 use std::path::PathBuf;
9
10 use crate::macho::constants::LC_DATA_IN_CODE;
11 use crate::macho::reader::{
12 parse_commands, parse_header, DysymtabCmd, LinkEditDataCmd, LoadCommand, MachHeader64,
13 ReadError, SymtabCmd, HEADER_SIZE,
14 };
15 use crate::section::InputSection;
16 use crate::string_table::StringTable;
17 use crate::symbol::{parse_nlist_table, InputSymbol, SymKind};
18
19 /// Whole parsed `.o` on disk. Section bodies and relocation bytes are owned
20 /// copies so the buffer this was parsed from can drop.
21 #[derive(Debug, Clone)]
22 pub struct ObjectFile {
23 pub path: PathBuf,
24 pub header: MachHeader64,
25 pub commands: Vec<LoadCommand>,
26 pub sections: Vec<InputSection>,
27 pub symbols: Vec<InputSymbol>,
28 pub strings: StringTable,
29 pub symtab: Option<SymtabCmd>,
30 pub dysymtab: Option<DysymtabCmd>,
31 pub data_in_code: Vec<DataInCodeEntry>,
32 }
33
34 /// One `data_in_code_entry` preserved from an input object.
35 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
36 pub struct DataInCodeEntry {
37 /// File offset from the input Mach-O header.
38 pub offset: u32,
39 pub length: u16,
40 pub kind: u16,
41 }
42
43 impl DataInCodeEntry {
44 const SIZE: usize = 8;
45
46 fn parse_payload(payload: &[u8]) -> Vec<Self> {
47 payload
48 .chunks_exact(Self::SIZE)
49 .map(|chunk| DataInCodeEntry {
50 offset: u32::from_le_bytes(chunk[0..4].try_into().unwrap()),
51 length: u16::from_le_bytes(chunk[4..6].try_into().unwrap()),
52 kind: u16::from_le_bytes(chunk[6..8].try_into().unwrap()),
53 })
54 .collect()
55 }
56 }
57
58 impl ObjectFile {
59 pub fn parse(path: impl Into<PathBuf>, file_bytes: &[u8]) -> Result<Self, ReadError> {
60 let path = path.into();
61 let header = parse_header(file_bytes)?;
62 let commands = parse_commands(&header, file_bytes)?;
63
64 // Collect sections from every LC_SEGMENT_64 (MH_OBJECT usually has
65 // exactly one segment, but the layout is not required to).
66 let mut sections = Vec::new();
67 for cmd in &commands {
68 if let LoadCommand::Segment64(seg) = cmd {
69 for hdr in &seg.sections {
70 sections.push(InputSection::from_header(hdr, file_bytes)?);
71 }
72 }
73 }
74
75 // Lift the symbol table + string table if present. A bare segment-only
76 // .o without LC_SYMTAB is technically legal and stays symbol-empty.
77 let symtab = commands.iter().find_map(|c| match c {
78 LoadCommand::Symtab(s) => Some(*s),
79 _ => None,
80 });
81 let dysymtab = commands.iter().find_map(|c| match c {
82 LoadCommand::Dysymtab(d) => Some(*d),
83 _ => None,
84 });
85
86 let (symbols, strings) = match symtab {
87 Some(s) => (
88 parse_nlist_table(file_bytes, s.symoff, s.nsyms)?,
89 StringTable::from_file(file_bytes, s.stroff, s.strsize)?,
90 ),
91 None => (Vec::new(), StringTable::from_bytes(Vec::new())),
92 };
93 let data_in_code = parse_data_in_code(&commands, file_bytes)?;
94
95 Ok(ObjectFile {
96 path,
97 header,
98 commands,
99 sections,
100 symbols,
101 strings,
102 symtab,
103 dysymtab,
104 data_in_code,
105 })
106 }
107
108 /// Resolve the name of a symbol via this object's string table.
109 pub fn symbol_name(&self, sym: &InputSymbol) -> Result<&str, ReadError> {
110 self.strings.get(sym.strx())
111 }
112
113 /// For `N_INDR` aliases, resolve the aliased name via this object's
114 /// string table. Returns `None` when the symbol is not an indirect entry.
115 pub fn indirect_target_name(&self, sym: &InputSymbol) -> Option<Result<&str, ReadError>> {
116 if sym.kind() == SymKind::Indirect {
117 Some(self.strings.get(sym.value() as u32))
118 } else {
119 None
120 }
121 }
122
123 /// Iterate over sections in 1-based-nlist order — i.e., the order
124 /// `nlist.n_sect` refers to. afs-ld preserves the parsed order, which
125 /// matches the order they appear in the segments.
126 pub fn section_for_symbol(&self, sym: &InputSymbol) -> Option<&InputSection> {
127 if sym.sect_idx() == 0 {
128 return None;
129 }
130 self.sections
131 .get((sym.sect_idx() as usize).saturating_sub(1))
132 }
133 }
134
135 fn parse_data_in_code(
136 commands: &[LoadCommand],
137 file_bytes: &[u8],
138 ) -> Result<Vec<DataInCodeEntry>, ReadError> {
139 let mut out = Vec::new();
140 for command in commands {
141 let LoadCommand::Raw { cmd, cmdsize, data } = command else {
142 continue;
143 };
144 if *cmd != LC_DATA_IN_CODE {
145 continue;
146 }
147 let linkedit = LinkEditDataCmd::parse(*cmd, *cmdsize, data)?;
148 if !(linkedit.datasize as usize).is_multiple_of(DataInCodeEntry::SIZE) {
149 return Err(ReadError::BadCmdsize {
150 cmd: *cmd,
151 cmdsize: linkedit.datasize,
152 at_offset: 0,
153 reason: "LC_DATA_IN_CODE payload size is not a multiple of 8",
154 });
155 }
156 let start = linkedit.dataoff as usize;
157 let end = start
158 .checked_add(linkedit.datasize as usize)
159 .ok_or(ReadError::Truncated {
160 need: usize::MAX,
161 have: file_bytes.len(),
162 context: "LC_DATA_IN_CODE payload (offset + size overflows)",
163 })?;
164 if end > file_bytes.len() {
165 return Err(ReadError::Truncated {
166 need: end,
167 have: file_bytes.len(),
168 context: "LC_DATA_IN_CODE payload",
169 });
170 }
171 out.extend(DataInCodeEntry::parse_payload(&file_bytes[start..end]));
172 }
173 Ok(out)
174 }
175
176 /// Total `sizeofcmds` region, exposed for callers doing byte-level round-trip
177 /// checks against the original file image.
178 pub fn header_and_cmds_end(header: &MachHeader64) -> usize {
179 HEADER_SIZE + header.sizeofcmds as usize
180 }
181
182 #[cfg(test)]
183 mod tests {
184 use super::*;
185 use crate::macho::constants::*;
186 use crate::macho::reader::{
187 write_commands, write_header, LinkEditDataCmd, LoadCommand, Section64Header, Segment64,
188 };
189 use crate::symbol::{RawNlist, NLIST_SIZE};
190
191 fn name16(s: &str) -> [u8; 16] {
192 let mut out = [0u8; 16];
193 let bytes = s.as_bytes();
194 let n = bytes.len().min(16);
195 out[..n].copy_from_slice(&bytes[..n]);
196 out
197 }
198
199 /// Build a tiny in-memory MH_OBJECT image with one __TEXT,__text section
200 /// holding 8 bytes, one external symbol `_main`, and the minimum symtab.
201 fn synth_image() -> Vec<u8> {
202 // 1) Section header for __text.
203 let text_sect = Section64Header {
204 sectname: name16("__text"),
205 segname: name16("__TEXT"),
206 addr: 0,
207 size: 8,
208 offset: 0, // fill in after layout
209 align: 2,
210 reloff: 0,
211 nreloc: 0,
212 flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
213 reserved1: 0,
214 reserved2: 0,
215 reserved3: 0,
216 };
217 // 2) Segment with one section.
218 let seg = Segment64 {
219 segname: name16(""),
220 vmaddr: 0,
221 vmsize: 8,
222 fileoff: 0,
223 filesize: 8,
224 maxprot: 7,
225 initprot: 7,
226 flags: 0,
227 sections: vec![text_sect],
228 };
229 // 3) Symtab+string table.
230 let strtab = b"\0_main\0";
231 let nsyms = 1u32;
232 let sym = RawNlist {
233 strx: 1, // "_main"
234 n_type: N_SECT | N_EXT,
235 n_sect: 1,
236 n_desc: 0,
237 n_value: 0,
238 };
239
240 // Layout: header → seg load cmd → symtab cmd → section content → nlist → strtab.
241 let hdr_size = HEADER_SIZE;
242 let seg_size = seg.wire_size() as usize;
243 let symtab_size = SymtabCmd::WIRE_SIZE as usize;
244 let sizeofcmds = (seg_size + symtab_size) as u32;
245
246 let section_offset = (hdr_size + sizeofcmds as usize) as u32;
247 let symoff = section_offset + 8; // after section content
248 let stroff = symoff + NLIST_SIZE as u32 * nsyms;
249
250 // Rebuild segment with the correct offset now that we know it.
251 let seg = Segment64 {
252 sections: vec![Section64Header {
253 offset: section_offset,
254 ..seg.sections[0]
255 }],
256 fileoff: section_offset as u64,
257 ..seg
258 };
259 let seg_size = seg.wire_size() as usize;
260 let sizeofcmds = (seg_size + symtab_size) as u32;
261
262 let header = MachHeader64 {
263 magic: MH_MAGIC_64,
264 cputype: CPU_TYPE_ARM64,
265 cpusubtype: 0,
266 filetype: MH_OBJECT,
267 ncmds: 2,
268 sizeofcmds,
269 flags: MH_SUBSECTIONS_VIA_SYMBOLS,
270 reserved: 0,
271 };
272 let symtab_cmd = SymtabCmd {
273 symoff,
274 nsyms,
275 stroff,
276 strsize: strtab.len() as u32,
277 };
278
279 let mut image = Vec::new();
280 write_header(&header, &mut image);
281 let cmds = vec![LoadCommand::Segment64(seg), LoadCommand::Symtab(symtab_cmd)];
282 write_commands(&cmds, &mut image);
283 // Section content: 8 bytes (fake instructions).
284 image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
285 // Nlist.
286 sym.write(&mut image);
287 // String table.
288 image.extend_from_slice(strtab);
289 image
290 }
291
292 fn synth_image_with_data_in_code() -> Vec<u8> {
293 let text_sect = Section64Header {
294 sectname: name16("__text"),
295 segname: name16("__TEXT"),
296 addr: 0,
297 size: 8,
298 offset: 0,
299 align: 2,
300 reloff: 0,
301 nreloc: 0,
302 flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
303 reserved1: 0,
304 reserved2: 0,
305 reserved3: 0,
306 };
307 let seg = Segment64 {
308 segname: name16(""),
309 vmaddr: 0,
310 vmsize: 8,
311 fileoff: 0,
312 filesize: 8,
313 maxprot: 7,
314 initprot: 7,
315 flags: 0,
316 sections: vec![text_sect],
317 };
318 let strtab = b"\0_main\0";
319 let nsyms = 1u32;
320 let sym = RawNlist {
321 strx: 1,
322 n_type: N_SECT | N_EXT,
323 n_sect: 1,
324 n_desc: 0,
325 n_value: 0,
326 };
327 let dic_blob = [
328 0u32.to_le_bytes().as_slice(),
329 4u16.to_le_bytes().as_slice(),
330 DICE_KIND_DATA.to_le_bytes().as_slice(),
331 ]
332 .concat();
333 let hdr_size = HEADER_SIZE;
334 let seg_size = seg.wire_size() as usize;
335 let dic_size = LinkEditDataCmd::WIRE_SIZE as usize;
336 let symtab_size = SymtabCmd::WIRE_SIZE as usize;
337 let sizeofcmds = (seg_size + dic_size + symtab_size) as u32;
338
339 let section_offset = (hdr_size + sizeofcmds as usize) as u32;
340 let data_in_code_off = section_offset + 8;
341 let symoff = data_in_code_off + dic_blob.len() as u32;
342 let stroff = symoff + NLIST_SIZE as u32 * nsyms;
343 let seg = Segment64 {
344 sections: vec![Section64Header {
345 offset: section_offset,
346 ..seg.sections[0]
347 }],
348 fileoff: section_offset as u64,
349 ..seg
350 };
351 let header = MachHeader64 {
352 magic: MH_MAGIC_64,
353 cputype: CPU_TYPE_ARM64,
354 cpusubtype: 0,
355 filetype: MH_OBJECT,
356 ncmds: 3,
357 sizeofcmds,
358 flags: MH_SUBSECTIONS_VIA_SYMBOLS,
359 reserved: 0,
360 };
361 let symtab_cmd = SymtabCmd {
362 symoff,
363 nsyms,
364 stroff,
365 strsize: strtab.len() as u32,
366 };
367 let dic_cmd = LoadCommand::Raw {
368 cmd: LC_DATA_IN_CODE,
369 cmdsize: LinkEditDataCmd::WIRE_SIZE,
370 data: [
371 data_in_code_off.to_le_bytes().as_slice(),
372 (dic_blob.len() as u32).to_le_bytes().as_slice(),
373 ]
374 .concat(),
375 };
376
377 let mut image = Vec::new();
378 write_header(&header, &mut image);
379 let cmds = vec![
380 LoadCommand::Segment64(seg),
381 dic_cmd,
382 LoadCommand::Symtab(symtab_cmd),
383 ];
384 write_commands(&cmds, &mut image);
385 image.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
386 image.extend_from_slice(&dic_blob);
387 sym.write(&mut image);
388 image.extend_from_slice(strtab);
389 image
390 }
391
392 #[test]
393 fn parse_synth_object_end_to_end() {
394 let image = synth_image();
395 let obj = ObjectFile::parse("/tmp/synth.o", &image).unwrap();
396 assert_eq!(obj.sections.len(), 1);
397 let sec = &obj.sections[0];
398 assert_eq!(sec.segname, "__TEXT");
399 assert_eq!(sec.sectname, "__text");
400 assert_eq!(sec.data.len(), 8);
401 assert_eq!(
402 sec.data,
403 vec![0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]
404 );
405
406 assert_eq!(obj.symbols.len(), 1);
407 let sym = &obj.symbols[0];
408 assert_eq!(obj.symbol_name(sym).unwrap(), "_main");
409 assert!(sym.is_ext());
410 let sect = obj.section_for_symbol(sym).expect("n_sect=1 resolves");
411 assert_eq!(sect.sectname, "__text");
412 }
413
414 #[test]
415 fn parse_preserves_data_in_code_entries() {
416 let image = synth_image_with_data_in_code();
417 let obj = ObjectFile::parse("/tmp/synth-dic.o", &image).unwrap();
418 assert_eq!(
419 obj.data_in_code,
420 vec![DataInCodeEntry {
421 offset: 0,
422 length: 4,
423 kind: DICE_KIND_DATA,
424 }]
425 );
426 }
427
428 #[test]
429 fn indirect_target_name_resolves() {
430 // Build a minimal strtab with "\0_alias\0_target\0" and a RawNlist
431 // whose n_value points at "_target".
432 let strtab = StringTable::from_bytes(b"\0_alias\0_target\0".to_vec());
433 let obj = ObjectFile {
434 path: PathBuf::from("/tmp/t"),
435 header: MachHeader64 {
436 magic: MH_MAGIC_64,
437 cputype: CPU_TYPE_ARM64,
438 cpusubtype: 0,
439 filetype: MH_OBJECT,
440 ncmds: 0,
441 sizeofcmds: 0,
442 flags: 0,
443 reserved: 0,
444 },
445 commands: Vec::new(),
446 sections: Vec::new(),
447 symbols: Vec::new(),
448 strings: strtab,
449 symtab: None,
450 dysymtab: None,
451 data_in_code: Vec::new(),
452 };
453 let alias = InputSymbol::from_raw(RawNlist {
454 strx: 1,
455 n_type: N_INDR | N_EXT,
456 n_sect: 0,
457 n_desc: 0,
458 n_value: 8, // strx of "_target"
459 });
460 let resolved = obj.indirect_target_name(&alias).unwrap().unwrap();
461 assert_eq!(resolved, "_target");
462 }
463 }
464