Rust · 55868 bytes Raw Blame History
1 //! Mach-O 64 reader.
2 //!
3 //! Sprint 1: parse `mach_header_64` and the load-command list, round-tripping
4 //! every command afs-as emits. Section contents, symbol bodies, and relocation
5 //! entries arrive in Sprint 2 and Sprint 3.
6 //!
7 //! All input is assumed little-endian (arm64 mach-o is always little-endian in
8 //! practice; we error out on any other cpu type).
9
10 use std::fmt;
11
12 use super::constants::*;
13
14 /// Every error surface this module can produce. Diagnostics include byte
15 /// offsets and a static context string so downstream layers can produce the
16 /// caret-under-source style that `afs-as/src/diag*.rs` uses.
17 #[derive(Debug)]
18 pub enum ReadError {
19 /// Not enough bytes to decode the next field.
20 Truncated {
21 need: usize,
22 have: usize,
23 context: &'static str,
24 },
25 /// Magic number is not `MH_MAGIC_64`.
26 BadMagic { got: u32 },
27 /// CPU type is not `CPU_TYPE_ARM64`.
28 UnsupportedCpu { got: u32 },
29 /// A load command's `cmdsize` field is malformed.
30 BadCmdsize {
31 cmd: u32,
32 cmdsize: u32,
33 at_offset: usize,
34 reason: &'static str,
35 },
36 /// A relocation entry or pairing is structurally invalid.
37 BadRelocation {
38 at_offset: u32,
39 reason: &'static str,
40 },
41 }
42
43 impl fmt::Display for ReadError {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 ReadError::Truncated { need, have, context } => write!(
47 f,
48 "truncated input while reading {context}: need {need} bytes, have {have}"
49 ),
50 ReadError::BadMagic { got } => write!(
51 f,
52 "not a Mach-O 64 file: magic 0x{got:08x} (expected 0x{MH_MAGIC_64:08x})"
53 ),
54 ReadError::UnsupportedCpu { got } => write!(
55 f,
56 "unsupported cpu type 0x{got:08x} (afs-ld requires arm64 / 0x{CPU_TYPE_ARM64:08x})"
57 ),
58 ReadError::BadCmdsize { cmd, cmdsize, at_offset, reason } => write!(
59 f,
60 "load command 0x{cmd:x} at offset 0x{at_offset:x}: cmdsize {cmdsize} invalid ({reason})"
61 ),
62 ReadError::BadRelocation { at_offset, reason } => write!(
63 f,
64 "malformed relocation at offset 0x{at_offset:x}: {reason}"
65 ),
66 }
67 }
68 }
69
70 impl std::error::Error for ReadError {}
71
72 /// `mach_header_64` — 32 bytes on the wire.
73 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
74 pub struct MachHeader64 {
75 pub magic: u32,
76 pub cputype: u32,
77 pub cpusubtype: u32,
78 pub filetype: u32,
79 pub ncmds: u32,
80 pub sizeofcmds: u32,
81 pub flags: u32,
82 pub reserved: u32,
83 }
84
85 /// Size of a `mach_header_64` on the wire.
86 pub const HEADER_SIZE: usize = 32;
87
88 pub fn parse_header(bytes: &[u8]) -> Result<MachHeader64, ReadError> {
89 if bytes.len() < HEADER_SIZE {
90 return Err(ReadError::Truncated {
91 need: HEADER_SIZE,
92 have: bytes.len(),
93 context: "mach_header_64",
94 });
95 }
96 let magic = u32_le(&bytes[0..4]);
97 if magic != MH_MAGIC_64 {
98 return Err(ReadError::BadMagic { got: magic });
99 }
100 let cputype = u32_le(&bytes[4..8]);
101 if cputype != CPU_TYPE_ARM64 {
102 return Err(ReadError::UnsupportedCpu { got: cputype });
103 }
104 Ok(MachHeader64 {
105 magic,
106 cputype,
107 cpusubtype: u32_le(&bytes[8..12]),
108 filetype: u32_le(&bytes[12..16]),
109 ncmds: u32_le(&bytes[16..20]),
110 sizeofcmds: u32_le(&bytes[20..24]),
111 flags: u32_le(&bytes[24..28]),
112 reserved: u32_le(&bytes[28..32]),
113 })
114 }
115
116 pub fn write_header(hdr: &MachHeader64, out: &mut Vec<u8>) {
117 out.extend_from_slice(&hdr.magic.to_le_bytes());
118 out.extend_from_slice(&hdr.cputype.to_le_bytes());
119 out.extend_from_slice(&hdr.cpusubtype.to_le_bytes());
120 out.extend_from_slice(&hdr.filetype.to_le_bytes());
121 out.extend_from_slice(&hdr.ncmds.to_le_bytes());
122 out.extend_from_slice(&hdr.sizeofcmds.to_le_bytes());
123 out.extend_from_slice(&hdr.flags.to_le_bytes());
124 out.extend_from_slice(&hdr.reserved.to_le_bytes());
125 }
126
127 // ---------------------------------------------------------------------------
128 // Load commands.
129 //
130 // Every command starts with `cmd: u32` + `cmdsize: u32` (the "load_command"
131 // header). `cmdsize` is 8-byte aligned and counts both those 8 header bytes
132 // plus the payload. Specific command kinds get their own variants as each
133 // commit in this sprint decodes them; unknown-to-us kinds live in
134 // `LoadCommand::Raw` forever so round-trips survive.
135 // ---------------------------------------------------------------------------
136
137 #[derive(Debug, Clone, PartialEq, Eq)]
138 pub enum LoadCommand {
139 Segment64(Segment64),
140 Symtab(SymtabCmd),
141 Dysymtab(DysymtabCmd),
142 BuildVersion(BuildVersionCmd),
143 LinkerOptimizationHint(LinkEditDataCmd),
144 /// `LC_ID_DYLIB` + every `LC_*_DYLIB` variant share the same wire
145 /// format. The `cmd` field on the inner struct discriminates.
146 Dylib(DylibCmd),
147 /// `LC_RPATH` — one runtime-search path per entry.
148 Rpath(RpathCmd),
149 /// `LC_DYLD_INFO_ONLY` — classic locator for rebase/bind/lazy/weak/export
150 /// streams in `__LINKEDIT`.
151 DyldInfoOnly(DyldInfoCmd),
152 /// `LC_DYLD_EXPORTS_TRIE` — the modern chained-fixups alternative that
153 /// holds only the export trie (paired with `LC_DYLD_CHAINED_FIXUPS`).
154 DyldExportsTrie(LinkEditDataCmd),
155 /// `LC_DYLD_CHAINED_FIXUPS` — pointer to the chained-fixups blob.
156 DyldChainedFixups(LinkEditDataCmd),
157 /// A load command whose payload we haven't decoded yet. Preserves bytes
158 /// verbatim for byte-level round-trip.
159 Raw {
160 cmd: u32,
161 cmdsize: u32,
162 data: Vec<u8>,
163 },
164 }
165
166 impl LoadCommand {
167 pub fn cmd(&self) -> u32 {
168 match self {
169 LoadCommand::Segment64(_) => LC_SEGMENT_64,
170 LoadCommand::Symtab(_) => LC_SYMTAB,
171 LoadCommand::Dysymtab(_) => LC_DYSYMTAB,
172 LoadCommand::BuildVersion(_) => LC_BUILD_VERSION,
173 LoadCommand::LinkerOptimizationHint(_) => LC_LINKER_OPTIMIZATION_HINT,
174 LoadCommand::Dylib(d) => d.cmd,
175 LoadCommand::Rpath(_) => LC_RPATH,
176 LoadCommand::DyldInfoOnly(_) => LC_DYLD_INFO_ONLY,
177 LoadCommand::DyldExportsTrie(_) => LC_DYLD_EXPORTS_TRIE,
178 LoadCommand::DyldChainedFixups(_) => LC_DYLD_CHAINED_FIXUPS,
179 LoadCommand::Raw { cmd, .. } => *cmd,
180 }
181 }
182
183 pub fn cmdsize(&self) -> u32 {
184 match self {
185 LoadCommand::Segment64(s) => s.wire_size(),
186 LoadCommand::Symtab(_) => SymtabCmd::WIRE_SIZE,
187 LoadCommand::Dysymtab(_) => DysymtabCmd::WIRE_SIZE,
188 LoadCommand::BuildVersion(b) => b.wire_size(),
189 LoadCommand::LinkerOptimizationHint(_) => LinkEditDataCmd::WIRE_SIZE,
190 LoadCommand::Dylib(d) => d.wire_size(),
191 LoadCommand::Rpath(r) => r.wire_size(),
192 LoadCommand::DyldInfoOnly(_) => DyldInfoCmd::WIRE_SIZE,
193 LoadCommand::DyldExportsTrie(_) => LinkEditDataCmd::WIRE_SIZE,
194 LoadCommand::DyldChainedFixups(_) => LinkEditDataCmd::WIRE_SIZE,
195 LoadCommand::Raw { cmdsize, .. } => *cmdsize,
196 }
197 }
198 }
199
200 // ---------------------------------------------------------------------------
201 // LC_SEGMENT_64 + section_64
202 // ---------------------------------------------------------------------------
203
204 /// Raw 16-byte name field. Null-padded; may be non-UTF-8 in pathological cases
205 /// (the spec doesn't guarantee anything beyond "null-padded bytes"). Kept raw
206 /// so byte-level round-trip is preserved; helpers below produce a lossy &str
207 /// for display.
208 pub type Name16 = [u8; 16];
209
210 pub fn name16_str(name: &Name16) -> String {
211 let n = name.iter().position(|&b| b == 0).unwrap_or(name.len());
212 String::from_utf8_lossy(&name[..n]).into_owned()
213 }
214
215 #[derive(Debug, Clone, PartialEq, Eq)]
216 pub struct Segment64 {
217 pub segname: Name16,
218 pub vmaddr: u64,
219 pub vmsize: u64,
220 pub fileoff: u64,
221 pub filesize: u64,
222 pub maxprot: u32,
223 pub initprot: u32,
224 pub flags: u32,
225 pub sections: Vec<Section64Header>,
226 }
227
228 impl Segment64 {
229 /// Fixed portion (before sections array): 16 + 4×u64 + 4×u32 = 64 bytes.
230 const BASE: usize = 64;
231 /// Per-section size: 2×name16 + 2×u64 + 8×u32 = 80 bytes.
232 const SECT: usize = 80;
233
234 pub fn wire_size(&self) -> u32 {
235 (8 + Self::BASE + Self::SECT * self.sections.len()) as u32
236 }
237
238 pub fn segname_str(&self) -> String {
239 name16_str(&self.segname)
240 }
241
242 pub fn parse(cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
243 if payload.len() < Self::BASE {
244 return Err(ReadError::Truncated {
245 need: Self::BASE,
246 have: payload.len(),
247 context: "segment_command_64 base",
248 });
249 }
250 let segname: Name16 = payload[0..16].try_into().unwrap();
251 let vmaddr = u64_le(&payload[16..24]);
252 let vmsize = u64_le(&payload[24..32]);
253 let fileoff = u64_le(&payload[32..40]);
254 let filesize = u64_le(&payload[40..48]);
255 let maxprot = u32_le(&payload[48..52]);
256 let initprot = u32_le(&payload[52..56]);
257 let nsects = u32_le(&payload[56..60]);
258 let flags = u32_le(&payload[60..64]);
259
260 let body_needed = Self::BASE + Self::SECT * nsects as usize;
261 if payload.len() < body_needed {
262 return Err(ReadError::BadCmdsize {
263 cmd: LC_SEGMENT_64,
264 cmdsize,
265 at_offset: 0,
266 reason: "nsects implies more bytes than cmdsize accommodates",
267 });
268 }
269 let mut sections = Vec::with_capacity(nsects as usize);
270 for i in 0..nsects as usize {
271 let off = Self::BASE + i * Self::SECT;
272 sections.push(Section64Header::parse(&payload[off..off + Self::SECT])?);
273 }
274
275 Ok(Segment64 {
276 segname,
277 vmaddr,
278 vmsize,
279 fileoff,
280 filesize,
281 maxprot,
282 initprot,
283 flags,
284 sections,
285 })
286 }
287
288 pub fn write(&self, out: &mut Vec<u8>) {
289 out.extend_from_slice(&LC_SEGMENT_64.to_le_bytes());
290 out.extend_from_slice(&self.wire_size().to_le_bytes());
291 out.extend_from_slice(&self.segname);
292 out.extend_from_slice(&self.vmaddr.to_le_bytes());
293 out.extend_from_slice(&self.vmsize.to_le_bytes());
294 out.extend_from_slice(&self.fileoff.to_le_bytes());
295 out.extend_from_slice(&self.filesize.to_le_bytes());
296 out.extend_from_slice(&self.maxprot.to_le_bytes());
297 out.extend_from_slice(&self.initprot.to_le_bytes());
298 out.extend_from_slice(&(self.sections.len() as u32).to_le_bytes());
299 out.extend_from_slice(&self.flags.to_le_bytes());
300 for s in &self.sections {
301 s.write(out);
302 }
303 }
304 }
305
306 #[derive(Debug, Clone, PartialEq, Eq)]
307 pub struct Section64Header {
308 pub sectname: Name16,
309 pub segname: Name16,
310 pub addr: u64,
311 pub size: u64,
312 pub offset: u32,
313 pub align: u32, // log2
314 pub reloff: u32,
315 pub nreloc: u32,
316 pub flags: u32,
317 pub reserved1: u32,
318 pub reserved2: u32,
319 pub reserved3: u32,
320 }
321
322 impl Section64Header {
323 fn parse(bytes: &[u8]) -> Result<Self, ReadError> {
324 if bytes.len() < Segment64::SECT {
325 return Err(ReadError::Truncated {
326 need: Segment64::SECT,
327 have: bytes.len(),
328 context: "section_64",
329 });
330 }
331 let sectname: Name16 = bytes[0..16].try_into().unwrap();
332 let segname: Name16 = bytes[16..32].try_into().unwrap();
333 Ok(Section64Header {
334 sectname,
335 segname,
336 addr: u64_le(&bytes[32..40]),
337 size: u64_le(&bytes[40..48]),
338 offset: u32_le(&bytes[48..52]),
339 align: u32_le(&bytes[52..56]),
340 reloff: u32_le(&bytes[56..60]),
341 nreloc: u32_le(&bytes[60..64]),
342 flags: u32_le(&bytes[64..68]),
343 reserved1: u32_le(&bytes[68..72]),
344 reserved2: u32_le(&bytes[72..76]),
345 reserved3: u32_le(&bytes[76..80]),
346 })
347 }
348
349 fn write(&self, out: &mut Vec<u8>) {
350 out.extend_from_slice(&self.sectname);
351 out.extend_from_slice(&self.segname);
352 out.extend_from_slice(&self.addr.to_le_bytes());
353 out.extend_from_slice(&self.size.to_le_bytes());
354 out.extend_from_slice(&self.offset.to_le_bytes());
355 out.extend_from_slice(&self.align.to_le_bytes());
356 out.extend_from_slice(&self.reloff.to_le_bytes());
357 out.extend_from_slice(&self.nreloc.to_le_bytes());
358 out.extend_from_slice(&self.flags.to_le_bytes());
359 out.extend_from_slice(&self.reserved1.to_le_bytes());
360 out.extend_from_slice(&self.reserved2.to_le_bytes());
361 out.extend_from_slice(&self.reserved3.to_le_bytes());
362 }
363
364 pub fn sectname_str(&self) -> String {
365 name16_str(&self.sectname)
366 }
367
368 pub fn segname_str(&self) -> String {
369 name16_str(&self.segname)
370 }
371 }
372
373 /// Parse the `header.ncmds` load commands that follow a `mach_header_64`.
374 /// The slice must cover the full file (or at least through `sizeofcmds`);
375 /// offsets are always relative to the start of the mach-o image.
376 pub fn parse_commands(header: &MachHeader64, bytes: &[u8]) -> Result<Vec<LoadCommand>, ReadError> {
377 let cmds_end =
378 HEADER_SIZE
379 .checked_add(header.sizeofcmds as usize)
380 .ok_or(ReadError::Truncated {
381 need: usize::MAX,
382 have: bytes.len(),
383 context: "load-command region (sizeofcmds overflows)",
384 })?;
385 if bytes.len() < cmds_end {
386 return Err(ReadError::Truncated {
387 need: cmds_end,
388 have: bytes.len(),
389 context: "load-command region",
390 });
391 }
392
393 let mut out = Vec::with_capacity(header.ncmds as usize);
394 let mut cursor = HEADER_SIZE;
395 for _ in 0..header.ncmds {
396 if cursor + 8 > cmds_end {
397 return Err(ReadError::Truncated {
398 need: 8,
399 have: cmds_end.saturating_sub(cursor),
400 context: "load_command header (cmd + cmdsize)",
401 });
402 }
403 let cmd = u32_le(&bytes[cursor..cursor + 4]);
404 let cmdsize = u32_le(&bytes[cursor + 4..cursor + 8]);
405 if cmdsize < 8 {
406 return Err(ReadError::BadCmdsize {
407 cmd,
408 cmdsize,
409 at_offset: cursor,
410 reason: "smaller than 8-byte header",
411 });
412 }
413 if !cmdsize.is_multiple_of(8) {
414 return Err(ReadError::BadCmdsize {
415 cmd,
416 cmdsize,
417 at_offset: cursor,
418 reason: "not 8-byte aligned",
419 });
420 }
421 let end = cursor
422 .checked_add(cmdsize as usize)
423 .ok_or(ReadError::BadCmdsize {
424 cmd,
425 cmdsize,
426 at_offset: cursor,
427 reason: "cmdsize overflow",
428 })?;
429 if end > cmds_end {
430 return Err(ReadError::BadCmdsize {
431 cmd,
432 cmdsize,
433 at_offset: cursor,
434 reason: "overruns sizeofcmds",
435 });
436 }
437 let payload = &bytes[cursor + 8..end];
438 out.push(decode_command(cmd, cmdsize, payload)?);
439 cursor = end;
440 }
441
442 Ok(out)
443 }
444
445 fn decode_command(cmd: u32, cmdsize: u32, payload: &[u8]) -> Result<LoadCommand, ReadError> {
446 match cmd {
447 LC_SEGMENT_64 => Ok(LoadCommand::Segment64(Segment64::parse(cmdsize, payload)?)),
448 LC_SYMTAB => Ok(LoadCommand::Symtab(SymtabCmd::parse(cmdsize, payload)?)),
449 LC_DYSYMTAB => Ok(LoadCommand::Dysymtab(DysymtabCmd::parse(cmdsize, payload)?)),
450 LC_BUILD_VERSION => Ok(LoadCommand::BuildVersion(BuildVersionCmd::parse(
451 cmdsize, payload,
452 )?)),
453 LC_LINKER_OPTIMIZATION_HINT => Ok(LoadCommand::LinkerOptimizationHint(
454 LinkEditDataCmd::parse(LC_LINKER_OPTIMIZATION_HINT, cmdsize, payload)?,
455 )),
456 LC_ID_DYLIB | LC_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB | LC_REEXPORT_DYLIB
457 | LC_LOAD_UPWARD_DYLIB => Ok(LoadCommand::Dylib(DylibCmd::parse(cmd, cmdsize, payload)?)),
458 LC_RPATH => Ok(LoadCommand::Rpath(RpathCmd::parse(cmdsize, payload)?)),
459 LC_DYLD_INFO_ONLY => Ok(LoadCommand::DyldInfoOnly(DyldInfoCmd::parse(
460 cmdsize, payload,
461 )?)),
462 LC_DYLD_EXPORTS_TRIE => Ok(LoadCommand::DyldExportsTrie(LinkEditDataCmd::parse(
463 LC_DYLD_EXPORTS_TRIE,
464 cmdsize,
465 payload,
466 )?)),
467 LC_DYLD_CHAINED_FIXUPS => Ok(LoadCommand::DyldChainedFixups(LinkEditDataCmd::parse(
468 LC_DYLD_CHAINED_FIXUPS,
469 cmdsize,
470 payload,
471 )?)),
472 _ => Ok(LoadCommand::Raw {
473 cmd,
474 cmdsize,
475 data: payload.to_vec(),
476 }),
477 }
478 }
479
480 /// Write a sequence of load commands back to wire form. Paired with
481 /// `parse_commands` so `write_commands(parse_commands(hdr, bytes)?, &mut out)`
482 /// produces byte-identical output to the original region.
483 pub fn write_commands(cmds: &[LoadCommand], out: &mut Vec<u8>) {
484 for c in cmds {
485 match c {
486 LoadCommand::Segment64(s) => s.write(out),
487 LoadCommand::Symtab(s) => s.write(out),
488 LoadCommand::Dysymtab(d) => d.write(out),
489 LoadCommand::BuildVersion(b) => b.write(out),
490 LoadCommand::LinkerOptimizationHint(l) => l.write(LC_LINKER_OPTIMIZATION_HINT, out),
491 LoadCommand::Dylib(d) => d.write(out),
492 LoadCommand::Rpath(r) => r.write(out),
493 LoadCommand::DyldInfoOnly(d) => d.write(out),
494 LoadCommand::DyldExportsTrie(l) => l.write(LC_DYLD_EXPORTS_TRIE, out),
495 LoadCommand::DyldChainedFixups(l) => l.write(LC_DYLD_CHAINED_FIXUPS, out),
496 LoadCommand::Raw { cmd, cmdsize, data } => {
497 out.extend_from_slice(&cmd.to_le_bytes());
498 out.extend_from_slice(&cmdsize.to_le_bytes());
499 out.extend_from_slice(data);
500 }
501 }
502 }
503 }
504
505 // ---------------------------------------------------------------------------
506 // LC_SYMTAB
507 // ---------------------------------------------------------------------------
508
509 /// `symtab_command` — 16-byte payload locating the symbol table and string
510 /// table in the file. Sprint 2 decodes the nlist_64 + string table contents
511 /// themselves; this sprint only lifts the locator.
512 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
513 pub struct SymtabCmd {
514 pub symoff: u32,
515 pub nsyms: u32,
516 pub stroff: u32,
517 pub strsize: u32,
518 }
519
520 impl SymtabCmd {
521 pub const WIRE_SIZE: u32 = 8 + 16;
522
523 pub fn parse(cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
524 if cmdsize != Self::WIRE_SIZE {
525 return Err(ReadError::BadCmdsize {
526 cmd: LC_SYMTAB,
527 cmdsize,
528 at_offset: 0,
529 reason: "LC_SYMTAB cmdsize must be 24",
530 });
531 }
532 if payload.len() < 16 {
533 return Err(ReadError::Truncated {
534 need: 16,
535 have: payload.len(),
536 context: "symtab_command",
537 });
538 }
539 Ok(SymtabCmd {
540 symoff: u32_le(&payload[0..4]),
541 nsyms: u32_le(&payload[4..8]),
542 stroff: u32_le(&payload[8..12]),
543 strsize: u32_le(&payload[12..16]),
544 })
545 }
546
547 pub fn write(&self, out: &mut Vec<u8>) {
548 out.extend_from_slice(&LC_SYMTAB.to_le_bytes());
549 out.extend_from_slice(&Self::WIRE_SIZE.to_le_bytes());
550 out.extend_from_slice(&self.symoff.to_le_bytes());
551 out.extend_from_slice(&self.nsyms.to_le_bytes());
552 out.extend_from_slice(&self.stroff.to_le_bytes());
553 out.extend_from_slice(&self.strsize.to_le_bytes());
554 }
555 }
556
557 // ---------------------------------------------------------------------------
558 // LC_DYSYMTAB
559 // ---------------------------------------------------------------------------
560
561 /// `dysymtab_command` — 72-byte payload with 18 u32 fields describing
562 /// partitioning of the symbol table and auxiliary tables. Sprint 2 consumes
563 /// the partition boundaries; the other fields are for dylibs / indirect
564 /// symbol tables (Sprint 12).
565 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
566 pub struct DysymtabCmd {
567 pub ilocalsym: u32,
568 pub nlocalsym: u32,
569 pub iextdefsym: u32,
570 pub nextdefsym: u32,
571 pub iundefsym: u32,
572 pub nundefsym: u32,
573 pub tocoff: u32,
574 pub ntoc: u32,
575 pub modtaboff: u32,
576 pub nmodtab: u32,
577 pub extrefsymoff: u32,
578 pub nextrefsyms: u32,
579 pub indirectsymoff: u32,
580 pub nindirectsyms: u32,
581 pub extreloff: u32,
582 pub nextrel: u32,
583 pub locreloff: u32,
584 pub nlocrel: u32,
585 }
586
587 impl DysymtabCmd {
588 pub const WIRE_SIZE: u32 = 8 + 72;
589
590 pub fn parse(cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
591 if cmdsize != Self::WIRE_SIZE {
592 return Err(ReadError::BadCmdsize {
593 cmd: LC_DYSYMTAB,
594 cmdsize,
595 at_offset: 0,
596 reason: "LC_DYSYMTAB cmdsize must be 80",
597 });
598 }
599 if payload.len() < 72 {
600 return Err(ReadError::Truncated {
601 need: 72,
602 have: payload.len(),
603 context: "dysymtab_command",
604 });
605 }
606 let get = |i: usize| u32_le(&payload[i * 4..(i + 1) * 4]);
607 Ok(DysymtabCmd {
608 ilocalsym: get(0),
609 nlocalsym: get(1),
610 iextdefsym: get(2),
611 nextdefsym: get(3),
612 iundefsym: get(4),
613 nundefsym: get(5),
614 tocoff: get(6),
615 ntoc: get(7),
616 modtaboff: get(8),
617 nmodtab: get(9),
618 extrefsymoff: get(10),
619 nextrefsyms: get(11),
620 indirectsymoff: get(12),
621 nindirectsyms: get(13),
622 extreloff: get(14),
623 nextrel: get(15),
624 locreloff: get(16),
625 nlocrel: get(17),
626 })
627 }
628
629 pub fn write(&self, out: &mut Vec<u8>) {
630 out.extend_from_slice(&LC_DYSYMTAB.to_le_bytes());
631 out.extend_from_slice(&Self::WIRE_SIZE.to_le_bytes());
632 for v in [
633 self.ilocalsym,
634 self.nlocalsym,
635 self.iextdefsym,
636 self.nextdefsym,
637 self.iundefsym,
638 self.nundefsym,
639 self.tocoff,
640 self.ntoc,
641 self.modtaboff,
642 self.nmodtab,
643 self.extrefsymoff,
644 self.nextrefsyms,
645 self.indirectsymoff,
646 self.nindirectsyms,
647 self.extreloff,
648 self.nextrel,
649 self.locreloff,
650 self.nlocrel,
651 ] {
652 out.extend_from_slice(&v.to_le_bytes());
653 }
654 }
655 }
656
657 // ---------------------------------------------------------------------------
658 // LC_BUILD_VERSION
659 // ---------------------------------------------------------------------------
660
661 /// `build_tool_version` — 8 bytes: tool kind + version.
662 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
663 pub struct BuildTool {
664 pub tool: u32,
665 pub version: u32,
666 }
667
668 /// `build_version_command` — 16-byte fixed header + N × 8-byte tool records.
669 /// `minos` / `sdk` are packed X.Y.Z: `(X << 16) | (Y << 8) | Z`.
670 #[derive(Debug, Clone, PartialEq, Eq)]
671 pub struct BuildVersionCmd {
672 pub platform: u32,
673 pub minos: u32,
674 pub sdk: u32,
675 pub tools: Vec<BuildTool>,
676 }
677
678 impl BuildVersionCmd {
679 const BASE: usize = 16; // platform + minos + sdk + ntools
680 const TOOL: usize = 8;
681
682 pub fn wire_size(&self) -> u32 {
683 (8 + Self::BASE + Self::TOOL * self.tools.len()) as u32
684 }
685
686 pub fn parse(cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
687 if payload.len() < Self::BASE {
688 return Err(ReadError::Truncated {
689 need: Self::BASE,
690 have: payload.len(),
691 context: "build_version_command",
692 });
693 }
694 let platform = u32_le(&payload[0..4]);
695 let minos = u32_le(&payload[4..8]);
696 let sdk = u32_le(&payload[8..12]);
697 let ntools = u32_le(&payload[12..16]);
698
699 let body_needed = Self::BASE + Self::TOOL * ntools as usize;
700 if payload.len() < body_needed {
701 return Err(ReadError::BadCmdsize {
702 cmd: LC_BUILD_VERSION,
703 cmdsize,
704 at_offset: 0,
705 reason: "ntools requires more bytes than cmdsize accommodates",
706 });
707 }
708
709 let mut tools = Vec::with_capacity(ntools as usize);
710 for i in 0..ntools as usize {
711 let off = Self::BASE + i * Self::TOOL;
712 tools.push(BuildTool {
713 tool: u32_le(&payload[off..off + 4]),
714 version: u32_le(&payload[off + 4..off + 8]),
715 });
716 }
717
718 Ok(BuildVersionCmd {
719 platform,
720 minos,
721 sdk,
722 tools,
723 })
724 }
725
726 pub fn write(&self, out: &mut Vec<u8>) {
727 out.extend_from_slice(&LC_BUILD_VERSION.to_le_bytes());
728 out.extend_from_slice(&self.wire_size().to_le_bytes());
729 out.extend_from_slice(&self.platform.to_le_bytes());
730 out.extend_from_slice(&self.minos.to_le_bytes());
731 out.extend_from_slice(&self.sdk.to_le_bytes());
732 out.extend_from_slice(&(self.tools.len() as u32).to_le_bytes());
733 for t in &self.tools {
734 out.extend_from_slice(&t.tool.to_le_bytes());
735 out.extend_from_slice(&t.version.to_le_bytes());
736 }
737 }
738 }
739
740 // ---------------------------------------------------------------------------
741 // dylib_command — LC_ID_DYLIB / LC_LOAD_DYLIB / LC_LOAD_WEAK_DYLIB /
742 // LC_REEXPORT_DYLIB / LC_LOAD_UPWARD_DYLIB all share the same 16-byte
743 // dylib struct + variable null-terminated name, padded to 8-byte alignment.
744 // ---------------------------------------------------------------------------
745
746 #[derive(Debug, Clone, PartialEq, Eq)]
747 pub struct DylibCmd {
748 /// Which LC_*_DYLIB variant this entry is.
749 pub cmd: u32,
750 /// Install name / load path — relative (e.g. `@rpath/libfoo.dylib`) or
751 /// absolute (`/usr/lib/libSystem.B.dylib`).
752 pub name: String,
753 /// Timestamp is historically `time_t`; writers typically store `2` for
754 /// reproducibility, so we preserve raw for round-trip.
755 pub timestamp: u32,
756 /// X.Y.Z packed as `0xXXXXYYZZ`.
757 pub current_version: u32,
758 pub compatibility_version: u32,
759 }
760
761 impl DylibCmd {
762 /// 8 (lc header) + 16 (dylib struct) + name + null + pad-to-8.
763 pub fn wire_size(&self) -> u32 {
764 let tail = pad8(16 + self.name.len() + 1);
765 (8 + tail) as u32
766 }
767
768 pub fn parse(cmd: u32, cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
769 if payload.len() < 16 {
770 return Err(ReadError::Truncated {
771 need: 16,
772 have: payload.len(),
773 context: "dylib_command body",
774 });
775 }
776 let name_off_in_cmd = u32_le(&payload[0..4]) as usize;
777 let timestamp = u32_le(&payload[4..8]);
778 let current_version = u32_le(&payload[8..12]);
779 let compatibility_version = u32_le(&payload[12..16]);
780 // Offset is relative to cmd start; payload is post-header by 8 bytes.
781 if name_off_in_cmd < 8 || name_off_in_cmd - 8 > payload.len() {
782 return Err(ReadError::BadCmdsize {
783 cmd,
784 cmdsize,
785 at_offset: 0,
786 reason: "dylib_command name offset out of range",
787 });
788 }
789 let name_start = name_off_in_cmd - 8;
790 let name_bytes = &payload[name_start..];
791 let nul = name_bytes
792 .iter()
793 .position(|&b| b == 0)
794 .ok_or(ReadError::BadCmdsize {
795 cmd,
796 cmdsize,
797 at_offset: 0,
798 reason: "dylib_command name is not null-terminated",
799 })?;
800 let name = std::str::from_utf8(&name_bytes[..nul])
801 .map_err(|_| ReadError::BadCmdsize {
802 cmd,
803 cmdsize,
804 at_offset: 0,
805 reason: "dylib_command name is not UTF-8",
806 })?
807 .to_string();
808 Ok(DylibCmd {
809 cmd,
810 name,
811 timestamp,
812 current_version,
813 compatibility_version,
814 })
815 }
816
817 pub fn write(&self, out: &mut Vec<u8>) {
818 // `name` always sits at offset 24 from the start of the LC.
819 let name_offset: u32 = 24;
820 out.extend_from_slice(&self.cmd.to_le_bytes());
821 out.extend_from_slice(&self.wire_size().to_le_bytes());
822 out.extend_from_slice(&name_offset.to_le_bytes());
823 out.extend_from_slice(&self.timestamp.to_le_bytes());
824 out.extend_from_slice(&self.current_version.to_le_bytes());
825 out.extend_from_slice(&self.compatibility_version.to_le_bytes());
826 out.extend_from_slice(self.name.as_bytes());
827 out.push(0);
828 let padded = pad8(16 + self.name.len() + 1);
829 let pad = padded - (16 + self.name.len() + 1);
830 for _ in 0..pad {
831 out.push(0);
832 }
833 }
834 }
835
836 // ---------------------------------------------------------------------------
837 // rpath_command — one `-rpath` search path each.
838 // ---------------------------------------------------------------------------
839
840 #[derive(Debug, Clone, PartialEq, Eq)]
841 pub struct RpathCmd {
842 pub path: String,
843 }
844
845 impl RpathCmd {
846 /// 8 (lc header) + 4 (path offset) + path + null + pad-to-8.
847 pub fn wire_size(&self) -> u32 {
848 let tail = pad8(4 + self.path.len() + 1);
849 (8 + tail) as u32
850 }
851
852 pub fn parse(cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
853 if payload.len() < 4 {
854 return Err(ReadError::Truncated {
855 need: 4,
856 have: payload.len(),
857 context: "rpath_command body",
858 });
859 }
860 let off_in_cmd = u32_le(&payload[0..4]) as usize;
861 if off_in_cmd < 8 || off_in_cmd - 8 > payload.len() {
862 return Err(ReadError::BadCmdsize {
863 cmd: LC_RPATH,
864 cmdsize,
865 at_offset: 0,
866 reason: "rpath_command path offset out of range",
867 });
868 }
869 let start = off_in_cmd - 8;
870 let bytes = &payload[start..];
871 let nul = bytes
872 .iter()
873 .position(|&b| b == 0)
874 .ok_or(ReadError::BadCmdsize {
875 cmd: LC_RPATH,
876 cmdsize,
877 at_offset: 0,
878 reason: "rpath_command path is not null-terminated",
879 })?;
880 let path = std::str::from_utf8(&bytes[..nul])
881 .map_err(|_| ReadError::BadCmdsize {
882 cmd: LC_RPATH,
883 cmdsize,
884 at_offset: 0,
885 reason: "rpath_command path is not UTF-8",
886 })?
887 .to_string();
888 Ok(RpathCmd { path })
889 }
890
891 pub fn write(&self, out: &mut Vec<u8>) {
892 let off: u32 = 12; // 8 (header) + 4 (path offset field)
893 out.extend_from_slice(&LC_RPATH.to_le_bytes());
894 out.extend_from_slice(&self.wire_size().to_le_bytes());
895 out.extend_from_slice(&off.to_le_bytes());
896 out.extend_from_slice(self.path.as_bytes());
897 out.push(0);
898 let padded = pad8(4 + self.path.len() + 1);
899 let pad = padded - (4 + self.path.len() + 1);
900 for _ in 0..pad {
901 out.push(0);
902 }
903 }
904 }
905
906 /// Round `n` up to the next multiple of 8.
907 #[inline]
908 fn pad8(n: usize) -> usize {
909 (n + 7) & !7
910 }
911
912 // ---------------------------------------------------------------------------
913 // LC_DYLD_INFO_ONLY — classic locator for rebase/bind/lazy/weak/export.
914 // ---------------------------------------------------------------------------
915
916 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
917 pub struct DyldInfoCmd {
918 pub rebase_off: u32,
919 pub rebase_size: u32,
920 pub bind_off: u32,
921 pub bind_size: u32,
922 pub weak_bind_off: u32,
923 pub weak_bind_size: u32,
924 pub lazy_bind_off: u32,
925 pub lazy_bind_size: u32,
926 pub export_off: u32,
927 pub export_size: u32,
928 }
929
930 impl DyldInfoCmd {
931 pub const WIRE_SIZE: u32 = 8 + 40;
932
933 pub fn parse(cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
934 if cmdsize != Self::WIRE_SIZE {
935 return Err(ReadError::BadCmdsize {
936 cmd: LC_DYLD_INFO_ONLY,
937 cmdsize,
938 at_offset: 0,
939 reason: "LC_DYLD_INFO_ONLY cmdsize must be 48",
940 });
941 }
942 if payload.len() < 40 {
943 return Err(ReadError::Truncated {
944 need: 40,
945 have: payload.len(),
946 context: "dyld_info_command",
947 });
948 }
949 let g = |i: usize| u32_le(&payload[i * 4..(i + 1) * 4]);
950 Ok(DyldInfoCmd {
951 rebase_off: g(0),
952 rebase_size: g(1),
953 bind_off: g(2),
954 bind_size: g(3),
955 weak_bind_off: g(4),
956 weak_bind_size: g(5),
957 lazy_bind_off: g(6),
958 lazy_bind_size: g(7),
959 export_off: g(8),
960 export_size: g(9),
961 })
962 }
963
964 pub fn write(&self, out: &mut Vec<u8>) {
965 out.extend_from_slice(&LC_DYLD_INFO_ONLY.to_le_bytes());
966 out.extend_from_slice(&Self::WIRE_SIZE.to_le_bytes());
967 for v in [
968 self.rebase_off,
969 self.rebase_size,
970 self.bind_off,
971 self.bind_size,
972 self.weak_bind_off,
973 self.weak_bind_size,
974 self.lazy_bind_off,
975 self.lazy_bind_size,
976 self.export_off,
977 self.export_size,
978 ] {
979 out.extend_from_slice(&v.to_le_bytes());
980 }
981 }
982 }
983
984 // ---------------------------------------------------------------------------
985 // linkedit_data_command — shared wire format for LC_LINKER_OPTIMIZATION_HINT,
986 // LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_CODE_SIGNATURE, LC_DYLD_EXPORTS_TRIE,
987 // LC_DYLD_CHAINED_FIXUPS. Only the LOH variant is decoded this sprint; the
988 // others adopt this same struct as they come online.
989 // ---------------------------------------------------------------------------
990
991 /// `linkedit_data_command` — 8 bytes: file offset + size pointing into
992 /// `__LINKEDIT`. The actual payload at `(dataoff, datasize)` is decoded by
993 /// whichever sprint owns the target section.
994 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
995 pub struct LinkEditDataCmd {
996 pub dataoff: u32,
997 pub datasize: u32,
998 }
999
1000 impl LinkEditDataCmd {
1001 pub const WIRE_SIZE: u32 = 8 + 8;
1002
1003 pub fn parse(cmd: u32, cmdsize: u32, payload: &[u8]) -> Result<Self, ReadError> {
1004 if cmdsize != Self::WIRE_SIZE {
1005 return Err(ReadError::BadCmdsize {
1006 cmd,
1007 cmdsize,
1008 at_offset: 0,
1009 reason: "linkedit_data_command cmdsize must be 16",
1010 });
1011 }
1012 if payload.len() < 8 {
1013 return Err(ReadError::Truncated {
1014 need: 8,
1015 have: payload.len(),
1016 context: "linkedit_data_command",
1017 });
1018 }
1019 Ok(LinkEditDataCmd {
1020 dataoff: u32_le(&payload[0..4]),
1021 datasize: u32_le(&payload[4..8]),
1022 })
1023 }
1024
1025 pub fn write(&self, cmd: u32, out: &mut Vec<u8>) {
1026 out.extend_from_slice(&cmd.to_le_bytes());
1027 out.extend_from_slice(&Self::WIRE_SIZE.to_le_bytes());
1028 out.extend_from_slice(&self.dataoff.to_le_bytes());
1029 out.extend_from_slice(&self.datasize.to_le_bytes());
1030 }
1031 }
1032
1033 // ---------------------------------------------------------------------------
1034 // Little-endian primitive readers. `u*_le(slice)` panics on short slices; every
1035 // caller in this module pre-checks length via `Truncated` diagnostics.
1036 // ---------------------------------------------------------------------------
1037
1038 #[inline]
1039 pub fn u32_le(b: &[u8]) -> u32 {
1040 u32::from_le_bytes([b[0], b[1], b[2], b[3]])
1041 }
1042
1043 #[inline]
1044 pub fn u64_le(b: &[u8]) -> u64 {
1045 u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]])
1046 }
1047
1048 // ---------------------------------------------------------------------------
1049 // Tests
1050 // ---------------------------------------------------------------------------
1051
1052 #[cfg(test)]
1053 mod tests {
1054 use super::*;
1055
1056 /// Hand-crafted minimal MH_OBJECT header: arm64, 0 load commands, 0 flags.
1057 fn minimal_object_header_bytes() -> Vec<u8> {
1058 let mut b = Vec::new();
1059 b.extend_from_slice(&MH_MAGIC_64.to_le_bytes());
1060 b.extend_from_slice(&CPU_TYPE_ARM64.to_le_bytes());
1061 b.extend_from_slice(&0u32.to_le_bytes()); // cpusubtype
1062 b.extend_from_slice(&MH_OBJECT.to_le_bytes());
1063 b.extend_from_slice(&0u32.to_le_bytes()); // ncmds
1064 b.extend_from_slice(&0u32.to_le_bytes()); // sizeofcmds
1065 b.extend_from_slice(&MH_SUBSECTIONS_VIA_SYMBOLS.to_le_bytes()); // flags
1066 b.extend_from_slice(&0u32.to_le_bytes()); // reserved
1067 b
1068 }
1069
1070 #[test]
1071 fn parse_minimal_object_header() {
1072 let bytes = minimal_object_header_bytes();
1073 let hdr = parse_header(&bytes).expect("valid header");
1074 assert_eq!(hdr.magic, MH_MAGIC_64);
1075 assert_eq!(hdr.cputype, CPU_TYPE_ARM64);
1076 assert_eq!(hdr.filetype, MH_OBJECT);
1077 assert_eq!(hdr.flags, MH_SUBSECTIONS_VIA_SYMBOLS);
1078 }
1079
1080 #[test]
1081 fn round_trip_header_byte_equal() {
1082 let bytes = minimal_object_header_bytes();
1083 let hdr = parse_header(&bytes).unwrap();
1084 let mut out = Vec::new();
1085 write_header(&hdr, &mut out);
1086 assert_eq!(out, bytes);
1087 }
1088
1089 #[test]
1090 fn truncated_header_errors_cleanly() {
1091 let err = parse_header(&[0u8; 10]).unwrap_err();
1092 assert!(
1093 matches!(
1094 err,
1095 ReadError::Truncated {
1096 need: HEADER_SIZE,
1097 have: 10,
1098 ..
1099 }
1100 ),
1101 "unexpected: {err:?}"
1102 );
1103 }
1104
1105 #[test]
1106 fn bad_magic_errors() {
1107 let mut bytes = minimal_object_header_bytes();
1108 bytes[0] ^= 0xff;
1109 let err = parse_header(&bytes).unwrap_err();
1110 assert!(matches!(err, ReadError::BadMagic { .. }));
1111 }
1112
1113 #[test]
1114 fn wrong_cpu_errors() {
1115 let mut bytes = minimal_object_header_bytes();
1116 // Overwrite cputype with x86_64 (0x01000007).
1117 bytes[4..8].copy_from_slice(&0x0100_0007u32.to_le_bytes());
1118 let err = parse_header(&bytes).unwrap_err();
1119 assert!(matches!(
1120 err,
1121 ReadError::UnsupportedCpu { got: 0x0100_0007 }
1122 ));
1123 }
1124
1125 /// Synthesize a mach-o image with `n` load commands, each of size
1126 /// `cmdsize` (must include the 8-byte header).
1127 fn synth_image(ncmds: u32, cmds: &[(u32, u32, &[u8])]) -> Vec<u8> {
1128 let sizeofcmds: u32 = cmds.iter().map(|(_, sz, _)| *sz).sum();
1129 let mut image = Vec::new();
1130 let hdr = MachHeader64 {
1131 magic: MH_MAGIC_64,
1132 cputype: CPU_TYPE_ARM64,
1133 cpusubtype: 0,
1134 filetype: MH_OBJECT,
1135 ncmds,
1136 sizeofcmds,
1137 flags: 0,
1138 reserved: 0,
1139 };
1140 write_header(&hdr, &mut image);
1141 for (cmd, sz, payload) in cmds {
1142 image.extend_from_slice(&cmd.to_le_bytes());
1143 image.extend_from_slice(&sz.to_le_bytes());
1144 image.extend_from_slice(payload);
1145 }
1146 image
1147 }
1148
1149 #[test]
1150 fn round_trip_two_raw_commands() {
1151 // Two fake commands of size 16 each (8 header + 8 payload).
1152 let payload_a = [0xAAu8; 8];
1153 let payload_b = [0xBBu8; 8];
1154 let image = synth_image(
1155 2,
1156 &[(0xDEAD_BEEF, 16, &payload_a), (0xCAFE_F00D, 16, &payload_b)],
1157 );
1158 let hdr = parse_header(&image).unwrap();
1159 let cmds = parse_commands(&hdr, &image).unwrap();
1160 assert_eq!(cmds.len(), 2);
1161 assert_eq!(cmds[0].cmd(), 0xDEAD_BEEF);
1162 assert_eq!(cmds[0].cmdsize(), 16);
1163 assert_eq!(cmds[1].cmd(), 0xCAFE_F00D);
1164
1165 let mut out = Vec::new();
1166 write_header(&hdr, &mut out);
1167 write_commands(&cmds, &mut out);
1168 assert_eq!(out, image);
1169 }
1170
1171 #[test]
1172 fn cmdsize_below_header_errors() {
1173 let image = synth_image(1, &[(0x1234, 4, &[])]);
1174 let hdr = MachHeader64 {
1175 magic: MH_MAGIC_64,
1176 cputype: CPU_TYPE_ARM64,
1177 cpusubtype: 0,
1178 filetype: MH_OBJECT,
1179 ncmds: 1,
1180 sizeofcmds: 4, // too small for even the header
1181 flags: 0,
1182 reserved: 0,
1183 };
1184 let err = parse_commands(&hdr, &image).unwrap_err();
1185 assert!(matches!(err, ReadError::Truncated { .. }));
1186 }
1187
1188 #[test]
1189 fn cmdsize_unaligned_errors() {
1190 // cmdsize = 10 — not 8-aligned.
1191 let image = synth_image(1, &[(0x1234, 10, &[0u8; 2])]);
1192 let hdr = parse_header(&image).unwrap();
1193 let err = parse_commands(&hdr, &image).unwrap_err();
1194 assert!(matches!(
1195 err,
1196 ReadError::BadCmdsize { cmd: 0x1234, cmdsize: 10, reason, .. } if reason.contains("aligned")
1197 ));
1198 }
1199
1200 fn name16(s: &str) -> Name16 {
1201 let mut out = [0u8; 16];
1202 let bytes = s.as_bytes();
1203 let n = bytes.len().min(16);
1204 out[..n].copy_from_slice(&bytes[..n]);
1205 out
1206 }
1207
1208 fn sample_segment64() -> Segment64 {
1209 Segment64 {
1210 segname: name16("__TEXT"),
1211 vmaddr: 0,
1212 vmsize: 0x1000,
1213 fileoff: 0x200,
1214 filesize: 0x40,
1215 maxprot: 7,
1216 initprot: 5,
1217 flags: 0,
1218 sections: vec![Section64Header {
1219 sectname: name16("__text"),
1220 segname: name16("__TEXT"),
1221 addr: 0,
1222 size: 0x10,
1223 offset: 0x200,
1224 align: 2,
1225 reloff: 0,
1226 nreloc: 0,
1227 flags: S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
1228 reserved1: 0,
1229 reserved2: 0,
1230 reserved3: 0,
1231 }],
1232 }
1233 }
1234
1235 #[test]
1236 fn segment64_round_trip_byte_equal() {
1237 let seg = sample_segment64();
1238 let mut wire = Vec::new();
1239 seg.write(&mut wire);
1240 // Strip LC header so Segment64::parse gets just the payload.
1241 let payload = &wire[8..];
1242 let decoded = Segment64::parse(seg.wire_size(), payload).unwrap();
1243 assert_eq!(decoded, seg);
1244
1245 // And byte-equal on re-emit.
1246 let mut reemit = Vec::new();
1247 decoded.write(&mut reemit);
1248 assert_eq!(reemit, wire);
1249 }
1250
1251 #[test]
1252 fn segment64_helpers_decode_names() {
1253 let seg = sample_segment64();
1254 assert_eq!(seg.segname_str(), "__TEXT");
1255 assert_eq!(seg.sections[0].sectname_str(), "__text");
1256 assert_eq!(seg.sections[0].segname_str(), "__TEXT");
1257 }
1258
1259 #[test]
1260 fn segment64_with_zero_sections_round_trips() {
1261 let seg = Segment64 {
1262 segname: name16("__DATA"),
1263 vmaddr: 0,
1264 vmsize: 0,
1265 fileoff: 0,
1266 filesize: 0,
1267 maxprot: 3,
1268 initprot: 3,
1269 flags: 0,
1270 sections: vec![],
1271 };
1272 let mut wire = Vec::new();
1273 seg.write(&mut wire);
1274 let decoded = Segment64::parse(seg.wire_size(), &wire[8..]).unwrap();
1275 assert_eq!(decoded, seg);
1276 assert_eq!(seg.wire_size(), 8 + 64);
1277 }
1278
1279 #[test]
1280 fn symtab_round_trip_and_dispatcher() {
1281 let cmd = SymtabCmd {
1282 symoff: 0x1234,
1283 nsyms: 7,
1284 stroff: 0x2000,
1285 strsize: 0x40,
1286 };
1287 let mut wire = Vec::new();
1288 cmd.write(&mut wire);
1289 assert_eq!(wire.len(), SymtabCmd::WIRE_SIZE as usize);
1290
1291 let decoded = SymtabCmd::parse(SymtabCmd::WIRE_SIZE, &wire[8..]).unwrap();
1292 assert_eq!(decoded, cmd);
1293
1294 let hdr = MachHeader64 {
1295 magic: MH_MAGIC_64,
1296 cputype: CPU_TYPE_ARM64,
1297 cpusubtype: 0,
1298 filetype: MH_OBJECT,
1299 ncmds: 1,
1300 sizeofcmds: SymtabCmd::WIRE_SIZE,
1301 flags: 0,
1302 reserved: 0,
1303 };
1304 let mut image = Vec::new();
1305 write_header(&hdr, &mut image);
1306 image.extend_from_slice(&wire);
1307
1308 let parsed = parse_commands(&hdr, &image).unwrap();
1309 assert!(matches!(parsed[0], LoadCommand::Symtab(c) if c == cmd));
1310 }
1311
1312 #[test]
1313 fn symtab_wrong_cmdsize_errors() {
1314 let payload = [0u8; 16];
1315 let err = SymtabCmd::parse(20, &payload).unwrap_err();
1316 assert!(matches!(err, ReadError::BadCmdsize { .. }));
1317 }
1318
1319 #[test]
1320 fn dysymtab_round_trip_and_dispatcher() {
1321 let cmd = DysymtabCmd {
1322 ilocalsym: 0,
1323 nlocalsym: 3,
1324 iextdefsym: 3,
1325 nextdefsym: 2,
1326 iundefsym: 5,
1327 nundefsym: 4,
1328 indirectsymoff: 0x3000,
1329 nindirectsyms: 7,
1330 ..Default::default()
1331 };
1332 let mut wire = Vec::new();
1333 cmd.write(&mut wire);
1334 assert_eq!(wire.len(), DysymtabCmd::WIRE_SIZE as usize);
1335
1336 let decoded = DysymtabCmd::parse(DysymtabCmd::WIRE_SIZE, &wire[8..]).unwrap();
1337 assert_eq!(decoded, cmd);
1338
1339 let hdr = MachHeader64 {
1340 magic: MH_MAGIC_64,
1341 cputype: CPU_TYPE_ARM64,
1342 cpusubtype: 0,
1343 filetype: MH_OBJECT,
1344 ncmds: 1,
1345 sizeofcmds: DysymtabCmd::WIRE_SIZE,
1346 flags: 0,
1347 reserved: 0,
1348 };
1349 let mut image = Vec::new();
1350 write_header(&hdr, &mut image);
1351 image.extend_from_slice(&wire);
1352
1353 let parsed = parse_commands(&hdr, &image).unwrap();
1354 assert!(matches!(parsed[0], LoadCommand::Dysymtab(c) if c == cmd));
1355 }
1356
1357 #[test]
1358 fn build_version_round_trip() {
1359 // Minimal: no tools. `minos` / `sdk` are X.Y.Z packed as 0xXXXXYYZZ;
1360 // 11.0.0 / 14.0.0 collapse to just `<major> << 16`.
1361 let cmd = BuildVersionCmd {
1362 platform: PLATFORM_MACOS,
1363 minos: 11 << 16,
1364 sdk: 14 << 16,
1365 tools: vec![],
1366 };
1367 let mut wire = Vec::new();
1368 cmd.write(&mut wire);
1369 assert_eq!(wire.len(), (8 + 16) as usize);
1370 let decoded = BuildVersionCmd::parse(cmd.wire_size(), &wire[8..]).unwrap();
1371 assert_eq!(decoded, cmd);
1372
1373 // With two tool records.
1374 let cmd2 = BuildVersionCmd {
1375 platform: PLATFORM_MACOS,
1376 minos: (11 << 16) | (3 << 8),
1377 sdk: (14 << 16) | (2 << 8),
1378 tools: vec![
1379 BuildTool {
1380 tool: 3,
1381 version: 0x0001_0002,
1382 },
1383 BuildTool {
1384 tool: 4,
1385 version: 0x0002_0003,
1386 },
1387 ],
1388 };
1389 let mut wire2 = Vec::new();
1390 cmd2.write(&mut wire2);
1391 assert_eq!(wire2.len(), (8 + 16 + 16) as usize);
1392 let decoded2 = BuildVersionCmd::parse(cmd2.wire_size(), &wire2[8..]).unwrap();
1393 assert_eq!(decoded2, cmd2);
1394 }
1395
1396 #[test]
1397 fn build_version_through_dispatcher() {
1398 let cmd = BuildVersionCmd {
1399 platform: PLATFORM_MACOS,
1400 minos: (11 << 16),
1401 sdk: (14 << 16),
1402 tools: vec![BuildTool {
1403 tool: 3,
1404 version: 1,
1405 }],
1406 };
1407 let mut wire = Vec::new();
1408 cmd.write(&mut wire);
1409 let hdr = MachHeader64 {
1410 magic: MH_MAGIC_64,
1411 cputype: CPU_TYPE_ARM64,
1412 cpusubtype: 0,
1413 filetype: MH_OBJECT,
1414 ncmds: 1,
1415 sizeofcmds: cmd.wire_size(),
1416 flags: 0,
1417 reserved: 0,
1418 };
1419 let mut image = Vec::new();
1420 write_header(&hdr, &mut image);
1421 image.extend_from_slice(&wire);
1422
1423 let parsed = parse_commands(&hdr, &image).unwrap();
1424 assert!(matches!(&parsed[0], LoadCommand::BuildVersion(b) if b == &cmd));
1425
1426 let mut reemit = Vec::new();
1427 write_header(&hdr, &mut reemit);
1428 write_commands(&parsed, &mut reemit);
1429 assert_eq!(reemit, image);
1430 }
1431
1432 #[test]
1433 fn dylib_cmd_round_trip_byte_equal() {
1434 let cmd = DylibCmd {
1435 cmd: LC_LOAD_DYLIB,
1436 name: "/usr/lib/libSystem.B.dylib".into(),
1437 timestamp: 2,
1438 current_version: (1 << 16) | (2 << 8) | 3,
1439 compatibility_version: 1 << 16,
1440 };
1441 let mut wire = Vec::new();
1442 cmd.write(&mut wire);
1443 // cmdsize: 8 + 16 + name.len()+1 padded to 8 → 8 + 16 + 32 = 56
1444 assert_eq!(wire.len() % 8, 0);
1445 assert_eq!(wire.len(), cmd.wire_size() as usize);
1446
1447 // Strip the LC header before feeding to parse.
1448 let decoded = DylibCmd::parse(LC_LOAD_DYLIB, cmd.wire_size(), &wire[8..]).unwrap();
1449 assert_eq!(decoded, cmd);
1450 }
1451
1452 #[test]
1453 fn dylib_cmd_through_dispatcher_all_variants() {
1454 for kind in [
1455 LC_ID_DYLIB,
1456 LC_LOAD_DYLIB,
1457 LC_LOAD_WEAK_DYLIB,
1458 LC_REEXPORT_DYLIB,
1459 LC_LOAD_UPWARD_DYLIB,
1460 ] {
1461 let cmd = DylibCmd {
1462 cmd: kind,
1463 name: format!("@rpath/lib{:x}.dylib", kind & 0xff),
1464 timestamp: 7,
1465 current_version: (1 << 16) | 5,
1466 compatibility_version: 1 << 16,
1467 };
1468 let mut wire = Vec::new();
1469 cmd.write(&mut wire);
1470
1471 let hdr = MachHeader64 {
1472 magic: MH_MAGIC_64,
1473 cputype: CPU_TYPE_ARM64,
1474 cpusubtype: 0,
1475 filetype: MH_DYLIB,
1476 ncmds: 1,
1477 sizeofcmds: cmd.wire_size(),
1478 flags: 0,
1479 reserved: 0,
1480 };
1481 let mut image = Vec::new();
1482 write_header(&hdr, &mut image);
1483 image.extend_from_slice(&wire);
1484 let parsed = parse_commands(&hdr, &image).unwrap();
1485 assert!(matches!(&parsed[0], LoadCommand::Dylib(d) if d == &cmd));
1486
1487 let mut reemit = Vec::new();
1488 write_header(&hdr, &mut reemit);
1489 write_commands(&parsed, &mut reemit);
1490 assert_eq!(reemit, image);
1491 }
1492 }
1493
1494 #[test]
1495 fn rpath_round_trip() {
1496 let cmd = RpathCmd {
1497 path: "@executable_path/../Frameworks".into(),
1498 };
1499 let mut wire = Vec::new();
1500 cmd.write(&mut wire);
1501 assert_eq!(wire.len() % 8, 0);
1502
1503 let hdr = MachHeader64 {
1504 magic: MH_MAGIC_64,
1505 cputype: CPU_TYPE_ARM64,
1506 cpusubtype: 0,
1507 filetype: MH_EXECUTE,
1508 ncmds: 1,
1509 sizeofcmds: cmd.wire_size(),
1510 flags: 0,
1511 reserved: 0,
1512 };
1513 let mut image = Vec::new();
1514 write_header(&hdr, &mut image);
1515 image.extend_from_slice(&wire);
1516 let parsed = parse_commands(&hdr, &image).unwrap();
1517 assert!(matches!(&parsed[0], LoadCommand::Rpath(r) if r == &cmd));
1518 }
1519
1520 #[test]
1521 fn dylib_cmd_bad_name_offset_errors() {
1522 // Name offset points past the buffer.
1523 let mut payload = Vec::new();
1524 payload.extend_from_slice(&9999u32.to_le_bytes()); // bad name offset
1525 payload.extend_from_slice(&0u32.to_le_bytes());
1526 payload.extend_from_slice(&0u32.to_le_bytes());
1527 payload.extend_from_slice(&0u32.to_le_bytes());
1528 let err = DylibCmd::parse(LC_LOAD_DYLIB, 32, &payload).unwrap_err();
1529 assert!(
1530 matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("name offset"))
1531 );
1532 }
1533
1534 #[test]
1535 fn loh_round_trip() {
1536 let cmd = LinkEditDataCmd {
1537 dataoff: 0x4000,
1538 datasize: 0x80,
1539 };
1540 let mut wire = Vec::new();
1541 cmd.write(LC_LINKER_OPTIMIZATION_HINT, &mut wire);
1542 assert_eq!(wire.len(), LinkEditDataCmd::WIRE_SIZE as usize);
1543
1544 let decoded = LinkEditDataCmd::parse(
1545 LC_LINKER_OPTIMIZATION_HINT,
1546 LinkEditDataCmd::WIRE_SIZE,
1547 &wire[8..],
1548 )
1549 .unwrap();
1550 assert_eq!(decoded, cmd);
1551
1552 let hdr = MachHeader64 {
1553 magic: MH_MAGIC_64,
1554 cputype: CPU_TYPE_ARM64,
1555 cpusubtype: 0,
1556 filetype: MH_OBJECT,
1557 ncmds: 1,
1558 sizeofcmds: LinkEditDataCmd::WIRE_SIZE,
1559 flags: 0,
1560 reserved: 0,
1561 };
1562 let mut image = Vec::new();
1563 write_header(&hdr, &mut image);
1564 image.extend_from_slice(&wire);
1565 let parsed = parse_commands(&hdr, &image).unwrap();
1566 assert!(matches!(
1567 &parsed[0],
1568 LoadCommand::LinkerOptimizationHint(l) if l == &cmd
1569 ));
1570 }
1571
1572 #[test]
1573 fn segment64_through_dispatcher_preserves_bytes() {
1574 // Build a synthetic image with a single LC_SEGMENT_64 + a following
1575 // opaque LC_BUILD_VERSION-shaped Raw command. Both must survive the
1576 // parse/write round-trip.
1577 let seg = sample_segment64();
1578 let mut seg_wire = Vec::new();
1579 seg.write(&mut seg_wire);
1580
1581 let raw_cmd = 0xCAFE_F00Du32; // any unknown cmd
1582 let raw_cmdsize: u32 = 16;
1583 let mut raw_wire = Vec::new();
1584 raw_wire.extend_from_slice(&raw_cmd.to_le_bytes());
1585 raw_wire.extend_from_slice(&raw_cmdsize.to_le_bytes());
1586 raw_wire.extend_from_slice(&[0x55u8; 8]);
1587
1588 let sizeofcmds = (seg_wire.len() + raw_wire.len()) as u32;
1589 let hdr = MachHeader64 {
1590 magic: MH_MAGIC_64,
1591 cputype: CPU_TYPE_ARM64,
1592 cpusubtype: 0,
1593 filetype: MH_OBJECT,
1594 ncmds: 2,
1595 sizeofcmds,
1596 flags: MH_SUBSECTIONS_VIA_SYMBOLS,
1597 reserved: 0,
1598 };
1599 let mut image = Vec::new();
1600 write_header(&hdr, &mut image);
1601 image.extend_from_slice(&seg_wire);
1602 image.extend_from_slice(&raw_wire);
1603
1604 let parsed_hdr = parse_header(&image).unwrap();
1605 let cmds = parse_commands(&parsed_hdr, &image).unwrap();
1606 assert!(matches!(cmds[0], LoadCommand::Segment64(_)));
1607 assert!(matches!(
1608 cmds[1],
1609 LoadCommand::Raw {
1610 cmd: 0xCAFE_F00D,
1611 ..
1612 }
1613 ));
1614
1615 let mut out = Vec::new();
1616 write_header(&parsed_hdr, &mut out);
1617 write_commands(&cmds, &mut out);
1618 assert_eq!(out, image);
1619 }
1620
1621 #[test]
1622 fn cmdsize_overrun_errors() {
1623 // sizeofcmds says 8, but the command claims 16 bytes.
1624 let mut image = Vec::new();
1625 let hdr = MachHeader64 {
1626 magic: MH_MAGIC_64,
1627 cputype: CPU_TYPE_ARM64,
1628 cpusubtype: 0,
1629 filetype: MH_OBJECT,
1630 ncmds: 1,
1631 sizeofcmds: 8,
1632 flags: 0,
1633 reserved: 0,
1634 };
1635 write_header(&hdr, &mut image);
1636 image.extend_from_slice(&0x1234u32.to_le_bytes());
1637 image.extend_from_slice(&16u32.to_le_bytes());
1638 let err = parse_commands(&hdr, &image).unwrap_err();
1639 assert!(matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("overruns")));
1640 }
1641 }
1642