Rust · 43612 bytes Raw Blame History
1 //! Static archive (`ar`) reader.
2 //!
3 //! `.a` files come in three flavors afs-ld cares about:
4 //!
5 //! | Flavor | Magic | Names |
6 //! |-----------|--------------|------------------------------------------------|
7 //! | `Bsd` | `!<arch>\n` | `#1/<N>` — next N bytes of the body are the name |
8 //! | `Sysv` | `!<arch>\n` | `foo.o/` (short) or `/NNN` into `//` (long) |
9 //! | `GnuThin` | `!<thin>\n` | name field holds an external path (body empty) |
10 //!
11 //! Both BSD and SysV archives use the same magic; distinguishing them
12 //! requires peeking at the first member. Apple's `ar` emits BSD; GNU's `ar`
13 //! on Linux emits SysV; GNU-thin archives use the `!<thin>\n` magic.
14 //!
15 //! Sprint 4 parses headers and member names, builds a symbol index, and
16 //! exposes `fetch_by_name` for lazy member retrieval driven by the Sprint 8
17 //! resolution pass. Member body bytes are returned as borrowed slices into
18 //! the archive buffer (or a secondary mmap for GNU-thin).
19
20 use std::fmt;
21 use std::path::{Path, PathBuf};
22 use std::str;
23
24 use crate::input::ObjectFile;
25 use crate::macho::reader::ReadError;
26
27 /// 8-byte magic bytes common to both regular `ar` variants.
28 pub const AR_MAGIC: &[u8; 8] = b"!<arch>\n";
29 /// 8-byte magic bytes for a GNU "thin" archive.
30 pub const AR_MAGIC_THIN: &[u8; 8] = b"!<thin>\n";
31 /// Per-entry footer bytes following the 60-byte header.
32 pub const AR_FMAG: &[u8; 2] = b"`\n";
33 /// Size of `ar_hdr` on the wire.
34 pub const AR_HDR_SIZE: usize = 60;
35
36 /// Every parser error the archive reader can produce.
37 #[derive(Debug)]
38 pub enum ArchiveError {
39 /// The input buffer is shorter than the next structure we need to read.
40 Truncated {
41 need: usize,
42 have: usize,
43 context: &'static str,
44 },
45 /// The 8-byte magic is neither `!<arch>\n` nor `!<thin>\n`.
46 BadMagic { got: [u8; 8] },
47 /// A header footer (`fmag`) wasn't `` `\n ``.
48 BadEntryFooter { at_offset: usize },
49 /// An ASCII decimal field (size/date/uid/gid/mode) didn't parse.
50 BadAsciiField {
51 at_offset: usize,
52 field: &'static str,
53 },
54 /// A member's claimed size would overrun the archive.
55 MemberOverrun { at_offset: usize, size: u64 },
56 /// A `/NNN` long-name offset didn't land inside the `//` table.
57 LongNameOob { at_offset: usize, strx: u32 },
58 /// Required special member (e.g. `__.SYMDEF`) malformed.
59 BadSymbolIndex { reason: &'static str },
60 /// Name field could not be interpreted as UTF-8.
61 BadName { at_offset: usize },
62 }
63
64 impl fmt::Display for ArchiveError {
65 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66 match self {
67 ArchiveError::Truncated { need, have, context } => write!(
68 f,
69 "archive truncated while reading {context}: need {need} bytes, have {have}"
70 ),
71 ArchiveError::BadMagic { got } => write!(
72 f,
73 "archive magic {:?} is not `!<arch>\\n` or `!<thin>\\n`",
74 String::from_utf8_lossy(got)
75 ),
76 ArchiveError::BadEntryFooter { at_offset } => write!(
77 f,
78 "archive member at offset 0x{at_offset:x}: missing or malformed entry footer"
79 ),
80 ArchiveError::BadAsciiField { at_offset, field } => write!(
81 f,
82 "archive member at offset 0x{at_offset:x}: {field} field is not ASCII decimal"
83 ),
84 ArchiveError::MemberOverrun { at_offset, size } => write!(
85 f,
86 "archive member at offset 0x{at_offset:x}: claimed size {size} overruns archive"
87 ),
88 ArchiveError::LongNameOob { at_offset, strx } => write!(
89 f,
90 "archive member at offset 0x{at_offset:x}: /{strx} out of bounds of // long-name table"
91 ),
92 ArchiveError::BadSymbolIndex { reason } => {
93 write!(f, "archive symbol index malformed: {reason}")
94 }
95 ArchiveError::BadName { at_offset } => write!(
96 f,
97 "archive member at offset 0x{at_offset:x}: name is not valid UTF-8"
98 ),
99 }
100 }
101 }
102
103 impl std::error::Error for ArchiveError {}
104
105 /// Which wire flavor this archive is.
106 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
107 pub enum Flavor {
108 /// `!<arch>\n` with BSD "extended" member names (`#1/<N>`).
109 Bsd,
110 /// `!<arch>\n` with SysV short names (`foo.o/`) and a `//` long-name
111 /// string table for anything that doesn't fit in 16 bytes.
112 Sysv,
113 /// `!<thin>\n` — member bodies are zero bytes; the name field carries an
114 /// external path, resolved lazily against the archive's parent directory.
115 GnuThin,
116 }
117
118 /// The 60-byte `ar_hdr` decoded into fixed fields. Name bytes are held raw
119 /// (null-padded, slash-terminated) — `Member` does the flavor-specific
120 /// interpretation that turns them into a real filename.
121 #[derive(Debug, Clone, Copy)]
122 pub struct ArHeader {
123 pub name: [u8; 16],
124 pub date: [u8; 12],
125 pub uid: [u8; 6],
126 pub gid: [u8; 6],
127 pub mode: [u8; 8],
128 /// Raw ASCII-decimal `size` field, decoded.
129 pub size: u64,
130 }
131
132 impl ArHeader {
133 pub fn parse(bytes: &[u8], at_offset: usize) -> Result<Self, ArchiveError> {
134 if bytes.len() < AR_HDR_SIZE {
135 return Err(ArchiveError::Truncated {
136 need: AR_HDR_SIZE,
137 have: bytes.len(),
138 context: "ar_hdr",
139 });
140 }
141 let fmag: [u8; 2] = [bytes[58], bytes[59]];
142 if &fmag != AR_FMAG {
143 return Err(ArchiveError::BadEntryFooter { at_offset });
144 }
145 let size = ascii_decimal(&bytes[48..58]).map_err(|_| ArchiveError::BadAsciiField {
146 at_offset,
147 field: "size",
148 })?;
149 Ok(ArHeader {
150 name: bytes[0..16].try_into().unwrap(),
151 date: bytes[16..28].try_into().unwrap(),
152 uid: bytes[28..34].try_into().unwrap(),
153 gid: bytes[34..40].try_into().unwrap(),
154 mode: bytes[40..48].try_into().unwrap(),
155 size,
156 })
157 }
158
159 pub fn raw_name_str(&self) -> &str {
160 trim_ascii(&self.name)
161 }
162 }
163
164 /// Flag marking which, if any, special role a member plays in the archive.
165 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
166 pub enum SpecialMember {
167 None,
168 /// BSD `__.SYMDEF` or `__.SYMDEF SORTED` — the symbol index.
169 BsdSymIndex,
170 /// SysV `/` — the symbol index.
171 SysvSymIndex,
172 /// SysV `//` — long-name string table.
173 SysvLongNames,
174 }
175
176 /// A single parsed archive member.
177 #[derive(Debug, Clone)]
178 pub struct Member<'a> {
179 /// Real filename, post-flavor-specific decoding.
180 pub name: String,
181 /// Byte offset of the member's `ar_hdr` within the archive.
182 pub header_offset: usize,
183 /// Byte offset of the member's visible body (after any BSD extended-name
184 /// prefix is stripped). For `GnuThin`, `body.len() == 0` and the real
185 /// contents live in an external file named by `name`.
186 pub body_offset: usize,
187 /// Raw accessible body bytes.
188 pub body: &'a [u8],
189 /// Present for members that serve a special structural role.
190 pub special: SpecialMember,
191 }
192
193 #[derive(Debug)]
194 pub struct Archive<'a> {
195 pub path: PathBuf,
196 pub flavor: Flavor,
197 data: &'a [u8],
198 members: Vec<Member<'a>>,
199 symbol_index: Option<SymbolIndex>,
200 }
201
202 impl<'a> Archive<'a> {
203 /// Open an archive given its raw bytes and the source path (for
204 /// diagnostics and GNU-thin external-file resolution).
205 pub fn open(path: impl Into<PathBuf>, data: &'a [u8]) -> Result<Self, ArchiveError> {
206 let flavor = detect_flavor(data)?;
207 let (members, flavor) = parse_members(data, flavor)?;
208 let symbol_index = build_symbol_index(&members)?;
209 Ok(Archive {
210 path: path.into(),
211 flavor,
212 data,
213 members,
214 symbol_index,
215 })
216 }
217
218 pub fn symbol_index(&self) -> Option<&SymbolIndex> {
219 self.symbol_index.as_ref()
220 }
221
222 /// Raw bytes following the 8-byte magic — where member entries begin.
223 pub fn body_bytes(&self) -> &'a [u8] {
224 &self.data[AR_MAGIC.len()..]
225 }
226
227 /// Starting offset (inside the full archive buffer) of the first member.
228 pub const fn body_start(&self) -> usize {
229 AR_MAGIC.len()
230 }
231
232 pub fn members(&self) -> &[Member<'a>] {
233 &self.members
234 }
235
236 /// Return every non-special member (skips symbol indexes and long-name
237 /// tables).
238 pub fn object_members(&self) -> impl Iterator<Item = &Member<'a>> {
239 self.members
240 .iter()
241 .filter(|m| m.special == SpecialMember::None)
242 }
243
244 /// Find the first member whose `ar_hdr` begins at `header_offset`. The
245 /// symbol-index's `member_header_offset` fields feed into this lookup.
246 pub fn member_at_offset(&self, header_offset: u32) -> Option<&Member<'a>> {
247 self.members
248 .iter()
249 .find(|m| m.header_offset == header_offset as usize)
250 }
251
252 /// First member that defines `name` according to the archive's symbol
253 /// index. Returns `None` when either no index is present or `name` is
254 /// absent.
255 pub fn first_member_defining(&self, name: &str) -> Option<&Member<'a>> {
256 let off = self.symbol_index.as_ref()?.first_defining_offset(name)?;
257 self.member_at_offset(off)
258 }
259
260 /// Parse a member's body as a Mach-O `ObjectFile`. Non-thin members use
261 /// the in-buffer slice; GNU-thin members read their external file on
262 /// demand.
263 pub fn parse_member_object(&self, member: &Member<'a>) -> Result<ObjectFile, FetchError> {
264 let logical_path = self.member_logical_path(member);
265 match self.flavor {
266 Flavor::GnuThin => {
267 let file_path = self.member_external_path(member);
268 let bytes = std::fs::read(&file_path).map_err(FetchError::Io)?;
269 ObjectFile::parse(logical_path, &bytes).map_err(FetchError::Read)
270 }
271 _ => ObjectFile::parse(logical_path, member.body).map_err(FetchError::Read),
272 }
273 }
274
275 /// Resolve `name` to its defining member, then parse that member as an
276 /// `ObjectFile`. Returns `None` when the symbol is absent; `Some(Err(_))`
277 /// when the member exists but fails to parse.
278 pub fn fetch_object_defining(&self, name: &str) -> Option<Result<ObjectFile, FetchError>> {
279 let member = self.first_member_defining(name)?;
280 Some(self.parse_member_object(member))
281 }
282
283 /// Produce the display path a member should surface as when parsed:
284 /// `/abs/path/libfoo.a(foo.o)`. For thin archives the path is the
285 /// external source file — that path is useful on its own.
286 fn member_logical_path(&self, member: &Member<'a>) -> PathBuf {
287 if self.flavor == Flavor::GnuThin {
288 return self.member_external_path(member);
289 }
290 let mut s = self.path.as_os_str().to_owned();
291 s.push("(");
292 s.push(member.name.as_str());
293 s.push(")");
294 PathBuf::from(s)
295 }
296
297 fn member_external_path(&self, member: &Member<'a>) -> PathBuf {
298 let base = self.path.parent().unwrap_or_else(|| Path::new("."));
299 base.join(&member.name)
300 }
301 }
302
303 /// Unified error for member fetching — I/O (GNU-thin only) or Mach-O parse.
304 #[derive(Debug)]
305 pub enum FetchError {
306 Io(std::io::Error),
307 Read(ReadError),
308 }
309
310 impl fmt::Display for FetchError {
311 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312 match self {
313 FetchError::Io(e) => write!(f, "thin-member I/O: {e}"),
314 FetchError::Read(e) => write!(f, "{e}"),
315 }
316 }
317 }
318
319 impl std::error::Error for FetchError {}
320
321 // ---------------------------------------------------------------------------
322 // Member walking & name decoding.
323 // ---------------------------------------------------------------------------
324
325 fn parse_members<'a>(
326 data: &'a [u8],
327 initial_flavor: Flavor,
328 ) -> Result<(Vec<Member<'a>>, Flavor), ArchiveError> {
329 let mut cursor = AR_MAGIC.len();
330 let mut flavor = initial_flavor;
331 let mut long_names: Option<&'a [u8]> = None;
332 let mut out: Vec<Member<'a>> = Vec::new();
333
334 while cursor < data.len() {
335 if cursor + AR_HDR_SIZE > data.len() {
336 return Err(ArchiveError::Truncated {
337 need: AR_HDR_SIZE,
338 have: data.len() - cursor,
339 context: "ar_hdr",
340 });
341 }
342 let hdr = ArHeader::parse(&data[cursor..cursor + AR_HDR_SIZE], cursor)?;
343 let size = hdr.size as usize;
344 let body_start = cursor + AR_HDR_SIZE;
345 let body_end = body_start
346 .checked_add(size)
347 .ok_or(ArchiveError::MemberOverrun {
348 at_offset: cursor,
349 size: hdr.size,
350 })?;
351 if flavor != Flavor::GnuThin && body_end > data.len() {
352 return Err(ArchiveError::MemberOverrun {
353 at_offset: cursor,
354 size: hdr.size,
355 });
356 }
357 let raw_name = hdr.raw_name_str();
358 let (real_name, body_offset, body, special) =
359 decode_member(raw_name, data, body_start, size, flavor, long_names, cursor)?;
360
361 // Opportunistic flavor refinement. The sole unambiguous signal for
362 // Sysv is the presence of `/` or `//` members — BSD never emits
363 // either.
364 match special {
365 SpecialMember::SysvSymIndex | SpecialMember::SysvLongNames => {
366 flavor = Flavor::Sysv;
367 }
368 _ => {}
369 }
370 if matches!(special, SpecialMember::SysvLongNames) {
371 long_names = Some(body);
372 }
373
374 out.push(Member {
375 name: real_name,
376 header_offset: cursor,
377 body_offset,
378 body,
379 special,
380 });
381
382 // Advance past body + 1-byte alignment pad for odd sizes (GNU-thin
383 // members have zero-byte bodies so this collapses to a no-op).
384 let advance = AR_HDR_SIZE + size + (size & 1);
385 cursor = cursor
386 .checked_add(advance)
387 .ok_or(ArchiveError::MemberOverrun {
388 at_offset: cursor,
389 size: hdr.size,
390 })?;
391 }
392
393 Ok((out, flavor))
394 }
395
396 #[allow(clippy::too_many_arguments)]
397 fn decode_member<'a>(
398 raw_name: &str,
399 data: &'a [u8],
400 body_start: usize,
401 size: usize,
402 flavor: Flavor,
403 long_names: Option<&'a [u8]>,
404 header_offset: usize,
405 ) -> Result<(String, usize, &'a [u8], SpecialMember), ArchiveError> {
406 // GNU-thin: body is zero bytes; name field is the external path.
407 if flavor == Flavor::GnuThin {
408 let name = raw_name.trim_end_matches('/').to_string();
409 return Ok((
410 name,
411 body_start,
412 &data[body_start..body_start],
413 SpecialMember::None,
414 ));
415 }
416
417 // BSD extended: "#1/<N>" — first N bytes of the body are the real name.
418 if let Some(rest) = raw_name.strip_prefix("#1/") {
419 let nlen: usize = rest.parse().map_err(|_| ArchiveError::BadAsciiField {
420 at_offset: header_offset,
421 field: "#1/<N>",
422 })?;
423 if body_start + nlen > data.len() || nlen > size {
424 return Err(ArchiveError::MemberOverrun {
425 at_offset: header_offset,
426 size: size as u64,
427 });
428 }
429 let name_bytes = &data[body_start..body_start + nlen];
430 let name = str::from_utf8(name_bytes).map_err(|_| ArchiveError::BadName {
431 at_offset: header_offset,
432 })?;
433 // Trim trailing nulls (some writers pad the name to 4/8 bytes).
434 let name = name.trim_end_matches('\0').to_string();
435 let body = &data[body_start + nlen..body_start + size];
436 let special = if name == "__.SYMDEF" || name == "__.SYMDEF SORTED" {
437 SpecialMember::BsdSymIndex
438 } else {
439 SpecialMember::None
440 };
441 return Ok((name, body_start + nlen, body, special));
442 }
443
444 // SysV structural members.
445 if raw_name == "/" {
446 return Ok((
447 "/".to_string(),
448 body_start,
449 &data[body_start..body_start + size],
450 SpecialMember::SysvSymIndex,
451 ));
452 }
453 if raw_name == "//" {
454 return Ok((
455 "//".to_string(),
456 body_start,
457 &data[body_start..body_start + size],
458 SpecialMember::SysvLongNames,
459 ));
460 }
461
462 // SysV long-name reference: "/NNN".
463 if let Some(rest) = raw_name.strip_prefix('/') {
464 if rest.chars().all(|c| c.is_ascii_digit()) && !rest.is_empty() {
465 let strx: u32 = rest.parse().map_err(|_| ArchiveError::BadAsciiField {
466 at_offset: header_offset,
467 field: "/NNN",
468 })?;
469 let table = long_names.ok_or(ArchiveError::LongNameOob {
470 at_offset: header_offset,
471 strx,
472 })?;
473 let name = decode_long_name(table, strx, header_offset)?;
474 return Ok((
475 name,
476 body_start,
477 &data[body_start..body_start + size],
478 SpecialMember::None,
479 ));
480 }
481 }
482
483 // SysV short name — slash-terminated.
484 if let Some(stripped) = raw_name.strip_suffix('/') {
485 return Ok((
486 stripped.to_string(),
487 body_start,
488 &data[body_start..body_start + size],
489 SpecialMember::None,
490 ));
491 }
492
493 // BSD short name (no trailing slash, no #1/ prefix).
494 Ok((
495 raw_name.to_string(),
496 body_start,
497 &data[body_start..body_start + size],
498 SpecialMember::None,
499 ))
500 }
501
502 // ---------------------------------------------------------------------------
503 // Symbol index parsing (BSD __.SYMDEF / SysV `/`).
504 // ---------------------------------------------------------------------------
505
506 /// One defined-symbol row in the archive symbol index.
507 #[derive(Debug, Clone, PartialEq, Eq)]
508 pub struct SymbolIndexEntry {
509 pub name: String,
510 /// Byte offset of the defining member's `ar_hdr` within the archive.
511 pub member_header_offset: u32,
512 }
513
514 /// The table of `(symbol → defining member offset)` the archive writer
515 /// embeds up front. A given name can appear multiple times — ld's policy is
516 /// to take the first defining member. Iteration preserves the source order.
517 #[derive(Debug, Clone, Default)]
518 pub struct SymbolIndex {
519 pub entries: Vec<SymbolIndexEntry>,
520 }
521
522 impl SymbolIndex {
523 pub fn is_empty(&self) -> bool {
524 self.entries.is_empty()
525 }
526
527 pub fn len(&self) -> usize {
528 self.entries.len()
529 }
530
531 /// Return the `ar_hdr` offset of the first member that defines `name`,
532 /// or `None` if the index has no such entry. The "first" rule matches
533 /// ld's classic ordering — a later duplicate doesn't shadow an earlier.
534 pub fn first_defining_offset(&self, name: &str) -> Option<u32> {
535 self.entries
536 .iter()
537 .find(|e| e.name == name)
538 .map(|e| e.member_header_offset)
539 }
540
541 /// All `ar_hdr` offsets where `name` appears. Exists for `-all_load` /
542 /// `-force_load` semantics and for tests that want to verify duplicates.
543 pub fn offsets_for<'n>(&'n self, name: &'n str) -> impl Iterator<Item = u32> + 'n {
544 self.entries
545 .iter()
546 .filter(move |e| e.name == name)
547 .map(|e| e.member_header_offset)
548 }
549 }
550
551 fn build_symbol_index(members: &[Member<'_>]) -> Result<Option<SymbolIndex>, ArchiveError> {
552 for m in members {
553 match m.special {
554 SpecialMember::BsdSymIndex => {
555 return Ok(Some(parse_bsd_symbol_index(m.body)?));
556 }
557 SpecialMember::SysvSymIndex => {
558 return Ok(Some(parse_sysv_symbol_index(m.body)?));
559 }
560 _ => {}
561 }
562 }
563 Ok(None)
564 }
565
566 /// BSD `__.SYMDEF` / `__.SYMDEF SORTED`, little-endian:
567 /// ```text
568 /// u32 ranlib_byte_count
569 /// ranlib[] { u32 strx; u32 member_header_offset; }
570 /// u32 string_size
571 /// char strings[string_size]
572 /// ```
573 fn parse_bsd_symbol_index(body: &[u8]) -> Result<SymbolIndex, ArchiveError> {
574 if body.len() < 8 {
575 return Err(ArchiveError::BadSymbolIndex {
576 reason: "__.SYMDEF shorter than 8-byte header",
577 });
578 }
579 let ranlib_bytes = u32::from_le_bytes(body[0..4].try_into().unwrap()) as usize;
580 if !ranlib_bytes.is_multiple_of(8) {
581 return Err(ArchiveError::BadSymbolIndex {
582 reason: "__.SYMDEF ranlib byte count not a multiple of 8",
583 });
584 }
585 let ranlib_end = 4usize
586 .checked_add(ranlib_bytes)
587 .ok_or(ArchiveError::BadSymbolIndex {
588 reason: "__.SYMDEF ranlib region overflows",
589 })?;
590 if ranlib_end + 4 > body.len() {
591 return Err(ArchiveError::BadSymbolIndex {
592 reason: "__.SYMDEF ranlib + stringsize region overruns member",
593 });
594 }
595 let stringsize =
596 u32::from_le_bytes(body[ranlib_end..ranlib_end + 4].try_into().unwrap()) as usize;
597 let strings_start = ranlib_end + 4;
598 let strings_end =
599 strings_start
600 .checked_add(stringsize)
601 .ok_or(ArchiveError::BadSymbolIndex {
602 reason: "__.SYMDEF strings region overflows",
603 })?;
604 if strings_end > body.len() {
605 return Err(ArchiveError::BadSymbolIndex {
606 reason: "__.SYMDEF strings region overruns member",
607 });
608 }
609 let strings = &body[strings_start..strings_end];
610
611 let mut entries = Vec::with_capacity(ranlib_bytes / 8);
612 let mut off = 4;
613 while off < ranlib_end {
614 let strx = u32::from_le_bytes(body[off..off + 4].try_into().unwrap()) as usize;
615 let mh_off = u32::from_le_bytes(body[off + 4..off + 8].try_into().unwrap());
616 if strx >= strings.len() {
617 return Err(ArchiveError::BadSymbolIndex {
618 reason: "ranlib strx out of bounds",
619 });
620 }
621 let end = strings[strx..]
622 .iter()
623 .position(|&b| b == 0)
624 .map(|i| strx + i)
625 .ok_or(ArchiveError::BadSymbolIndex {
626 reason: "ranlib name not null-terminated",
627 })?;
628 let name = str::from_utf8(&strings[strx..end])
629 .map_err(|_| ArchiveError::BadSymbolIndex {
630 reason: "ranlib name not UTF-8",
631 })?
632 .to_string();
633 entries.push(SymbolIndexEntry {
634 name,
635 member_header_offset: mh_off,
636 });
637 off += 8;
638 }
639 Ok(SymbolIndex { entries })
640 }
641
642 /// SysV `/` symbol index, big-endian:
643 /// ```text
644 /// u32 nsyms
645 /// u32 offsets[nsyms] — each a member ar_hdr offset
646 /// char strings[] — nsyms null-terminated names, concatenated
647 /// ```
648 fn parse_sysv_symbol_index(body: &[u8]) -> Result<SymbolIndex, ArchiveError> {
649 if body.len() < 4 {
650 return Err(ArchiveError::BadSymbolIndex {
651 reason: "SysV symbol index shorter than 4-byte header",
652 });
653 }
654 let nsyms = u32::from_be_bytes(body[0..4].try_into().unwrap()) as usize;
655 let offsets_end = 4usize
656 .checked_add(nsyms * 4)
657 .ok_or(ArchiveError::BadSymbolIndex {
658 reason: "SysV symbol-index offsets region overflows",
659 })?;
660 if offsets_end > body.len() {
661 return Err(ArchiveError::BadSymbolIndex {
662 reason: "SysV symbol-index offsets region overruns member",
663 });
664 }
665 let strings = &body[offsets_end..];
666
667 let mut entries = Vec::with_capacity(nsyms);
668 let mut cursor = 0usize;
669 for i in 0..nsyms {
670 let off = 4 + i * 4;
671 let mh_off = u32::from_be_bytes(body[off..off + 4].try_into().unwrap());
672 if cursor >= strings.len() {
673 return Err(ArchiveError::BadSymbolIndex {
674 reason: "SysV symbol-index names exhausted before nsyms satisfied",
675 });
676 }
677 let end = strings[cursor..]
678 .iter()
679 .position(|&b| b == 0)
680 .map(|i| cursor + i)
681 .ok_or(ArchiveError::BadSymbolIndex {
682 reason: "SysV symbol-index name not null-terminated",
683 })?;
684 let name = str::from_utf8(&strings[cursor..end])
685 .map_err(|_| ArchiveError::BadSymbolIndex {
686 reason: "SysV symbol-index name not UTF-8",
687 })?
688 .to_string();
689 entries.push(SymbolIndexEntry {
690 name,
691 member_header_offset: mh_off,
692 });
693 cursor = end + 1;
694 }
695 Ok(SymbolIndex { entries })
696 }
697
698 fn decode_long_name(table: &[u8], strx: u32, at_offset: usize) -> Result<String, ArchiveError> {
699 let start = strx as usize;
700 if start >= table.len() {
701 return Err(ArchiveError::LongNameOob { at_offset, strx });
702 }
703 // GNU-style long names end at either a null or a "/\n" sequence; we
704 // accept the more permissive union and stop at the first of either.
705 let end = table[start..]
706 .iter()
707 .position(|&b| b == 0 || b == b'\n')
708 .map(|i| start + i)
709 .unwrap_or(table.len());
710 // Strip a trailing slash that GNU appends before the newline.
711 let trimmed_end = if end > 0 && table[end - 1] == b'/' {
712 end - 1
713 } else {
714 end
715 };
716 str::from_utf8(&table[start..trimmed_end])
717 .map(|s| s.to_string())
718 .map_err(|_| ArchiveError::BadName { at_offset })
719 }
720
721 pub fn detect_flavor(data: &[u8]) -> Result<Flavor, ArchiveError> {
722 if data.len() < AR_MAGIC.len() {
723 return Err(ArchiveError::Truncated {
724 need: AR_MAGIC.len(),
725 have: data.len(),
726 context: "archive magic",
727 });
728 }
729 let head: [u8; 8] = data[..8].try_into().unwrap();
730 match &head {
731 m if m == AR_MAGIC => Ok(Flavor::Bsd), // refined vs Sysv by peeking at first member
732 m if m == AR_MAGIC_THIN => Ok(Flavor::GnuThin),
733 _ => Err(ArchiveError::BadMagic { got: head }),
734 }
735 }
736
737 // ---------------------------------------------------------------------------
738 // Small helpers.
739 // ---------------------------------------------------------------------------
740
741 /// Trim trailing spaces and null bytes from a fixed-width ASCII field.
742 pub(crate) fn trim_ascii(bytes: &[u8]) -> &str {
743 let end = bytes
744 .iter()
745 .rposition(|&b| b != b' ' && b != 0)
746 .map(|i| i + 1)
747 .unwrap_or(0);
748 // Safe: `ar_hdr` fields are ASCII in practice; non-ASCII surfaces via
749 // the caller-level `BadName` diagnostic.
750 str::from_utf8(&bytes[..end]).unwrap_or("")
751 }
752
753 /// Parse a right-trimmed ASCII-decimal field into `u64`. Empty (all-space)
754 /// fields return 0 — this matches what Apple's `ar` writes for `date/uid/gid/mode`
755 /// on anonymized archives.
756 pub(crate) fn ascii_decimal(bytes: &[u8]) -> Result<u64, ()> {
757 let s = trim_ascii(bytes);
758 if s.is_empty() {
759 return Ok(0);
760 }
761 s.parse::<u64>().map_err(|_| ())
762 }
763
764 #[cfg(test)]
765 mod tests {
766 use super::*;
767
768 #[test]
769 fn detect_bsd_flavor() {
770 let buf = b"!<arch>\nextra bytes";
771 let flavor = detect_flavor(buf).unwrap();
772 assert_eq!(flavor, Flavor::Bsd);
773 }
774
775 #[test]
776 fn detect_thin_flavor() {
777 let buf = b"!<thin>\n";
778 assert_eq!(detect_flavor(buf).unwrap(), Flavor::GnuThin);
779 }
780
781 #[test]
782 fn detect_rejects_bad_magic() {
783 let buf = b"NOTARCH!";
784 assert!(matches!(
785 detect_flavor(buf).unwrap_err(),
786 ArchiveError::BadMagic { .. }
787 ));
788 }
789
790 #[test]
791 fn detect_rejects_truncated_magic() {
792 let buf = b"!<ar";
793 assert!(matches!(
794 detect_flavor(buf).unwrap_err(),
795 ArchiveError::Truncated { .. }
796 ));
797 }
798
799 /// Synthesize a 60-byte ar_hdr with the given fixed fields.
800 fn make_ar_hdr(name: &str, size: u64) -> Vec<u8> {
801 let mut buf = Vec::with_capacity(AR_HDR_SIZE);
802 let name_bytes = name.as_bytes();
803 let mut name_field = [b' '; 16];
804 name_field[..name_bytes.len().min(16)]
805 .copy_from_slice(&name_bytes[..name_bytes.len().min(16)]);
806 buf.extend_from_slice(&name_field);
807 buf.extend_from_slice(&[b' '; 12]); // date
808 buf.extend_from_slice(&[b' '; 6]); // uid
809 buf.extend_from_slice(&[b' '; 6]); // gid
810 buf.extend_from_slice(&[b' '; 8]); // mode
811 let mut size_field = [b' '; 10];
812 let size_str = size.to_string();
813 let bytes = size_str.as_bytes();
814 size_field[..bytes.len()].copy_from_slice(bytes);
815 buf.extend_from_slice(&size_field);
816 buf.extend_from_slice(AR_FMAG);
817 buf
818 }
819
820 #[test]
821 fn ar_header_decodes_fields() {
822 let hdr = make_ar_hdr("foo.o/", 128);
823 let parsed = ArHeader::parse(&hdr, 0).unwrap();
824 assert_eq!(parsed.size, 128);
825 assert_eq!(parsed.raw_name_str(), "foo.o/");
826 }
827
828 #[test]
829 fn ar_header_rejects_bad_fmag() {
830 let mut hdr = make_ar_hdr("foo.o/", 0);
831 hdr[58] = b'X';
832 assert!(matches!(
833 ArHeader::parse(&hdr, 0x40).unwrap_err(),
834 ArchiveError::BadEntryFooter { at_offset: 0x40 }
835 ));
836 }
837
838 #[test]
839 fn ar_header_rejects_non_decimal_size() {
840 let mut hdr = make_ar_hdr("foo.o/", 0);
841 hdr[48..58].copy_from_slice(b"abc ");
842 assert!(matches!(
843 ArHeader::parse(&hdr, 0).unwrap_err(),
844 ArchiveError::BadAsciiField { field: "size", .. }
845 ));
846 }
847
848 #[test]
849 fn ar_header_truncated_errors() {
850 let short = vec![0u8; 30];
851 assert!(matches!(
852 ArHeader::parse(&short, 0).unwrap_err(),
853 ArchiveError::Truncated {
854 need: AR_HDR_SIZE,
855 ..
856 }
857 ));
858 }
859
860 #[test]
861 fn ascii_decimal_accepts_empty_as_zero() {
862 assert_eq!(ascii_decimal(b" ").unwrap(), 0);
863 assert_eq!(ascii_decimal(b"42 ").unwrap(), 42);
864 assert!(ascii_decimal(b"abc ").is_err());
865 }
866
867 #[test]
868 fn archive_open_exposes_body_bytes() {
869 let mut buf = Vec::new();
870 buf.extend_from_slice(AR_MAGIC);
871 let ar = Archive::open("/tmp/empty.a", &buf).unwrap();
872 assert_eq!(ar.flavor, Flavor::Bsd);
873 assert_eq!(ar.body_bytes(), b"");
874 assert_eq!(ar.body_start(), AR_MAGIC.len());
875 assert!(ar.members().is_empty());
876 }
877
878 // ----- helpers for member fixtures -----
879
880 /// Build a member: 60-byte header with name field `raw_name`, size
881 /// equal to `body.len()`, followed by the body bytes and a 1-byte pad
882 /// if the body is odd-length.
883 fn encode_member(raw_name: &str, body: &[u8]) -> Vec<u8> {
884 let mut out = make_ar_hdr(raw_name, body.len() as u64);
885 out.extend_from_slice(body);
886 if body.len() & 1 != 0 {
887 out.push(b'\n');
888 }
889 out
890 }
891
892 /// Build a BSD extended-name member: raw name "#1/<N>", body = name + content.
893 fn encode_bsd_extended(name: &str, content: &[u8]) -> Vec<u8> {
894 let raw = format!("#1/{}", name.len());
895 let mut body = Vec::with_capacity(name.len() + content.len());
896 body.extend_from_slice(name.as_bytes());
897 body.extend_from_slice(content);
898 encode_member(&raw, &body)
899 }
900
901 #[test]
902 fn bsd_short_name_roundtrips() {
903 let mut buf = Vec::new();
904 buf.extend_from_slice(AR_MAGIC);
905 buf.extend_from_slice(&encode_member("foo.o", b"XXXX"));
906 let ar = Archive::open("/tmp/bsd.a", &buf).unwrap();
907 assert_eq!(ar.members().len(), 1);
908 assert_eq!(ar.members()[0].name, "foo.o");
909 assert_eq!(ar.members()[0].body, b"XXXX");
910 assert_eq!(ar.members()[0].special, SpecialMember::None);
911 }
912
913 #[test]
914 fn bsd_extended_name_splits_body() {
915 let mut buf = Vec::new();
916 buf.extend_from_slice(AR_MAGIC);
917 buf.extend_from_slice(&encode_bsd_extended(
918 "long_filename_with_many_chars.o",
919 b"CONT",
920 ));
921 let ar = Archive::open("/tmp/bsd_ext.a", &buf).unwrap();
922 assert_eq!(ar.members()[0].name, "long_filename_with_many_chars.o");
923 assert_eq!(ar.members()[0].body, b"CONT");
924 }
925
926 #[test]
927 fn bsd_extended_symdef_marked_special() {
928 let empty_idx = encode_bsd_symbol_index(&[]);
929 let mut buf = Vec::new();
930 buf.extend_from_slice(AR_MAGIC);
931 buf.extend_from_slice(&encode_bsd_extended("__.SYMDEF SORTED", &empty_idx));
932 let ar = Archive::open("/tmp/bsd_symdef.a", &buf).unwrap();
933 assert_eq!(ar.members()[0].special, SpecialMember::BsdSymIndex);
934 }
935
936 #[test]
937 fn sysv_short_name_strips_slash() {
938 let mut buf = Vec::new();
939 buf.extend_from_slice(AR_MAGIC);
940 // Sysv short names are right-padded with spaces and terminated by '/'.
941 buf.extend_from_slice(&encode_member("foo.o/", b"aa"));
942 let ar = Archive::open("/tmp/sysv.a", &buf).unwrap();
943 assert_eq!(ar.members()[0].name, "foo.o");
944 }
945
946 #[test]
947 fn sysv_long_names_resolve_via_slash_slash_table() {
948 // Long-name table content: "really_long_name.o/\nfoo/\n"
949 // Offsets: 0 → "really_long_name.o", 20 → "foo".
950 let lns_body: &[u8] = b"really_long_name.o/\nfoo/\n";
951 let mut buf = Vec::new();
952 buf.extend_from_slice(AR_MAGIC);
953 buf.extend_from_slice(&encode_member("//", lns_body));
954 buf.extend_from_slice(&encode_member("/0", b"BODY1"));
955 buf.extend_from_slice(&encode_member("/20", b"BODY2"));
956 let ar = Archive::open("/tmp/sysv_long.a", &buf).unwrap();
957 assert_eq!(ar.flavor, Flavor::Sysv);
958 assert_eq!(ar.members().len(), 3);
959 assert_eq!(ar.members()[0].special, SpecialMember::SysvLongNames);
960 assert_eq!(ar.members()[1].name, "really_long_name.o");
961 assert_eq!(ar.members()[1].body, b"BODY1");
962 assert_eq!(ar.members()[2].name, "foo");
963 }
964
965 #[test]
966 fn sysv_symbol_index_member_is_special() {
967 let empty_idx = encode_sysv_symbol_index(&[]);
968 let mut buf = Vec::new();
969 buf.extend_from_slice(AR_MAGIC);
970 buf.extend_from_slice(&encode_member("/", &empty_idx));
971 let ar = Archive::open("/tmp/sysv_sym.a", &buf).unwrap();
972 assert_eq!(ar.members()[0].special, SpecialMember::SysvSymIndex);
973 assert_eq!(ar.flavor, Flavor::Sysv);
974 }
975
976 #[test]
977 fn odd_sized_member_pad_byte_consumed() {
978 let mut buf = Vec::new();
979 buf.extend_from_slice(AR_MAGIC);
980 buf.extend_from_slice(&encode_member("a.o", b"ODD")); // 3 bytes → 1 pad
981 buf.extend_from_slice(&encode_member("b.o", b"AA")); // 2 bytes → no pad
982 let ar = Archive::open("/tmp/pad.a", &buf).unwrap();
983 assert_eq!(ar.members().len(), 2);
984 assert_eq!(ar.members()[0].name, "a.o");
985 assert_eq!(ar.members()[1].name, "b.o");
986 }
987
988 #[test]
989 fn object_members_skip_specials() {
990 let empty_idx = encode_bsd_symbol_index(&[]);
991 let mut buf = Vec::new();
992 buf.extend_from_slice(AR_MAGIC);
993 buf.extend_from_slice(&encode_bsd_extended("__.SYMDEF", &empty_idx));
994 buf.extend_from_slice(&encode_member("real.o", b"CONTENT"));
995 let ar = Archive::open("/tmp/mixed.a", &buf).unwrap();
996 assert_eq!(ar.members().len(), 2);
997 let reals: Vec<_> = ar.object_members().map(|m| m.name.clone()).collect();
998 assert_eq!(reals, vec!["real.o"]);
999 }
1000
1001 #[test]
1002 fn gnu_thin_decodes_paths_without_bodies() {
1003 let mut buf = Vec::new();
1004 buf.extend_from_slice(AR_MAGIC_THIN);
1005 // Thin members have zero-byte bodies.
1006 buf.extend_from_slice(&make_ar_hdr("../foo.o/", 0));
1007 buf.extend_from_slice(&make_ar_hdr("bar.o/", 0));
1008 let ar = Archive::open("/tmp/thin.a", &buf).unwrap();
1009 assert_eq!(ar.flavor, Flavor::GnuThin);
1010 assert_eq!(ar.members().len(), 2);
1011 assert_eq!(ar.members()[0].name, "../foo.o");
1012 assert_eq!(ar.members()[1].name, "bar.o");
1013 }
1014
1015 // ----- symbol-index tests -----
1016
1017 /// Build a BSD __.SYMDEF body: ranlib array + stringtab.
1018 fn encode_bsd_symbol_index(entries: &[(&str, u32)]) -> Vec<u8> {
1019 let mut strings = Vec::<u8>::new();
1020 let mut strx_map = Vec::new();
1021 for (name, _) in entries {
1022 let strx = strings.len() as u32;
1023 strings.extend_from_slice(name.as_bytes());
1024 strings.push(0);
1025 strx_map.push(strx);
1026 }
1027 let ranlib_bytes = (entries.len() * 8) as u32;
1028
1029 let mut body = Vec::new();
1030 body.extend_from_slice(&ranlib_bytes.to_le_bytes());
1031 for ((_, mh_off), strx) in entries.iter().zip(strx_map.iter()) {
1032 body.extend_from_slice(&strx.to_le_bytes());
1033 body.extend_from_slice(&mh_off.to_le_bytes());
1034 }
1035 let stringsize = strings.len() as u32;
1036 body.extend_from_slice(&stringsize.to_le_bytes());
1037 body.extend_from_slice(&strings);
1038 body
1039 }
1040
1041 fn encode_sysv_symbol_index(entries: &[(&str, u32)]) -> Vec<u8> {
1042 let nsyms = entries.len() as u32;
1043 let mut body = Vec::new();
1044 body.extend_from_slice(&nsyms.to_be_bytes());
1045 for (_, mh_off) in entries {
1046 body.extend_from_slice(&mh_off.to_be_bytes());
1047 }
1048 for (name, _) in entries {
1049 body.extend_from_slice(name.as_bytes());
1050 body.push(0);
1051 }
1052 body
1053 }
1054
1055 #[test]
1056 fn bsd_symbol_index_parses_entries() {
1057 let idx_body = encode_bsd_symbol_index(&[("_alpha", 0x100), ("_beta", 0x200)]);
1058 let mut buf = Vec::new();
1059 buf.extend_from_slice(AR_MAGIC);
1060 buf.extend_from_slice(&encode_bsd_extended("__.SYMDEF SORTED", &idx_body));
1061 let ar = Archive::open("/tmp/bsd_idx.a", &buf).unwrap();
1062 let idx = ar.symbol_index().expect("index present");
1063 assert_eq!(idx.len(), 2);
1064 assert_eq!(idx.first_defining_offset("_alpha"), Some(0x100));
1065 assert_eq!(idx.first_defining_offset("_beta"), Some(0x200));
1066 assert_eq!(idx.first_defining_offset("_missing"), None);
1067 }
1068
1069 #[test]
1070 fn sysv_symbol_index_parses_entries() {
1071 let idx_body = encode_sysv_symbol_index(&[("_alpha", 0x60), ("_beta", 0xC0)]);
1072 let mut buf = Vec::new();
1073 buf.extend_from_slice(AR_MAGIC);
1074 buf.extend_from_slice(&encode_member("/", &idx_body));
1075 let ar = Archive::open("/tmp/sysv_idx.a", &buf).unwrap();
1076 let idx = ar.symbol_index().expect("index present");
1077 assert_eq!(idx.len(), 2);
1078 assert_eq!(idx.first_defining_offset("_alpha"), Some(0x60));
1079 assert_eq!(idx.first_defining_offset("_beta"), Some(0xC0));
1080 }
1081
1082 #[test]
1083 fn symbol_index_absent_when_no_special_member() {
1084 let mut buf = Vec::new();
1085 buf.extend_from_slice(AR_MAGIC);
1086 buf.extend_from_slice(&encode_member("foo.o/", b"CONTENT"));
1087 let ar = Archive::open("/tmp/noidx.a", &buf).unwrap();
1088 assert!(ar.symbol_index().is_none());
1089 }
1090
1091 #[test]
1092 fn symbol_index_duplicate_returns_first() {
1093 let idx_body = encode_bsd_symbol_index(&[("_sym", 0x100), ("_sym", 0x200)]);
1094 let mut buf = Vec::new();
1095 buf.extend_from_slice(AR_MAGIC);
1096 buf.extend_from_slice(&encode_bsd_extended("__.SYMDEF", &idx_body));
1097 let ar = Archive::open("/tmp/dup.a", &buf).unwrap();
1098 let idx = ar.symbol_index().unwrap();
1099 assert_eq!(idx.first_defining_offset("_sym"), Some(0x100));
1100 let all: Vec<u32> = idx.offsets_for("_sym").collect();
1101 assert_eq!(all, vec![0x100, 0x200]);
1102 }
1103
1104 #[test]
1105 fn bsd_symbol_index_rejects_oob_strx() {
1106 // ranlib entry with strx = 999 but strings is tiny.
1107 let mut body = Vec::new();
1108 body.extend_from_slice(&8u32.to_le_bytes()); // 8 bytes → 1 ranlib
1109 body.extend_from_slice(&999u32.to_le_bytes()); // strx
1110 body.extend_from_slice(&0u32.to_le_bytes()); // mh_off
1111 body.extend_from_slice(&4u32.to_le_bytes()); // stringsize
1112 body.extend_from_slice(b"abc\0");
1113 let mut buf = Vec::new();
1114 buf.extend_from_slice(AR_MAGIC);
1115 buf.extend_from_slice(&encode_bsd_extended("__.SYMDEF", &body));
1116 assert!(matches!(
1117 Archive::open("/tmp/bad_idx.a", &buf).unwrap_err(),
1118 ArchiveError::BadSymbolIndex { .. }
1119 ));
1120 }
1121
1122 // ----- fetch API tests -----
1123
1124 fn encode_member_at(raw_name: &str, body: &[u8], out: &mut Vec<u8>) -> usize {
1125 let off = out.len();
1126 out.extend_from_slice(&encode_member(raw_name, body));
1127 off
1128 }
1129
1130 #[test]
1131 fn first_member_defining_uses_symbol_index_offset() {
1132 // Layout: magic, __.SYMDEF placeholder, real.o body. Patch the index
1133 // after we know real.o's header offset.
1134 let mut buf = Vec::<u8>::new();
1135 buf.extend_from_slice(AR_MAGIC);
1136 let idx_placeholder = encode_bsd_symbol_index(&[("_foo", 0)]);
1137 let idx_member_bytes = encode_bsd_extended("__.SYMDEF", &idx_placeholder);
1138 let idx_member_off = buf.len();
1139 buf.extend_from_slice(&idx_member_bytes);
1140
1141 let real_off = encode_member_at("real.o", b"CONTENT", &mut buf) as u32;
1142 let updated_idx = encode_bsd_symbol_index(&[("_foo", real_off)]);
1143 let updated_member = encode_bsd_extended("__.SYMDEF", &updated_idx);
1144 buf.splice(
1145 idx_member_off..idx_member_off + idx_member_bytes.len(),
1146 updated_member,
1147 );
1148
1149 let ar = Archive::open("/tmp/sym_fetch.a", &buf).unwrap();
1150 let m = ar.first_member_defining("_foo").expect("symbol found");
1151 assert_eq!(m.name, "real.o");
1152 assert_eq!(m.body, b"CONTENT");
1153 }
1154
1155 #[test]
1156 fn fetch_object_defining_reports_parse_error_for_non_macho_body() {
1157 let mut buf = Vec::<u8>::new();
1158 buf.extend_from_slice(AR_MAGIC);
1159 let idx_placeholder = encode_bsd_symbol_index(&[("_bogus", 0)]);
1160 let idx_bytes = encode_bsd_extended("__.SYMDEF", &idx_placeholder);
1161 let idx_off = buf.len();
1162 buf.extend_from_slice(&idx_bytes);
1163 let real_off = encode_member_at("bogus.o", b"notmacho", &mut buf) as u32;
1164 let updated_idx = encode_bsd_symbol_index(&[("_bogus", real_off)]);
1165 let updated_member = encode_bsd_extended("__.SYMDEF", &updated_idx);
1166 buf.splice(idx_off..idx_off + idx_bytes.len(), updated_member);
1167
1168 let ar = Archive::open("/tmp/bad_body.a", &buf).unwrap();
1169 let result = ar.fetch_object_defining("_bogus").expect("found");
1170 assert!(matches!(result, Err(FetchError::Read(_))));
1171 }
1172
1173 #[test]
1174 fn fetch_object_defining_returns_none_for_unknown_symbol() {
1175 let mut buf = Vec::<u8>::new();
1176 buf.extend_from_slice(AR_MAGIC);
1177 buf.extend_from_slice(&encode_member("foo.o", b"BODY"));
1178 let ar = Archive::open("/tmp/no_idx.a", &buf).unwrap();
1179 assert!(ar.fetch_object_defining("_missing").is_none());
1180 }
1181
1182 #[test]
1183 fn member_overrun_errors() {
1184 let mut buf = Vec::new();
1185 buf.extend_from_slice(AR_MAGIC);
1186 buf.extend_from_slice(&make_ar_hdr("foo.o/", 999)); // claims 999 bytes but body absent
1187 assert!(matches!(
1188 Archive::open("/tmp/bad.a", &buf).unwrap_err(),
1189 ArchiveError::MemberOverrun { .. }
1190 ));
1191 }
1192 }
1193