Rust · 8234 bytes Raw Blame History
1 //! Mach-O string table reader.
2 //!
3 //! The string table is a contiguous blob of null-terminated bytes pointed at
4 //! by `LC_SYMTAB.stroff / strsize`. Every symbol's `strx` is an offset into
5 //! this blob; the name runs from `strx` up to (not including) the next null.
6 //!
7 //! Assemblers regularly reuse suffix bytes — two symbols whose names share a
8 //! common tail can point at the same sequence. afs-as does this explicitly
9 //! via its suffix-dedup sort. The walker here handles any strx that lands
10 //! between nulls, not just those aligned to the start of a name.
11
12 use std::collections::HashMap;
13
14 use crate::macho::reader::ReadError;
15
16 #[derive(Debug, Clone, PartialEq, Eq)]
17 pub struct StringTable {
18 raw: Vec<u8>,
19 }
20
21 impl StringTable {
22 /// Lift the string table out of a file image, owning a copy for lifetime
23 /// independence from the source buffer. Both `stroff` and `strsize` come
24 /// from `LC_SYMTAB`.
25 pub fn from_file(file_bytes: &[u8], stroff: u32, strsize: u32) -> Result<Self, ReadError> {
26 let start = stroff as usize;
27 let end = start
28 .checked_add(strsize as usize)
29 .ok_or(ReadError::Truncated {
30 need: usize::MAX,
31 have: file_bytes.len(),
32 context: "string table (stroff + strsize overflows)",
33 })?;
34 if end > file_bytes.len() {
35 return Err(ReadError::Truncated {
36 need: end,
37 have: file_bytes.len(),
38 context: "string table",
39 });
40 }
41 Ok(StringTable {
42 raw: file_bytes[start..end].to_vec(),
43 })
44 }
45
46 /// Construct directly from owned bytes — handy for tests and for the
47 /// writer path (Sprint 14 builds a fresh table before emission).
48 pub fn from_bytes(raw: Vec<u8>) -> Self {
49 StringTable { raw }
50 }
51
52 /// Raw underlying bytes; useful for byte-level round-trip tests.
53 pub fn as_bytes(&self) -> &[u8] {
54 &self.raw
55 }
56
57 pub fn len(&self) -> usize {
58 self.raw.len()
59 }
60
61 pub fn is_empty(&self) -> bool {
62 self.raw.is_empty()
63 }
64
65 /// Look up the null-terminated string at offset `strx`. Non-UTF-8 names
66 /// return `ReadError::BadCmdsize` (re-used as the structural-error kind)
67 /// with a contextual `reason`; callers never see lossy replacement bytes.
68 ///
69 /// `strx = 0` returns the empty string (strtabs start with a null).
70 pub fn get(&self, strx: u32) -> Result<&str, ReadError> {
71 let start = strx as usize;
72 if start >= self.raw.len() {
73 return Err(ReadError::BadCmdsize {
74 cmd: 0,
75 cmdsize: 0,
76 at_offset: start,
77 reason: "strx out of bounds",
78 });
79 }
80 let end = start
81 + self.raw[start..]
82 .iter()
83 .position(|&b| b == 0)
84 .ok_or(ReadError::BadCmdsize {
85 cmd: 0,
86 cmdsize: 0,
87 at_offset: start,
88 reason: "unterminated string (no null byte before end)",
89 })?;
90 std::str::from_utf8(&self.raw[start..end]).map_err(|_| ReadError::BadCmdsize {
91 cmd: 0,
92 cmdsize: 0,
93 at_offset: start,
94 reason: "non-UTF-8 bytes in symbol name",
95 })
96 }
97 }
98
99 #[derive(Debug, Clone, Default, PartialEq, Eq)]
100 pub struct StringTableBuilder {
101 roots: Vec<(String, u32)>,
102 offsets: HashMap<String, u32>,
103 }
104
105 impl StringTableBuilder {
106 pub fn new() -> Self {
107 Self::default()
108 }
109
110 pub fn insert(&mut self, name: &str) {
111 self.offsets.entry(name.to_string()).or_insert(0);
112 }
113
114 pub fn finish(mut self) -> (Vec<u8>, HashMap<String, u32>) {
115 let mut names: Vec<String> = self.offsets.keys().cloned().collect();
116 names.sort_by(|lhs, rhs| reverse_suffix_order(lhs, rhs));
117
118 let mut raw = vec![0u8];
119 for name in names {
120 if let Some(offset) = self.find_suffix_offset(&name) {
121 self.offsets.insert(name, offset);
122 continue;
123 }
124
125 let offset = raw.len() as u32;
126 raw.extend_from_slice(name.as_bytes());
127 raw.push(0);
128 self.roots.push((name.clone(), offset));
129 self.offsets.insert(name, offset);
130 }
131
132 while !raw.len().is_multiple_of(8) {
133 raw.push(0);
134 }
135 (raw, self.offsets)
136 }
137
138 fn find_suffix_offset(&self, name: &str) -> Option<u32> {
139 self.roots.iter().find_map(|(existing, offset)| {
140 if existing.ends_with(name) {
141 Some(*offset + (existing.len() - name.len()) as u32)
142 } else {
143 None
144 }
145 })
146 }
147 }
148
149 fn reverse_suffix_order(lhs: &str, rhs: &str) -> std::cmp::Ordering {
150 let mut lhs_rev = lhs.bytes().rev();
151 let mut rhs_rev = rhs.bytes().rev();
152 loop {
153 match (lhs_rev.next(), rhs_rev.next()) {
154 (Some(a), Some(b)) => match a.cmp(&b) {
155 std::cmp::Ordering::Equal => continue,
156 other => return other,
157 },
158 (Some(_), None) => return std::cmp::Ordering::Less,
159 (None, Some(_)) => return std::cmp::Ordering::Greater,
160 (None, None) => return lhs.cmp(rhs),
161 }
162 }
163 }
164
165 #[cfg(test)]
166 mod tests {
167 use super::*;
168
169 fn tbl(bytes: &[u8]) -> StringTable {
170 StringTable::from_bytes(bytes.to_vec())
171 }
172
173 #[test]
174 fn empty_strx_yields_empty_string() {
175 let t = tbl(b"\0");
176 assert_eq!(t.get(0).unwrap(), "");
177 }
178
179 #[test]
180 fn resolves_simple_name() {
181 let t = tbl(b"\0_main\0_helper\0");
182 assert_eq!(t.get(1).unwrap(), "_main");
183 assert_eq!(t.get(7).unwrap(), "_helper");
184 }
185
186 #[test]
187 fn suffix_dedup_overlap() {
188 // strtab contains "_afs_array_sum\0" at offset 1; a symbol named just
189 // "array_sum" (9 chars) points mid-string at offset 6.
190 let mut raw = vec![0u8];
191 raw.extend_from_slice(b"_afs_array_sum\0");
192 let t = tbl(&raw);
193 assert_eq!(t.get(1).unwrap(), "_afs_array_sum");
194 assert_eq!(t.get(6).unwrap(), "array_sum");
195 assert_eq!(t.get(10).unwrap(), "y_sum");
196 }
197
198 #[test]
199 fn out_of_bounds_strx_errors() {
200 let t = tbl(b"\0a\0");
201 let err = t.get(100).unwrap_err();
202 assert!(
203 matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("out of bounds"))
204 );
205 }
206
207 #[test]
208 fn unterminated_string_errors() {
209 let t = tbl(b"\0abcdef"); // no trailing null
210 let err = t.get(1).unwrap_err();
211 assert!(
212 matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("unterminated"))
213 );
214 }
215
216 #[test]
217 fn non_utf8_bytes_error() {
218 let t = tbl(&[0, 0xff, 0xfe, 0]);
219 let err = t.get(1).unwrap_err();
220 assert!(matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("UTF-8")));
221 }
222
223 #[test]
224 fn from_file_bounds_check() {
225 let file = vec![0u8; 32];
226 // stroff + strsize spills off the end.
227 let err = StringTable::from_file(&file, 30, 10).unwrap_err();
228 assert!(matches!(err, ReadError::Truncated { .. }));
229 }
230
231 #[test]
232 fn from_file_copies_range() {
233 let mut file = vec![0u8; 8];
234 file.extend_from_slice(b"\0_main\0");
235 let t = StringTable::from_file(&file, 8, 7).unwrap();
236 assert_eq!(t.as_bytes(), b"\0_main\0");
237 assert_eq!(t.get(1).unwrap(), "_main");
238 }
239
240 #[test]
241 fn builder_dedups_suffix_names() {
242 let mut builder = StringTableBuilder::new();
243 builder.insert("_array_sum");
244 builder.insert("_afs_array_sum");
245
246 let (bytes, offsets) = builder.finish();
247 let table = StringTable::from_bytes(bytes);
248 let afs = offsets["_afs_array_sum"];
249 let array = offsets["_array_sum"];
250
251 assert_eq!(table.get(afs).unwrap(), "_afs_array_sum");
252 assert_eq!(table.get(array).unwrap(), "_array_sum");
253 assert_eq!(array, afs + 4);
254 assert_eq!(table.as_bytes().len() % 8, 0);
255 }
256 }
257