| 1 | //! Mach-O string table reader. |
| 2 | //! |
| 3 | //! The string table is a contiguous blob of null-terminated bytes pointed at |
| 4 | //! by `LC_SYMTAB.stroff / strsize`. Every symbol's `strx` is an offset into |
| 5 | //! this blob; the name runs from `strx` up to (not including) the next null. |
| 6 | //! |
| 7 | //! Assemblers regularly reuse suffix bytes — two symbols whose names share a |
| 8 | //! common tail can point at the same sequence. afs-as does this explicitly |
| 9 | //! via its suffix-dedup sort. The walker here handles any strx that lands |
| 10 | //! between nulls, not just those aligned to the start of a name. |
| 11 | |
| 12 | use std::collections::HashMap; |
| 13 | |
| 14 | use crate::macho::reader::ReadError; |
| 15 | |
| 16 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 17 | pub struct StringTable { |
| 18 | raw: Vec<u8>, |
| 19 | } |
| 20 | |
| 21 | impl StringTable { |
| 22 | /// Lift the string table out of a file image, owning a copy for lifetime |
| 23 | /// independence from the source buffer. Both `stroff` and `strsize` come |
| 24 | /// from `LC_SYMTAB`. |
| 25 | pub fn from_file(file_bytes: &[u8], stroff: u32, strsize: u32) -> Result<Self, ReadError> { |
| 26 | let start = stroff as usize; |
| 27 | let end = start |
| 28 | .checked_add(strsize as usize) |
| 29 | .ok_or(ReadError::Truncated { |
| 30 | need: usize::MAX, |
| 31 | have: file_bytes.len(), |
| 32 | context: "string table (stroff + strsize overflows)", |
| 33 | })?; |
| 34 | if end > file_bytes.len() { |
| 35 | return Err(ReadError::Truncated { |
| 36 | need: end, |
| 37 | have: file_bytes.len(), |
| 38 | context: "string table", |
| 39 | }); |
| 40 | } |
| 41 | Ok(StringTable { |
| 42 | raw: file_bytes[start..end].to_vec(), |
| 43 | }) |
| 44 | } |
| 45 | |
| 46 | /// Construct directly from owned bytes — handy for tests and for the |
| 47 | /// writer path (Sprint 14 builds a fresh table before emission). |
| 48 | pub fn from_bytes(raw: Vec<u8>) -> Self { |
| 49 | StringTable { raw } |
| 50 | } |
| 51 | |
| 52 | /// Raw underlying bytes; useful for byte-level round-trip tests. |
| 53 | pub fn as_bytes(&self) -> &[u8] { |
| 54 | &self.raw |
| 55 | } |
| 56 | |
| 57 | pub fn len(&self) -> usize { |
| 58 | self.raw.len() |
| 59 | } |
| 60 | |
| 61 | pub fn is_empty(&self) -> bool { |
| 62 | self.raw.is_empty() |
| 63 | } |
| 64 | |
| 65 | /// Look up the null-terminated string at offset `strx`. Non-UTF-8 names |
| 66 | /// return `ReadError::BadCmdsize` (re-used as the structural-error kind) |
| 67 | /// with a contextual `reason`; callers never see lossy replacement bytes. |
| 68 | /// |
| 69 | /// `strx = 0` returns the empty string (strtabs start with a null). |
| 70 | pub fn get(&self, strx: u32) -> Result<&str, ReadError> { |
| 71 | let start = strx as usize; |
| 72 | if start >= self.raw.len() { |
| 73 | return Err(ReadError::BadCmdsize { |
| 74 | cmd: 0, |
| 75 | cmdsize: 0, |
| 76 | at_offset: start, |
| 77 | reason: "strx out of bounds", |
| 78 | }); |
| 79 | } |
| 80 | let end = start |
| 81 | + self.raw[start..] |
| 82 | .iter() |
| 83 | .position(|&b| b == 0) |
| 84 | .ok_or(ReadError::BadCmdsize { |
| 85 | cmd: 0, |
| 86 | cmdsize: 0, |
| 87 | at_offset: start, |
| 88 | reason: "unterminated string (no null byte before end)", |
| 89 | })?; |
| 90 | std::str::from_utf8(&self.raw[start..end]).map_err(|_| ReadError::BadCmdsize { |
| 91 | cmd: 0, |
| 92 | cmdsize: 0, |
| 93 | at_offset: start, |
| 94 | reason: "non-UTF-8 bytes in symbol name", |
| 95 | }) |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | #[derive(Debug, Clone, Default, PartialEq, Eq)] |
| 100 | pub struct StringTableBuilder { |
| 101 | roots: Vec<(String, u32)>, |
| 102 | offsets: HashMap<String, u32>, |
| 103 | } |
| 104 | |
| 105 | impl StringTableBuilder { |
| 106 | pub fn new() -> Self { |
| 107 | Self::default() |
| 108 | } |
| 109 | |
| 110 | pub fn insert(&mut self, name: &str) { |
| 111 | self.offsets.entry(name.to_string()).or_insert(0); |
| 112 | } |
| 113 | |
| 114 | pub fn finish(mut self) -> (Vec<u8>, HashMap<String, u32>) { |
| 115 | let mut names: Vec<String> = self.offsets.keys().cloned().collect(); |
| 116 | names.sort_by(|lhs, rhs| reverse_suffix_order(lhs, rhs)); |
| 117 | |
| 118 | let mut raw = vec![0u8]; |
| 119 | for name in names { |
| 120 | if let Some(offset) = self.find_suffix_offset(&name) { |
| 121 | self.offsets.insert(name, offset); |
| 122 | continue; |
| 123 | } |
| 124 | |
| 125 | let offset = raw.len() as u32; |
| 126 | raw.extend_from_slice(name.as_bytes()); |
| 127 | raw.push(0); |
| 128 | self.roots.push((name.clone(), offset)); |
| 129 | self.offsets.insert(name, offset); |
| 130 | } |
| 131 | |
| 132 | while !raw.len().is_multiple_of(8) { |
| 133 | raw.push(0); |
| 134 | } |
| 135 | (raw, self.offsets) |
| 136 | } |
| 137 | |
| 138 | fn find_suffix_offset(&self, name: &str) -> Option<u32> { |
| 139 | self.roots.iter().find_map(|(existing, offset)| { |
| 140 | if existing.ends_with(name) { |
| 141 | Some(*offset + (existing.len() - name.len()) as u32) |
| 142 | } else { |
| 143 | None |
| 144 | } |
| 145 | }) |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | fn reverse_suffix_order(lhs: &str, rhs: &str) -> std::cmp::Ordering { |
| 150 | let mut lhs_rev = lhs.bytes().rev(); |
| 151 | let mut rhs_rev = rhs.bytes().rev(); |
| 152 | loop { |
| 153 | match (lhs_rev.next(), rhs_rev.next()) { |
| 154 | (Some(a), Some(b)) => match a.cmp(&b) { |
| 155 | std::cmp::Ordering::Equal => continue, |
| 156 | other => return other, |
| 157 | }, |
| 158 | (Some(_), None) => return std::cmp::Ordering::Less, |
| 159 | (None, Some(_)) => return std::cmp::Ordering::Greater, |
| 160 | (None, None) => return lhs.cmp(rhs), |
| 161 | } |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | #[cfg(test)] |
| 166 | mod tests { |
| 167 | use super::*; |
| 168 | |
| 169 | fn tbl(bytes: &[u8]) -> StringTable { |
| 170 | StringTable::from_bytes(bytes.to_vec()) |
| 171 | } |
| 172 | |
| 173 | #[test] |
| 174 | fn empty_strx_yields_empty_string() { |
| 175 | let t = tbl(b"\0"); |
| 176 | assert_eq!(t.get(0).unwrap(), ""); |
| 177 | } |
| 178 | |
| 179 | #[test] |
| 180 | fn resolves_simple_name() { |
| 181 | let t = tbl(b"\0_main\0_helper\0"); |
| 182 | assert_eq!(t.get(1).unwrap(), "_main"); |
| 183 | assert_eq!(t.get(7).unwrap(), "_helper"); |
| 184 | } |
| 185 | |
| 186 | #[test] |
| 187 | fn suffix_dedup_overlap() { |
| 188 | // strtab contains "_afs_array_sum\0" at offset 1; a symbol named just |
| 189 | // "array_sum" (9 chars) points mid-string at offset 6. |
| 190 | let mut raw = vec![0u8]; |
| 191 | raw.extend_from_slice(b"_afs_array_sum\0"); |
| 192 | let t = tbl(&raw); |
| 193 | assert_eq!(t.get(1).unwrap(), "_afs_array_sum"); |
| 194 | assert_eq!(t.get(6).unwrap(), "array_sum"); |
| 195 | assert_eq!(t.get(10).unwrap(), "y_sum"); |
| 196 | } |
| 197 | |
| 198 | #[test] |
| 199 | fn out_of_bounds_strx_errors() { |
| 200 | let t = tbl(b"\0a\0"); |
| 201 | let err = t.get(100).unwrap_err(); |
| 202 | assert!( |
| 203 | matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("out of bounds")) |
| 204 | ); |
| 205 | } |
| 206 | |
| 207 | #[test] |
| 208 | fn unterminated_string_errors() { |
| 209 | let t = tbl(b"\0abcdef"); // no trailing null |
| 210 | let err = t.get(1).unwrap_err(); |
| 211 | assert!( |
| 212 | matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("unterminated")) |
| 213 | ); |
| 214 | } |
| 215 | |
| 216 | #[test] |
| 217 | fn non_utf8_bytes_error() { |
| 218 | let t = tbl(&[0, 0xff, 0xfe, 0]); |
| 219 | let err = t.get(1).unwrap_err(); |
| 220 | assert!(matches!(err, ReadError::BadCmdsize { reason, .. } if reason.contains("UTF-8"))); |
| 221 | } |
| 222 | |
| 223 | #[test] |
| 224 | fn from_file_bounds_check() { |
| 225 | let file = vec![0u8; 32]; |
| 226 | // stroff + strsize spills off the end. |
| 227 | let err = StringTable::from_file(&file, 30, 10).unwrap_err(); |
| 228 | assert!(matches!(err, ReadError::Truncated { .. })); |
| 229 | } |
| 230 | |
| 231 | #[test] |
| 232 | fn from_file_copies_range() { |
| 233 | let mut file = vec![0u8; 8]; |
| 234 | file.extend_from_slice(b"\0_main\0"); |
| 235 | let t = StringTable::from_file(&file, 8, 7).unwrap(); |
| 236 | assert_eq!(t.as_bytes(), b"\0_main\0"); |
| 237 | assert_eq!(t.get(1).unwrap(), "_main"); |
| 238 | } |
| 239 | |
| 240 | #[test] |
| 241 | fn builder_dedups_suffix_names() { |
| 242 | let mut builder = StringTableBuilder::new(); |
| 243 | builder.insert("_array_sum"); |
| 244 | builder.insert("_afs_array_sum"); |
| 245 | |
| 246 | let (bytes, offsets) = builder.finish(); |
| 247 | let table = StringTable::from_bytes(bytes); |
| 248 | let afs = offsets["_afs_array_sum"]; |
| 249 | let array = offsets["_array_sum"]; |
| 250 | |
| 251 | assert_eq!(table.get(afs).unwrap(), "_afs_array_sum"); |
| 252 | assert_eq!(table.get(array).unwrap(), "_array_sum"); |
| 253 | assert_eq!(array, afs + 4); |
| 254 | assert_eq!(table.as_bytes().len() % 8, 0); |
| 255 | } |
| 256 | } |
| 257 |