Rust · 18963 bytes Raw Blame History
1 //! TBD v4 decoder.
2 //!
3 //! Consumes the generic `Value` tree from `tbd_yaml` and produces a
4 //! strongly-typed `Tbd` that mirrors Apple's TAPI v4 schema. Targets are
5 //! `arch-platform` strings; each "scoped" field narrows to a subset of
6 //! targets.
7 //!
8 //! Unknown mapping keys (e.g. `uuids`, `swift-abi-version`) are skipped
9 //! silently — TBD grows new sections over releases and we don't need
10 //! most of them to produce a linker-side view. A later sprint can tighten
11 //! this into strict mode if parity testing flags divergences.
12
13 use super::tbd_yaml::{parse_documents, Document, Value, YamlError};
14
15 #[derive(Debug)]
16 pub enum TbdError {
17 Yaml(YamlError),
18 Schema { msg: String },
19 }
20
21 impl std::fmt::Display for TbdError {
22 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23 match self {
24 TbdError::Yaml(e) => write!(f, "{e}"),
25 TbdError::Schema { msg } => write!(f, "TBD schema error: {msg}"),
26 }
27 }
28 }
29
30 impl From<YamlError> for TbdError {
31 fn from(e: YamlError) -> Self {
32 TbdError::Yaml(e)
33 }
34 }
35
36 impl std::error::Error for TbdError {}
37
38 /// One TBD document. A single `.tbd` file may contain several of these
39 /// (libSystem.tbd has one per re-exported sub-dylib).
40 #[derive(Debug, Clone, Default, PartialEq, Eq)]
41 pub struct Tbd {
42 pub version: u32,
43 pub targets: Vec<Target>,
44 pub install_name: String,
45 /// Textual — may be `"1351"` or `"1.2.3"`. Packed to u32 via `parse_version`.
46 pub current_version: Option<String>,
47 pub compatibility_version: Option<String>,
48 pub parent_umbrella: Vec<Scoped<String>>,
49 pub allowable_clients: Vec<Scoped<Vec<String>>>,
50 pub reexported_libraries: Vec<Scoped<Vec<String>>>,
51 pub exports: Vec<Scoped<SymbolLists>>,
52 pub reexports: Vec<Scoped<SymbolLists>>,
53 }
54
55 #[derive(Debug, Clone, PartialEq, Eq)]
56 pub struct Target {
57 pub arch: Arch,
58 pub platform: Platform,
59 }
60
61 #[derive(Debug, Clone, PartialEq, Eq)]
62 pub enum Arch {
63 Arm64,
64 Arm64e,
65 X86_64,
66 Other(String),
67 }
68
69 #[derive(Debug, Clone, PartialEq, Eq)]
70 pub enum Platform {
71 MacOs,
72 Ios,
73 WatchOs,
74 TvOs,
75 DriverKit,
76 MacCatalyst,
77 Other(String),
78 }
79
80 #[derive(Debug, Clone, PartialEq, Eq)]
81 pub struct Scoped<T> {
82 pub targets: Vec<Target>,
83 pub value: T,
84 }
85
86 /// Six symbol lists, one per TAPI symbol category. All are flat arrays of
87 /// names; kinds (objc vs plain, weak vs regular) are encoded by which
88 /// list carries the name.
89 #[derive(Debug, Clone, Default, PartialEq, Eq)]
90 pub struct SymbolLists {
91 pub symbols: Vec<String>,
92 pub weak_symbols: Vec<String>,
93 pub thread_local_symbols: Vec<String>,
94 pub objc_classes: Vec<String>,
95 pub objc_eh_types: Vec<String>,
96 pub objc_ivars: Vec<String>,
97 }
98
99 impl SymbolLists {
100 pub fn is_empty(&self) -> bool {
101 self.symbols.is_empty()
102 && self.weak_symbols.is_empty()
103 && self.thread_local_symbols.is_empty()
104 && self.objc_classes.is_empty()
105 && self.objc_eh_types.is_empty()
106 && self.objc_ivars.is_empty()
107 }
108
109 /// Total symbol count across every list. Handy for sanity-checking
110 /// against `nm -g` during integration tests.
111 pub fn total(&self) -> usize {
112 self.symbols.len()
113 + self.weak_symbols.len()
114 + self.thread_local_symbols.len()
115 + self.objc_classes.len()
116 + self.objc_eh_types.len()
117 + self.objc_ivars.len()
118 }
119 }
120
121 /// Parse a TBD file's raw bytes into one `Tbd` per `--- !tapi-tbd` document.
122 pub fn parse_tbd(input: &str) -> Result<Vec<Tbd>, TbdError> {
123 let docs = parse_documents(input)?;
124 let mut out = Vec::with_capacity(docs.len());
125 for d in docs {
126 out.push(decode_document(&d)?);
127 }
128 Ok(out)
129 }
130
131 fn decode_document(doc: &Document) -> Result<Tbd, TbdError> {
132 let m = doc
133 .root
134 .as_mapping()
135 .ok_or_else(|| schema("top level of a TBD document must be a mapping"))?;
136
137 let mut tbd = Tbd::default();
138 for (k, v) in m {
139 match k.as_str() {
140 "tbd-version" => tbd.version = scalar_u32(v, "tbd-version")?,
141 "targets" => tbd.targets = decode_target_list(v)?,
142 "install-name" => tbd.install_name = scalar_string(v, "install-name")?,
143 "current-version" => tbd.current_version = Some(scalar_string(v, "current-version")?),
144 "compatibility-version" => {
145 tbd.compatibility_version = Some(scalar_string(v, "compatibility-version")?)
146 }
147 "parent-umbrella" => tbd.parent_umbrella = decode_scoped_umbrella(v)?,
148 "allowable-clients" => tbd.allowable_clients = decode_scoped_string_list(v, "clients")?,
149 "reexported-libraries" => {
150 tbd.reexported_libraries = decode_scoped_string_list(v, "libraries")?;
151 }
152 "exports" => tbd.exports = decode_scoped_symbols(v)?,
153 "reexports" => tbd.reexports = decode_scoped_symbols(v)?,
154 // Known-but-ignored keys (grow this list as TAPI adds them).
155 "uuids" | "flags" | "swift-abi-version" | "rpaths" | "objc-constraint"
156 | "parent-libraries" => {}
157 _ => {
158 // Silently accept unknown keys — TAPI can add new ones in
159 // future releases without breaking our reader.
160 }
161 }
162 }
163 // Minimum required by every real TAPI TBD.
164 if tbd.install_name.is_empty() {
165 return Err(schema("TBD document missing required 'install-name'"));
166 }
167 if tbd.targets.is_empty() {
168 return Err(schema("TBD document missing required 'targets'"));
169 }
170 Ok(tbd)
171 }
172
173 fn decode_target_list(v: &Value) -> Result<Vec<Target>, TbdError> {
174 let seq = v
175 .as_sequence()
176 .ok_or_else(|| schema("'targets' must be a sequence"))?;
177 let mut out = Vec::with_capacity(seq.len());
178 for item in seq {
179 let s = item
180 .as_str()
181 .ok_or_else(|| schema("target must be a scalar"))?;
182 out.push(parse_target(s)?);
183 }
184 Ok(out)
185 }
186
187 fn parse_target(s: &str) -> Result<Target, TbdError> {
188 // `arch-platform`. Arch may contain a hyphen (none today, but armv7k
189 // in the wild) — split on the *last* `-`.
190 let hyphen = s
191 .rfind('-')
192 .ok_or_else(|| schema(&format!("target {s:?} is not `arch-platform`")))?;
193 let arch = match &s[..hyphen] {
194 "arm64" => Arch::Arm64,
195 "arm64e" => Arch::Arm64e,
196 "x86_64" => Arch::X86_64,
197 other => Arch::Other(other.to_string()),
198 };
199 let platform = match &s[hyphen + 1..] {
200 "macos" => Platform::MacOs,
201 "ios" => Platform::Ios,
202 "watchos" => Platform::WatchOs,
203 "tvos" => Platform::TvOs,
204 "driverkit" => Platform::DriverKit,
205 "maccatalyst" => Platform::MacCatalyst,
206 other => Platform::Other(other.to_string()),
207 };
208 Ok(Target { arch, platform })
209 }
210
211 fn decode_scoped_umbrella(v: &Value) -> Result<Vec<Scoped<String>>, TbdError> {
212 let seq = v
213 .as_sequence()
214 .ok_or_else(|| schema("'parent-umbrella' must be a sequence of scoped mappings"))?;
215 let mut out = Vec::with_capacity(seq.len());
216 for item in seq {
217 let m = item
218 .as_mapping()
219 .ok_or_else(|| schema("parent-umbrella entry must be a mapping"))?;
220 let targets = lookup_required(m, "targets").and_then(decode_target_list)?;
221 let umbrella = lookup_required(m, "umbrella").and_then(|v| scalar_string(v, "umbrella"))?;
222 out.push(Scoped {
223 targets,
224 value: umbrella,
225 });
226 }
227 Ok(out)
228 }
229
230 fn decode_scoped_string_list(
231 v: &Value,
232 inner_key: &str,
233 ) -> Result<Vec<Scoped<Vec<String>>>, TbdError> {
234 let seq = v
235 .as_sequence()
236 .ok_or_else(|| schema("expected a sequence of scoped mappings"))?;
237 let mut out = Vec::with_capacity(seq.len());
238 for item in seq {
239 let m = item
240 .as_mapping()
241 .ok_or_else(|| schema("scoped entry must be a mapping"))?;
242 let targets = lookup_required(m, "targets").and_then(decode_target_list)?;
243 let value = match m.iter().find(|(k, _)| k == inner_key) {
244 Some((_, v)) => decode_string_list(v, inner_key)?,
245 None => Vec::new(),
246 };
247 out.push(Scoped { targets, value });
248 }
249 Ok(out)
250 }
251
252 fn decode_scoped_symbols(v: &Value) -> Result<Vec<Scoped<SymbolLists>>, TbdError> {
253 let seq = v
254 .as_sequence()
255 .ok_or_else(|| schema("'exports'/'reexports' must be a sequence"))?;
256 let mut out = Vec::with_capacity(seq.len());
257 for item in seq {
258 let m = item
259 .as_mapping()
260 .ok_or_else(|| schema("exports entry must be a mapping"))?;
261 let targets = lookup_required(m, "targets").and_then(decode_target_list)?;
262 let mut lists = SymbolLists::default();
263 for (k, v) in m {
264 match k.as_str() {
265 "targets" => {}
266 "symbols" => lists.symbols = decode_string_list(v, "symbols")?,
267 "weak-symbols" => lists.weak_symbols = decode_string_list(v, "weak-symbols")?,
268 "thread-local-symbols" => {
269 lists.thread_local_symbols = decode_string_list(v, "thread-local-symbols")?;
270 }
271 "objc-classes" => lists.objc_classes = decode_string_list(v, "objc-classes")?,
272 "objc-eh-types" => lists.objc_eh_types = decode_string_list(v, "objc-eh-types")?,
273 "objc-ivars" => lists.objc_ivars = decode_string_list(v, "objc-ivars")?,
274 _ => {} // ignore unknown inner keys
275 }
276 }
277 out.push(Scoped {
278 targets,
279 value: lists,
280 });
281 }
282 Ok(out)
283 }
284
285 fn decode_string_list(v: &Value, context: &str) -> Result<Vec<String>, TbdError> {
286 match v {
287 Value::Sequence(items) => {
288 let mut out = Vec::with_capacity(items.len());
289 for it in items {
290 out.push(scalar_string(it, context)?);
291 }
292 Ok(out)
293 }
294 Value::Null => Ok(Vec::new()),
295 _ => Err(schema(&format!("{context} must be a sequence of scalars"))),
296 }
297 }
298
299 fn lookup_required<'a>(m: &'a [(String, Value)], key: &str) -> Result<&'a Value, TbdError> {
300 m.iter()
301 .find(|(k, _)| k == key)
302 .map(|(_, v)| v)
303 .ok_or_else(|| schema(&format!("missing required key {key:?}")))
304 }
305
306 fn scalar_u32(v: &Value, context: &str) -> Result<u32, TbdError> {
307 let s = v
308 .as_str()
309 .ok_or_else(|| schema(&format!("{context} must be a scalar")))?;
310 s.parse()
311 .map_err(|_| schema(&format!("{context} must parse as a u32: {s:?}")))
312 }
313
314 fn scalar_string(v: &Value, context: &str) -> Result<String, TbdError> {
315 match v {
316 Value::Scalar(s) => Ok(s.clone()),
317 _ => Err(schema(&format!("{context} must be a scalar"))),
318 }
319 }
320
321 fn schema(msg: &str) -> TbdError {
322 TbdError::Schema {
323 msg: msg.to_string(),
324 }
325 }
326
327 /// Pack a `"X.Y.Z"` / `"X.Y"` / `"X"` / `"1351"` version string to
328 /// Mach-O's 0xXXXXYYZZ form. Missing fields become 0; extra components
329 /// are truncated. Plain integers like `1351` become `1351 << 16`.
330 pub fn parse_version(s: &str) -> u32 {
331 let mut parts = s.split('.').map(|p| p.parse::<u32>().unwrap_or(0));
332 let x = parts.next().unwrap_or(0);
333 let y = parts.next().unwrap_or(0);
334 let z = parts.next().unwrap_or(0);
335 (x << 16) | ((y & 0xff) << 8) | (z & 0xff)
336 }
337
338 impl Target {
339 /// Exactly matches the TBD string form: `arch-platform`.
340 pub fn as_string(&self) -> String {
341 let arch = match &self.arch {
342 Arch::Arm64 => "arm64".to_string(),
343 Arch::Arm64e => "arm64e".to_string(),
344 Arch::X86_64 => "x86_64".to_string(),
345 Arch::Other(s) => s.clone(),
346 };
347 let plat = match &self.platform {
348 Platform::MacOs => "macos".to_string(),
349 Platform::Ios => "ios".to_string(),
350 Platform::WatchOs => "watchos".to_string(),
351 Platform::TvOs => "tvos".to_string(),
352 Platform::DriverKit => "driverkit".to_string(),
353 Platform::MacCatalyst => "maccatalyst".to_string(),
354 Platform::Other(s) => s.clone(),
355 };
356 format!("{arch}-{plat}")
357 }
358
359 /// Apple SDK TBDs sometimes scope umbrella documents to `arm64e-macos`
360 /// only even though the same symbols are consumable by plain `arm64`
361 /// linkers on Apple Silicon. Treat that as compatible for our arm64-only
362 /// linker, while still requiring the platform to match exactly.
363 pub fn matches_requested(&self, wanted: &Target) -> bool {
364 if self.platform != wanted.platform {
365 return false;
366 }
367 self.arch == wanted.arch
368 || matches!((&self.arch, &wanted.arch), (Arch::Arm64e, Arch::Arm64))
369 }
370 }
371
372 #[cfg(test)]
373 mod tests {
374 use super::*;
375
376 #[test]
377 fn parses_minimal_tbd_v4() {
378 let src = "--- !tapi-tbd\n\
379 tbd-version: 4\n\
380 targets: [ arm64-macos ]\n\
381 install-name: '/usr/lib/libfoo.dylib'\n\
382 ...\n";
383 let docs = parse_tbd(src).unwrap();
384 assert_eq!(docs.len(), 1);
385 let tbd = &docs[0];
386 assert_eq!(tbd.version, 4);
387 assert_eq!(
388 tbd.targets,
389 vec![Target {
390 arch: Arch::Arm64,
391 platform: Platform::MacOs,
392 }]
393 );
394 assert_eq!(tbd.install_name, "/usr/lib/libfoo.dylib");
395 }
396
397 #[test]
398 fn parses_scoped_exports_with_multiple_lists() {
399 let src = "--- !tapi-tbd\n\
400 tbd-version: 4\n\
401 targets: [ arm64-macos, x86_64-macos ]\n\
402 install-name: '/usr/lib/libfoo.dylib'\n\
403 exports:\n\
404 \x20 - targets: [ arm64-macos ]\n\
405 \x20 symbols: [ _a, _b, _c ]\n\
406 \x20 weak-symbols: [ _weak_one ]\n\
407 \x20 objc-classes: [ _OBJC_CLASS_$_Foo ]\n";
408 let tbd = &parse_tbd(src).unwrap()[0];
409 assert_eq!(tbd.exports.len(), 1);
410 let scoped = &tbd.exports[0];
411 assert_eq!(
412 scoped.targets,
413 vec![Target {
414 arch: Arch::Arm64,
415 platform: Platform::MacOs,
416 }]
417 );
418 assert_eq!(scoped.value.symbols, vec!["_a", "_b", "_c"]);
419 assert_eq!(scoped.value.weak_symbols, vec!["_weak_one"]);
420 assert_eq!(scoped.value.objc_classes, vec!["_OBJC_CLASS_$_Foo"]);
421 assert_eq!(scoped.value.total(), 5);
422 }
423
424 #[test]
425 fn parses_reexported_libraries() {
426 let src = "--- !tapi-tbd\n\
427 tbd-version: 4\n\
428 targets: [ arm64-macos ]\n\
429 install-name: '/usr/lib/libSystem.B.dylib'\n\
430 reexported-libraries:\n\
431 \x20 - targets: [ arm64-macos ]\n\
432 \x20 libraries: [ '/usr/lib/system/libcache.dylib', '/usr/lib/system/libxpc.dylib' ]\n";
433 let tbd = &parse_tbd(src).unwrap()[0];
434 assert_eq!(tbd.reexported_libraries.len(), 1);
435 assert_eq!(
436 tbd.reexported_libraries[0].value,
437 vec![
438 "/usr/lib/system/libcache.dylib",
439 "/usr/lib/system/libxpc.dylib"
440 ]
441 );
442 }
443
444 #[test]
445 fn parses_parent_umbrella() {
446 let src = "--- !tapi-tbd\n\
447 tbd-version: 4\n\
448 targets: [ arm64-macos ]\n\
449 install-name: '/usr/lib/system/libcache.dylib'\n\
450 parent-umbrella:\n\
451 \x20 - targets: [ arm64-macos ]\n\
452 \x20 umbrella: System\n";
453 let tbd = &parse_tbd(src).unwrap()[0];
454 assert_eq!(tbd.parent_umbrella.len(), 1);
455 assert_eq!(tbd.parent_umbrella[0].value, "System");
456 }
457
458 #[test]
459 fn unknown_keys_are_tolerated() {
460 let src = "--- !tapi-tbd\n\
461 tbd-version: 4\n\
462 targets: [ arm64-macos ]\n\
463 install-name: 'x'\n\
464 future-key: [ a, b ]\n";
465 let tbd = &parse_tbd(src).unwrap()[0];
466 assert_eq!(tbd.version, 4);
467 }
468
469 #[test]
470 fn target_as_string_roundtrip() {
471 let t = Target {
472 arch: Arch::Arm64e,
473 platform: Platform::MacCatalyst,
474 };
475 assert_eq!(t.as_string(), "arm64e-maccatalyst");
476 }
477
478 #[test]
479 fn arm64_request_accepts_arm64e_scope() {
480 let scoped = Target {
481 arch: Arch::Arm64e,
482 platform: Platform::MacOs,
483 };
484 let wanted = Target {
485 arch: Arch::Arm64,
486 platform: Platform::MacOs,
487 };
488 assert!(scoped.matches_requested(&wanted));
489 }
490
491 #[test]
492 fn arm64_request_still_rejects_wrong_platform() {
493 let scoped = Target {
494 arch: Arch::Arm64e,
495 platform: Platform::MacCatalyst,
496 };
497 let wanted = Target {
498 arch: Arch::Arm64,
499 platform: Platform::MacOs,
500 };
501 assert!(!scoped.matches_requested(&wanted));
502 }
503
504 #[test]
505 fn parse_version_packs_major_dot_minor_dot_patch() {
506 assert_eq!(parse_version("1.2.3"), (1 << 16) | (2 << 8) | 3);
507 assert_eq!(parse_version("11"), 11 << 16);
508 assert_eq!(parse_version("14.0"), 14 << 16);
509 assert_eq!(parse_version("1351"), 1351 << 16);
510 }
511
512 #[test]
513 fn missing_required_key_errors() {
514 let src = "--- !tapi-tbd\ntbd-version: 4\n";
515 let err = parse_tbd(src).unwrap_err();
516 assert!(format!("{err}").contains("install-name") || format!("{err}").contains("targets"));
517 }
518
519 #[test]
520 fn parses_libsystem_like_shape() {
521 let src = "--- !tapi-tbd\n\
522 tbd-version: 4\n\
523 targets: [ x86_64-macos, arm64-macos, arm64e-macos ]\n\
524 install-name: '/usr/lib/libSystem.B.dylib'\n\
525 current-version: 1351\n\
526 reexported-libraries:\n\
527 \x20 - targets: [ arm64-macos, x86_64-macos ]\n\
528 \x20 libraries: [ '/usr/lib/system/libcache.dylib',\n\
529 \x20 '/usr/lib/system/libxpc.dylib' ]\n\
530 exports:\n\
531 \x20 - targets: [ arm64-macos, x86_64-macos ]\n\
532 \x20 symbols: [ _dyld_stub_binder, _malloc, _free,\n\
533 \x20 _printf, _fprintf ]\n\
534 ...\n";
535 let tbd = &parse_tbd(src).unwrap()[0];
536 assert_eq!(tbd.install_name, "/usr/lib/libSystem.B.dylib");
537 assert_eq!(tbd.current_version.as_deref(), Some("1351"));
538 assert_eq!(tbd.exports[0].value.symbols.len(), 5);
539 }
540 }
541