Rust · 40121 bytes Raw Blame History
1 //! TBD v4 decoder.
2 //!
3 //! Consumes the generic `Value` tree from `tbd_yaml` and produces a
4 //! strongly-typed `Tbd` that mirrors Apple's TAPI v4 schema. Targets are
5 //! `arch-platform` strings; each "scoped" field narrows to a subset of
6 //! targets.
7 //!
8 //! Unknown mapping keys (e.g. `uuids`, `swift-abi-version`) are skipped
9 //! silently — TBD grows new sections over releases and we don't need
10 //! most of them to produce a linker-side view. A later sprint can tighten
11 //! this into strict mode if parity testing flags divergences.
12
13 use super::tbd_yaml::{parse_documents, Document, Value, YamlError};
14
15 #[derive(Debug)]
16 pub enum TbdError {
17 Yaml(YamlError),
18 Schema { msg: String },
19 }
20
21 impl std::fmt::Display for TbdError {
22 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23 match self {
24 TbdError::Yaml(e) => write!(f, "{e}"),
25 TbdError::Schema { msg } => write!(f, "TBD schema error: {msg}"),
26 }
27 }
28 }
29
30 impl From<YamlError> for TbdError {
31 fn from(e: YamlError) -> Self {
32 TbdError::Yaml(e)
33 }
34 }
35
36 impl std::error::Error for TbdError {}
37
38 /// One TBD document. A single `.tbd` file may contain several of these
39 /// (libSystem.tbd has one per re-exported sub-dylib).
40 #[derive(Debug, Clone, Default, PartialEq, Eq)]
41 pub struct Tbd {
42 pub version: u32,
43 pub targets: Vec<Target>,
44 pub install_name: String,
45 /// Textual — may be `"1351"` or `"1.2.3"`. Packed to u32 via `parse_version`.
46 pub current_version: Option<String>,
47 pub compatibility_version: Option<String>,
48 pub parent_umbrella: Vec<Scoped<String>>,
49 pub allowable_clients: Vec<Scoped<Vec<String>>>,
50 pub reexported_libraries: Vec<Scoped<Vec<String>>>,
51 pub exports: Vec<Scoped<SymbolLists>>,
52 pub reexports: Vec<Scoped<SymbolLists>>,
53 }
54
55 #[derive(Debug, Clone, PartialEq, Eq)]
56 pub struct Target {
57 pub arch: Arch,
58 pub platform: Platform,
59 }
60
61 #[derive(Debug, Clone, PartialEq, Eq)]
62 pub enum Arch {
63 Arm64,
64 Arm64e,
65 X86_64,
66 Other(String),
67 }
68
69 #[derive(Debug, Clone, PartialEq, Eq)]
70 pub enum Platform {
71 MacOs,
72 Ios,
73 WatchOs,
74 TvOs,
75 DriverKit,
76 MacCatalyst,
77 Other(String),
78 }
79
80 #[derive(Debug, Clone, PartialEq, Eq)]
81 pub struct Scoped<T> {
82 pub targets: Vec<Target>,
83 pub value: T,
84 }
85
86 /// Six symbol lists, one per TAPI symbol category. All are flat arrays of
87 /// names; kinds (objc vs plain, weak vs regular) are encoded by which
88 /// list carries the name.
89 #[derive(Debug, Clone, Default, PartialEq, Eq)]
90 pub struct SymbolLists {
91 pub symbols: Vec<String>,
92 pub weak_symbols: Vec<String>,
93 pub thread_local_symbols: Vec<String>,
94 pub objc_classes: Vec<String>,
95 pub objc_eh_types: Vec<String>,
96 pub objc_ivars: Vec<String>,
97 }
98
99 impl SymbolLists {
100 pub fn is_empty(&self) -> bool {
101 self.symbols.is_empty()
102 && self.weak_symbols.is_empty()
103 && self.thread_local_symbols.is_empty()
104 && self.objc_classes.is_empty()
105 && self.objc_eh_types.is_empty()
106 && self.objc_ivars.is_empty()
107 }
108
109 /// Total symbol count across every list. Handy for sanity-checking
110 /// against `nm -g` during integration tests.
111 pub fn total(&self) -> usize {
112 self.symbols.len()
113 + self.weak_symbols.len()
114 + self.thread_local_symbols.len()
115 + self.objc_classes.len()
116 + self.objc_eh_types.len()
117 + self.objc_ivars.len()
118 }
119 }
120
121 /// Parse a TBD file's raw bytes into one `Tbd` per `--- !tapi-tbd` document.
122 pub fn parse_tbd(input: &str) -> Result<Vec<Tbd>, TbdError> {
123 let docs = parse_documents(input)?;
124 let mut out = Vec::with_capacity(docs.len());
125 for d in docs {
126 out.push(decode_document(d)?);
127 }
128 Ok(out)
129 }
130
131 /// Parse a TBD for the linker hot path, keeping only documents and scoped
132 /// symbol lists that can satisfy `target`.
133 ///
134 /// The fast path handles Apple's emitted TAPI v4 shape directly and avoids
135 /// constructing the generic YAML `Value` tree for the thousands of symbols in
136 /// libSystem.tbd. If it sees a shape outside that subset, it falls back to the
137 /// generic decoder and applies the same target filter afterward.
138 pub fn parse_tbd_for_target(input: &str, target: &Target) -> Result<Vec<Tbd>, TbdError> {
139 match parse_tbd_for_target_direct(input, target, true) {
140 Ok(docs) => Ok(docs),
141 Err(_) => {
142 let docs = parse_tbd(input)?;
143 Ok(filter_docs_for_target(docs, target))
144 }
145 }
146 }
147
148 /// Parse only load-command-relevant TBD metadata for `target`.
149 ///
150 /// This is used for links that have no unresolved dylib symbols; emitting the
151 /// requested `LC_LOAD_DYLIB` does not require materializing libSystem's full
152 /// export surface.
153 pub fn parse_tbd_metadata_for_target(input: &str, target: &Target) -> Result<Vec<Tbd>, TbdError> {
154 match parse_tbd_for_target_direct(input, target, false) {
155 Ok(docs) => Ok(docs),
156 Err(_) => {
157 let docs = parse_tbd(input)?;
158 Ok(filter_docs_for_target_metadata(docs, target))
159 }
160 }
161 }
162
163 fn filter_docs_for_target(mut docs: Vec<Tbd>, target: &Target) -> Vec<Tbd> {
164 docs.retain(|doc| targets_match(&doc.targets, target));
165 for doc in &mut docs {
166 doc.parent_umbrella
167 .retain(|scoped| targets_match(&scoped.targets, target));
168 doc.allowable_clients
169 .retain(|scoped| targets_match(&scoped.targets, target));
170 doc.reexported_libraries
171 .retain(|scoped| targets_match(&scoped.targets, target));
172 doc.exports
173 .retain(|scoped| targets_match(&scoped.targets, target));
174 doc.reexports
175 .retain(|scoped| targets_match(&scoped.targets, target));
176 }
177 docs
178 }
179
180 fn filter_docs_for_target_metadata(docs: Vec<Tbd>, target: &Target) -> Vec<Tbd> {
181 let mut docs = filter_docs_for_target(docs, target);
182 for doc in &mut docs {
183 doc.reexported_libraries.clear();
184 doc.exports.clear();
185 doc.reexports.clear();
186 }
187 docs
188 }
189
190 fn parse_tbd_for_target_direct(
191 input: &str,
192 target: &Target,
193 include_exports: bool,
194 ) -> Result<Vec<Tbd>, TbdError> {
195 let lines: Vec<&str> = input.lines().collect();
196 let mut docs = Vec::new();
197 let mut i = 0usize;
198 while i < lines.len() {
199 let Some(trimmed) = direct_trimmed(lines[i]) else {
200 i += 1;
201 continue;
202 };
203 if trimmed.starts_with("%YAML") || trimmed.starts_with("...") {
204 i += 1;
205 continue;
206 }
207 if trimmed.starts_with("---") {
208 i += 1;
209 }
210
211 let (doc, next) = parse_direct_document(&lines, i, target, include_exports)?;
212 i = next;
213 if doc.install_name.is_empty() && doc.targets.is_empty() {
214 continue;
215 }
216 if targets_match(&doc.targets, target) {
217 docs.push(doc);
218 }
219 }
220 Ok(docs)
221 }
222
223 fn parse_direct_document(
224 lines: &[&str],
225 mut i: usize,
226 target: &Target,
227 include_exports: bool,
228 ) -> Result<(Tbd, usize), TbdError> {
229 let mut tbd = Tbd::default();
230 while i < lines.len() {
231 let Some(trimmed) = direct_trimmed(lines[i]) else {
232 i += 1;
233 continue;
234 };
235 if trimmed.starts_with("---") || trimmed.starts_with("...") {
236 break;
237 }
238 if direct_indent(lines[i]) != 0 {
239 return Err(schema("unexpected nested TBD line at document root"));
240 }
241 let (key, rest) =
242 direct_key_value(trimmed).ok_or_else(|| schema("expected top-level TBD key"))?;
243 match key {
244 "tbd-version" => {
245 tbd.version = parse_direct_scalar(rest)
246 .parse()
247 .map_err(|_| schema(&format!("tbd-version must parse as a u32: {rest:?}")))?;
248 i += 1;
249 }
250 "targets" => {
251 let (targets, next) = parse_direct_targets(lines, i, rest)?;
252 tbd.targets = targets;
253 i = next;
254 }
255 "install-name" => {
256 tbd.install_name = parse_direct_scalar(rest);
257 i += 1;
258 }
259 "current-version" => {
260 tbd.current_version = Some(parse_direct_scalar(rest));
261 i += 1;
262 }
263 "compatibility-version" => {
264 tbd.compatibility_version = Some(parse_direct_scalar(rest));
265 i += 1;
266 }
267 "parent-umbrella" => {
268 let (value, next) = parse_direct_scoped_scalars(lines, i + 1, target, "umbrella")?;
269 tbd.parent_umbrella = value;
270 i = next;
271 }
272 "allowable-clients" => {
273 let (value, next) = parse_direct_scoped_lists(lines, i + 1, target, "clients")?;
274 tbd.allowable_clients = value;
275 i = next;
276 }
277 "reexported-libraries" if include_exports => {
278 let (value, next) = parse_direct_scoped_lists(lines, i + 1, target, "libraries")?;
279 tbd.reexported_libraries = value;
280 i = next;
281 }
282 "reexported-libraries" => {
283 i = skip_direct_value(lines, i + 1);
284 }
285 "exports" if include_exports => {
286 let (value, next) = parse_direct_scoped_symbols(lines, i + 1, target)?;
287 tbd.exports = value;
288 i = next;
289 }
290 "exports" => {
291 i = skip_direct_value(lines, i + 1);
292 }
293 "reexports" if include_exports => {
294 let (value, next) = parse_direct_scoped_symbols(lines, i + 1, target)?;
295 tbd.reexports = value;
296 i = next;
297 }
298 "reexports" => {
299 i = skip_direct_value(lines, i + 1);
300 }
301 _ => {
302 i = skip_direct_value(lines, i + 1);
303 }
304 }
305 }
306
307 if !tbd.install_name.is_empty() || !tbd.targets.is_empty() {
308 if tbd.install_name.is_empty() {
309 return Err(schema("TBD document missing required 'install-name'"));
310 }
311 if tbd.targets.is_empty() {
312 return Err(schema("TBD document missing required 'targets'"));
313 }
314 }
315 Ok((tbd, i))
316 }
317
318 fn parse_direct_scoped_scalars(
319 lines: &[&str],
320 mut i: usize,
321 target: &Target,
322 value_key: &str,
323 ) -> Result<(Vec<Scoped<String>>, usize), TbdError> {
324 let mut out = Vec::new();
325 let ctx = DirectCtx { lines, target };
326 while let Some(entry) = direct_entry_start(lines, i) {
327 let mut state = DirectScopeState::default();
328 let mut value = None;
329 let (key, rest) = entry?;
330 apply_direct_scalar_pair((key, rest), ctx, &mut i, &mut state, value_key, &mut value)?;
331 while let Some((key, rest)) = direct_nested_pair(lines, i) {
332 apply_direct_scalar_pair((key, rest), ctx, &mut i, &mut state, value_key, &mut value)?;
333 }
334 if state.include == Some(true) {
335 out.push(Scoped {
336 targets: state
337 .targets
338 .ok_or_else(|| schema("missing required key \"targets\""))?,
339 value: value.unwrap_or_default(),
340 });
341 }
342 }
343 Ok((out, i))
344 }
345
346 type DirectScopedList = Vec<Scoped<Vec<String>>>;
347 type DirectScopedListResult = Result<(DirectScopedList, usize), TbdError>;
348
349 #[derive(Clone, Copy)]
350 struct DirectCtx<'a, 't> {
351 lines: &'a [&'a str],
352 target: &'t Target,
353 }
354
355 #[derive(Default)]
356 struct DirectScopeState {
357 targets: Option<Vec<Target>>,
358 include: Option<bool>,
359 }
360
361 fn parse_direct_scoped_lists(
362 lines: &[&str],
363 mut i: usize,
364 target: &Target,
365 value_key: &str,
366 ) -> DirectScopedListResult {
367 let mut out = Vec::new();
368 let ctx = DirectCtx { lines, target };
369 while let Some(entry) = direct_entry_start(lines, i) {
370 let mut state = DirectScopeState::default();
371 let mut value = Vec::new();
372 let (key, rest) = entry?;
373 apply_direct_list_pair((key, rest), ctx, &mut i, &mut state, value_key, &mut value)?;
374 while let Some((key, rest)) = direct_nested_pair(lines, i) {
375 apply_direct_list_pair((key, rest), ctx, &mut i, &mut state, value_key, &mut value)?;
376 }
377 if state.include == Some(true) {
378 out.push(Scoped {
379 targets: state
380 .targets
381 .ok_or_else(|| schema("missing required key \"targets\""))?,
382 value,
383 });
384 }
385 }
386 Ok((out, i))
387 }
388
389 fn parse_direct_scoped_symbols(
390 lines: &[&str],
391 mut i: usize,
392 target: &Target,
393 ) -> Result<(Vec<Scoped<SymbolLists>>, usize), TbdError> {
394 let mut out = Vec::new();
395 let ctx = DirectCtx { lines, target };
396 while let Some(entry) = direct_entry_start(lines, i) {
397 let mut state = DirectScopeState::default();
398 let mut lists = SymbolLists::default();
399 let (key, rest) = entry?;
400 apply_direct_symbol_pair((key, rest), ctx, &mut i, &mut state, &mut lists)?;
401 while let Some((key, rest)) = direct_nested_pair(lines, i) {
402 apply_direct_symbol_pair((key, rest), ctx, &mut i, &mut state, &mut lists)?;
403 }
404 if state.include == Some(true) {
405 out.push(Scoped {
406 targets: state
407 .targets
408 .ok_or_else(|| schema("missing required key \"targets\""))?,
409 value: lists,
410 });
411 }
412 }
413 Ok((out, i))
414 }
415
416 fn apply_direct_scalar_pair(
417 pair: (&str, &str),
418 ctx: DirectCtx<'_, '_>,
419 i: &mut usize,
420 state: &mut DirectScopeState,
421 value_key: &str,
422 value: &mut Option<String>,
423 ) -> Result<(), TbdError> {
424 let (key, rest) = pair;
425 if key == "targets" {
426 let (parsed, next) = parse_direct_targets(ctx.lines, *i, rest)?;
427 state.include = Some(targets_match(&parsed, ctx.target));
428 state.targets = Some(parsed);
429 *i = next;
430 } else if key == value_key {
431 if state.include != Some(false) {
432 *value = Some(parse_direct_scalar(rest));
433 }
434 *i += 1;
435 } else {
436 *i = skip_direct_inline_value(ctx.lines, *i, rest)?;
437 }
438 Ok(())
439 }
440
441 fn apply_direct_list_pair(
442 pair: (&str, &str),
443 ctx: DirectCtx<'_, '_>,
444 i: &mut usize,
445 state: &mut DirectScopeState,
446 value_key: &str,
447 value: &mut Vec<String>,
448 ) -> Result<(), TbdError> {
449 let (key, rest) = pair;
450 if key == "targets" {
451 let (parsed, next) = parse_direct_targets(ctx.lines, *i, rest)?;
452 state.include = Some(targets_match(&parsed, ctx.target));
453 state.targets = Some(parsed);
454 *i = next;
455 } else if key == value_key {
456 if state.include == Some(false) {
457 *i = skip_direct_flow(ctx.lines, *i, rest)?;
458 } else {
459 let (parsed, next) = parse_direct_string_list(ctx.lines, *i, rest)?;
460 *value = parsed;
461 *i = next;
462 }
463 } else {
464 *i = skip_direct_inline_value(ctx.lines, *i, rest)?;
465 }
466 Ok(())
467 }
468
469 fn apply_direct_symbol_pair(
470 pair: (&str, &str),
471 ctx: DirectCtx<'_, '_>,
472 i: &mut usize,
473 state: &mut DirectScopeState,
474 lists: &mut SymbolLists,
475 ) -> Result<(), TbdError> {
476 let (key, rest) = pair;
477 if key == "targets" {
478 let (parsed, next) = parse_direct_targets(ctx.lines, *i, rest)?;
479 state.include = Some(targets_match(&parsed, ctx.target));
480 state.targets = Some(parsed);
481 *i = next;
482 return Ok(());
483 }
484
485 let slot = match key {
486 "symbols" => Some(&mut lists.symbols),
487 "weak-symbols" => Some(&mut lists.weak_symbols),
488 "thread-local-symbols" => Some(&mut lists.thread_local_symbols),
489 "objc-classes" => Some(&mut lists.objc_classes),
490 "objc-eh-types" => Some(&mut lists.objc_eh_types),
491 "objc-ivars" => Some(&mut lists.objc_ivars),
492 _ => None,
493 };
494 if let Some(slot) = slot {
495 if state.include == Some(false) {
496 *i = skip_direct_flow(ctx.lines, *i, rest)?;
497 } else {
498 let (parsed, next) = parse_direct_string_list(ctx.lines, *i, rest)?;
499 *slot = parsed;
500 *i = next;
501 }
502 } else {
503 *i = skip_direct_inline_value(ctx.lines, *i, rest)?;
504 }
505 Ok(())
506 }
507
508 type DirectEntry<'a> = Result<(&'a str, &'a str), TbdError>;
509
510 fn direct_entry_start<'a>(lines: &'a [&str], i: usize) -> Option<DirectEntry<'a>> {
511 let trimmed = direct_trimmed(lines.get(i)?)?;
512 if trimmed.starts_with("---") || trimmed.starts_with("...") || direct_indent(lines[i]) == 0 {
513 return None;
514 }
515 if direct_indent(lines[i]) != 2 || !trimmed.starts_with('-') {
516 return Some(Err(schema("expected scoped TBD entry")));
517 }
518 let rest = trimmed.strip_prefix('-').unwrap_or("").trim_start();
519 if rest.is_empty() {
520 return Some(Err(schema("empty scoped TBD entries are not supported")));
521 }
522 Some(direct_key_value(rest).ok_or_else(|| schema("expected scoped TBD key")))
523 }
524
525 fn direct_nested_pair<'a>(lines: &'a [&str], i: usize) -> Option<(&'a str, &'a str)> {
526 let trimmed = direct_trimmed(lines.get(i)?)?;
527 if trimmed.starts_with("---") || trimmed.starts_with("...") {
528 return None;
529 }
530 let indent = direct_indent(lines[i]);
531 if indent <= 2 {
532 return None;
533 }
534 direct_key_value(trimmed)
535 }
536
537 fn parse_direct_targets(
538 lines: &[&str],
539 i: usize,
540 rest: &str,
541 ) -> Result<(Vec<Target>, usize), TbdError> {
542 let (items, next) = parse_direct_string_list(lines, i, rest)?;
543 let mut targets = Vec::with_capacity(items.len());
544 for item in items {
545 targets.push(parse_target(&item)?);
546 }
547 Ok((targets, next))
548 }
549
550 fn parse_direct_string_list(
551 lines: &[&str],
552 i: usize,
553 rest: &str,
554 ) -> Result<(Vec<String>, usize), TbdError> {
555 let (flow, next) = collect_direct_flow(lines, i, rest)?;
556 Ok((split_direct_flow_scalars(&flow)?, next))
557 }
558
559 fn collect_direct_flow(lines: &[&str], i: usize, rest: &str) -> Result<(String, usize), TbdError> {
560 let start_line = i + 1;
561 let mut flow = rest.trim().to_string();
562 if !flow.starts_with('[') {
563 return Err(schema("expected a flow sequence"));
564 }
565 let mut next_i = if direct_trimmed(lines.get(i).copied().unwrap_or_default())
566 .map(|line| line.contains(rest.trim()))
567 .unwrap_or(false)
568 {
569 i + 1
570 } else {
571 i
572 };
573 while direct_flow_unbalanced(&flow) {
574 let Some(next) = lines.get(next_i).and_then(|line| direct_trimmed(line)) else {
575 return Err(schema(&format!(
576 "unterminated flow sequence from line {} near line {}: {:?}",
577 start_line,
578 next_i + 1,
579 flow
580 )));
581 };
582 if next.starts_with("---") || next.starts_with("...") {
583 return Err(schema(&format!(
584 "unterminated flow sequence from line {} before line {}: {:?}",
585 start_line,
586 next_i + 1,
587 flow
588 )));
589 }
590 flow.push(' ');
591 flow.push_str(next);
592 next_i += 1;
593 }
594 if !flow.ends_with(']') {
595 return Err(schema("flow sequence must end with ']'"));
596 }
597 Ok((flow, next_i))
598 }
599
600 fn skip_direct_flow(lines: &[&str], i: usize, rest: &str) -> Result<usize, TbdError> {
601 collect_direct_flow(lines, i, rest).map(|(_, next)| next)
602 }
603
604 fn skip_direct_inline_value(lines: &[&str], i: usize, rest: &str) -> Result<usize, TbdError> {
605 if rest.trim_start().starts_with('[') {
606 skip_direct_flow(lines, i, rest)
607 } else {
608 Ok(i + 1)
609 }
610 }
611
612 fn skip_direct_value(lines: &[&str], mut i: usize) -> usize {
613 while i < lines.len() {
614 let Some(trimmed) = direct_trimmed(lines[i]) else {
615 i += 1;
616 continue;
617 };
618 if trimmed.starts_with("---") || trimmed.starts_with("...") || direct_indent(lines[i]) == 0
619 {
620 break;
621 }
622 i += 1;
623 }
624 i
625 }
626
627 fn split_direct_flow_scalars(flow: &str) -> Result<Vec<String>, TbdError> {
628 let inner = flow
629 .strip_prefix('[')
630 .and_then(|s| s.strip_suffix(']'))
631 .ok_or_else(|| schema("flow sequence must be bracketed"))?;
632 let bytes = inner.as_bytes();
633 let mut out = Vec::new();
634 let mut in_single = false;
635 let mut in_double = false;
636 let mut depth = 0i32;
637 let mut start = 0usize;
638 let mut i = 0usize;
639 while i < bytes.len() {
640 let b = bytes[i];
641 match b {
642 b'\'' if !in_double => in_single = !in_single,
643 b'"' if !in_single => in_double = !in_double,
644 b'\\' if in_double && i + 1 < bytes.len() => {
645 i += 2;
646 continue;
647 }
648 b'[' | b'{' if !in_single && !in_double => depth += 1,
649 b']' | b'}' if !in_single && !in_double => depth -= 1,
650 b',' if !in_single && !in_double && depth == 0 => {
651 push_direct_flow_scalar(&mut out, &inner[start..i]);
652 start = i + 1;
653 }
654 _ => {}
655 }
656 i += 1;
657 }
658 if start <= inner.len() {
659 push_direct_flow_scalar(&mut out, &inner[start..]);
660 }
661 Ok(out)
662 }
663
664 fn push_direct_flow_scalar(out: &mut Vec<String>, item: &str) {
665 let item = item.trim();
666 if !item.is_empty() {
667 out.push(parse_direct_scalar(item));
668 }
669 }
670
671 fn parse_direct_scalar(raw: &str) -> String {
672 let raw = raw.trim();
673 if raw.len() >= 2 && raw.starts_with('\'') && raw.ends_with('\'') {
674 raw[1..raw.len() - 1].replace("''", "'")
675 } else if raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"') {
676 parse_direct_double_quoted(&raw[1..raw.len() - 1])
677 } else {
678 raw.to_string()
679 }
680 }
681
682 fn parse_direct_double_quoted(raw: &str) -> String {
683 let bytes = raw.as_bytes();
684 let mut out = String::with_capacity(raw.len());
685 let mut i = 0usize;
686 while i < bytes.len() {
687 if bytes[i] == b'\\' && i + 1 < bytes.len() {
688 let ch = match bytes[i + 1] {
689 b'n' => '\n',
690 b'r' => '\r',
691 b't' => '\t',
692 b'"' => '"',
693 b'\\' => '\\',
694 other => other as char,
695 };
696 out.push(ch);
697 i += 2;
698 } else {
699 out.push(bytes[i] as char);
700 i += 1;
701 }
702 }
703 out
704 }
705
706 fn direct_key_value(s: &str) -> Option<(&str, &str)> {
707 let (key, rest) = s.split_once(':')?;
708 Some((key.trim(), rest.trim_start()))
709 }
710
711 fn direct_trimmed(line: &str) -> Option<&str> {
712 let trimmed = line.trim();
713 if trimmed.is_empty() || trimmed.starts_with('#') {
714 None
715 } else {
716 Some(trimmed)
717 }
718 }
719
720 fn direct_indent(line: &str) -> usize {
721 line.bytes().take_while(|b| *b == b' ').count()
722 }
723
724 fn direct_flow_unbalanced(s: &str) -> bool {
725 let mut depth = 0i32;
726 let mut in_single = false;
727 let mut in_double = false;
728 let bytes = s.as_bytes();
729 let mut i = 0usize;
730 while i < bytes.len() {
731 let b = bytes[i];
732 match b {
733 b'\'' if !in_double => in_single = !in_single,
734 b'"' if !in_single => in_double = !in_double,
735 b'\\' if in_double && i + 1 < bytes.len() => {
736 i += 2;
737 continue;
738 }
739 b'[' | b'{' if !in_single && !in_double => depth += 1,
740 b']' | b'}' if !in_single && !in_double => depth -= 1,
741 _ => {}
742 }
743 i += 1;
744 }
745 depth != 0
746 }
747
748 fn targets_match(targets: &[Target], target: &Target) -> bool {
749 targets.iter().any(|t| t.matches_requested(target))
750 }
751
752 fn decode_document(doc: Document) -> Result<Tbd, TbdError> {
753 let Value::Mapping(m) = doc.root else {
754 return Err(schema("top level of a TBD document must be a mapping"));
755 };
756 let mut tbd = Tbd::default();
757 for (k, v) in m {
758 match k.as_str() {
759 "tbd-version" => tbd.version = scalar_u32(v, "tbd-version")?,
760 "targets" => tbd.targets = decode_target_list(v)?,
761 "install-name" => tbd.install_name = scalar_string(v, "install-name")?,
762 "current-version" => tbd.current_version = Some(scalar_string(v, "current-version")?),
763 "compatibility-version" => {
764 tbd.compatibility_version = Some(scalar_string(v, "compatibility-version")?)
765 }
766 "parent-umbrella" => tbd.parent_umbrella = decode_scoped_umbrella(v)?,
767 "allowable-clients" => tbd.allowable_clients = decode_scoped_string_list(v, "clients")?,
768 "reexported-libraries" => {
769 tbd.reexported_libraries = decode_scoped_string_list(v, "libraries")?;
770 }
771 "exports" => tbd.exports = decode_scoped_symbols(v)?,
772 "reexports" => tbd.reexports = decode_scoped_symbols(v)?,
773 // Known-but-ignored keys (grow this list as TAPI adds them).
774 "uuids" | "flags" | "swift-abi-version" | "rpaths" | "objc-constraint"
775 | "parent-libraries" => {}
776 _ => {
777 // Silently accept unknown keys — TAPI can add new ones in
778 // future releases without breaking our reader.
779 }
780 }
781 }
782 // Minimum required by every real TAPI TBD.
783 if tbd.install_name.is_empty() {
784 return Err(schema("TBD document missing required 'install-name'"));
785 }
786 if tbd.targets.is_empty() {
787 return Err(schema("TBD document missing required 'targets'"));
788 }
789 Ok(tbd)
790 }
791
792 fn decode_target_list(v: Value) -> Result<Vec<Target>, TbdError> {
793 let Value::Sequence(seq) = v else {
794 return Err(schema("'targets' must be a sequence"));
795 };
796 let mut out = Vec::with_capacity(seq.len());
797 for item in seq {
798 out.push(parse_target(&scalar_string(item, "target")?)?);
799 }
800 Ok(out)
801 }
802
803 fn parse_target(s: &str) -> Result<Target, TbdError> {
804 // `arch-platform`. Arch may contain a hyphen (none today, but armv7k
805 // in the wild) — split on the *last* `-`.
806 let hyphen = s
807 .rfind('-')
808 .ok_or_else(|| schema(&format!("target {s:?} is not `arch-platform`")))?;
809 let arch = match &s[..hyphen] {
810 "arm64" => Arch::Arm64,
811 "arm64e" => Arch::Arm64e,
812 "x86_64" => Arch::X86_64,
813 other => Arch::Other(other.to_string()),
814 };
815 let platform = match &s[hyphen + 1..] {
816 "macos" => Platform::MacOs,
817 "ios" => Platform::Ios,
818 "watchos" => Platform::WatchOs,
819 "tvos" => Platform::TvOs,
820 "driverkit" => Platform::DriverKit,
821 "maccatalyst" => Platform::MacCatalyst,
822 other => Platform::Other(other.to_string()),
823 };
824 Ok(Target { arch, platform })
825 }
826
827 fn decode_scoped_umbrella(v: Value) -> Result<Vec<Scoped<String>>, TbdError> {
828 let Value::Sequence(seq) = v else {
829 return Err(schema(
830 "'parent-umbrella' must be a sequence of scoped mappings",
831 ));
832 };
833 let mut out = Vec::with_capacity(seq.len());
834 for item in seq {
835 let Value::Mapping(m) = item else {
836 return Err(schema("parent-umbrella entry must be a mapping"));
837 };
838 let mut targets = None;
839 let mut umbrella = None;
840 for (k, v) in m {
841 match k.as_str() {
842 "targets" => targets = Some(decode_target_list(v)?),
843 "umbrella" => umbrella = Some(scalar_string(v, "umbrella")?),
844 _ => {}
845 }
846 }
847 let targets = targets.ok_or_else(|| schema("missing required key \"targets\""))?;
848 let umbrella = umbrella.ok_or_else(|| schema("missing required key \"umbrella\""))?;
849 out.push(Scoped {
850 targets,
851 value: umbrella,
852 });
853 }
854 Ok(out)
855 }
856
857 fn decode_scoped_string_list(
858 v: Value,
859 inner_key: &str,
860 ) -> Result<Vec<Scoped<Vec<String>>>, TbdError> {
861 let Value::Sequence(seq) = v else {
862 return Err(schema("expected a sequence of scoped mappings"));
863 };
864 let mut out = Vec::with_capacity(seq.len());
865 for item in seq {
866 let Value::Mapping(m) = item else {
867 return Err(schema("scoped entry must be a mapping"));
868 };
869 let mut targets = None;
870 let mut value = Vec::new();
871 for (k, v) in m {
872 if k == "targets" {
873 targets = Some(decode_target_list(v)?);
874 } else if k == inner_key {
875 value = decode_string_list(v, inner_key)?;
876 }
877 }
878 let targets = targets.ok_or_else(|| schema("missing required key \"targets\""))?;
879 out.push(Scoped { targets, value });
880 }
881 Ok(out)
882 }
883
884 fn decode_scoped_symbols(v: Value) -> Result<Vec<Scoped<SymbolLists>>, TbdError> {
885 let Value::Sequence(seq) = v else {
886 return Err(schema("'exports'/'reexports' must be a sequence"));
887 };
888 let mut out = Vec::with_capacity(seq.len());
889 for item in seq {
890 let Value::Mapping(m) = item else {
891 return Err(schema("exports entry must be a mapping"));
892 };
893 let mut targets = None;
894 let mut lists = SymbolLists::default();
895 for (k, v) in m {
896 match k.as_str() {
897 "targets" => targets = Some(decode_target_list(v)?),
898 "symbols" => lists.symbols = decode_string_list(v, "symbols")?,
899 "weak-symbols" => lists.weak_symbols = decode_string_list(v, "weak-symbols")?,
900 "thread-local-symbols" => {
901 lists.thread_local_symbols = decode_string_list(v, "thread-local-symbols")?;
902 }
903 "objc-classes" => lists.objc_classes = decode_string_list(v, "objc-classes")?,
904 "objc-eh-types" => lists.objc_eh_types = decode_string_list(v, "objc-eh-types")?,
905 "objc-ivars" => lists.objc_ivars = decode_string_list(v, "objc-ivars")?,
906 _ => {} // ignore unknown inner keys
907 }
908 }
909 let targets = targets.ok_or_else(|| schema("missing required key \"targets\""))?;
910 out.push(Scoped {
911 targets,
912 value: lists,
913 });
914 }
915 Ok(out)
916 }
917
918 fn decode_string_list(v: Value, context: &str) -> Result<Vec<String>, TbdError> {
919 match v {
920 Value::Sequence(items) => {
921 let mut out = Vec::with_capacity(items.len());
922 for it in items {
923 out.push(scalar_string(it, context)?);
924 }
925 Ok(out)
926 }
927 Value::Null => Ok(Vec::new()),
928 _ => Err(schema(&format!("{context} must be a sequence of scalars"))),
929 }
930 }
931
932 fn scalar_u32(v: Value, context: &str) -> Result<u32, TbdError> {
933 let s = scalar_string(v, context)?;
934 s.parse()
935 .map_err(|_| schema(&format!("{context} must parse as a u32: {s:?}")))
936 }
937
938 fn scalar_string(v: Value, context: &str) -> Result<String, TbdError> {
939 match v {
940 Value::Scalar(s) => Ok(s),
941 _ => Err(schema(&format!("{context} must be a scalar"))),
942 }
943 }
944
945 fn schema(msg: &str) -> TbdError {
946 TbdError::Schema {
947 msg: msg.to_string(),
948 }
949 }
950
951 /// Pack a `"X.Y.Z"` / `"X.Y"` / `"X"` / `"1351"` version string to
952 /// Mach-O's 0xXXXXYYZZ form. Missing fields become 0; extra components
953 /// are truncated. Plain integers like `1351` become `1351 << 16`.
954 pub fn parse_version(s: &str) -> u32 {
955 let mut parts = s.split('.').map(|p| p.parse::<u32>().unwrap_or(0));
956 let x = parts.next().unwrap_or(0);
957 let y = parts.next().unwrap_or(0);
958 let z = parts.next().unwrap_or(0);
959 (x << 16) | ((y & 0xff) << 8) | (z & 0xff)
960 }
961
962 impl Target {
963 /// Exactly matches the TBD string form: `arch-platform`.
964 pub fn as_string(&self) -> String {
965 let arch = match &self.arch {
966 Arch::Arm64 => "arm64".to_string(),
967 Arch::Arm64e => "arm64e".to_string(),
968 Arch::X86_64 => "x86_64".to_string(),
969 Arch::Other(s) => s.clone(),
970 };
971 let plat = match &self.platform {
972 Platform::MacOs => "macos".to_string(),
973 Platform::Ios => "ios".to_string(),
974 Platform::WatchOs => "watchos".to_string(),
975 Platform::TvOs => "tvos".to_string(),
976 Platform::DriverKit => "driverkit".to_string(),
977 Platform::MacCatalyst => "maccatalyst".to_string(),
978 Platform::Other(s) => s.clone(),
979 };
980 format!("{arch}-{plat}")
981 }
982
983 /// Apple SDK TBDs sometimes scope umbrella documents to `arm64e-macos`
984 /// only even though the same symbols are consumable by plain `arm64`
985 /// linkers on Apple Silicon. Treat that as compatible for our arm64-only
986 /// linker, while still requiring the platform to match exactly.
987 pub fn matches_requested(&self, wanted: &Target) -> bool {
988 if self.platform != wanted.platform {
989 return false;
990 }
991 self.arch == wanted.arch
992 || matches!((&self.arch, &wanted.arch), (Arch::Arm64e, Arch::Arm64))
993 }
994 }
995
996 #[cfg(test)]
997 mod tests {
998 use super::*;
999
1000 fn arm64_macos() -> Target {
1001 Target {
1002 arch: Arch::Arm64,
1003 platform: Platform::MacOs,
1004 }
1005 }
1006
1007 #[test]
1008 fn parses_minimal_tbd_v4() {
1009 let src = "--- !tapi-tbd\n\
1010 tbd-version: 4\n\
1011 targets: [ arm64-macos ]\n\
1012 install-name: '/usr/lib/libfoo.dylib'\n\
1013 ...\n";
1014 let docs = parse_tbd(src).unwrap();
1015 assert_eq!(docs.len(), 1);
1016 let tbd = &docs[0];
1017 assert_eq!(tbd.version, 4);
1018 assert_eq!(
1019 tbd.targets,
1020 vec![Target {
1021 arch: Arch::Arm64,
1022 platform: Platform::MacOs,
1023 }]
1024 );
1025 assert_eq!(tbd.install_name, "/usr/lib/libfoo.dylib");
1026 }
1027
1028 #[test]
1029 fn parses_scoped_exports_with_multiple_lists() {
1030 let src = "--- !tapi-tbd\n\
1031 tbd-version: 4\n\
1032 targets: [ arm64-macos, x86_64-macos ]\n\
1033 install-name: '/usr/lib/libfoo.dylib'\n\
1034 exports:\n\
1035 \x20 - targets: [ arm64-macos ]\n\
1036 \x20 symbols: [ _a, _b, _c ]\n\
1037 \x20 weak-symbols: [ _weak_one ]\n\
1038 \x20 objc-classes: [ _OBJC_CLASS_$_Foo ]\n";
1039 let tbd = &parse_tbd(src).unwrap()[0];
1040 assert_eq!(tbd.exports.len(), 1);
1041 let scoped = &tbd.exports[0];
1042 assert_eq!(
1043 scoped.targets,
1044 vec![Target {
1045 arch: Arch::Arm64,
1046 platform: Platform::MacOs,
1047 }]
1048 );
1049 assert_eq!(scoped.value.symbols, vec!["_a", "_b", "_c"]);
1050 assert_eq!(scoped.value.weak_symbols, vec!["_weak_one"]);
1051 assert_eq!(scoped.value.objc_classes, vec!["_OBJC_CLASS_$_Foo"]);
1052 assert_eq!(scoped.value.total(), 5);
1053 }
1054
1055 #[test]
1056 fn parses_reexported_libraries() {
1057 let src = "--- !tapi-tbd\n\
1058 tbd-version: 4\n\
1059 targets: [ arm64-macos ]\n\
1060 install-name: '/usr/lib/libSystem.B.dylib'\n\
1061 reexported-libraries:\n\
1062 \x20 - targets: [ arm64-macos ]\n\
1063 \x20 libraries: [ '/usr/lib/system/libcache.dylib', '/usr/lib/system/libxpc.dylib' ]\n";
1064 let tbd = &parse_tbd(src).unwrap()[0];
1065 assert_eq!(tbd.reexported_libraries.len(), 1);
1066 assert_eq!(
1067 tbd.reexported_libraries[0].value,
1068 vec![
1069 "/usr/lib/system/libcache.dylib",
1070 "/usr/lib/system/libxpc.dylib"
1071 ]
1072 );
1073 }
1074
1075 #[test]
1076 fn parses_parent_umbrella() {
1077 let src = "--- !tapi-tbd\n\
1078 tbd-version: 4\n\
1079 targets: [ arm64-macos ]\n\
1080 install-name: '/usr/lib/system/libcache.dylib'\n\
1081 parent-umbrella:\n\
1082 \x20 - targets: [ arm64-macos ]\n\
1083 \x20 umbrella: System\n";
1084 let tbd = &parse_tbd(src).unwrap()[0];
1085 assert_eq!(tbd.parent_umbrella.len(), 1);
1086 assert_eq!(tbd.parent_umbrella[0].value, "System");
1087 }
1088
1089 #[test]
1090 fn target_fast_path_keeps_matching_multiline_exports() {
1091 let src = "--- !tapi-tbd\n\
1092 tbd-version: 4\n\
1093 targets: [ x86_64-macos, arm64e-macos ]\n\
1094 install-name: '/usr/lib/libfoo.dylib'\n\
1095 exports:\n\
1096 \x20 - targets: [ x86_64-macos ]\n\
1097 \x20 symbols: [ _x86_only ]\n\
1098 \x20 - targets: [ x86_64-macos, arm64e-macos ]\n\
1099 \x20 symbols: [ _arm_one,\n\
1100 \x20 _arm_two ]\n";
1101 let docs = parse_tbd_for_target(src, &arm64_macos()).unwrap();
1102 assert_eq!(docs.len(), 1);
1103 assert_eq!(docs[0].exports.len(), 1);
1104 assert_eq!(docs[0].exports[0].value.symbols, ["_arm_one", "_arm_two"]);
1105 }
1106
1107 #[test]
1108 fn unknown_keys_are_tolerated() {
1109 let src = "--- !tapi-tbd\n\
1110 tbd-version: 4\n\
1111 targets: [ arm64-macos ]\n\
1112 install-name: 'x'\n\
1113 future-key: [ a, b ]\n";
1114 let tbd = &parse_tbd(src).unwrap()[0];
1115 assert_eq!(tbd.version, 4);
1116 }
1117
1118 #[test]
1119 fn target_as_string_roundtrip() {
1120 let t = Target {
1121 arch: Arch::Arm64e,
1122 platform: Platform::MacCatalyst,
1123 };
1124 assert_eq!(t.as_string(), "arm64e-maccatalyst");
1125 }
1126
1127 #[test]
1128 fn arm64_request_accepts_arm64e_scope() {
1129 let scoped = Target {
1130 arch: Arch::Arm64e,
1131 platform: Platform::MacOs,
1132 };
1133 let wanted = Target {
1134 arch: Arch::Arm64,
1135 platform: Platform::MacOs,
1136 };
1137 assert!(scoped.matches_requested(&wanted));
1138 }
1139
1140 #[test]
1141 fn arm64_request_still_rejects_wrong_platform() {
1142 let scoped = Target {
1143 arch: Arch::Arm64e,
1144 platform: Platform::MacCatalyst,
1145 };
1146 let wanted = Target {
1147 arch: Arch::Arm64,
1148 platform: Platform::MacOs,
1149 };
1150 assert!(!scoped.matches_requested(&wanted));
1151 }
1152
1153 #[test]
1154 fn parse_version_packs_major_dot_minor_dot_patch() {
1155 assert_eq!(parse_version("1.2.3"), (1 << 16) | (2 << 8) | 3);
1156 assert_eq!(parse_version("11"), 11 << 16);
1157 assert_eq!(parse_version("14.0"), 14 << 16);
1158 assert_eq!(parse_version("1351"), 1351 << 16);
1159 }
1160
1161 #[test]
1162 fn missing_required_key_errors() {
1163 let src = "--- !tapi-tbd\ntbd-version: 4\n";
1164 let err = parse_tbd(src).unwrap_err();
1165 assert!(format!("{err}").contains("install-name") || format!("{err}").contains("targets"));
1166 }
1167
1168 #[test]
1169 fn parses_libsystem_like_shape() {
1170 let src = "--- !tapi-tbd\n\
1171 tbd-version: 4\n\
1172 targets: [ x86_64-macos, arm64-macos, arm64e-macos ]\n\
1173 install-name: '/usr/lib/libSystem.B.dylib'\n\
1174 current-version: 1351\n\
1175 reexported-libraries:\n\
1176 \x20 - targets: [ arm64-macos, x86_64-macos ]\n\
1177 \x20 libraries: [ '/usr/lib/system/libcache.dylib',\n\
1178 \x20 '/usr/lib/system/libxpc.dylib' ]\n\
1179 exports:\n\
1180 \x20 - targets: [ arm64-macos, x86_64-macos ]\n\
1181 \x20 symbols: [ _dyld_stub_binder, _malloc, _free,\n\
1182 \x20 _printf, _fprintf ]\n\
1183 ...\n";
1184 let tbd = &parse_tbd(src).unwrap()[0];
1185 assert_eq!(tbd.install_name, "/usr/lib/libSystem.B.dylib");
1186 assert_eq!(tbd.current_version.as_deref(), Some("1351"));
1187 assert_eq!(tbd.exports[0].value.symbols.len(), 5);
1188 }
1189 }
1190