Rust · 13122 bytes Raw Blame History
1 //! Linker-side section model.
2 //!
3 //! Sprint 2 introduces the `SectionKind` taxonomy the linker reasons about
4 //! post-parse — code vs data vs zerofill vs TLS vs literals plus the Apple
5 //! markers (`__compact_unwind`, `__eh_frame`, GOT/stubs/lazy-pointer)
6 //! identified by sectname because the type nibble alone is ambiguous.
7 //!
8 //! Later sprints layer `InputSection` (atomized content) and
9 //! `OutputSection` / `OutputSegment` (layout model) on top of this module.
10
11 use crate::macho::constants::*;
12 use crate::macho::reader::{name16_str, ReadError, Section64Header};
13 use crate::resolve::AtomId;
14
15 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
16 pub enum SectionKind {
17 /// Regular code with `S_ATTR_PURE_INSTRUCTIONS` set.
18 Text,
19 /// Regular data (`S_REGULAR` with none of the attribute markers).
20 Data,
21 /// Immutable data such as `__TEXT,__const` or `__DATA_CONST,__const`.
22 ConstData,
23 /// `__TEXT,__cstring` (`S_CSTRING_LITERALS`).
24 CStringLiterals,
25 Literal4,
26 Literal8,
27 Literal16,
28 /// BSS-style uninitialized storage (`S_ZEROFILL`).
29 ZeroFill,
30 /// > 4 GiB BSS — rare, not used by armfortas today.
31 GbZeroFill,
32 /// Coalesced (`S_COALESCED`) — per-function weak-def model.
33 Coalesced,
34 /// Thread-local initialized data.
35 ThreadLocalRegular,
36 /// Thread-local zerofill.
37 ThreadLocalZeroFill,
38 /// TLV descriptors (`S_THREAD_LOCAL_VARIABLES`).
39 ThreadLocalVariables,
40 /// TLV descriptor pointer slots (`S_THREAD_LOCAL_VARIABLE_POINTERS`).
41 ThreadLocalVariablePointers,
42 /// TLV init function pointers.
43 ThreadLocalInitPointers,
44 /// `__TEXT,__compact_unwind` (`S_REGULAR` + `S_ATTR_DEBUG`).
45 CompactUnwind,
46 /// `__TEXT,__eh_frame` (`S_COALESCED` + specific attribute bits).
47 EhFrame,
48 /// Non-lazy symbol pointers — typically `__DATA_CONST,__got`.
49 NonLazySymbolPointers,
50 /// Lazy symbol pointers — typically `__DATA,__la_symbol_ptr`.
51 LazySymbolPointers,
52 /// Symbol stubs — typically `__TEXT,__stubs`.
53 SymbolStubs,
54 /// Any other regular section not otherwise classified.
55 Regular,
56 /// Unknown section type nibble; carries the nibble for diagnostics.
57 Unknown(u8),
58 }
59
60 /// Classify a section by its segment name, section name, and wire `flags`.
61 pub fn classify_section(segname: &str, sectname: &str, flags: u32) -> SectionKind {
62 let ty = flags & SECTION_TYPE_MASK;
63 match ty {
64 S_ZEROFILL => SectionKind::ZeroFill,
65 S_GB_ZEROFILL => SectionKind::GbZeroFill,
66 S_CSTRING_LITERALS => SectionKind::CStringLiterals,
67 S_4BYTE_LITERALS => SectionKind::Literal4,
68 S_8BYTE_LITERALS => SectionKind::Literal8,
69 S_16BYTE_LITERALS => SectionKind::Literal16,
70 S_NON_LAZY_SYMBOL_POINTERS => SectionKind::NonLazySymbolPointers,
71 S_LAZY_SYMBOL_POINTERS => SectionKind::LazySymbolPointers,
72 S_SYMBOL_STUBS => SectionKind::SymbolStubs,
73 S_COALESCED => {
74 if sectname == "__eh_frame" {
75 SectionKind::EhFrame
76 } else {
77 SectionKind::Coalesced
78 }
79 }
80 S_THREAD_LOCAL_REGULAR => SectionKind::ThreadLocalRegular,
81 S_THREAD_LOCAL_ZEROFILL => SectionKind::ThreadLocalZeroFill,
82 S_THREAD_LOCAL_VARIABLES => SectionKind::ThreadLocalVariables,
83 S_THREAD_LOCAL_VARIABLE_POINTERS => SectionKind::ThreadLocalVariablePointers,
84 S_THREAD_LOCAL_INIT_FUNCTION_POINTERS => SectionKind::ThreadLocalInitPointers,
85 S_REGULAR => classify_regular(segname, sectname, flags),
86 _ => SectionKind::Unknown(ty as u8),
87 }
88 }
89
90 fn classify_regular(segname: &str, sectname: &str, flags: u32) -> SectionKind {
91 if flags & S_ATTR_DEBUG != 0 && sectname == "__compact_unwind" {
92 return SectionKind::CompactUnwind;
93 }
94 if flags & S_ATTR_PURE_INSTRUCTIONS != 0 {
95 return SectionKind::Text;
96 }
97 if sectname == "__const" && matches!(segname, "__TEXT" | "__DATA" | "__DATA_CONST") {
98 return SectionKind::ConstData;
99 }
100 SectionKind::Data
101 }
102
103 /// True if the section holds no bytes in the file (size is virtual).
104 pub fn is_zerofill(kind: SectionKind) -> bool {
105 matches!(
106 kind,
107 SectionKind::ZeroFill | SectionKind::GbZeroFill | SectionKind::ThreadLocalZeroFill
108 )
109 }
110
111 /// True if the section carries ARM64 instructions.
112 pub fn is_executable(kind: SectionKind) -> bool {
113 matches!(
114 kind,
115 SectionKind::Text | SectionKind::SymbolStubs | SectionKind::Coalesced
116 )
117 }
118
119 // ---------------------------------------------------------------------------
120 // InputSection — the linker-side model for one input .o's section.
121 // ---------------------------------------------------------------------------
122
123 /// A single input-file section with its decoded header, kind, content slice,
124 /// and raw relocation bytes. Relocation decoding happens in Sprint 3; here we
125 /// preserve the wire bytes unchanged.
126 #[derive(Debug, Clone, PartialEq, Eq)]
127 pub struct InputSection {
128 pub segname: String,
129 pub sectname: String,
130 pub kind: SectionKind,
131 pub addr: u64,
132 pub size: u64,
133 pub align_pow2: u32,
134 pub flags: u32,
135 pub offset: u32,
136 pub reloff: u32,
137 pub nreloc: u32,
138 pub reserved1: u32,
139 pub reserved2: u32,
140 pub reserved3: u32,
141 /// File-backed bytes of the section. Empty for zerofill/TLS-zerofill/GB.
142 pub data: Vec<u8>,
143 /// Raw 8-byte relocation_info entries (`nreloc × 8` bytes). Decoded in
144 /// Sprint 3; owned here so later passes can reinterpret without
145 /// re-reading the source file.
146 pub raw_relocs: Vec<u8>,
147 }
148
149 impl InputSection {
150 /// Lift an `InputSection` out of a file image using the decoded section
151 /// header to locate its content and relocation bytes.
152 pub fn from_header(hdr: &Section64Header, file_bytes: &[u8]) -> Result<Self, ReadError> {
153 let segname = name16_str(&hdr.segname);
154 let sectname = name16_str(&hdr.sectname);
155 let kind = classify_section(&segname, &sectname, hdr.flags);
156
157 let data = if is_zerofill(kind) {
158 Vec::new()
159 } else {
160 let start = hdr.offset as usize;
161 let end = start
162 .checked_add(hdr.size as usize)
163 .ok_or(ReadError::Truncated {
164 need: usize::MAX,
165 have: file_bytes.len(),
166 context: "section content (offset + size overflows)",
167 })?;
168 if end > file_bytes.len() {
169 return Err(ReadError::Truncated {
170 need: end,
171 have: file_bytes.len(),
172 context: "section content",
173 });
174 }
175 file_bytes[start..end].to_vec()
176 };
177
178 let raw_relocs = if hdr.nreloc == 0 {
179 Vec::new()
180 } else {
181 let start = hdr.reloff as usize;
182 let total = (hdr.nreloc as usize)
183 .checked_mul(8)
184 .ok_or(ReadError::Truncated {
185 need: usize::MAX,
186 have: file_bytes.len(),
187 context: "section relocs (nreloc × 8 overflows)",
188 })?;
189 let end = start.checked_add(total).ok_or(ReadError::Truncated {
190 need: usize::MAX,
191 have: file_bytes.len(),
192 context: "section relocs (reloff + size overflows)",
193 })?;
194 if end > file_bytes.len() {
195 return Err(ReadError::Truncated {
196 need: end,
197 have: file_bytes.len(),
198 context: "section relocs",
199 });
200 }
201 file_bytes[start..end].to_vec()
202 };
203
204 Ok(InputSection {
205 segname,
206 sectname,
207 kind,
208 addr: hdr.addr,
209 size: hdr.size,
210 align_pow2: hdr.align,
211 flags: hdr.flags,
212 offset: hdr.offset,
213 reloff: hdr.reloff,
214 nreloc: hdr.nreloc,
215 reserved1: hdr.reserved1,
216 reserved2: hdr.reserved2,
217 reserved3: hdr.reserved3,
218 data,
219 raw_relocs,
220 })
221 }
222 }
223
224 // ---------------------------------------------------------------------------
225 // Output layout model — populated by Sprint 10's layout pass.
226 // ---------------------------------------------------------------------------
227
228 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
229 pub struct OutputSectionId(pub u32);
230
231 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
232 pub struct Prot(u32);
233
234 impl Prot {
235 pub const NONE: Prot = Prot(0);
236 pub const READ: Prot = Prot(1);
237 pub const WRITE: Prot = Prot(2);
238 pub const EXECUTE: Prot = Prot(4);
239 pub const READ_ONLY: Prot = Prot(Self::READ.0);
240 pub const READ_WRITE: Prot = Prot(Self::READ.0 | Self::WRITE.0);
241 pub const READ_EXECUTE: Prot = Prot(Self::READ.0 | Self::EXECUTE.0);
242
243 pub fn bits(self) -> u32 {
244 self.0
245 }
246 }
247
248 #[derive(Debug, Clone, PartialEq, Eq)]
249 pub struct OutputAtom {
250 pub atom: AtomId,
251 pub offset: u64,
252 pub size: u64,
253 pub data: Vec<u8>,
254 }
255
256 #[derive(Debug, Clone, PartialEq, Eq)]
257 pub struct OutputSection {
258 pub segment: String,
259 pub name: String,
260 pub kind: SectionKind,
261 pub align_pow2: u8,
262 pub flags: u32,
263 pub reserved1: u32,
264 pub reserved2: u32,
265 pub reserved3: u32,
266 pub atoms: Vec<OutputAtom>,
267 /// Byte offset within the section where `synthetic_data` begins.
268 pub synthetic_offset: u64,
269 pub synthetic_data: Vec<u8>,
270 pub addr: u64,
271 pub size: u64,
272 pub file_off: u64,
273 }
274
275 #[derive(Debug, Clone, PartialEq, Eq)]
276 pub struct OutputSegment {
277 pub name: String,
278 pub sections: Vec<OutputSectionId>,
279 pub vm_addr: u64,
280 pub vm_size: u64,
281 pub file_off: u64,
282 pub file_size: u64,
283 pub init_prot: Prot,
284 pub max_prot: Prot,
285 pub flags: u32,
286 }
287
288 impl OutputSection {
289 pub fn is_zerofill(&self) -> bool {
290 is_zerofill(self.kind)
291 }
292 }
293
294 #[cfg(test)]
295 mod tests {
296 use super::*;
297
298 #[test]
299 fn classify_text_section() {
300 let k = classify_section(
301 "__TEXT",
302 "__text",
303 S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
304 );
305 assert_eq!(k, SectionKind::Text);
306 assert!(is_executable(k));
307 }
308
309 #[test]
310 fn classify_cstring_literals() {
311 assert_eq!(
312 classify_section("__TEXT", "__cstring", S_CSTRING_LITERALS),
313 SectionKind::CStringLiterals
314 );
315 }
316
317 #[test]
318 fn classify_zerofill() {
319 let k = classify_section("__DATA", "__bss", S_ZEROFILL);
320 assert_eq!(k, SectionKind::ZeroFill);
321 assert!(is_zerofill(k));
322 }
323
324 #[test]
325 fn classify_const_data() {
326 assert_eq!(
327 classify_section("__TEXT", "__const", S_REGULAR),
328 SectionKind::ConstData
329 );
330 }
331
332 #[test]
333 fn classify_regular_data() {
334 assert_eq!(
335 classify_section("__DATA", "__data", S_REGULAR),
336 SectionKind::Data
337 );
338 }
339
340 #[test]
341 fn classify_compact_unwind() {
342 let flags = S_REGULAR | S_ATTR_DEBUG;
343 assert_eq!(
344 classify_section("__TEXT", "__compact_unwind", flags),
345 SectionKind::CompactUnwind
346 );
347 }
348
349 #[test]
350 fn classify_eh_frame_vs_coalesced() {
351 assert_eq!(
352 classify_section("__TEXT", "__eh_frame", S_COALESCED),
353 SectionKind::EhFrame
354 );
355 assert_eq!(
356 classify_section("__TEXT", "__weak_text", S_COALESCED),
357 SectionKind::Coalesced
358 );
359 }
360
361 #[test]
362 fn classify_tls_family() {
363 assert_eq!(
364 classify_section("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR),
365 SectionKind::ThreadLocalRegular
366 );
367 assert_eq!(
368 classify_section("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL),
369 SectionKind::ThreadLocalZeroFill
370 );
371 assert_eq!(
372 classify_section("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES),
373 SectionKind::ThreadLocalVariables
374 );
375 assert_eq!(
376 classify_section("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS),
377 SectionKind::ThreadLocalVariablePointers
378 );
379 }
380
381 #[test]
382 fn classify_got_and_stubs() {
383 assert_eq!(
384 classify_section("__DATA_CONST", "__got", S_NON_LAZY_SYMBOL_POINTERS),
385 SectionKind::NonLazySymbolPointers
386 );
387 assert_eq!(
388 classify_section("__TEXT", "__stubs", S_SYMBOL_STUBS),
389 SectionKind::SymbolStubs
390 );
391 assert_eq!(
392 classify_section("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS),
393 SectionKind::LazySymbolPointers
394 );
395 }
396
397 #[test]
398 fn unknown_type_nibble_preserved() {
399 let weird = 0xFFu32;
400 assert_eq!(
401 classify_section("__WEIRD", "__weird", weird),
402 SectionKind::Unknown(0xFF)
403 );
404 }
405 }
406