| 1 | //! Linker-side section model. |
| 2 | //! |
| 3 | //! Sprint 2 introduces the `SectionKind` taxonomy the linker reasons about |
| 4 | //! post-parse — code vs data vs zerofill vs TLS vs literals plus the Apple |
| 5 | //! markers (`__compact_unwind`, `__eh_frame`, GOT/stubs/lazy-pointer) |
| 6 | //! identified by sectname because the type nibble alone is ambiguous. |
| 7 | //! |
| 8 | //! Later sprints layer `InputSection` (atomized content) and |
| 9 | //! `OutputSection` / `OutputSegment` (layout model) on top of this module. |
| 10 | |
| 11 | use crate::macho::constants::*; |
| 12 | use crate::macho::reader::{name16_str, ReadError, Section64Header}; |
| 13 | use crate::resolve::AtomId; |
| 14 | |
| 15 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 16 | pub enum SectionKind { |
| 17 | /// Regular code with `S_ATTR_PURE_INSTRUCTIONS` set. |
| 18 | Text, |
| 19 | /// Regular data (`S_REGULAR` with none of the attribute markers). |
| 20 | Data, |
| 21 | /// Immutable data such as `__TEXT,__const` or `__DATA_CONST,__const`. |
| 22 | ConstData, |
| 23 | /// `__TEXT,__cstring` (`S_CSTRING_LITERALS`). |
| 24 | CStringLiterals, |
| 25 | Literal4, |
| 26 | Literal8, |
| 27 | Literal16, |
| 28 | /// BSS-style uninitialized storage (`S_ZEROFILL`). |
| 29 | ZeroFill, |
| 30 | /// > 4 GiB BSS — rare, not used by armfortas today. |
| 31 | GbZeroFill, |
| 32 | /// Coalesced (`S_COALESCED`) — per-function weak-def model. |
| 33 | Coalesced, |
| 34 | /// Thread-local initialized data. |
| 35 | ThreadLocalRegular, |
| 36 | /// Thread-local zerofill. |
| 37 | ThreadLocalZeroFill, |
| 38 | /// TLV descriptors (`S_THREAD_LOCAL_VARIABLES`). |
| 39 | ThreadLocalVariables, |
| 40 | /// TLV descriptor pointer slots (`S_THREAD_LOCAL_VARIABLE_POINTERS`). |
| 41 | ThreadLocalVariablePointers, |
| 42 | /// TLV init function pointers. |
| 43 | ThreadLocalInitPointers, |
| 44 | /// `__TEXT,__compact_unwind` (`S_REGULAR` + `S_ATTR_DEBUG`). |
| 45 | CompactUnwind, |
| 46 | /// `__TEXT,__eh_frame` (`S_COALESCED` + specific attribute bits). |
| 47 | EhFrame, |
| 48 | /// Non-lazy symbol pointers — typically `__DATA_CONST,__got`. |
| 49 | NonLazySymbolPointers, |
| 50 | /// Lazy symbol pointers — typically `__DATA,__la_symbol_ptr`. |
| 51 | LazySymbolPointers, |
| 52 | /// Symbol stubs — typically `__TEXT,__stubs`. |
| 53 | SymbolStubs, |
| 54 | /// Any other regular section not otherwise classified. |
| 55 | Regular, |
| 56 | /// Unknown section type nibble; carries the nibble for diagnostics. |
| 57 | Unknown(u8), |
| 58 | } |
| 59 | |
| 60 | /// Classify a section by its segment name, section name, and wire `flags`. |
| 61 | pub fn classify_section(segname: &str, sectname: &str, flags: u32) -> SectionKind { |
| 62 | let ty = flags & SECTION_TYPE_MASK; |
| 63 | match ty { |
| 64 | S_ZEROFILL => SectionKind::ZeroFill, |
| 65 | S_GB_ZEROFILL => SectionKind::GbZeroFill, |
| 66 | S_CSTRING_LITERALS => SectionKind::CStringLiterals, |
| 67 | S_4BYTE_LITERALS => SectionKind::Literal4, |
| 68 | S_8BYTE_LITERALS => SectionKind::Literal8, |
| 69 | S_16BYTE_LITERALS => SectionKind::Literal16, |
| 70 | S_NON_LAZY_SYMBOL_POINTERS => SectionKind::NonLazySymbolPointers, |
| 71 | S_LAZY_SYMBOL_POINTERS => SectionKind::LazySymbolPointers, |
| 72 | S_SYMBOL_STUBS => SectionKind::SymbolStubs, |
| 73 | S_COALESCED => { |
| 74 | if sectname == "__eh_frame" { |
| 75 | SectionKind::EhFrame |
| 76 | } else { |
| 77 | SectionKind::Coalesced |
| 78 | } |
| 79 | } |
| 80 | S_THREAD_LOCAL_REGULAR => SectionKind::ThreadLocalRegular, |
| 81 | S_THREAD_LOCAL_ZEROFILL => SectionKind::ThreadLocalZeroFill, |
| 82 | S_THREAD_LOCAL_VARIABLES => SectionKind::ThreadLocalVariables, |
| 83 | S_THREAD_LOCAL_VARIABLE_POINTERS => SectionKind::ThreadLocalVariablePointers, |
| 84 | S_THREAD_LOCAL_INIT_FUNCTION_POINTERS => SectionKind::ThreadLocalInitPointers, |
| 85 | S_REGULAR => classify_regular(segname, sectname, flags), |
| 86 | _ => SectionKind::Unknown(ty as u8), |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | fn classify_regular(segname: &str, sectname: &str, flags: u32) -> SectionKind { |
| 91 | if flags & S_ATTR_DEBUG != 0 && sectname == "__compact_unwind" { |
| 92 | return SectionKind::CompactUnwind; |
| 93 | } |
| 94 | if flags & S_ATTR_PURE_INSTRUCTIONS != 0 { |
| 95 | return SectionKind::Text; |
| 96 | } |
| 97 | if sectname == "__const" && matches!(segname, "__TEXT" | "__DATA" | "__DATA_CONST") { |
| 98 | return SectionKind::ConstData; |
| 99 | } |
| 100 | SectionKind::Data |
| 101 | } |
| 102 | |
| 103 | /// True if the section holds no bytes in the file (size is virtual). |
| 104 | pub fn is_zerofill(kind: SectionKind) -> bool { |
| 105 | matches!( |
| 106 | kind, |
| 107 | SectionKind::ZeroFill | SectionKind::GbZeroFill | SectionKind::ThreadLocalZeroFill |
| 108 | ) |
| 109 | } |
| 110 | |
| 111 | /// True if the section carries ARM64 instructions. |
| 112 | pub fn is_executable(kind: SectionKind) -> bool { |
| 113 | matches!( |
| 114 | kind, |
| 115 | SectionKind::Text | SectionKind::SymbolStubs | SectionKind::Coalesced |
| 116 | ) |
| 117 | } |
| 118 | |
| 119 | // --------------------------------------------------------------------------- |
| 120 | // InputSection — the linker-side model for one input .o's section. |
| 121 | // --------------------------------------------------------------------------- |
| 122 | |
| 123 | /// A single input-file section with its decoded header, kind, content slice, |
| 124 | /// and raw relocation bytes. Relocation decoding happens in Sprint 3; here we |
| 125 | /// preserve the wire bytes unchanged. |
| 126 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 127 | pub struct InputSection { |
| 128 | pub segname: String, |
| 129 | pub sectname: String, |
| 130 | pub kind: SectionKind, |
| 131 | pub addr: u64, |
| 132 | pub size: u64, |
| 133 | pub align_pow2: u32, |
| 134 | pub flags: u32, |
| 135 | pub offset: u32, |
| 136 | pub reloff: u32, |
| 137 | pub nreloc: u32, |
| 138 | pub reserved1: u32, |
| 139 | pub reserved2: u32, |
| 140 | pub reserved3: u32, |
| 141 | /// File-backed bytes of the section. Empty for zerofill/TLS-zerofill/GB. |
| 142 | pub data: Vec<u8>, |
| 143 | /// Raw 8-byte relocation_info entries (`nreloc × 8` bytes). Decoded in |
| 144 | /// Sprint 3; owned here so later passes can reinterpret without |
| 145 | /// re-reading the source file. |
| 146 | pub raw_relocs: Vec<u8>, |
| 147 | } |
| 148 | |
| 149 | impl InputSection { |
| 150 | /// Lift an `InputSection` out of a file image using the decoded section |
| 151 | /// header to locate its content and relocation bytes. |
| 152 | pub fn from_header(hdr: &Section64Header, file_bytes: &[u8]) -> Result<Self, ReadError> { |
| 153 | let segname = name16_str(&hdr.segname); |
| 154 | let sectname = name16_str(&hdr.sectname); |
| 155 | let kind = classify_section(&segname, §name, hdr.flags); |
| 156 | |
| 157 | let data = if is_zerofill(kind) { |
| 158 | Vec::new() |
| 159 | } else { |
| 160 | let start = hdr.offset as usize; |
| 161 | let end = start |
| 162 | .checked_add(hdr.size as usize) |
| 163 | .ok_or(ReadError::Truncated { |
| 164 | need: usize::MAX, |
| 165 | have: file_bytes.len(), |
| 166 | context: "section content (offset + size overflows)", |
| 167 | })?; |
| 168 | if end > file_bytes.len() { |
| 169 | return Err(ReadError::Truncated { |
| 170 | need: end, |
| 171 | have: file_bytes.len(), |
| 172 | context: "section content", |
| 173 | }); |
| 174 | } |
| 175 | file_bytes[start..end].to_vec() |
| 176 | }; |
| 177 | |
| 178 | let raw_relocs = if hdr.nreloc == 0 { |
| 179 | Vec::new() |
| 180 | } else { |
| 181 | let start = hdr.reloff as usize; |
| 182 | let total = (hdr.nreloc as usize) |
| 183 | .checked_mul(8) |
| 184 | .ok_or(ReadError::Truncated { |
| 185 | need: usize::MAX, |
| 186 | have: file_bytes.len(), |
| 187 | context: "section relocs (nreloc × 8 overflows)", |
| 188 | })?; |
| 189 | let end = start.checked_add(total).ok_or(ReadError::Truncated { |
| 190 | need: usize::MAX, |
| 191 | have: file_bytes.len(), |
| 192 | context: "section relocs (reloff + size overflows)", |
| 193 | })?; |
| 194 | if end > file_bytes.len() { |
| 195 | return Err(ReadError::Truncated { |
| 196 | need: end, |
| 197 | have: file_bytes.len(), |
| 198 | context: "section relocs", |
| 199 | }); |
| 200 | } |
| 201 | file_bytes[start..end].to_vec() |
| 202 | }; |
| 203 | |
| 204 | Ok(InputSection { |
| 205 | segname, |
| 206 | sectname, |
| 207 | kind, |
| 208 | addr: hdr.addr, |
| 209 | size: hdr.size, |
| 210 | align_pow2: hdr.align, |
| 211 | flags: hdr.flags, |
| 212 | offset: hdr.offset, |
| 213 | reloff: hdr.reloff, |
| 214 | nreloc: hdr.nreloc, |
| 215 | reserved1: hdr.reserved1, |
| 216 | reserved2: hdr.reserved2, |
| 217 | reserved3: hdr.reserved3, |
| 218 | data, |
| 219 | raw_relocs, |
| 220 | }) |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | // --------------------------------------------------------------------------- |
| 225 | // Output layout model — populated by Sprint 10's layout pass. |
| 226 | // --------------------------------------------------------------------------- |
| 227 | |
| 228 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] |
| 229 | pub struct OutputSectionId(pub u32); |
| 230 | |
| 231 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 232 | pub struct Prot(u32); |
| 233 | |
| 234 | impl Prot { |
| 235 | pub const NONE: Prot = Prot(0); |
| 236 | pub const READ: Prot = Prot(1); |
| 237 | pub const WRITE: Prot = Prot(2); |
| 238 | pub const EXECUTE: Prot = Prot(4); |
| 239 | pub const READ_ONLY: Prot = Prot(Self::READ.0); |
| 240 | pub const READ_WRITE: Prot = Prot(Self::READ.0 | Self::WRITE.0); |
| 241 | pub const READ_EXECUTE: Prot = Prot(Self::READ.0 | Self::EXECUTE.0); |
| 242 | |
| 243 | pub fn bits(self) -> u32 { |
| 244 | self.0 |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 249 | pub struct OutputAtom { |
| 250 | pub atom: AtomId, |
| 251 | pub offset: u64, |
| 252 | pub size: u64, |
| 253 | pub data: Vec<u8>, |
| 254 | } |
| 255 | |
| 256 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 257 | pub struct OutputSection { |
| 258 | pub segment: String, |
| 259 | pub name: String, |
| 260 | pub kind: SectionKind, |
| 261 | pub align_pow2: u8, |
| 262 | pub flags: u32, |
| 263 | pub reserved1: u32, |
| 264 | pub reserved2: u32, |
| 265 | pub reserved3: u32, |
| 266 | pub atoms: Vec<OutputAtom>, |
| 267 | /// Byte offset within the section where `synthetic_data` begins. |
| 268 | pub synthetic_offset: u64, |
| 269 | pub synthetic_data: Vec<u8>, |
| 270 | pub addr: u64, |
| 271 | pub size: u64, |
| 272 | pub file_off: u64, |
| 273 | } |
| 274 | |
| 275 | #[derive(Debug, Clone, PartialEq, Eq)] |
| 276 | pub struct OutputSegment { |
| 277 | pub name: String, |
| 278 | pub sections: Vec<OutputSectionId>, |
| 279 | pub vm_addr: u64, |
| 280 | pub vm_size: u64, |
| 281 | pub file_off: u64, |
| 282 | pub file_size: u64, |
| 283 | pub init_prot: Prot, |
| 284 | pub max_prot: Prot, |
| 285 | pub flags: u32, |
| 286 | } |
| 287 | |
| 288 | impl OutputSection { |
| 289 | pub fn is_zerofill(&self) -> bool { |
| 290 | is_zerofill(self.kind) |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | #[cfg(test)] |
| 295 | mod tests { |
| 296 | use super::*; |
| 297 | |
| 298 | #[test] |
| 299 | fn classify_text_section() { |
| 300 | let k = classify_section( |
| 301 | "__TEXT", |
| 302 | "__text", |
| 303 | S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS, |
| 304 | ); |
| 305 | assert_eq!(k, SectionKind::Text); |
| 306 | assert!(is_executable(k)); |
| 307 | } |
| 308 | |
| 309 | #[test] |
| 310 | fn classify_cstring_literals() { |
| 311 | assert_eq!( |
| 312 | classify_section("__TEXT", "__cstring", S_CSTRING_LITERALS), |
| 313 | SectionKind::CStringLiterals |
| 314 | ); |
| 315 | } |
| 316 | |
| 317 | #[test] |
| 318 | fn classify_zerofill() { |
| 319 | let k = classify_section("__DATA", "__bss", S_ZEROFILL); |
| 320 | assert_eq!(k, SectionKind::ZeroFill); |
| 321 | assert!(is_zerofill(k)); |
| 322 | } |
| 323 | |
| 324 | #[test] |
| 325 | fn classify_const_data() { |
| 326 | assert_eq!( |
| 327 | classify_section("__TEXT", "__const", S_REGULAR), |
| 328 | SectionKind::ConstData |
| 329 | ); |
| 330 | } |
| 331 | |
| 332 | #[test] |
| 333 | fn classify_regular_data() { |
| 334 | assert_eq!( |
| 335 | classify_section("__DATA", "__data", S_REGULAR), |
| 336 | SectionKind::Data |
| 337 | ); |
| 338 | } |
| 339 | |
| 340 | #[test] |
| 341 | fn classify_compact_unwind() { |
| 342 | let flags = S_REGULAR | S_ATTR_DEBUG; |
| 343 | assert_eq!( |
| 344 | classify_section("__TEXT", "__compact_unwind", flags), |
| 345 | SectionKind::CompactUnwind |
| 346 | ); |
| 347 | } |
| 348 | |
| 349 | #[test] |
| 350 | fn classify_eh_frame_vs_coalesced() { |
| 351 | assert_eq!( |
| 352 | classify_section("__TEXT", "__eh_frame", S_COALESCED), |
| 353 | SectionKind::EhFrame |
| 354 | ); |
| 355 | assert_eq!( |
| 356 | classify_section("__TEXT", "__weak_text", S_COALESCED), |
| 357 | SectionKind::Coalesced |
| 358 | ); |
| 359 | } |
| 360 | |
| 361 | #[test] |
| 362 | fn classify_tls_family() { |
| 363 | assert_eq!( |
| 364 | classify_section("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR), |
| 365 | SectionKind::ThreadLocalRegular |
| 366 | ); |
| 367 | assert_eq!( |
| 368 | classify_section("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL), |
| 369 | SectionKind::ThreadLocalZeroFill |
| 370 | ); |
| 371 | assert_eq!( |
| 372 | classify_section("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES), |
| 373 | SectionKind::ThreadLocalVariables |
| 374 | ); |
| 375 | assert_eq!( |
| 376 | classify_section("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS), |
| 377 | SectionKind::ThreadLocalVariablePointers |
| 378 | ); |
| 379 | } |
| 380 | |
| 381 | #[test] |
| 382 | fn classify_got_and_stubs() { |
| 383 | assert_eq!( |
| 384 | classify_section("__DATA_CONST", "__got", S_NON_LAZY_SYMBOL_POINTERS), |
| 385 | SectionKind::NonLazySymbolPointers |
| 386 | ); |
| 387 | assert_eq!( |
| 388 | classify_section("__TEXT", "__stubs", S_SYMBOL_STUBS), |
| 389 | SectionKind::SymbolStubs |
| 390 | ); |
| 391 | assert_eq!( |
| 392 | classify_section("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS), |
| 393 | SectionKind::LazySymbolPointers |
| 394 | ); |
| 395 | } |
| 396 | |
| 397 | #[test] |
| 398 | fn unknown_type_nibble_preserved() { |
| 399 | let weird = 0xFFu32; |
| 400 | assert_eq!( |
| 401 | classify_section("__WEIRD", "__weird", weird), |
| 402 | SectionKind::Unknown(0xFF) |
| 403 | ); |
| 404 | } |
| 405 | } |
| 406 |