fortrangoingonforty/afs-ld / d7a741d

Browse files

add ULEB128/SLEB128 codec for dyld trees and opcode streams

Authored by espadonne
SHA
d7a741d9973bed2bb1a55b3ea3c97974b4a9e7b8
Parents
5e9fb8a
Tree
d5ae782

2 changed files

StatusFile+-
A src/leb.rs 210 0
M src/lib.rs 1 0
src/leb.rsadded
@@ -0,0 +1,210 @@
1
+//! ULEB128 / SLEB128 codec.
2
+//!
3
+//! dyld uses LEB128 pervasively: the export trie (Sprint 5), function-starts
4
+//! deltas (Sprint 16), rebase/bind/lazy-bind opcode streams (Sprint 15), and
5
+//! chained-fixups imports (Sprint 15.5) all encode variable-width integers
6
+//! this way. One codec, reused across all of them.
7
+
8
+use crate::macho::reader::ReadError;
9
+
10
+/// Read a ULEB128 from `bytes`. On success returns `(value, consumed)`.
11
+/// Errors on overrun beyond the buffer or on encodings that exceed 10 bytes
12
+/// (maximum for a `u64`).
13
+pub fn read_uleb(bytes: &[u8]) -> Result<(u64, usize), ReadError> {
14
+    let mut value: u64 = 0;
15
+    let mut shift = 0u32;
16
+    for (i, &b) in bytes.iter().enumerate() {
17
+        // The top bit of each byte is the continuation flag; low 7 bits are
18
+        // value bits, concatenated little-end first.
19
+        if shift >= 64 {
20
+            return Err(ReadError::BadRelocation {
21
+                at_offset: 0,
22
+                reason: "ULEB128 overflows u64",
23
+            });
24
+        }
25
+        value |= ((b & 0x7f) as u64) << shift;
26
+        if b & 0x80 == 0 {
27
+            return Ok((value, i + 1));
28
+        }
29
+        shift += 7;
30
+    }
31
+    Err(ReadError::Truncated {
32
+        need: bytes.len() + 1,
33
+        have: bytes.len(),
34
+        context: "ULEB128 (unterminated)",
35
+    })
36
+}
37
+
38
+/// Read an SLEB128 from `bytes`. On success returns `(value, consumed)`.
39
+pub fn read_sleb(bytes: &[u8]) -> Result<(i64, usize), ReadError> {
40
+    let mut value: i64 = 0;
41
+    let mut shift = 0u32;
42
+    for (i, &b) in bytes.iter().enumerate() {
43
+        if shift >= 64 {
44
+            return Err(ReadError::BadRelocation {
45
+                at_offset: 0,
46
+                reason: "SLEB128 overflows i64",
47
+            });
48
+        }
49
+        value |= ((b & 0x7f) as i64) << shift;
50
+        shift += 7;
51
+        if b & 0x80 == 0 {
52
+            // Sign-extend: if the value's sign bit (bit 6 of the last byte)
53
+            // is set and we have spare high bits, fill them with 1s.
54
+            if shift < 64 && (b & 0x40) != 0 {
55
+                value |= !0i64 << shift;
56
+            }
57
+            return Ok((value, i + 1));
58
+        }
59
+    }
60
+    Err(ReadError::Truncated {
61
+        need: bytes.len() + 1,
62
+        have: bytes.len(),
63
+        context: "SLEB128 (unterminated)",
64
+    })
65
+}
66
+
67
+/// Append a ULEB128 encoding of `value` to `out`.
68
+pub fn write_uleb(mut value: u64, out: &mut Vec<u8>) {
69
+    loop {
70
+        let byte = (value & 0x7f) as u8;
71
+        value >>= 7;
72
+        if value == 0 {
73
+            out.push(byte);
74
+            return;
75
+        }
76
+        out.push(byte | 0x80);
77
+    }
78
+}
79
+
80
+/// Append an SLEB128 encoding of `value` to `out`.
81
+pub fn write_sleb(mut value: i64, out: &mut Vec<u8>) {
82
+    loop {
83
+        let byte = (value & 0x7f) as u8;
84
+        // Arithmetic shift preserves the sign.
85
+        let next = value >> 7;
86
+        let sign_bit = byte & 0x40 != 0;
87
+        let done = (next == 0 && !sign_bit) || (next == -1 && sign_bit);
88
+        value = next;
89
+        if done {
90
+            out.push(byte);
91
+            return;
92
+        }
93
+        out.push(byte | 0x80);
94
+    }
95
+}
96
+
97
+#[cfg(test)]
98
+mod tests {
99
+    use super::*;
100
+
101
+    #[test]
102
+    fn uleb_small_one_byte() {
103
+        let mut buf = Vec::new();
104
+        write_uleb(0, &mut buf);
105
+        assert_eq!(buf, vec![0x00]);
106
+        write_uleb(0x7f, &mut buf);
107
+        assert_eq!(buf[1], 0x7f);
108
+    }
109
+
110
+    #[test]
111
+    fn uleb_round_trips_many_values() {
112
+        for v in [
113
+            0u64,
114
+            1,
115
+            127,
116
+            128,
117
+            129,
118
+            16383,
119
+            16384,
120
+            0xdead_beef,
121
+            0xffff_ffff,
122
+            u64::MAX,
123
+        ] {
124
+            let mut buf = Vec::new();
125
+            write_uleb(v, &mut buf);
126
+            let (back, consumed) = read_uleb(&buf).unwrap();
127
+            assert_eq!(back, v, "round-trip failed for {v:#x}");
128
+            assert_eq!(consumed, buf.len());
129
+        }
130
+    }
131
+
132
+    #[test]
133
+    fn sleb_round_trips_many_values() {
134
+        for v in [
135
+            0i64,
136
+            1,
137
+            -1,
138
+            63,
139
+            -63,
140
+            64,
141
+            -64,
142
+            65,
143
+            -65,
144
+            127,
145
+            -128,
146
+            8192,
147
+            -8192,
148
+            0x0001_0000,
149
+            -0x0001_0000,
150
+            i64::MIN,
151
+            i64::MAX,
152
+        ] {
153
+            let mut buf = Vec::new();
154
+            write_sleb(v, &mut buf);
155
+            let (back, consumed) = read_sleb(&buf).unwrap();
156
+            assert_eq!(back, v, "sleb round-trip failed for {v}");
157
+            assert_eq!(consumed, buf.len());
158
+        }
159
+    }
160
+
161
+    #[test]
162
+    fn uleb_known_multibyte_encoding() {
163
+        // 624485 = 0b100110000111011100101 — canonical LEB example.
164
+        let mut buf = Vec::new();
165
+        write_uleb(624485, &mut buf);
166
+        assert_eq!(buf, vec![0xe5, 0x8e, 0x26]);
167
+        let (v, n) = read_uleb(&buf).unwrap();
168
+        assert_eq!(v, 624485);
169
+        assert_eq!(n, 3);
170
+    }
171
+
172
+    #[test]
173
+    fn sleb_known_multibyte_encoding() {
174
+        // -12345 in SLEB: widely quoted example.
175
+        let mut buf = Vec::new();
176
+        write_sleb(-12345, &mut buf);
177
+        let (v, n) = read_sleb(&buf).unwrap();
178
+        assert_eq!(v, -12345);
179
+        assert_eq!(n, buf.len());
180
+    }
181
+
182
+    #[test]
183
+    fn uleb_unterminated_errors() {
184
+        // Continuation bit set on every byte but no terminator.
185
+        let buf = vec![0x80, 0x80, 0x80];
186
+        assert!(matches!(
187
+            read_uleb(&buf).unwrap_err(),
188
+            ReadError::Truncated { .. }
189
+        ));
190
+    }
191
+
192
+    #[test]
193
+    fn sleb_unterminated_errors() {
194
+        let buf = vec![0x80, 0x80];
195
+        assert!(matches!(
196
+            read_sleb(&buf).unwrap_err(),
197
+            ReadError::Truncated { .. }
198
+        ));
199
+    }
200
+
201
+    #[test]
202
+    fn uleb_consumes_exactly_first_encoding() {
203
+        let mut buf = Vec::new();
204
+        write_uleb(100, &mut buf);
205
+        buf.extend_from_slice(&[0xff, 0xee]); // trailing unrelated bytes
206
+        let (v, n) = read_uleb(&buf).unwrap();
207
+        assert_eq!(v, 100);
208
+        assert_eq!(n, 1);
209
+    }
210
+}
src/lib.rsmodified
@@ -9,6 +9,7 @@ pub mod args;
99
 pub mod diag;
1010
 pub mod dump;
1111
 pub mod input;
12
+pub mod leb;
1213
 pub mod macho;
1314
 pub mod reloc;
1415
 pub mod section;