Rust · 5982 bytes Raw Blame History
1 //! ULEB128 / SLEB128 codec.
2 //!
3 //! dyld uses LEB128 pervasively: the export trie (Sprint 5), function-starts
4 //! deltas (Sprint 16), rebase/bind/lazy-bind opcode streams (Sprint 15), and
5 //! chained-fixups imports (Sprint 15.5) all encode variable-width integers
6 //! this way. One codec, reused across all of them.
7
8 use crate::macho::reader::ReadError;
9
10 /// Read a ULEB128 from `bytes`. On success returns `(value, consumed)`.
11 /// Errors on overrun beyond the buffer or on encodings that exceed 10 bytes
12 /// (maximum for a `u64`).
13 pub fn read_uleb(bytes: &[u8]) -> Result<(u64, usize), ReadError> {
14 let mut value: u64 = 0;
15 let mut shift = 0u32;
16 for (i, &b) in bytes.iter().enumerate() {
17 // The top bit of each byte is the continuation flag; low 7 bits are
18 // value bits, concatenated little-end first.
19 if shift >= 64 {
20 return Err(ReadError::BadRelocation {
21 at_offset: 0,
22 reason: "ULEB128 overflows u64",
23 });
24 }
25 value |= ((b & 0x7f) as u64) << shift;
26 if b & 0x80 == 0 {
27 return Ok((value, i + 1));
28 }
29 shift += 7;
30 }
31 Err(ReadError::Truncated {
32 need: bytes.len() + 1,
33 have: bytes.len(),
34 context: "ULEB128 (unterminated)",
35 })
36 }
37
38 /// Read an SLEB128 from `bytes`. On success returns `(value, consumed)`.
39 pub fn read_sleb(bytes: &[u8]) -> Result<(i64, usize), ReadError> {
40 let mut value: i64 = 0;
41 let mut shift = 0u32;
42 for (i, &b) in bytes.iter().enumerate() {
43 if shift >= 64 {
44 return Err(ReadError::BadRelocation {
45 at_offset: 0,
46 reason: "SLEB128 overflows i64",
47 });
48 }
49 value |= ((b & 0x7f) as i64) << shift;
50 shift += 7;
51 if b & 0x80 == 0 {
52 // Sign-extend: if the value's sign bit (bit 6 of the last byte)
53 // is set and we have spare high bits, fill them with 1s.
54 if shift < 64 && (b & 0x40) != 0 {
55 value |= !0i64 << shift;
56 }
57 return Ok((value, i + 1));
58 }
59 }
60 Err(ReadError::Truncated {
61 need: bytes.len() + 1,
62 have: bytes.len(),
63 context: "SLEB128 (unterminated)",
64 })
65 }
66
67 /// Append a ULEB128 encoding of `value` to `out`.
68 pub fn write_uleb(mut value: u64, out: &mut Vec<u8>) {
69 loop {
70 let byte = (value & 0x7f) as u8;
71 value >>= 7;
72 if value == 0 {
73 out.push(byte);
74 return;
75 }
76 out.push(byte | 0x80);
77 }
78 }
79
80 /// Append an SLEB128 encoding of `value` to `out`.
81 pub fn write_sleb(mut value: i64, out: &mut Vec<u8>) {
82 loop {
83 let byte = (value & 0x7f) as u8;
84 // Arithmetic shift preserves the sign.
85 let next = value >> 7;
86 let sign_bit = byte & 0x40 != 0;
87 let done = (next == 0 && !sign_bit) || (next == -1 && sign_bit);
88 value = next;
89 if done {
90 out.push(byte);
91 return;
92 }
93 out.push(byte | 0x80);
94 }
95 }
96
97 #[cfg(test)]
98 mod tests {
99 use super::*;
100
101 #[test]
102 fn uleb_small_one_byte() {
103 let mut buf = Vec::new();
104 write_uleb(0, &mut buf);
105 assert_eq!(buf, vec![0x00]);
106 write_uleb(0x7f, &mut buf);
107 assert_eq!(buf[1], 0x7f);
108 }
109
110 #[test]
111 fn uleb_round_trips_many_values() {
112 for v in [
113 0u64,
114 1,
115 127,
116 128,
117 129,
118 16383,
119 16384,
120 0xdead_beef,
121 0xffff_ffff,
122 u64::MAX,
123 ] {
124 let mut buf = Vec::new();
125 write_uleb(v, &mut buf);
126 let (back, consumed) = read_uleb(&buf).unwrap();
127 assert_eq!(back, v, "round-trip failed for {v:#x}");
128 assert_eq!(consumed, buf.len());
129 }
130 }
131
132 #[test]
133 fn sleb_round_trips_many_values() {
134 for v in [
135 0i64,
136 1,
137 -1,
138 63,
139 -63,
140 64,
141 -64,
142 65,
143 -65,
144 127,
145 -128,
146 8192,
147 -8192,
148 0x0001_0000,
149 -0x0001_0000,
150 i64::MIN,
151 i64::MAX,
152 ] {
153 let mut buf = Vec::new();
154 write_sleb(v, &mut buf);
155 let (back, consumed) = read_sleb(&buf).unwrap();
156 assert_eq!(back, v, "sleb round-trip failed for {v}");
157 assert_eq!(consumed, buf.len());
158 }
159 }
160
161 #[test]
162 fn uleb_known_multibyte_encoding() {
163 // 624485 = 0b100110000111011100101 — canonical LEB example.
164 let mut buf = Vec::new();
165 write_uleb(624485, &mut buf);
166 assert_eq!(buf, vec![0xe5, 0x8e, 0x26]);
167 let (v, n) = read_uleb(&buf).unwrap();
168 assert_eq!(v, 624485);
169 assert_eq!(n, 3);
170 }
171
172 #[test]
173 fn sleb_known_multibyte_encoding() {
174 // -12345 in SLEB: widely quoted example.
175 let mut buf = Vec::new();
176 write_sleb(-12345, &mut buf);
177 let (v, n) = read_sleb(&buf).unwrap();
178 assert_eq!(v, -12345);
179 assert_eq!(n, buf.len());
180 }
181
182 #[test]
183 fn uleb_unterminated_errors() {
184 // Continuation bit set on every byte but no terminator.
185 let buf = vec![0x80, 0x80, 0x80];
186 assert!(matches!(
187 read_uleb(&buf).unwrap_err(),
188 ReadError::Truncated { .. }
189 ));
190 }
191
192 #[test]
193 fn sleb_unterminated_errors() {
194 let buf = vec![0x80, 0x80];
195 assert!(matches!(
196 read_sleb(&buf).unwrap_err(),
197 ReadError::Truncated { .. }
198 ));
199 }
200
201 #[test]
202 fn uleb_consumes_exactly_first_encoding() {
203 let mut buf = Vec::new();
204 write_uleb(100, &mut buf);
205 buf.extend_from_slice(&[0xff, 0xee]); // trailing unrelated bytes
206 let (v, n) = read_uleb(&buf).unwrap();
207 assert_eq!(v, 100);
208 assert_eq!(n, 1);
209 }
210 }
211