1 // This module implements Identifier, a short-optimized string allowed to
2 // contain only the ASCII characters hyphen, dot, 0-9, A-Z, a-z.
3 //
4 // As of mid-2021, the distribution of pre-release lengths on crates.io is:
5 //
6 //     length  count         length  count         length  count
7 //        0  355929            11      81            24       2
8 //        1     208            12      48            25       6
9 //        2     236            13      55            26      10
10 //        3    1909            14      25            27       4
11 //        4    1284            15      15            28       1
12 //        5    1742            16      35            30       1
13 //        6    3440            17       9            31       5
14 //        7    5624            18       6            32       1
15 //        8    1321            19      12            36       2
16 //        9     179            20       2            37     379
17 //       10      65            23      11
18 //
19 // and the distribution of build metadata lengths is:
20 //
21 //     length  count         length  count         length  count
22 //        0  364445             8    7725            18       1
23 //        1      72             9      16            19       1
24 //        2       7            10      85            20       1
25 //        3      28            11      17            22       4
26 //        4       9            12      10            26       1
27 //        5      68            13       9            27       1
28 //        6      73            14      10            40       5
29 //        7      53            15       6
30 //
31 // Therefore it really behooves us to be able to use the entire 8 bytes of a
32 // pointer for inline storage. For both pre-release and build metadata there are
33 // vastly more strings with length exactly 8 bytes than the sum over all lengths
34 // longer than 8 bytes.
35 //
36 // To differentiate the inline representation from the heap allocated long
37 // representation, we'll allocate heap pointers with 2-byte alignment so that
38 // they are guaranteed to have an unset least significant bit. Then in the repr
39 // we store for pointers, we rotate a 1 into the most significant bit of the
40 // most significant byte, which is never set for an ASCII byte.
41 //
42 // Inline repr:
43 //
44 //     0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx
45 //
46 // Heap allocated repr:
47 //
48 //     1ppppppp pppppppp pppppppp pppppppp pppppppp pppppppp pppppppp pppppppp 0
49 //     ^ most significant bit   least significant bit of orig ptr, rotated out ^
50 //
51 // Since the most significant bit doubles as a sign bit for the similarly sized
52 // signed integer type, the CPU has an efficient instruction for inspecting it,
53 // meaning we can differentiate between an inline repr and a heap allocated repr
54 // in one instruction. Effectively an inline repr always looks like a positive
55 // i64 while a heap allocated repr always looks like a negative i64.
56 //
57 // For the inline repr, we store \0 padding on the end of the stored characters,
58 // and thus the string length is readily determined efficiently by a cttz (count
59 // trailing zeros) or bsf (bit scan forward) instruction.
60 //
61 // For the heap allocated repr, the length is encoded as a base-128 varint at
62 // the head of the allocation.
63 //
64 // Empty strings are stored as an all-1 bit pattern, corresponding to -1i64.
65 // Consequently the all-0 bit pattern is never a legal representation in any
66 // repr, leaving it available as a niche for downstream code. For example this
67 // allows size_of::<Version>() == size_of::<Option<Version>>().
68 
69 use crate::alloc::alloc::{alloc, dealloc, handle_alloc_error, Layout};
70 use core::isize;
71 use core::mem;
72 use core::num::{NonZeroU64, NonZeroUsize};
73 use core::ptr::{self, NonNull};
74 use core::slice;
75 use core::str;
76 use core::usize;
77 
78 const PTR_BYTES: usize = mem::size_of::<NonNull<u8>>();
79 
80 // If pointers are already 8 bytes or bigger, then 0. If pointers are smaller
81 // than 8 bytes, then Identifier will contain a byte array to raise its size up
82 // to 8 bytes total.
83 const TAIL_BYTES: usize = 8 * (PTR_BYTES < 8) as usize - PTR_BYTES * (PTR_BYTES < 8) as usize;
84 
85 #[repr(C, align(8))]
86 pub(crate) struct Identifier {
87     head: NonNull<u8>,
88     tail: [u8; TAIL_BYTES],
89 }
90 
91 impl Identifier {
empty() -> Self92     pub(crate) const fn empty() -> Self {
93         // This is a separate constant because unsafe function calls are not
94         // allowed in a const fn body, only in a const, until later rustc than
95         // what we support.
96         const HEAD: NonNull<u8> = unsafe { NonNull::new_unchecked(!0 as *mut u8) };
97 
98         // `mov rax, -1`
99         Identifier {
100             head: HEAD,
101             tail: [!0; TAIL_BYTES],
102         }
103     }
104 
105     // SAFETY: string must be ASCII and not contain \0 bytes.
new_unchecked(string: &str) -> Self106     pub(crate) unsafe fn new_unchecked(string: &str) -> Self {
107         let len = string.len();
108         debug_assert!(len <= isize::MAX as usize);
109         match len as u64 {
110             0 => Self::empty(),
111             1..=8 => {
112                 let mut bytes = [0u8; mem::size_of::<Identifier>()];
113                 // SAFETY: string is big enough to read len bytes, bytes is big
114                 // enough to write len bytes, and they do not overlap.
115                 unsafe { ptr::copy_nonoverlapping(string.as_ptr(), bytes.as_mut_ptr(), len) };
116                 // SAFETY: the head field is nonzero because the input string
117                 // was at least 1 byte of ASCII and did not contain \0.
118                 unsafe { mem::transmute::<[u8; mem::size_of::<Identifier>()], Identifier>(bytes) }
119             }
120             9..=0xff_ffff_ffff_ffff => {
121                 // SAFETY: len is in a range that does not contain 0.
122                 let size = bytes_for_varint(unsafe { NonZeroUsize::new_unchecked(len) }) + len;
123                 let align = 2;
124                 // On 32-bit and 16-bit architecture, check for size overflowing
125                 // isize::MAX. Making an allocation request bigger than this to
126                 // the allocator is considered UB. All allocations (including
127                 // static ones) are limited to isize::MAX so we're guaranteed
128                 // len <= isize::MAX, and we know bytes_for_varint(len) <= 5
129                 // because 128**5 > isize::MAX, which means the only problem
130                 // that can arise is when isize::MAX - 5 <= len <= isize::MAX.
131                 // This is pretty much guaranteed to be malicious input so we
132                 // don't need to care about returning a good error message.
133                 if mem::size_of::<usize>() < 8 {
134                     let max_alloc = usize::MAX / 2 - align;
135                     assert!(size <= max_alloc);
136                 }
137                 // SAFETY: align is not zero, align is a power of two, and
138                 // rounding size up to align does not overflow isize::MAX.
139                 let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
140                 // SAFETY: layout's size is nonzero.
141                 let ptr = unsafe { alloc(layout) };
142                 if ptr.is_null() {
143                     handle_alloc_error(layout);
144                 }
145                 let mut write = ptr;
146                 let mut varint_remaining = len;
147                 while varint_remaining > 0 {
148                     // SAFETY: size is bytes_for_varint(len) bytes + len bytes.
149                     // This is writing the first bytes_for_varint(len) bytes.
150                     unsafe { ptr::write(write, varint_remaining as u8 | 0x80) };
151                     varint_remaining >>= 7;
152                     // SAFETY: still in bounds of the same allocation.
153                     write = unsafe { write.add(1) };
154                 }
155                 // SAFETY: size is bytes_for_varint(len) bytes + len bytes. This
156                 // is writing to the last len bytes.
157                 unsafe { ptr::copy_nonoverlapping(string.as_ptr(), write, len) };
158                 Identifier {
159                     head: ptr_to_repr(ptr),
160                     tail: [0; TAIL_BYTES],
161                 }
162             }
163             0x100_0000_0000_0000..=0xffff_ffff_ffff_ffff => {
164                 unreachable!("please refrain from storing >64 petabytes of text in semver version");
165             }
166             #[cfg(no_exhaustive_int_match)] // rustc <1.33
167             _ => unreachable!(),
168         }
169     }
170 
is_empty(&self) -> bool171     pub(crate) fn is_empty(&self) -> bool {
172         // `cmp rdi, -1` -- basically: `repr as i64 == -1`
173         let empty = Self::empty();
174         let is_empty = self.head == empty.head && self.tail == empty.tail;
175         // The empty representation does nothing on Drop. We can't let this one
176         // drop normally because `impl Drop for Identifier` calls is_empty; that
177         // would be an infinite recursion.
178         mem::forget(empty);
179         is_empty
180     }
181 
is_inline(&self) -> bool182     fn is_inline(&self) -> bool {
183         // `test rdi, rdi` -- basically: `repr as i64 >= 0`
184         self.head.as_ptr() as usize >> (PTR_BYTES * 8 - 1) == 0
185     }
186 
is_empty_or_inline(&self) -> bool187     fn is_empty_or_inline(&self) -> bool {
188         // `cmp rdi, -2` -- basically: `repr as i64 > -2`
189         self.is_empty() || self.is_inline()
190     }
191 
as_str(&self) -> &str192     pub(crate) fn as_str(&self) -> &str {
193         if self.is_empty() {
194             ""
195         } else if self.is_inline() {
196             // SAFETY: repr is in the inline representation.
197             unsafe { inline_as_str(self) }
198         } else {
199             // SAFETY: repr is in the heap allocated representation.
200             unsafe { ptr_as_str(&self.head) }
201         }
202     }
203 }
204 
205 impl Clone for Identifier {
clone(&self) -> Self206     fn clone(&self) -> Self {
207         if self.is_empty_or_inline() {
208             Identifier {
209                 head: self.head,
210                 tail: self.tail,
211             }
212         } else {
213             let ptr = repr_to_ptr(self.head);
214             // SAFETY: ptr is one of our own heap allocations.
215             let len = unsafe { decode_len(ptr) };
216             let size = bytes_for_varint(len) + len.get();
217             let align = 2;
218             // SAFETY: align is not zero, align is a power of two, and rounding
219             // size up to align does not overflow isize::MAX. This is just
220             // duplicating a previous allocation where all of these guarantees
221             // were already made.
222             let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
223             // SAFETY: layout's size is nonzero.
224             let clone = unsafe { alloc(layout) };
225             if clone.is_null() {
226                 handle_alloc_error(layout);
227             }
228             // SAFETY: new allocation cannot overlap the previous one (this was
229             // not a realloc). The argument ptrs are readable/writeable
230             // respectively for size bytes.
231             unsafe { ptr::copy_nonoverlapping(ptr, clone, size) }
232             Identifier {
233                 head: ptr_to_repr(clone),
234                 tail: [0; TAIL_BYTES],
235             }
236         }
237     }
238 }
239 
240 impl Drop for Identifier {
drop(&mut self)241     fn drop(&mut self) {
242         if self.is_empty_or_inline() {
243             return;
244         }
245         let ptr = repr_to_ptr_mut(self.head);
246         // SAFETY: ptr is one of our own heap allocations.
247         let len = unsafe { decode_len(ptr) };
248         let size = bytes_for_varint(len) + len.get();
249         let align = 2;
250         // SAFETY: align is not zero, align is a power of two, and rounding
251         // size up to align does not overflow isize::MAX. These guarantees were
252         // made when originally allocating this memory.
253         let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
254         // SAFETY: ptr was previously allocated by the same allocator with the
255         // same layout.
256         unsafe { dealloc(ptr, layout) }
257     }
258 }
259 
260 impl PartialEq for Identifier {
eq(&self, rhs: &Self) -> bool261     fn eq(&self, rhs: &Self) -> bool {
262         if self.is_empty_or_inline() {
263             // Fast path (most common)
264             self.head == rhs.head && self.tail == rhs.tail
265         } else if rhs.is_empty_or_inline() {
266             false
267         } else {
268             // SAFETY: both reprs are in the heap allocated representation.
269             unsafe { ptr_as_str(&self.head) == ptr_as_str(&rhs.head) }
270         }
271     }
272 }
273 
274 unsafe impl Send for Identifier {}
275 unsafe impl Sync for Identifier {}
276 
277 // We use heap pointers that are 2-byte aligned, meaning they have an
278 // insignificant 0 in the least significant bit. We take advantage of that
279 // unneeded bit to rotate a 1 into the most significant bit to make the repr
280 // distinguishable from ASCII bytes.
ptr_to_repr(original: *mut u8) -> NonNull<u8>281 fn ptr_to_repr(original: *mut u8) -> NonNull<u8> {
282     // `mov eax, 1`
283     // `shld rax, rdi, 63`
284     let modified = (original as usize | 1).rotate_right(1);
285 
286     // `original + (modified - original)`, but being mindful of provenance.
287     let diff = modified.wrapping_sub(original as usize);
288     let modified = original.wrapping_add(diff);
289 
290     // SAFETY: the most significant bit of repr is known to be set, so the value
291     // is not zero.
292     unsafe { NonNull::new_unchecked(modified) }
293 }
294 
295 // Shift out the 1 previously placed into the most significant bit of the least
296 // significant byte. Shift in a low 0 bit to reconstruct the original 2-byte
297 // aligned pointer.
repr_to_ptr(modified: NonNull<u8>) -> *const u8298 fn repr_to_ptr(modified: NonNull<u8>) -> *const u8 {
299     // `lea rax, [rdi + rdi]`
300     let modified = modified.as_ptr();
301     let original = (modified as usize) << 1;
302 
303     // `modified + (original - modified)`, but being mindful of provenance.
304     let diff = original.wrapping_sub(modified as usize);
305     modified.wrapping_add(diff)
306 }
307 
repr_to_ptr_mut(repr: NonNull<u8>) -> *mut u8308 fn repr_to_ptr_mut(repr: NonNull<u8>) -> *mut u8 {
309     repr_to_ptr(repr) as *mut u8
310 }
311 
312 // Compute the length of the inline string, assuming the argument is in short
313 // string representation. Short strings are stored as 1 to 8 nonzero ASCII
314 // bytes, followed by \0 padding for the remaining bytes.
315 //
316 // SAFETY: the identifier must indeed be in the inline representation.
inline_len(repr: &Identifier) -> NonZeroUsize317 unsafe fn inline_len(repr: &Identifier) -> NonZeroUsize {
318     // SAFETY: Identifier's layout is align(8) and at least size 8. We're doing
319     // an aligned read of the first 8 bytes from it. The bytes are not all zero
320     // because inline strings are at least 1 byte long and cannot contain \0.
321     let repr = unsafe { ptr::read(repr as *const Identifier as *const NonZeroU64) };
322 
323     // Rustc >=1.53 has intrinsics for counting zeros on a non-zeroable integer.
324     // On many architectures these are more efficient than counting on ordinary
325     // zeroable integers (bsf vs cttz). On rustc <1.53 without those intrinsics,
326     // we count zeros in the u64 rather than the NonZeroU64.
327     #[cfg(no_nonzero_bitscan)]
328     let repr = repr.get();
329 
330     #[cfg(target_endian = "little")]
331     let zero_bits_on_string_end = repr.leading_zeros();
332     #[cfg(target_endian = "big")]
333     let zero_bits_on_string_end = repr.trailing_zeros();
334 
335     let nonzero_bytes = 8 - zero_bits_on_string_end as usize / 8;
336 
337     // SAFETY: repr is nonzero, so it has at most 63 zero bits on either end,
338     // thus at least one nonzero byte.
339     unsafe { NonZeroUsize::new_unchecked(nonzero_bytes) }
340 }
341 
342 // SAFETY: repr must be in the inline representation, i.e. at least 1 and at
343 // most 8 nonzero ASCII bytes padded on the end with \0 bytes.
inline_as_str(repr: &Identifier) -> &str344 unsafe fn inline_as_str(repr: &Identifier) -> &str {
345     let ptr = repr as *const Identifier as *const u8;
346     let len = unsafe { inline_len(repr) }.get();
347     // SAFETY: we are viewing the nonzero ASCII prefix of the inline repr's
348     // contents as a slice of bytes. Input/output lifetimes are correctly
349     // associated.
350     let slice = unsafe { slice::from_raw_parts(ptr, len) };
351     // SAFETY: the string contents are known to be only ASCII bytes, which are
352     // always valid UTF-8.
353     unsafe { str::from_utf8_unchecked(slice) }
354 }
355 
356 // Decode varint. Varints consist of between one and eight base-128 digits, each
357 // of which is stored in a byte with most significant bit set. Adjacent to the
358 // varint in memory there is guaranteed to be at least 9 ASCII bytes, each of
359 // which has an unset most significant bit.
360 //
361 // SAFETY: ptr must be one of our own heap allocations, with the varint header
362 // already written.
decode_len(ptr: *const u8) -> NonZeroUsize363 unsafe fn decode_len(ptr: *const u8) -> NonZeroUsize {
364     // SAFETY: There is at least one byte of varint followed by at least 9 bytes
365     // of string content, which is at least 10 bytes total for the allocation,
366     // so reading the first two is no problem.
367     let [first, second] = unsafe { ptr::read(ptr as *const [u8; 2]) };
368     if second < 0x80 {
369         // SAFETY: the length of this heap allocated string has been encoded as
370         // one base-128 digit, so the length is at least 9 and at most 127. It
371         // cannot be zero.
372         unsafe { NonZeroUsize::new_unchecked((first & 0x7f) as usize) }
373     } else {
374         return unsafe { decode_len_cold(ptr) };
375 
376         // Identifiers 128 bytes or longer. This is not exercised by any crate
377         // version currently published to crates.io.
378         #[cold]
379         #[inline(never)]
380         unsafe fn decode_len_cold(mut ptr: *const u8) -> NonZeroUsize {
381             let mut len = 0;
382             let mut shift = 0;
383             loop {
384                 // SAFETY: varint continues while there are bytes having the
385                 // most significant bit set, i.e. until we start hitting the
386                 // ASCII string content with msb unset.
387                 let byte = unsafe { *ptr };
388                 if byte < 0x80 {
389                     // SAFETY: the string length is known to be 128 bytes or
390                     // longer.
391                     return unsafe { NonZeroUsize::new_unchecked(len) };
392                 }
393                 // SAFETY: still in bounds of the same allocation.
394                 ptr = unsafe { ptr.add(1) };
395                 len += ((byte & 0x7f) as usize) << shift;
396                 shift += 7;
397             }
398         }
399     }
400 }
401 
402 // SAFETY: repr must be in the heap allocated representation, with varint header
403 // and string contents already written.
ptr_as_str(repr: &NonNull<u8>) -> &str404 unsafe fn ptr_as_str(repr: &NonNull<u8>) -> &str {
405     let ptr = repr_to_ptr(*repr);
406     let len = unsafe { decode_len(ptr) };
407     let header = bytes_for_varint(len);
408     let slice = unsafe { slice::from_raw_parts(ptr.add(header), len.get()) };
409     // SAFETY: all identifier contents are ASCII bytes, which are always valid
410     // UTF-8.
411     unsafe { str::from_utf8_unchecked(slice) }
412 }
413 
414 // Number of base-128 digits required for the varint representation of a length.
bytes_for_varint(len: NonZeroUsize) -> usize415 fn bytes_for_varint(len: NonZeroUsize) -> usize {
416     #[cfg(no_nonzero_bitscan)] // rustc <1.53
417     let len = len.get();
418 
419     let usize_bits = mem::size_of::<usize>() * 8;
420     let len_bits = usize_bits - len.leading_zeros() as usize;
421     (len_bits + 6) / 7
422 }
423