xref: /aosp_15_r20/external/llvm-libc/src/string/memory_utils/op_generic.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Generic implementation of memory function building blocks ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides generic C++ building blocks.
10 // Depending on the requested size, the block operation uses unsigned integral
11 // types, vector types or an array of the type with the maximum size.
12 //
13 // The maximum size is passed as a template argument. For instance, on x86
14 // platforms that only supports integral types the maximum size would be 8
15 // (corresponding to uint64_t). On this platform if we request the size 32, this
16 // would be treated as a cpp::array<uint64_t, 4>.
17 //
18 // On the other hand, if the platform is x86 with support for AVX the maximum
19 // size is 32 and the operation can be handled with a single native operation.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
25 
26 #include "src/__support/CPP/array.h"
27 #include "src/__support/CPP/type_traits.h"
28 #include "src/__support/common.h"
29 #include "src/__support/endian_internal.h"
30 #include "src/__support/macros/config.h"
31 #include "src/__support/macros/optimization.h"
32 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64
33 #include "src/string/memory_utils/op_builtin.h"
34 #include "src/string/memory_utils/utils.h"
35 
36 #include <stdint.h>
37 
38 static_assert((UINTPTR_MAX == 4294967295U) ||
39                   (UINTPTR_MAX == 18446744073709551615UL),
40               "We currently only support 32- or 64-bit platforms");
41 
42 namespace LIBC_NAMESPACE_DECL {
43 // Compiler types using the vector attributes.
44 using generic_v128 = uint8_t __attribute__((__vector_size__(16)));
45 using generic_v256 = uint8_t __attribute__((__vector_size__(32)));
46 using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
47 } // namespace LIBC_NAMESPACE_DECL
48 
49 namespace LIBC_NAMESPACE_DECL {
50 namespace generic {
51 
52 // We accept three types of values as elements for generic operations:
53 // - scalar : unsigned integral types,
54 // - vector : compiler types using the vector attributes or platform builtins,
55 // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
56 // The following traits help discriminate between these cases.
57 
58 template <typename T> struct is_scalar : cpp::false_type {};
59 template <> struct is_scalar<uint8_t> : cpp::true_type {};
60 template <> struct is_scalar<uint16_t> : cpp::true_type {};
61 template <> struct is_scalar<uint32_t> : cpp::true_type {};
62 #ifdef LIBC_TYPES_HAS_INT64
63 template <> struct is_scalar<uint64_t> : cpp::true_type {};
64 #endif // LIBC_TYPES_HAS_INT64
65 // Meant to match std::numeric_limits interface.
66 // NOLINTNEXTLINE(readability-identifier-naming)
67 template <typename T> constexpr bool is_scalar_v = is_scalar<T>::value;
68 
69 template <typename T> struct is_vector : cpp::false_type {};
70 template <> struct is_vector<generic_v128> : cpp::true_type {};
71 template <> struct is_vector<generic_v256> : cpp::true_type {};
72 template <> struct is_vector<generic_v512> : cpp::true_type {};
73 // Meant to match std::numeric_limits interface.
74 // NOLINTNEXTLINE(readability-identifier-naming)
75 template <typename T> constexpr bool is_vector_v = is_vector<T>::value;
76 
77 template <class T> struct is_array : cpp::false_type {};
78 template <class T, size_t N> struct is_array<cpp::array<T, N>> {
79   // Meant to match std::numeric_limits interface.
80   // NOLINTNEXTLINE(readability-identifier-naming)
81   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
82 };
83 // Meant to match std::numeric_limits interface.
84 // NOLINTNEXTLINE(readability-identifier-naming)
85 template <typename T> constexpr bool is_array_v = is_array<T>::value;
86 
87 // Meant to match std::numeric_limits interface.
88 // NOLINTBEGIN(readability-identifier-naming)
89 template <typename T>
90 constexpr bool is_element_type_v =
91     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
92 // NOLINTEND(readability-identifier-naming)
93 
94 // Helper struct to retrieve the number of elements of an array.
95 template <class T> struct array_size {};
96 template <class T, size_t N>
97 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
98 // Meant to match std::numeric_limits interface.
99 // NOLINTNEXTLINE(readability-identifier-naming)
100 template <typename T> constexpr size_t array_size_v = array_size<T>::value;
101 
102 // Generic operations for the above type categories.
103 
104 template <typename T> T load(CPtr src) {
105   static_assert(is_element_type_v<T>);
106   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
107     return ::LIBC_NAMESPACE::load<T>(src);
108   } else if constexpr (is_array_v<T>) {
109     using value_type = typename T::value_type;
110     T value;
111     for (size_t i = 0; i < array_size_v<T>; ++i)
112       value[i] = load<value_type>(src + (i * sizeof(value_type)));
113     return value;
114   }
115 }
116 
117 template <typename T> void store(Ptr dst, T value) {
118   static_assert(is_element_type_v<T>);
119   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
120     ::LIBC_NAMESPACE::store<T>(dst, value);
121   } else if constexpr (is_array_v<T>) {
122     using value_type = typename T::value_type;
123     for (size_t i = 0; i < array_size_v<T>; ++i)
124       store<value_type>(dst + (i * sizeof(value_type)), value[i]);
125   }
126 }
127 
128 template <typename T> T splat(uint8_t value) {
129   static_assert(is_scalar_v<T> || is_vector_v<T>);
130   if constexpr (is_scalar_v<T>)
131     return T(~0) / T(0xFF) * T(value);
132   else if constexpr (is_vector_v<T>) {
133     T out;
134     // This for loop is optimized out for vector types.
135     for (size_t i = 0; i < sizeof(T); ++i)
136       out[i] = value;
137     return out;
138   }
139 }
140 
141 ///////////////////////////////////////////////////////////////////////////////
142 // Memset
143 ///////////////////////////////////////////////////////////////////////////////
144 
145 template <typename T> struct Memset {
146   static_assert(is_element_type_v<T>);
147   static constexpr size_t SIZE = sizeof(T);
148 
149   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
150     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
151       store<T>(dst, splat<T>(value));
152     } else if constexpr (is_array_v<T>) {
153       using value_type = typename T::value_type;
154       const auto Splat = splat<value_type>(value);
155       for (size_t i = 0; i < array_size_v<T>; ++i)
156         store<value_type>(dst + (i * sizeof(value_type)), Splat);
157     }
158   }
159 
160   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
161     block(dst + count - SIZE, value);
162   }
163 
164   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
165     block(dst, value);
166     tail(dst, value, count);
167   }
168 
169   LIBC_INLINE static void loop_and_tail_offset(Ptr dst, uint8_t value,
170                                                size_t count, size_t offset) {
171     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
172     do {
173       block(dst + offset, value);
174       offset += SIZE;
175     } while (offset < count - SIZE);
176     tail(dst, value, count);
177   }
178 
179   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
180     return loop_and_tail_offset(dst, value, count, 0);
181   }
182 };
183 
184 template <typename T, typename... TS> struct MemsetSequence {
185   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
186   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
187     Memset<T>::block(dst, value);
188     if constexpr (sizeof...(TS) > 0)
189       return MemsetSequence<TS...>::block(dst + sizeof(T), value);
190   }
191 };
192 
193 ///////////////////////////////////////////////////////////////////////////////
194 // Memmove
195 ///////////////////////////////////////////////////////////////////////////////
196 
197 template <typename T> struct Memmove {
198   static_assert(is_element_type_v<T>);
199   static constexpr size_t SIZE = sizeof(T);
200 
201   LIBC_INLINE static void block(Ptr dst, CPtr src) {
202     store<T>(dst, load<T>(src));
203   }
204 
205   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
206     const size_t offset = count - SIZE;
207     // The load and store operations can be performed in any order as long as
208     // they are not interleaved. More investigations are needed to determine
209     // the best order.
210     const auto head = load<T>(src);
211     const auto tail = load<T>(src + offset);
212     store<T>(dst, head);
213     store<T>(dst + offset, tail);
214   }
215 
216   // Align forward suitable when dst < src. The alignment is performed with
217   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
218   //
219   // e.g. Moving two bytes forward, we make sure src is aligned.
220   // [  |       |       |       |      ]
221   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
222   // [____LLLLLLLL_____________________]
223   // [___________LLLLLLLA______________]
224   // [_SSSSSSSS________________________]
225   // [________SSSSSSSS_________________]
226   //
227   // e.g. Moving two bytes forward, we make sure dst is aligned.
228   // [  |       |       |       |      ]
229   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
230   // [____LLLLLLLL_____________________]
231   // [______LLLLLLLL___________________]
232   // [_SSSSSSSS________________________]
233   // [___SSSSSSSA______________________]
234   template <Arg AlignOn>
235   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
236     Ptr prev_dst = dst;
237     CPtr prev_src = src;
238     size_t prev_count = count;
239     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
240     adjust(SIZE, dst, src, count);
241     head_tail(prev_dst, prev_src, prev_count - count);
242   }
243 
244   // Align backward suitable when dst > src. The alignment is performed with
245   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
246   //
247   // e.g. Moving two bytes backward, we make sure src is aligned.
248   // [  |       |       |       |      ]
249   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
250   // [ _________________ALLLLLLL_______]
251   // [ ___________________LLLLLLLL_____]
252   // [____________________SSSSSSSS_____]
253   // [______________________SSSSSSSS___]
254   //
255   // e.g. Moving two bytes backward, we make sure dst is aligned.
256   // [  |       |       |       |      ]
257   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
258   // [ _______________LLLLLLLL_________]
259   // [ ___________________LLLLLLLL_____]
260   // [__________________ASSSSSSS_______]
261   // [______________________SSSSSSSS___]
262   template <Arg AlignOn>
263   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
264     Ptr headtail_dst = dst + count;
265     CPtr headtail_src = src + count;
266     size_t headtail_size = 0;
267     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
268                                           headtail_size);
269     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
270     head_tail(headtail_dst, headtail_src, headtail_size);
271     count -= headtail_size;
272   }
273 
274   // Move forward suitable when dst < src. We load the tail bytes before
275   // handling the loop.
276   //
277   // e.g. Moving two bytes
278   // [   |       |       |       |       |]
279   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
280   // [_________________________LLLLLLLL___]
281   // [___LLLLLLLL_________________________]
282   // [_SSSSSSSS___________________________]
283   // [___________LLLLLLLL_________________]
284   // [_________SSSSSSSS___________________]
285   // [___________________LLLLLLLL_________]
286   // [_________________SSSSSSSS___________]
287   // [_______________________SSSSSSSS_____]
288   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
289                                                 size_t count) {
290     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
291     const size_t tail_offset = count - SIZE;
292     const auto tail_value = load<T>(src + tail_offset);
293     size_t offset = 0;
294     LIBC_LOOP_NOUNROLL
295     do {
296       block(dst + offset, src + offset);
297       offset += SIZE;
298     } while (offset < count - SIZE);
299     store<T>(dst + tail_offset, tail_value);
300   }
301 
302   // Move backward suitable when dst > src. We load the head bytes before
303   // handling the loop.
304   //
305   // e.g. Moving two bytes
306   // [   |       |       |       |       |]
307   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
308   // [___LLLLLLLL_________________________]
309   // [_________________________LLLLLLLL___]
310   // [___________________________SSSSSSSS_]
311   // [_________________LLLLLLLL___________]
312   // [___________________SSSSSSSS_________]
313   // [_________LLLLLLLL___________________]
314   // [___________SSSSSSSS_________________]
315   // [_____SSSSSSSS_______________________]
316   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
317                                                  size_t count) {
318     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
319     const auto head_value = load<T>(src);
320     ptrdiff_t offset = count - SIZE;
321     LIBC_LOOP_NOUNROLL
322     do {
323       block(dst + offset, src + offset);
324       offset -= SIZE;
325     } while (offset >= 0);
326     store<T>(dst, head_value);
327   }
328 };
329 
330 ///////////////////////////////////////////////////////////////////////////////
331 // Low level operations for Bcmp and Memcmp that operate on memory locations.
332 ///////////////////////////////////////////////////////////////////////////////
333 
334 // Same as load above but with an offset to the pointer.
335 // Making the offset explicit hints the compiler to use relevant addressing mode
336 // consistently.
337 template <typename T> LIBC_INLINE T load(CPtr ptr, size_t offset) {
338   return ::LIBC_NAMESPACE::load<T>(ptr + offset);
339 }
340 
341 // Same as above but also makes sure the loaded value is in big endian format.
342 // This is useful when implementing lexicograhic comparisons as big endian
343 // scalar comparison directly maps to lexicographic byte comparisons.
344 template <typename T> LIBC_INLINE T load_be(CPtr ptr, size_t offset) {
345   return Endian::to_big_endian(load<T>(ptr, offset));
346 }
347 
348 // Equality: returns true iff values at locations (p1 + offset) and (p2 +
349 // offset) compare equal.
350 template <typename T> LIBC_INLINE bool eq(CPtr p1, CPtr p2, size_t offset);
351 
352 // Not equals: returns non-zero iff values at locations (p1 + offset) and (p2 +
353 // offset) differ.
354 template <typename T> LIBC_INLINE uint32_t neq(CPtr p1, CPtr p2, size_t offset);
355 
356 // Lexicographic comparison:
357 // - returns 0 iff values at locations (p1 + offset) and (p2 + offset) compare
358 //   equal.
359 // - returns a negative value if value at location (p1 + offset) is
360 //   lexicographically less than value at (p2 + offset).
361 // - returns a positive value if value at location (p1 + offset) is
362 //   lexicographically greater than value at (p2 + offset).
363 template <typename T>
364 LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset);
365 
366 // Lexicographic comparison of non-equal values:
367 // - returns a negative value if value at location (p1 + offset) is
368 //   lexicographically less than value at (p2 + offset).
369 // - returns a positive value if value at location (p1 + offset) is
370 //   lexicographically greater than value at (p2 + offset).
371 template <typename T>
372 LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, size_t offset);
373 
374 ///////////////////////////////////////////////////////////////////////////////
375 // Memcmp implementation
376 //
377 // When building memcmp, not all types are considered equals.
378 //
379 // For instance, the lexicographic comparison of two uint8_t can be implemented
380 // as a simple subtraction, but for wider operations the logic can be much more
381 // involving, especially on little endian platforms.
382 //
383 // For such wider types it is a good strategy to test for equality first and
384 // only do the expensive lexicographic comparison if necessary.
385 //
386 // Decomposing the algorithm like this for wider types allows us to have
387 // efficient implementation of higher order functions like 'head_tail' or
388 // 'loop_and_tail'.
389 ///////////////////////////////////////////////////////////////////////////////
390 
391 // Type traits to decide whether we can use 'cmp' directly or if we need to
392 // split the computation.
393 template <typename T> struct cmp_is_expensive;
394 
395 template <typename T> struct Memcmp {
396   static_assert(is_element_type_v<T>);
397   static constexpr size_t SIZE = sizeof(T);
398 
399 private:
400   LIBC_INLINE static MemcmpReturnType block_offset(CPtr p1, CPtr p2,
401                                                    size_t offset) {
402     if constexpr (cmp_is_expensive<T>::value) {
403       if (!eq<T>(p1, p2, offset))
404         return cmp_neq<T>(p1, p2, offset);
405       return MemcmpReturnType::zero();
406     } else {
407       return cmp<T>(p1, p2, offset);
408     }
409   }
410 
411 public:
412   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
413     return block_offset(p1, p2, 0);
414   }
415 
416   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
417     return block_offset(p1, p2, count - SIZE);
418   }
419 
420   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
421                                                 size_t count) {
422     if constexpr (cmp_is_expensive<T>::value) {
423       if (!eq<T>(p1, p2, 0))
424         return cmp_neq<T>(p1, p2, 0);
425     } else {
426       if (const auto value = cmp<T>(p1, p2, 0))
427         return value;
428     }
429     return tail(p1, p2, count);
430   }
431 
432   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
433                                                     size_t count) {
434     return loop_and_tail_offset(p1, p2, count, 0);
435   }
436 
437   LIBC_INLINE static MemcmpReturnType
438   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
439     if constexpr (SIZE > 1) {
440       const size_t limit = count - SIZE;
441       LIBC_LOOP_NOUNROLL
442       for (; offset < limit; offset += SIZE) {
443         if constexpr (cmp_is_expensive<T>::value) {
444           if (!eq<T>(p1, p2, offset))
445             return cmp_neq<T>(p1, p2, offset);
446         } else {
447           if (const auto value = cmp<T>(p1, p2, offset))
448             return value;
449         }
450       }
451       return block_offset(p1, p2, limit); // tail
452     } else {
453       // No need for a tail operation when SIZE == 1.
454       LIBC_LOOP_NOUNROLL
455       for (; offset < count; offset += SIZE)
456         if (auto value = cmp<T>(p1, p2, offset))
457           return value;
458       return MemcmpReturnType::zero();
459     }
460   }
461 
462   LIBC_INLINE static MemcmpReturnType
463   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
464     const AlignHelper<sizeof(T)> helper(p1);
465     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
466       if (auto value = block(p1, p2))
467         return value;
468       adjust(helper.offset, p1, p2, count);
469     }
470     return loop_and_tail(p1, p2, count);
471   }
472 };
473 
474 template <typename T, typename... TS> struct MemcmpSequence {
475   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
476   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
477     // TODO: test suggestion in
478     // https://reviews.llvm.org/D148717?id=515724#inline-1446890
479     // once we have a proper way to check memory operation latency.
480     if constexpr (cmp_is_expensive<T>::value) {
481       if (!eq<T>(p1, p2, 0))
482         return cmp_neq<T>(p1, p2, 0);
483     } else {
484       if (auto value = cmp<T>(p1, p2, 0))
485         return value;
486     }
487     if constexpr (sizeof...(TS) > 0)
488       return MemcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
489     else
490       return MemcmpReturnType::zero();
491   }
492 };
493 
494 ///////////////////////////////////////////////////////////////////////////////
495 // Bcmp
496 ///////////////////////////////////////////////////////////////////////////////
497 template <typename T> struct Bcmp {
498   static_assert(is_element_type_v<T>);
499   static constexpr size_t SIZE = sizeof(T);
500 
501   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
502     return neq<T>(p1, p2, 0);
503   }
504 
505   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
506     const size_t tail_offset = count - SIZE;
507     return neq<T>(p1, p2, tail_offset);
508   }
509 
510   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
511     if (const auto value = neq<T>(p1, p2, 0))
512       return value;
513     return tail(p1, p2, count);
514   }
515 
516   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
517                                                   size_t count) {
518     return loop_and_tail_offset(p1, p2, count, 0);
519   }
520 
521   LIBC_INLINE static BcmpReturnType
522   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
523     if constexpr (SIZE > 1) {
524       const size_t limit = count - SIZE;
525       LIBC_LOOP_NOUNROLL
526       for (; offset < limit; offset += SIZE)
527         if (const auto value = neq<T>(p1, p2, offset))
528           return value;
529       return tail(p1, p2, count);
530     } else {
531       // No need for a tail operation when SIZE == 1.
532       LIBC_LOOP_NOUNROLL
533       for (; offset < count; offset += SIZE)
534         if (const auto value = neq<T>(p1, p2, offset))
535           return value;
536       return BcmpReturnType::zero();
537     }
538   }
539 
540   LIBC_INLINE static BcmpReturnType
541   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
542     static_assert(SIZE > 1,
543                   "No need to align when processing one byte at a time");
544     const AlignHelper<sizeof(T)> helper(p1);
545     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
546       if (auto value = block(p1, p2))
547         return value;
548       adjust(helper.offset, p1, p2, count);
549     }
550     return loop_and_tail(p1, p2, count);
551   }
552 };
553 
554 template <typename T, typename... TS> struct BcmpSequence {
555   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
556   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
557     if (auto value = neq<T>(p1, p2, 0))
558       return value;
559     if constexpr (sizeof...(TS) > 0)
560       return BcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
561     else
562       return BcmpReturnType::zero();
563   }
564 };
565 
566 ///////////////////////////////////////////////////////////////////////////////
567 // Specializations for uint8_t
568 template <> struct cmp_is_expensive<uint8_t> : public cpp::false_type {};
569 template <> LIBC_INLINE bool eq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
570   return load<uint8_t>(p1, offset) == load<uint8_t>(p2, offset);
571 }
572 template <> LIBC_INLINE uint32_t neq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
573   return load<uint8_t>(p1, offset) ^ load<uint8_t>(p2, offset);
574 }
575 template <>
576 LIBC_INLINE MemcmpReturnType cmp<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
577   return static_cast<int32_t>(load<uint8_t>(p1, offset)) -
578          static_cast<int32_t>(load<uint8_t>(p2, offset));
579 }
580 template <>
581 LIBC_INLINE MemcmpReturnType cmp_neq<uint8_t>(CPtr p1, CPtr p2, size_t offset);
582 
583 } // namespace generic
584 } // namespace LIBC_NAMESPACE_DECL
585 
586 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
587