1*71db0c75SAndroid Build Coastguard Worker //===-- Portable string hash function ---------------------------*- C++ -*-===//
2*71db0c75SAndroid Build Coastguard Worker //
3*71db0c75SAndroid Build Coastguard Worker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*71db0c75SAndroid Build Coastguard Worker // See https://llvm.org/LICENSE.txt for license information.
5*71db0c75SAndroid Build Coastguard Worker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*71db0c75SAndroid Build Coastguard Worker //
7*71db0c75SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
8*71db0c75SAndroid Build Coastguard Worker
9*71db0c75SAndroid Build Coastguard Worker #ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
10*71db0c75SAndroid Build Coastguard Worker #define LLVM_LIBC_SRC___SUPPORT_HASH_H
11*71db0c75SAndroid Build Coastguard Worker
12*71db0c75SAndroid Build Coastguard Worker #include "src/__support/CPP/bit.h" // rotl
13*71db0c75SAndroid Build Coastguard Worker #include "src/__support/CPP/limits.h" // numeric_limits
14*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/attributes.h" // LIBC_INLINE
15*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/config.h"
16*71db0c75SAndroid Build Coastguard Worker #include "src/__support/uint128.h" // UInt128
17*71db0c75SAndroid Build Coastguard Worker #include <stdint.h> // For uint64_t
18*71db0c75SAndroid Build Coastguard Worker
19*71db0c75SAndroid Build Coastguard Worker namespace LIBC_NAMESPACE_DECL {
20*71db0c75SAndroid Build Coastguard Worker namespace internal {
21*71db0c75SAndroid Build Coastguard Worker
22*71db0c75SAndroid Build Coastguard Worker // Folded multiplication.
23*71db0c75SAndroid Build Coastguard Worker // This function multiplies two 64-bit integers and xor the high and
24*71db0c75SAndroid Build Coastguard Worker // low 64-bit parts of the result.
folded_multiply(uint64_t x,uint64_t y)25*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
26*71db0c75SAndroid Build Coastguard Worker UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
27*71db0c75SAndroid Build Coastguard Worker uint64_t low = static_cast<uint64_t>(p);
28*71db0c75SAndroid Build Coastguard Worker uint64_t high = static_cast<uint64_t>(p >> 64);
29*71db0c75SAndroid Build Coastguard Worker return low ^ high;
30*71db0c75SAndroid Build Coastguard Worker }
31*71db0c75SAndroid Build Coastguard Worker
32*71db0c75SAndroid Build Coastguard Worker // Read as little endian.
33*71db0c75SAndroid Build Coastguard Worker // Shift-and-or implementation does not give a satisfactory code on aarch64.
34*71db0c75SAndroid Build Coastguard Worker // Therefore, we use a union to read the value.
read_little_endian(const void * ptr)35*71db0c75SAndroid Build Coastguard Worker template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
36*71db0c75SAndroid Build Coastguard Worker const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
37*71db0c75SAndroid Build Coastguard Worker uint8_t buffer[sizeof(T)];
38*71db0c75SAndroid Build Coastguard Worker #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
39*71db0c75SAndroid Build Coastguard Worker // Compiler should able to optimize this as a load followed by a byte
40*71db0c75SAndroid Build Coastguard Worker // swap. On aarch64 (-mbig-endian), this compiles to the following for
41*71db0c75SAndroid Build Coastguard Worker // int:
42*71db0c75SAndroid Build Coastguard Worker // ldr w0, [x0]
43*71db0c75SAndroid Build Coastguard Worker // rev w0, w0
44*71db0c75SAndroid Build Coastguard Worker // ret
45*71db0c75SAndroid Build Coastguard Worker for (size_t i = 0; i < sizeof(T); ++i) {
46*71db0c75SAndroid Build Coastguard Worker buffer[i] = bytes[sizeof(T) - i - 1];
47*71db0c75SAndroid Build Coastguard Worker }
48*71db0c75SAndroid Build Coastguard Worker #else
49*71db0c75SAndroid Build Coastguard Worker for (size_t i = 0; i < sizeof(T); ++i) {
50*71db0c75SAndroid Build Coastguard Worker buffer[i] = bytes[i];
51*71db0c75SAndroid Build Coastguard Worker }
52*71db0c75SAndroid Build Coastguard Worker #endif
53*71db0c75SAndroid Build Coastguard Worker return cpp::bit_cast<T>(buffer);
54*71db0c75SAndroid Build Coastguard Worker }
55*71db0c75SAndroid Build Coastguard Worker
56*71db0c75SAndroid Build Coastguard Worker // Specialized read functions for small values. size must be <= 8.
read_small_values(const void * ptr,size_t size,uint64_t & low,uint64_t & high)57*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low,
58*71db0c75SAndroid Build Coastguard Worker uint64_t &high) {
59*71db0c75SAndroid Build Coastguard Worker const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
60*71db0c75SAndroid Build Coastguard Worker if (size >= 2) {
61*71db0c75SAndroid Build Coastguard Worker if (size >= 4) {
62*71db0c75SAndroid Build Coastguard Worker low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
63*71db0c75SAndroid Build Coastguard Worker high =
64*71db0c75SAndroid Build Coastguard Worker static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
65*71db0c75SAndroid Build Coastguard Worker } else {
66*71db0c75SAndroid Build Coastguard Worker low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
67*71db0c75SAndroid Build Coastguard Worker high = static_cast<uint64_t>(bytes[size - 1]);
68*71db0c75SAndroid Build Coastguard Worker }
69*71db0c75SAndroid Build Coastguard Worker } else {
70*71db0c75SAndroid Build Coastguard Worker if (size > 0) {
71*71db0c75SAndroid Build Coastguard Worker low = static_cast<uint64_t>(bytes[0]);
72*71db0c75SAndroid Build Coastguard Worker high = static_cast<uint64_t>(bytes[0]);
73*71db0c75SAndroid Build Coastguard Worker } else {
74*71db0c75SAndroid Build Coastguard Worker low = 0;
75*71db0c75SAndroid Build Coastguard Worker high = 0;
76*71db0c75SAndroid Build Coastguard Worker }
77*71db0c75SAndroid Build Coastguard Worker }
78*71db0c75SAndroid Build Coastguard Worker }
79*71db0c75SAndroid Build Coastguard Worker
80*71db0c75SAndroid Build Coastguard Worker // This constant comes from Kunth's prng (it empirically works well).
81*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;
82*71db0c75SAndroid Build Coastguard Worker // Rotation amount for mixing.
83*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;
84*71db0c75SAndroid Build Coastguard Worker
85*71db0c75SAndroid Build Coastguard Worker // Randomly generated values. For now, we use the same values as in aHash as
86*71db0c75SAndroid Build Coastguard Worker // they are widely tested.
87*71db0c75SAndroid Build Coastguard Worker // https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
88*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
89*71db0c75SAndroid Build Coastguard Worker {0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
90*71db0c75SAndroid Build Coastguard Worker 0x082efa98ec4e6c89},
91*71db0c75SAndroid Build Coastguard Worker {0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
92*71db0c75SAndroid Build Coastguard Worker 0x3f84d5b5b5470917},
93*71db0c75SAndroid Build Coastguard Worker };
94*71db0c75SAndroid Build Coastguard Worker
95*71db0c75SAndroid Build Coastguard Worker // This is a portable string hasher. It is not cryptographically secure.
96*71db0c75SAndroid Build Coastguard Worker // The quality of the hash is good enough to pass all tests in SMHasher.
97*71db0c75SAndroid Build Coastguard Worker // The implementation is derived from the generic routine of aHash.
98*71db0c75SAndroid Build Coastguard Worker class HashState {
99*71db0c75SAndroid Build Coastguard Worker uint64_t buffer;
100*71db0c75SAndroid Build Coastguard Worker uint64_t pad;
101*71db0c75SAndroid Build Coastguard Worker uint64_t extra_keys[2];
update(uint64_t low,uint64_t high)102*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE void update(uint64_t low, uint64_t high) {
103*71db0c75SAndroid Build Coastguard Worker uint64_t combined =
104*71db0c75SAndroid Build Coastguard Worker folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
105*71db0c75SAndroid Build Coastguard Worker buffer = (buffer + pad) ^ combined;
106*71db0c75SAndroid Build Coastguard Worker buffer = cpp::rotl(buffer, ROTATE);
107*71db0c75SAndroid Build Coastguard Worker }
mix(uint64_t seed)108*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE static uint64_t mix(uint64_t seed) {
109*71db0c75SAndroid Build Coastguard Worker HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
110*71db0c75SAndroid Build Coastguard Worker RANDOMNESS[0][3]};
111*71db0c75SAndroid Build Coastguard Worker mixer.update(seed, 0);
112*71db0c75SAndroid Build Coastguard Worker return mixer.finish();
113*71db0c75SAndroid Build Coastguard Worker }
114*71db0c75SAndroid Build Coastguard Worker
115*71db0c75SAndroid Build Coastguard Worker public:
HashState(uint64_t a,uint64_t b,uint64_t c,uint64_t d)116*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c,
117*71db0c75SAndroid Build Coastguard Worker uint64_t d)
118*71db0c75SAndroid Build Coastguard Worker : buffer(a), pad(b), extra_keys{c, d} {}
HashState(uint64_t seed)119*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE HashState(uint64_t seed) {
120*71db0c75SAndroid Build Coastguard Worker // Mix one more round of the seed to make it stronger.
121*71db0c75SAndroid Build Coastguard Worker uint64_t mixed = mix(seed);
122*71db0c75SAndroid Build Coastguard Worker buffer = RANDOMNESS[1][0] ^ mixed;
123*71db0c75SAndroid Build Coastguard Worker pad = RANDOMNESS[1][1] ^ mixed;
124*71db0c75SAndroid Build Coastguard Worker extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
125*71db0c75SAndroid Build Coastguard Worker extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
126*71db0c75SAndroid Build Coastguard Worker }
update(const void * ptr,size_t size)127*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE void update(const void *ptr, size_t size) {
128*71db0c75SAndroid Build Coastguard Worker uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
129*71db0c75SAndroid Build Coastguard Worker buffer = (buffer + size) * MULTIPLE;
130*71db0c75SAndroid Build Coastguard Worker uint64_t low, high;
131*71db0c75SAndroid Build Coastguard Worker if (size > 8) {
132*71db0c75SAndroid Build Coastguard Worker if (size > 16) {
133*71db0c75SAndroid Build Coastguard Worker // update tail
134*71db0c75SAndroid Build Coastguard Worker low = read_little_endian<uint64_t>(&bytes[size - 16]);
135*71db0c75SAndroid Build Coastguard Worker high = read_little_endian<uint64_t>(&bytes[size - 8]);
136*71db0c75SAndroid Build Coastguard Worker update(low, high);
137*71db0c75SAndroid Build Coastguard Worker while (size > 16) {
138*71db0c75SAndroid Build Coastguard Worker low = read_little_endian<uint64_t>(&bytes[0]);
139*71db0c75SAndroid Build Coastguard Worker high = read_little_endian<uint64_t>(&bytes[8]);
140*71db0c75SAndroid Build Coastguard Worker update(low, high);
141*71db0c75SAndroid Build Coastguard Worker bytes += 16;
142*71db0c75SAndroid Build Coastguard Worker size -= 16;
143*71db0c75SAndroid Build Coastguard Worker }
144*71db0c75SAndroid Build Coastguard Worker } else {
145*71db0c75SAndroid Build Coastguard Worker low = read_little_endian<uint64_t>(&bytes[0]);
146*71db0c75SAndroid Build Coastguard Worker high = read_little_endian<uint64_t>(&bytes[size - 8]);
147*71db0c75SAndroid Build Coastguard Worker update(low, high);
148*71db0c75SAndroid Build Coastguard Worker }
149*71db0c75SAndroid Build Coastguard Worker } else {
150*71db0c75SAndroid Build Coastguard Worker read_small_values(ptr, size, low, high);
151*71db0c75SAndroid Build Coastguard Worker update(low, high);
152*71db0c75SAndroid Build Coastguard Worker }
153*71db0c75SAndroid Build Coastguard Worker }
finish()154*71db0c75SAndroid Build Coastguard Worker LIBC_INLINE uint64_t finish() {
155*71db0c75SAndroid Build Coastguard Worker int rot = buffer & 63;
156*71db0c75SAndroid Build Coastguard Worker uint64_t folded = folded_multiply(buffer, pad);
157*71db0c75SAndroid Build Coastguard Worker return cpp::rotl(folded, rot);
158*71db0c75SAndroid Build Coastguard Worker }
159*71db0c75SAndroid Build Coastguard Worker };
160*71db0c75SAndroid Build Coastguard Worker
161*71db0c75SAndroid Build Coastguard Worker } // namespace internal
162*71db0c75SAndroid Build Coastguard Worker } // namespace LIBC_NAMESPACE_DECL
163*71db0c75SAndroid Build Coastguard Worker
164*71db0c75SAndroid Build Coastguard Worker #endif // LLVM_LIBC_SRC___SUPPORT_HASH_H
165