xref: /aosp_15_r20/external/google-fruit/include/fruit/impl/data_structures/semistatic_map.templates.h (revision a65addddcf69f38db5b288d787b6b7571a57bb8f)
1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SEMISTATIC_MAP_TEMPLATES_H
18 #define SEMISTATIC_MAP_TEMPLATES_H
19 
20 #if !IN_FRUIT_CPP_FILE
21 #error "Fruit .template.h file included in non-cpp file."
22 #endif
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <chrono>
27 #include <random>
28 #include <utility>
29 // This include is not necessary for GCC/Clang, but it's necessary for MSVC.
30 #include <numeric>
31 
32 #include <fruit/impl/data_structures/semistatic_map.h>
33 
34 #include <fruit/impl/data_structures/arena_allocator.h>
35 #include <fruit/impl/data_structures/fixed_size_vector.templates.h>
36 #include <fruit/impl/fruit_assert.h>
37 
38 namespace fruit {
39 namespace impl {
40 
41 template <typename Key, typename Value>
42 template <typename Iter>
SemistaticMap(Iter values_begin,Iter values_end,std::size_t num_values,MemoryPool & memory_pool)43 SemistaticMap<Key, Value>::SemistaticMap(
44     Iter values_begin, // NOLINT(bugprone-easily-swappable-parameters)
45     Iter values_end,
46     std::size_t num_values,
47     MemoryPool& memory_pool) {
48   NumBits num_bits = pickNumBits(num_values);
49   std::size_t num_buckets = size_t(1) << num_bits;
50 
51   FixedSizeVector<Unsigned, ArenaAllocator<Unsigned>> count(num_buckets, 0, ArenaAllocator<Unsigned>(memory_pool));
52 
53   hash_function.shift = (sizeof(Unsigned) * CHAR_BIT - num_bits);
54 
55   // The cast is a no-op in some systems (e.g. GCC and Clang under Linux 64bit) but it's needed in other systems (e.g.
56   // MSVC).
57   unsigned seed = (unsigned)std::chrono::system_clock::now().time_since_epoch().count();
58   std::default_random_engine random_generator(seed);
59   std::uniform_int_distribution<Unsigned> random_distribution;
60 
61   while (1) {
62     hash_function.a = random_distribution(random_generator);
63 
64     for (Iter itr = values_begin; !(itr == values_end); ++itr) {
65       Unsigned& this_count = count[hash((*itr).first)];
66       ++this_count;
67       if (this_count == beta) {
68         goto pick_another;
69       }
70     }
71     break;
72 
73   pick_another:
74     std::memset(count.data(), 0, num_buckets * sizeof(Unsigned));
75   }
76 
77   values = FixedSizeVector<value_type>(num_values, value_type());
78 
79   std::partial_sum(count.begin(), count.end(), count.begin());
80   lookup_table = FixedSizeVector<CandidateValuesRange>(count.size());
81   for (Unsigned n : count) {
82     lookup_table.push_back(CandidateValuesRange{values.data() + n, values.data() + n});
83   }
84 
85   // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h.
86   // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant.
87 
88   Iter itr = values_begin;
89   for (std::size_t i = 0; i < num_values; ++i, ++itr) {
90     value_type*& first_value_ptr = lookup_table[hash((*itr).first)].begin;
91     --first_value_ptr;
92     FruitAssert(values.data() <= first_value_ptr);
93     FruitAssert(first_value_ptr < values.data() + values.size());
94     *first_value_ptr = *itr;
95   }
96 }
97 
98 template <typename Key, typename Value>
SemistaticMap(const SemistaticMap<Key,Value> & map,std::vector<value_type,ArenaAllocator<value_type>> && new_elements)99 SemistaticMap<Key, Value>::SemistaticMap(const SemistaticMap<Key, Value>& map,
100                                          std::vector<value_type, ArenaAllocator<value_type>>&& new_elements)
101     : hash_function(map.hash_function), lookup_table(map.lookup_table, map.lookup_table.size()) {
102 
103   // Sort by hash.
104   std::sort(new_elements.begin(), new_elements.end(),
105             [this](const value_type& x, const value_type& y) { return hash(x.first) < hash(y.first); });
106 
107   std::size_t num_additional_values = new_elements.size();
108   // Add the space needed to store copies of the old buckets.
109   for (auto itr = new_elements.begin(), itr_end = new_elements.end(); itr != itr_end; /* no increment */) {
110     Unsigned h = hash(itr->first);
111     auto p = map.lookup_table[h];
112     num_additional_values += (p.end - p.begin);
113     for (; itr != itr_end && hash(itr->first) == h; ++itr) {
114     }
115   }
116 
117   values = FixedSizeVector<value_type>(num_additional_values);
118 
119   // Now actually perform the insertions.
120 
121   if (new_elements.empty()) {
122     // This is to workaround a bug in the STL shipped with GCC <4.8.2, where calling data() on an
123     // empty vector causes undefined behavior (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829).
124     return;
125   }
126   for (value_type *itr = new_elements.data(), *itr_end = new_elements.data() + new_elements.size(); itr != itr_end;
127        /* no increment */) {
128     Unsigned h = hash(itr->first);
129     auto p = map.lookup_table[h];
130     num_additional_values += (p.end - p.begin);
131     value_type* first = itr;
132     for (; itr != itr_end && hash(itr->first) == h; ++itr) {
133     }
134     value_type* last = itr;
135     insert(h, first, last);
136   }
137 }
138 
139 template <typename Key, typename Value>
insert(std::size_t h,const value_type * elems_begin,const value_type * elems_end)140 void SemistaticMap<Key, Value>::insert(
141     std::size_t h,
142     const value_type* elems_begin, // NOLINT(bugprone-easily-swappable-parameters)
143     const value_type* elems_end) {
144 
145   value_type* old_bucket_begin = lookup_table[h].begin;
146   value_type* old_bucket_end = lookup_table[h].end;
147 
148   lookup_table[h].begin = values.data() + values.size();
149 
150   // Step 1: re-insert all keys with the same hash at the end (if any).
151   for (value_type* p = old_bucket_begin; p != old_bucket_end; ++p) {
152     values.push_back(*p);
153   }
154 
155   // Step 2: also insert the new keys and values
156   for (auto itr = elems_begin; itr != elems_end; ++itr) {
157     values.push_back(*itr);
158   }
159 
160   lookup_table[h].end = values.data() + values.size();
161 
162   // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be
163   // too slow.
164 }
165 
166 template <typename Key, typename Value>
at(Key key)167 const Value& SemistaticMap<Key, Value>::at(Key key) const {
168   Unsigned h = hash(key);
169   for (const value_type* p = lookup_table[h].begin; /* p!=lookup_table[h].end but no need to check */; ++p) {
170     FruitAssert(p != lookup_table[h].end);
171     if (p->first == key) {
172       return p->second;
173     }
174   }
175 }
176 
177 template <typename Key, typename Value>
find(Key key)178 const Value* SemistaticMap<Key, Value>::find(Key key) const {
179   Unsigned h = hash(key);
180   for (const value_type *p = lookup_table[h].begin, *p_end = lookup_table[h].end; p != p_end; ++p) {
181     if (p->first == key) {
182       return &(p->second);
183     }
184   }
185   return nullptr;
186 }
187 
188 template <typename Key, typename Value>
pickNumBits(std::size_t n)189 typename SemistaticMap<Key, Value>::NumBits SemistaticMap<Key, Value>::pickNumBits(std::size_t n) {
190   NumBits result = 1;
191   while ((std::size_t(1) << result) < n) {
192     ++result;
193   }
194   return result + 1;
195 }
196 
197 // This is here so that we don't have to include fixed_size_vector.templates.h in fruit.h.
198 template <typename Key, typename Value>
~SemistaticMap()199 SemistaticMap<Key, Value>::~SemistaticMap() {}
200 
201 } // namespace impl
202 } // namespace fruit
203 
204 #endif // SEMISTATIC_MAP_TEMPLATES_H
205