1 /*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef SEMISTATIC_MAP_TEMPLATES_H
18 #define SEMISTATIC_MAP_TEMPLATES_H
19
20 #if !IN_FRUIT_CPP_FILE
21 #error "Fruit .template.h file included in non-cpp file."
22 #endif
23
24 #include <algorithm>
25 #include <cassert>
26 #include <chrono>
27 #include <random>
28 #include <utility>
29 // This include is not necessary for GCC/Clang, but it's necessary for MSVC.
30 #include <numeric>
31
32 #include <fruit/impl/data_structures/semistatic_map.h>
33
34 #include <fruit/impl/data_structures/arena_allocator.h>
35 #include <fruit/impl/data_structures/fixed_size_vector.templates.h>
36 #include <fruit/impl/fruit_assert.h>
37
38 namespace fruit {
39 namespace impl {
40
41 template <typename Key, typename Value>
42 template <typename Iter>
SemistaticMap(Iter values_begin,Iter values_end,std::size_t num_values,MemoryPool & memory_pool)43 SemistaticMap<Key, Value>::SemistaticMap(
44 Iter values_begin, // NOLINT(bugprone-easily-swappable-parameters)
45 Iter values_end,
46 std::size_t num_values,
47 MemoryPool& memory_pool) {
48 NumBits num_bits = pickNumBits(num_values);
49 std::size_t num_buckets = size_t(1) << num_bits;
50
51 FixedSizeVector<Unsigned, ArenaAllocator<Unsigned>> count(num_buckets, 0, ArenaAllocator<Unsigned>(memory_pool));
52
53 hash_function.shift = (sizeof(Unsigned) * CHAR_BIT - num_bits);
54
55 // The cast is a no-op in some systems (e.g. GCC and Clang under Linux 64bit) but it's needed in other systems (e.g.
56 // MSVC).
57 unsigned seed = (unsigned)std::chrono::system_clock::now().time_since_epoch().count();
58 std::default_random_engine random_generator(seed);
59 std::uniform_int_distribution<Unsigned> random_distribution;
60
61 while (1) {
62 hash_function.a = random_distribution(random_generator);
63
64 for (Iter itr = values_begin; !(itr == values_end); ++itr) {
65 Unsigned& this_count = count[hash((*itr).first)];
66 ++this_count;
67 if (this_count == beta) {
68 goto pick_another;
69 }
70 }
71 break;
72
73 pick_another:
74 std::memset(count.data(), 0, num_buckets * sizeof(Unsigned));
75 }
76
77 values = FixedSizeVector<value_type>(num_values, value_type());
78
79 std::partial_sum(count.begin(), count.end(), count.begin());
80 lookup_table = FixedSizeVector<CandidateValuesRange>(count.size());
81 for (Unsigned n : count) {
82 lookup_table.push_back(CandidateValuesRange{values.data() + n, values.data() + n});
83 }
84
85 // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h.
86 // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant.
87
88 Iter itr = values_begin;
89 for (std::size_t i = 0; i < num_values; ++i, ++itr) {
90 value_type*& first_value_ptr = lookup_table[hash((*itr).first)].begin;
91 --first_value_ptr;
92 FruitAssert(values.data() <= first_value_ptr);
93 FruitAssert(first_value_ptr < values.data() + values.size());
94 *first_value_ptr = *itr;
95 }
96 }
97
98 template <typename Key, typename Value>
SemistaticMap(const SemistaticMap<Key,Value> & map,std::vector<value_type,ArenaAllocator<value_type>> && new_elements)99 SemistaticMap<Key, Value>::SemistaticMap(const SemistaticMap<Key, Value>& map,
100 std::vector<value_type, ArenaAllocator<value_type>>&& new_elements)
101 : hash_function(map.hash_function), lookup_table(map.lookup_table, map.lookup_table.size()) {
102
103 // Sort by hash.
104 std::sort(new_elements.begin(), new_elements.end(),
105 [this](const value_type& x, const value_type& y) { return hash(x.first) < hash(y.first); });
106
107 std::size_t num_additional_values = new_elements.size();
108 // Add the space needed to store copies of the old buckets.
109 for (auto itr = new_elements.begin(), itr_end = new_elements.end(); itr != itr_end; /* no increment */) {
110 Unsigned h = hash(itr->first);
111 auto p = map.lookup_table[h];
112 num_additional_values += (p.end - p.begin);
113 for (; itr != itr_end && hash(itr->first) == h; ++itr) {
114 }
115 }
116
117 values = FixedSizeVector<value_type>(num_additional_values);
118
119 // Now actually perform the insertions.
120
121 if (new_elements.empty()) {
122 // This is to workaround a bug in the STL shipped with GCC <4.8.2, where calling data() on an
123 // empty vector causes undefined behavior (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829).
124 return;
125 }
126 for (value_type *itr = new_elements.data(), *itr_end = new_elements.data() + new_elements.size(); itr != itr_end;
127 /* no increment */) {
128 Unsigned h = hash(itr->first);
129 auto p = map.lookup_table[h];
130 num_additional_values += (p.end - p.begin);
131 value_type* first = itr;
132 for (; itr != itr_end && hash(itr->first) == h; ++itr) {
133 }
134 value_type* last = itr;
135 insert(h, first, last);
136 }
137 }
138
139 template <typename Key, typename Value>
insert(std::size_t h,const value_type * elems_begin,const value_type * elems_end)140 void SemistaticMap<Key, Value>::insert(
141 std::size_t h,
142 const value_type* elems_begin, // NOLINT(bugprone-easily-swappable-parameters)
143 const value_type* elems_end) {
144
145 value_type* old_bucket_begin = lookup_table[h].begin;
146 value_type* old_bucket_end = lookup_table[h].end;
147
148 lookup_table[h].begin = values.data() + values.size();
149
150 // Step 1: re-insert all keys with the same hash at the end (if any).
151 for (value_type* p = old_bucket_begin; p != old_bucket_end; ++p) {
152 values.push_back(*p);
153 }
154
155 // Step 2: also insert the new keys and values
156 for (auto itr = elems_begin; itr != elems_end; ++itr) {
157 values.push_back(*itr);
158 }
159
160 lookup_table[h].end = values.data() + values.size();
161
162 // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be
163 // too slow.
164 }
165
166 template <typename Key, typename Value>
at(Key key)167 const Value& SemistaticMap<Key, Value>::at(Key key) const {
168 Unsigned h = hash(key);
169 for (const value_type* p = lookup_table[h].begin; /* p!=lookup_table[h].end but no need to check */; ++p) {
170 FruitAssert(p != lookup_table[h].end);
171 if (p->first == key) {
172 return p->second;
173 }
174 }
175 }
176
177 template <typename Key, typename Value>
find(Key key)178 const Value* SemistaticMap<Key, Value>::find(Key key) const {
179 Unsigned h = hash(key);
180 for (const value_type *p = lookup_table[h].begin, *p_end = lookup_table[h].end; p != p_end; ++p) {
181 if (p->first == key) {
182 return &(p->second);
183 }
184 }
185 return nullptr;
186 }
187
188 template <typename Key, typename Value>
pickNumBits(std::size_t n)189 typename SemistaticMap<Key, Value>::NumBits SemistaticMap<Key, Value>::pickNumBits(std::size_t n) {
190 NumBits result = 1;
191 while ((std::size_t(1) << result) < n) {
192 ++result;
193 }
194 return result + 1;
195 }
196
197 // This is here so that we don't have to include fixed_size_vector.templates.h in fruit.h.
198 template <typename Key, typename Value>
~SemistaticMap()199 SemistaticMap<Key, Value>::~SemistaticMap() {}
200
201 } // namespace impl
202 } // namespace fruit
203
204 #endif // SEMISTATIC_MAP_TEMPLATES_H
205