1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5 #include "leveldb/cache.h"
6
7 #include <cassert>
8 #include <cstdio>
9 #include <cstdlib>
10
11 #include "port/port.h"
12 #include "port/thread_annotations.h"
13 #include "util/hash.h"
14 #include "util/mutexlock.h"
15
16 namespace leveldb {
17
~Cache()18 Cache::~Cache() {}
19
20 namespace {
21
22 // LRU cache implementation
23 //
24 // Cache entries have an "in_cache" boolean indicating whether the cache has a
25 // reference on the entry. The only ways that this can become false without the
26 // entry being passed to its "deleter" are via Erase(), via Insert() when
27 // an element with a duplicate key is inserted, or on destruction of the cache.
28 //
29 // The cache keeps two linked lists of items in the cache. All items in the
30 // cache are in one list or the other, and never both. Items still referenced
31 // by clients but erased from the cache are in neither list. The lists are:
32 // - in-use: contains the items currently referenced by clients, in no
33 // particular order. (This list is used for invariant checking. If we
34 // removed the check, elements that would otherwise be on this list could be
35 // left as disconnected singleton lists.)
36 // - LRU: contains the items not currently referenced by clients, in LRU order
37 // Elements are moved between these lists by the Ref() and Unref() methods,
38 // when they detect an element in the cache acquiring or losing its only
39 // external reference.
40
41 // An entry is a variable length heap-allocated structure. Entries
42 // are kept in a circular doubly linked list ordered by access time.
43 struct LRUHandle {
44 void* value;
45 void (*deleter)(const Slice&, void* value);
46 LRUHandle* next_hash;
47 LRUHandle* next;
48 LRUHandle* prev;
49 size_t charge; // TODO(opt): Only allow uint32_t?
50 size_t key_length;
51 bool in_cache; // Whether entry is in the cache.
52 uint32_t refs; // References, including cache reference, if present.
53 uint32_t hash; // Hash of key(); used for fast sharding and comparisons
54 char key_data[1]; // Beginning of key
55
keyleveldb::__anon852f22290111::LRUHandle56 Slice key() const {
57 // next_ is only equal to this if the LRU handle is the list head of an
58 // empty list. List heads never have meaningful keys.
59 assert(next != this);
60
61 return Slice(key_data, key_length);
62 }
63 };
64
65 // We provide our own simple hash table since it removes a whole bunch
66 // of porting hacks and is also faster than some of the built-in hash
67 // table implementations in some of the compiler/runtime combinations
68 // we have tested. E.g., readrandom speeds up by ~5% over the g++
69 // 4.4.3's builtin hashtable.
70 class HandleTable {
71 public:
HandleTable()72 HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
~HandleTable()73 ~HandleTable() { delete[] list_; }
74
Lookup(const Slice & key,uint32_t hash)75 LRUHandle* Lookup(const Slice& key, uint32_t hash) {
76 return *FindPointer(key, hash);
77 }
78
Insert(LRUHandle * h)79 LRUHandle* Insert(LRUHandle* h) {
80 LRUHandle** ptr = FindPointer(h->key(), h->hash);
81 LRUHandle* old = *ptr;
82 h->next_hash = (old == nullptr ? nullptr : old->next_hash);
83 *ptr = h;
84 if (old == nullptr) {
85 ++elems_;
86 if (elems_ > length_) {
87 // Since each cache entry is fairly large, we aim for a small
88 // average linked list length (<= 1).
89 Resize();
90 }
91 }
92 return old;
93 }
94
Remove(const Slice & key,uint32_t hash)95 LRUHandle* Remove(const Slice& key, uint32_t hash) {
96 LRUHandle** ptr = FindPointer(key, hash);
97 LRUHandle* result = *ptr;
98 if (result != nullptr) {
99 *ptr = result->next_hash;
100 --elems_;
101 }
102 return result;
103 }
104
105 private:
106 // The table consists of an array of buckets where each bucket is
107 // a linked list of cache entries that hash into the bucket.
108 uint32_t length_;
109 uint32_t elems_;
110 LRUHandle** list_;
111
112 // Return a pointer to slot that points to a cache entry that
113 // matches key/hash. If there is no such cache entry, return a
114 // pointer to the trailing slot in the corresponding linked list.
FindPointer(const Slice & key,uint32_t hash)115 LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
116 LRUHandle** ptr = &list_[hash & (length_ - 1)];
117 while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
118 ptr = &(*ptr)->next_hash;
119 }
120 return ptr;
121 }
122
Resize()123 void Resize() {
124 uint32_t new_length = 4;
125 while (new_length < elems_) {
126 new_length *= 2;
127 }
128 LRUHandle** new_list = new LRUHandle*[new_length];
129 memset(new_list, 0, sizeof(new_list[0]) * new_length);
130 uint32_t count = 0;
131 for (uint32_t i = 0; i < length_; i++) {
132 LRUHandle* h = list_[i];
133 while (h != nullptr) {
134 LRUHandle* next = h->next_hash;
135 uint32_t hash = h->hash;
136 LRUHandle** ptr = &new_list[hash & (new_length - 1)];
137 h->next_hash = *ptr;
138 *ptr = h;
139 h = next;
140 count++;
141 }
142 }
143 assert(elems_ == count);
144 delete[] list_;
145 list_ = new_list;
146 length_ = new_length;
147 }
148 };
149
150 // A single shard of sharded cache.
151 class LRUCache {
152 public:
153 LRUCache();
154 ~LRUCache();
155
156 // Separate from constructor so caller can easily make an array of LRUCache
SetCapacity(size_t capacity)157 void SetCapacity(size_t capacity) { capacity_ = capacity; }
158
159 // Like Cache methods, but with an extra "hash" parameter.
160 Cache::Handle* Insert(const Slice& key, uint32_t hash, void* value,
161 size_t charge,
162 void (*deleter)(const Slice& key, void* value));
163 Cache::Handle* Lookup(const Slice& key, uint32_t hash);
164 void Release(Cache::Handle* handle);
165 void Erase(const Slice& key, uint32_t hash);
166 void Prune();
TotalCharge() const167 size_t TotalCharge() const {
168 MutexLock l(&mutex_);
169 return usage_;
170 }
171
172 private:
173 void LRU_Remove(LRUHandle* e);
174 void LRU_Append(LRUHandle* list, LRUHandle* e);
175 void Ref(LRUHandle* e);
176 void Unref(LRUHandle* e);
177 bool FinishErase(LRUHandle* e) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
178
179 // Initialized before use.
180 size_t capacity_;
181
182 // mutex_ protects the following state.
183 mutable port::Mutex mutex_;
184 size_t usage_ GUARDED_BY(mutex_);
185
186 // Dummy head of LRU list.
187 // lru.prev is newest entry, lru.next is oldest entry.
188 // Entries have refs==1 and in_cache==true.
189 LRUHandle lru_ GUARDED_BY(mutex_);
190
191 // Dummy head of in-use list.
192 // Entries are in use by clients, and have refs >= 2 and in_cache==true.
193 LRUHandle in_use_ GUARDED_BY(mutex_);
194
195 HandleTable table_ GUARDED_BY(mutex_);
196 };
197
LRUCache()198 LRUCache::LRUCache() : capacity_(0), usage_(0) {
199 // Make empty circular linked lists.
200 lru_.next = &lru_;
201 lru_.prev = &lru_;
202 in_use_.next = &in_use_;
203 in_use_.prev = &in_use_;
204 }
205
~LRUCache()206 LRUCache::~LRUCache() {
207 assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle
208 for (LRUHandle* e = lru_.next; e != &lru_;) {
209 LRUHandle* next = e->next;
210 assert(e->in_cache);
211 e->in_cache = false;
212 assert(e->refs == 1); // Invariant of lru_ list.
213 Unref(e);
214 e = next;
215 }
216 }
217
Ref(LRUHandle * e)218 void LRUCache::Ref(LRUHandle* e) {
219 if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list.
220 LRU_Remove(e);
221 LRU_Append(&in_use_, e);
222 }
223 e->refs++;
224 }
225
Unref(LRUHandle * e)226 void LRUCache::Unref(LRUHandle* e) {
227 assert(e->refs > 0);
228 e->refs--;
229 if (e->refs == 0) { // Deallocate.
230 assert(!e->in_cache);
231 (*e->deleter)(e->key(), e->value);
232 free(e);
233 } else if (e->in_cache && e->refs == 1) {
234 // No longer in use; move to lru_ list.
235 LRU_Remove(e);
236 LRU_Append(&lru_, e);
237 }
238 }
239
LRU_Remove(LRUHandle * e)240 void LRUCache::LRU_Remove(LRUHandle* e) {
241 e->next->prev = e->prev;
242 e->prev->next = e->next;
243 }
244
LRU_Append(LRUHandle * list,LRUHandle * e)245 void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
246 // Make "e" newest entry by inserting just before *list
247 e->next = list;
248 e->prev = list->prev;
249 e->prev->next = e;
250 e->next->prev = e;
251 }
252
Lookup(const Slice & key,uint32_t hash)253 Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
254 MutexLock l(&mutex_);
255 LRUHandle* e = table_.Lookup(key, hash);
256 if (e != nullptr) {
257 Ref(e);
258 }
259 return reinterpret_cast<Cache::Handle*>(e);
260 }
261
Release(Cache::Handle * handle)262 void LRUCache::Release(Cache::Handle* handle) {
263 MutexLock l(&mutex_);
264 Unref(reinterpret_cast<LRUHandle*>(handle));
265 }
266
Insert(const Slice & key,uint32_t hash,void * value,size_t charge,void (* deleter)(const Slice & key,void * value))267 Cache::Handle* LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
268 size_t charge,
269 void (*deleter)(const Slice& key,
270 void* value)) {
271 MutexLock l(&mutex_);
272
273 LRUHandle* e =
274 reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 1 + key.size()));
275 e->value = value;
276 e->deleter = deleter;
277 e->charge = charge;
278 e->key_length = key.size();
279 e->hash = hash;
280 e->in_cache = false;
281 e->refs = 1; // for the returned handle.
282 std::memcpy(e->key_data, key.data(), key.size());
283
284 if (capacity_ > 0) {
285 e->refs++; // for the cache's reference.
286 e->in_cache = true;
287 LRU_Append(&in_use_, e);
288 usage_ += charge;
289 FinishErase(table_.Insert(e));
290 } else { // don't cache. (capacity_==0 is supported and turns off caching.)
291 // next is read by key() in an assert, so it must be initialized
292 e->next = nullptr;
293 }
294 while (usage_ > capacity_ && lru_.next != &lru_) {
295 LRUHandle* old = lru_.next;
296 assert(old->refs == 1);
297 bool erased = FinishErase(table_.Remove(old->key(), old->hash));
298 if (!erased) { // to avoid unused variable when compiled NDEBUG
299 assert(erased);
300 }
301 }
302
303 return reinterpret_cast<Cache::Handle*>(e);
304 }
305
306 // If e != nullptr, finish removing *e from the cache; it has already been
307 // removed from the hash table. Return whether e != nullptr.
FinishErase(LRUHandle * e)308 bool LRUCache::FinishErase(LRUHandle* e) {
309 if (e != nullptr) {
310 assert(e->in_cache);
311 LRU_Remove(e);
312 e->in_cache = false;
313 usage_ -= e->charge;
314 Unref(e);
315 }
316 return e != nullptr;
317 }
318
Erase(const Slice & key,uint32_t hash)319 void LRUCache::Erase(const Slice& key, uint32_t hash) {
320 MutexLock l(&mutex_);
321 FinishErase(table_.Remove(key, hash));
322 }
323
Prune()324 void LRUCache::Prune() {
325 MutexLock l(&mutex_);
326 while (lru_.next != &lru_) {
327 LRUHandle* e = lru_.next;
328 assert(e->refs == 1);
329 bool erased = FinishErase(table_.Remove(e->key(), e->hash));
330 if (!erased) { // to avoid unused variable when compiled NDEBUG
331 assert(erased);
332 }
333 }
334 }
335
336 static const int kNumShardBits = 4;
337 static const int kNumShards = 1 << kNumShardBits;
338
339 class ShardedLRUCache : public Cache {
340 private:
341 LRUCache shard_[kNumShards];
342 port::Mutex id_mutex_;
343 uint64_t last_id_;
344
HashSlice(const Slice & s)345 static inline uint32_t HashSlice(const Slice& s) {
346 return Hash(s.data(), s.size(), 0);
347 }
348
Shard(uint32_t hash)349 static uint32_t Shard(uint32_t hash) { return hash >> (32 - kNumShardBits); }
350
351 public:
ShardedLRUCache(size_t capacity)352 explicit ShardedLRUCache(size_t capacity) : last_id_(0) {
353 const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
354 for (int s = 0; s < kNumShards; s++) {
355 shard_[s].SetCapacity(per_shard);
356 }
357 }
~ShardedLRUCache()358 ~ShardedLRUCache() override {}
Insert(const Slice & key,void * value,size_t charge,void (* deleter)(const Slice & key,void * value))359 Handle* Insert(const Slice& key, void* value, size_t charge,
360 void (*deleter)(const Slice& key, void* value)) override {
361 const uint32_t hash = HashSlice(key);
362 return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
363 }
Lookup(const Slice & key)364 Handle* Lookup(const Slice& key) override {
365 const uint32_t hash = HashSlice(key);
366 return shard_[Shard(hash)].Lookup(key, hash);
367 }
Release(Handle * handle)368 void Release(Handle* handle) override {
369 LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
370 shard_[Shard(h->hash)].Release(handle);
371 }
Erase(const Slice & key)372 void Erase(const Slice& key) override {
373 const uint32_t hash = HashSlice(key);
374 shard_[Shard(hash)].Erase(key, hash);
375 }
Value(Handle * handle)376 void* Value(Handle* handle) override {
377 return reinterpret_cast<LRUHandle*>(handle)->value;
378 }
NewId()379 uint64_t NewId() override {
380 MutexLock l(&id_mutex_);
381 return ++(last_id_);
382 }
Prune()383 void Prune() override {
384 for (int s = 0; s < kNumShards; s++) {
385 shard_[s].Prune();
386 }
387 }
TotalCharge() const388 size_t TotalCharge() const override {
389 size_t total = 0;
390 for (int s = 0; s < kNumShards; s++) {
391 total += shard_[s].TotalCharge();
392 }
393 return total;
394 }
395 };
396
397 } // end anonymous namespace
398
NewLRUCache(size_t capacity)399 Cache* NewLRUCache(size_t capacity) { return new ShardedLRUCache(capacity); }
400
401 } // namespace leveldb
402