1*9507f98cSAndroid Build Coastguard Worker // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2*9507f98cSAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*9507f98cSAndroid Build Coastguard Worker // found in the LICENSE file. See the AUTHORS file for names of contributors.
4*9507f98cSAndroid Build Coastguard Worker
5*9507f98cSAndroid Build Coastguard Worker #include "leveldb/filter_policy.h"
6*9507f98cSAndroid Build Coastguard Worker
7*9507f98cSAndroid Build Coastguard Worker #include "leveldb/slice.h"
8*9507f98cSAndroid Build Coastguard Worker #include "util/hash.h"
9*9507f98cSAndroid Build Coastguard Worker
10*9507f98cSAndroid Build Coastguard Worker namespace leveldb {
11*9507f98cSAndroid Build Coastguard Worker
12*9507f98cSAndroid Build Coastguard Worker namespace {
BloomHash(const Slice & key)13*9507f98cSAndroid Build Coastguard Worker static uint32_t BloomHash(const Slice& key) {
14*9507f98cSAndroid Build Coastguard Worker return Hash(key.data(), key.size(), 0xbc9f1d34);
15*9507f98cSAndroid Build Coastguard Worker }
16*9507f98cSAndroid Build Coastguard Worker
17*9507f98cSAndroid Build Coastguard Worker class BloomFilterPolicy : public FilterPolicy {
18*9507f98cSAndroid Build Coastguard Worker public:
BloomFilterPolicy(int bits_per_key)19*9507f98cSAndroid Build Coastguard Worker explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
20*9507f98cSAndroid Build Coastguard Worker // We intentionally round down to reduce probing cost a little bit
21*9507f98cSAndroid Build Coastguard Worker k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
22*9507f98cSAndroid Build Coastguard Worker if (k_ < 1) k_ = 1;
23*9507f98cSAndroid Build Coastguard Worker if (k_ > 30) k_ = 30;
24*9507f98cSAndroid Build Coastguard Worker }
25*9507f98cSAndroid Build Coastguard Worker
Name() const26*9507f98cSAndroid Build Coastguard Worker const char* Name() const override { return "leveldb.BuiltinBloomFilter2"; }
27*9507f98cSAndroid Build Coastguard Worker
CreateFilter(const Slice * keys,int n,std::string * dst) const28*9507f98cSAndroid Build Coastguard Worker void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
29*9507f98cSAndroid Build Coastguard Worker // Compute bloom filter size (in both bits and bytes)
30*9507f98cSAndroid Build Coastguard Worker size_t bits = n * bits_per_key_;
31*9507f98cSAndroid Build Coastguard Worker
32*9507f98cSAndroid Build Coastguard Worker // For small n, we can see a very high false positive rate. Fix it
33*9507f98cSAndroid Build Coastguard Worker // by enforcing a minimum bloom filter length.
34*9507f98cSAndroid Build Coastguard Worker if (bits < 64) bits = 64;
35*9507f98cSAndroid Build Coastguard Worker
36*9507f98cSAndroid Build Coastguard Worker size_t bytes = (bits + 7) / 8;
37*9507f98cSAndroid Build Coastguard Worker bits = bytes * 8;
38*9507f98cSAndroid Build Coastguard Worker
39*9507f98cSAndroid Build Coastguard Worker const size_t init_size = dst->size();
40*9507f98cSAndroid Build Coastguard Worker dst->resize(init_size + bytes, 0);
41*9507f98cSAndroid Build Coastguard Worker dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
42*9507f98cSAndroid Build Coastguard Worker char* array = &(*dst)[init_size];
43*9507f98cSAndroid Build Coastguard Worker for (int i = 0; i < n; i++) {
44*9507f98cSAndroid Build Coastguard Worker // Use double-hashing to generate a sequence of hash values.
45*9507f98cSAndroid Build Coastguard Worker // See analysis in [Kirsch,Mitzenmacher 2006].
46*9507f98cSAndroid Build Coastguard Worker uint32_t h = BloomHash(keys[i]);
47*9507f98cSAndroid Build Coastguard Worker const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
48*9507f98cSAndroid Build Coastguard Worker for (size_t j = 0; j < k_; j++) {
49*9507f98cSAndroid Build Coastguard Worker const uint32_t bitpos = h % bits;
50*9507f98cSAndroid Build Coastguard Worker array[bitpos / 8] |= (1 << (bitpos % 8));
51*9507f98cSAndroid Build Coastguard Worker h += delta;
52*9507f98cSAndroid Build Coastguard Worker }
53*9507f98cSAndroid Build Coastguard Worker }
54*9507f98cSAndroid Build Coastguard Worker }
55*9507f98cSAndroid Build Coastguard Worker
KeyMayMatch(const Slice & key,const Slice & bloom_filter) const56*9507f98cSAndroid Build Coastguard Worker bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
57*9507f98cSAndroid Build Coastguard Worker const size_t len = bloom_filter.size();
58*9507f98cSAndroid Build Coastguard Worker if (len < 2) return false;
59*9507f98cSAndroid Build Coastguard Worker
60*9507f98cSAndroid Build Coastguard Worker const char* array = bloom_filter.data();
61*9507f98cSAndroid Build Coastguard Worker const size_t bits = (len - 1) * 8;
62*9507f98cSAndroid Build Coastguard Worker
63*9507f98cSAndroid Build Coastguard Worker // Use the encoded k so that we can read filters generated by
64*9507f98cSAndroid Build Coastguard Worker // bloom filters created using different parameters.
65*9507f98cSAndroid Build Coastguard Worker const size_t k = array[len - 1];
66*9507f98cSAndroid Build Coastguard Worker if (k > 30) {
67*9507f98cSAndroid Build Coastguard Worker // Reserved for potentially new encodings for short bloom filters.
68*9507f98cSAndroid Build Coastguard Worker // Consider it a match.
69*9507f98cSAndroid Build Coastguard Worker return true;
70*9507f98cSAndroid Build Coastguard Worker }
71*9507f98cSAndroid Build Coastguard Worker
72*9507f98cSAndroid Build Coastguard Worker uint32_t h = BloomHash(key);
73*9507f98cSAndroid Build Coastguard Worker const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
74*9507f98cSAndroid Build Coastguard Worker for (size_t j = 0; j < k; j++) {
75*9507f98cSAndroid Build Coastguard Worker const uint32_t bitpos = h % bits;
76*9507f98cSAndroid Build Coastguard Worker if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
77*9507f98cSAndroid Build Coastguard Worker h += delta;
78*9507f98cSAndroid Build Coastguard Worker }
79*9507f98cSAndroid Build Coastguard Worker return true;
80*9507f98cSAndroid Build Coastguard Worker }
81*9507f98cSAndroid Build Coastguard Worker
82*9507f98cSAndroid Build Coastguard Worker private:
83*9507f98cSAndroid Build Coastguard Worker size_t bits_per_key_;
84*9507f98cSAndroid Build Coastguard Worker size_t k_;
85*9507f98cSAndroid Build Coastguard Worker };
86*9507f98cSAndroid Build Coastguard Worker } // namespace
87*9507f98cSAndroid Build Coastguard Worker
NewBloomFilterPolicy(int bits_per_key)88*9507f98cSAndroid Build Coastguard Worker const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
89*9507f98cSAndroid Build Coastguard Worker return new BloomFilterPolicy(bits_per_key);
90*9507f98cSAndroid Build Coastguard Worker }
91*9507f98cSAndroid Build Coastguard Worker
92*9507f98cSAndroid Build Coastguard Worker } // namespace leveldb
93