1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ 6 #define STORAGE_LEVELDB_DB_DB_IMPL_H_ 7 8 #include <atomic> 9 #include <deque> 10 #include <set> 11 #include <string> 12 13 #include "db/dbformat.h" 14 #include "db/log_writer.h" 15 #include "db/snapshot.h" 16 #include "leveldb/db.h" 17 #include "leveldb/env.h" 18 #include "port/port.h" 19 #include "port/thread_annotations.h" 20 21 namespace leveldb { 22 23 class MemTable; 24 class TableCache; 25 class Version; 26 class VersionEdit; 27 class VersionSet; 28 29 class DBImpl : public DB { 30 public: 31 DBImpl(const Options& options, const std::string& dbname); 32 33 DBImpl(const DBImpl&) = delete; 34 DBImpl& operator=(const DBImpl&) = delete; 35 36 ~DBImpl() override; 37 38 // Implementations of the DB interface 39 Status Put(const WriteOptions&, const Slice& key, 40 const Slice& value) override; 41 Status Delete(const WriteOptions&, const Slice& key) override; 42 Status Write(const WriteOptions& options, WriteBatch* updates) override; 43 Status Get(const ReadOptions& options, const Slice& key, 44 std::string* value) override; 45 Iterator* NewIterator(const ReadOptions&) override; 46 const Snapshot* GetSnapshot() override; 47 void ReleaseSnapshot(const Snapshot* snapshot) override; 48 bool GetProperty(const Slice& property, std::string* value) override; 49 void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) override; 50 void CompactRange(const Slice* begin, const Slice* end) override; 51 52 // Extra methods (for testing) that are not in the public DB interface 53 54 // Compact any files in the named level that overlap [*begin,*end] 55 void TEST_CompactRange(int level, const Slice* begin, const Slice* end); 56 57 // Force current memtable contents to be compacted. 58 Status TEST_CompactMemTable(); 59 60 // Return an internal iterator over the current state of the database. 61 // The keys of this iterator are internal keys (see format.h). 62 // The returned iterator should be deleted when no longer needed. 63 Iterator* TEST_NewInternalIterator(); 64 65 // Return the maximum overlapping data (in bytes) at next level for any 66 // file at a level >= 1. 67 int64_t TEST_MaxNextLevelOverlappingBytes(); 68 69 // Record a sample of bytes read at the specified internal key. 70 // Samples are taken approximately once every config::kReadBytesPeriod 71 // bytes. 72 void RecordReadSample(Slice key); 73 74 private: 75 friend class DB; 76 struct CompactionState; 77 struct Writer; 78 79 // Information for a manual compaction 80 struct ManualCompaction { 81 int level; 82 bool done; 83 const InternalKey* begin; // null means beginning of key range 84 const InternalKey* end; // null means end of key range 85 InternalKey tmp_storage; // Used to keep track of compaction progress 86 }; 87 88 // Per level compaction stats. stats_[level] stores the stats for 89 // compactions that produced data for the specified "level". 90 struct CompactionStats { CompactionStatsCompactionStats91 CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {} 92 AddCompactionStats93 void Add(const CompactionStats& c) { 94 this->micros += c.micros; 95 this->bytes_read += c.bytes_read; 96 this->bytes_written += c.bytes_written; 97 } 98 99 int64_t micros; 100 int64_t bytes_read; 101 int64_t bytes_written; 102 }; 103 104 Iterator* NewInternalIterator(const ReadOptions&, 105 SequenceNumber* latest_snapshot, 106 uint32_t* seed); 107 108 Status NewDB(); 109 110 // Recover the descriptor from persistent storage. May do a significant 111 // amount of work to recover recently logged updates. Any changes to 112 // be made to the descriptor are added to *edit. 113 Status Recover(VersionEdit* edit, bool* save_manifest) 114 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 115 116 void MaybeIgnoreError(Status* s) const; 117 118 // Delete any unneeded files and stale in-memory entries. 119 void RemoveObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 120 121 // Compact the in-memory write buffer to disk. Switches to a new 122 // log-file/memtable and writes a new descriptor iff successful. 123 // Errors are recorded in bg_error_. 124 void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 125 126 Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest, 127 VersionEdit* edit, SequenceNumber* max_sequence) 128 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 129 130 Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) 131 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 132 133 Status MakeRoomForWrite(bool force /* compact even if there is room? */) 134 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 135 WriteBatch* BuildBatchGroup(Writer** last_writer) 136 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 137 138 void RecordBackgroundError(const Status& s); 139 140 void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 141 static void BGWork(void* db); 142 void BackgroundCall(); 143 void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 144 void CleanupCompaction(CompactionState* compact) 145 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 146 Status DoCompactionWork(CompactionState* compact) 147 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 148 149 Status OpenCompactionOutputFile(CompactionState* compact); 150 Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); 151 Status InstallCompactionResults(CompactionState* compact) 152 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 153 user_comparator()154 const Comparator* user_comparator() const { 155 return internal_comparator_.user_comparator(); 156 } 157 158 // Constant after construction 159 Env* const env_; 160 const InternalKeyComparator internal_comparator_; 161 const InternalFilterPolicy internal_filter_policy_; 162 const Options options_; // options_.comparator == &internal_comparator_ 163 const bool owns_info_log_; 164 const bool owns_cache_; 165 const std::string dbname_; 166 167 // table_cache_ provides its own synchronization 168 TableCache* const table_cache_; 169 170 // Lock over the persistent DB state. Non-null iff successfully acquired. 171 FileLock* db_lock_; 172 173 // State below is protected by mutex_ 174 port::Mutex mutex_; 175 std::atomic<bool> shutting_down_; 176 port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_); 177 MemTable* mem_; 178 MemTable* imm_ GUARDED_BY(mutex_); // Memtable being compacted 179 std::atomic<bool> has_imm_; // So bg thread can detect non-null imm_ 180 WritableFile* logfile_; 181 uint64_t logfile_number_ GUARDED_BY(mutex_); 182 log::Writer* log_; 183 uint32_t seed_ GUARDED_BY(mutex_); // For sampling. 184 185 // Queue of writers. 186 std::deque<Writer*> writers_ GUARDED_BY(mutex_); 187 WriteBatch* tmp_batch_ GUARDED_BY(mutex_); 188 189 SnapshotList snapshots_ GUARDED_BY(mutex_); 190 191 // Set of table files to protect from deletion because they are 192 // part of ongoing compactions. 193 std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_); 194 195 // Has a background compaction been scheduled or is running? 196 bool background_compaction_scheduled_ GUARDED_BY(mutex_); 197 198 ManualCompaction* manual_compaction_ GUARDED_BY(mutex_); 199 200 VersionSet* const versions_ GUARDED_BY(mutex_); 201 202 // Have we encountered a background error in paranoid mode? 203 Status bg_error_ GUARDED_BY(mutex_); 204 205 CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_); 206 }; 207 208 // Sanitize db options. The caller should delete result.info_log if 209 // it is not equal to src.info_log. 210 Options SanitizeOptions(const std::string& db, 211 const InternalKeyComparator* icmp, 212 const InternalFilterPolicy* ipolicy, 213 const Options& src); 214 215 } // namespace leveldb 216 217 #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ 218