1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_EXPIRING_LRU_CACHE_H_ 17 #define TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_EXPIRING_LRU_CACHE_H_ 18 19 #include <functional> 20 #include <list> 21 #include <map> 22 #include <memory> 23 #include <string> 24 25 #include "absl/base/thread_annotations.h" 26 #include "absl/synchronization/mutex.h" 27 #include "tensorflow/c/env.h" 28 #include "tensorflow/c/tf_status.h" 29 30 namespace tf_gcs_filesystem { 31 32 /// \brief An LRU cache of string keys and arbitrary values, with configurable 33 /// max item age (in seconds) and max entries. 34 /// 35 /// This class is thread safe. 36 template <typename T> 37 class ExpiringLRUCache { 38 public: 39 /// A `max_age` of 0 means that nothing is cached. A `max_entries` of 0 means 40 /// that there is no limit on the number of entries in the cache (however, if 41 /// `max_age` is also 0, the cache will not be populated). 42 ExpiringLRUCache(uint64_t max_age, size_t max_entries, 43 std::function<uint64_t()> timer_seconds = TF_NowSeconds) max_age_(max_age)44 : max_age_(max_age), 45 max_entries_(max_entries), 46 timer_seconds_(timer_seconds) {} 47 48 /// Insert `value` with key `key`. This will replace any previous entry with 49 /// the same key. Insert(const std::string & key,const T & value)50 void Insert(const std::string& key, const T& value) { 51 if (max_age_ == 0) { 52 return; 53 } 54 absl::MutexLock lock(&mu_); 55 InsertLocked(key, value); 56 } 57 58 // Delete the entry with key `key`. Return true if the entry was found for 59 // `key`, false if the entry was not found. In both cases, there is no entry 60 // with key `key` existed after the call. Delete(const std::string & key)61 bool Delete(const std::string& key) { 62 absl::MutexLock lock(&mu_); 63 return DeleteLocked(key); 64 } 65 66 /// Look up the entry with key `key` and copy it to `value` if found. Returns 67 /// true if an entry was found for `key`, and its timestamp is not more than 68 /// max_age_ seconds in the past. Lookup(const std::string & key,T * value)69 bool Lookup(const std::string& key, T* value) { 70 if (max_age_ == 0) { 71 return false; 72 } 73 absl::MutexLock lock(&mu_); 74 return LookupLocked(key, value); 75 } 76 77 typedef std::function<void(const std::string&, T*, TF_Status*)> ComputeFunc; 78 79 /// Look up the entry with key `key` and copy it to `value` if found. If not 80 /// found, call `compute_func`. If `compute_func` set `status` to `TF_OK`, 81 /// store a copy of the output parameter in the cache, and another copy in 82 /// `value`. LookupOrCompute(const std::string & key,T * value,const ComputeFunc & compute_func,TF_Status * status)83 void LookupOrCompute(const std::string& key, T* value, 84 const ComputeFunc& compute_func, TF_Status* status) { 85 if (max_age_ == 0) { 86 return compute_func(key, value, status); 87 } 88 89 // Note: we hold onto mu_ for the rest of this function. In practice, this 90 // is okay, as stat requests are typically fast, and concurrent requests are 91 // often for the same file. Future work can split this up into one lock per 92 // key if this proves to be a significant performance bottleneck. 93 absl::MutexLock lock(&mu_); 94 if (LookupLocked(key, value)) { 95 return TF_SetStatus(status, TF_OK, ""); 96 } 97 compute_func(key, value, status); 98 if (TF_GetCode(status) == TF_OK) { 99 InsertLocked(key, *value); 100 } 101 } 102 103 /// Clear the cache. Clear()104 void Clear() { 105 absl::MutexLock lock(&mu_); 106 cache_.clear(); 107 lru_list_.clear(); 108 } 109 110 /// Accessors for cache parameters. max_age()111 uint64_t max_age() const { return max_age_; } max_entries()112 size_t max_entries() const { return max_entries_; } 113 114 private: 115 struct Entry { 116 /// The timestamp (seconds) at which the entry was added to the cache. 117 uint64_t timestamp; 118 119 /// The entry's value. 120 T value; 121 122 /// A list iterator pointing to the entry's position in the LRU list. 123 std::list<std::string>::iterator lru_iterator; 124 }; 125 LookupLocked(const std::string & key,T * value)126 bool LookupLocked(const std::string& key, T* value) 127 ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) { 128 auto it = cache_.find(key); 129 if (it == cache_.end()) { 130 return false; 131 } 132 lru_list_.erase(it->second.lru_iterator); 133 if (timer_seconds_() - it->second.timestamp > max_age_) { 134 cache_.erase(it); 135 return false; 136 } 137 *value = it->second.value; 138 lru_list_.push_front(it->first); 139 it->second.lru_iterator = lru_list_.begin(); 140 return true; 141 } 142 InsertLocked(const std::string & key,const T & value)143 void InsertLocked(const std::string& key, const T& value) 144 ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) { 145 lru_list_.push_front(key); 146 Entry entry{timer_seconds_(), value, lru_list_.begin()}; 147 auto insert = cache_.insert(std::make_pair(key, entry)); 148 if (!insert.second) { 149 lru_list_.erase(insert.first->second.lru_iterator); 150 insert.first->second = entry; 151 } else if (max_entries_ > 0 && cache_.size() > max_entries_) { 152 cache_.erase(lru_list_.back()); 153 lru_list_.pop_back(); 154 } 155 } 156 DeleteLocked(const std::string & key)157 bool DeleteLocked(const std::string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) { 158 auto it = cache_.find(key); 159 if (it == cache_.end()) { 160 return false; 161 } 162 lru_list_.erase(it->second.lru_iterator); 163 cache_.erase(it); 164 return true; 165 } 166 167 /// The maximum age of entries in the cache, in seconds. A value of 0 means 168 /// that no entry is ever placed in the cache. 169 const uint64_t max_age_; 170 171 /// The maximum number of entries in the cache. A value of 0 means there is no 172 /// limit on entry count. 173 const size_t max_entries_; 174 175 /// The callback to read timestamps. 176 std::function<uint64_t()> timer_seconds_; 177 178 /// Guards access to the cache and the LRU list. 179 absl::Mutex mu_; 180 181 /// The cache (a map from string key to Entry). 182 std::map<std::string, Entry> cache_ ABSL_GUARDED_BY(mu_); 183 184 /// The LRU list of entries. The front of the list identifies the most 185 /// recently accessed entry. 186 std::list<std::string> lru_list_ ABSL_GUARDED_BY(mu_); 187 }; 188 189 } // namespace tf_gcs_filesystem 190 191 #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_EXPIRING_LRU_CACHE_H_ 192