1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_CLOUD_FILE_BLOCK_CACHE_H_ 17 #define TENSORFLOW_CORE_PLATFORM_CLOUD_FILE_BLOCK_CACHE_H_ 18 19 #include <functional> 20 #include <list> 21 #include <map> 22 #include <memory> 23 #include <string> 24 #include <vector> 25 26 #include "tensorflow/core/platform/env.h" 27 #include "tensorflow/core/platform/mutex.h" 28 #include "tensorflow/core/platform/notification.h" 29 #include "tensorflow/core/platform/status.h" 30 #include "tensorflow/core/platform/stringpiece.h" 31 #include "tensorflow/core/platform/thread_annotations.h" 32 #include "tensorflow/core/platform/types.h" 33 34 namespace tensorflow { 35 36 class FileBlockCache; 37 38 /// FileBlockCacheStatsInterface allows for instrumentation of the block cache. 39 /// 40 /// FileBlockCacheStatsInterface and its subclasses must be safe to use from 41 /// multiple threads concurrently. 42 /// 43 /// WARNING! This is an experimental interface that may change or go away at any 44 /// time. 45 class FileBlockCacheStatsInterface { 46 public: 47 /// Configure is called to provide instrumentation hooks. 48 /// 49 /// Note: Configure can be called multiple times (e.g. if the block cache is 50 /// re-initialized). 51 virtual void Configure(const FileBlockCache* block_cache) = 0; 52 53 /// RecordBlockLoadRequest is called to record the size of a hit block. 54 virtual void RecordCacheHitBlockSize(size_t bytes_transferred) = 0; 55 56 /// RecordBlockLoadRequest is called to record the size of a missed block. 57 virtual void RecordCacheMissBlockSize(size_t bytes_transferred) = 0; 58 59 virtual ~FileBlockCacheStatsInterface() = default; 60 }; 61 62 /// \brief A block cache of file contents, keyed by {filename, offset}. 63 /// 64 /// This class should be shared by read-only random access files on a remote 65 /// filesystem (e.g. GCS). 66 class FileBlockCache { 67 public: 68 /// The callback executed when a block is not found in the cache, and needs to 69 /// be fetched from the backing filesystem. This callback is provided when the 70 /// cache is constructed. The returned Status should be OK as long as the 71 /// read from the remote filesystem succeeded (similar to the semantics of the 72 /// read(2) system call). 73 typedef std::function<Status(const string& filename, size_t offset, 74 size_t buffer_size, char* buffer, 75 size_t* bytes_transferred)> 76 BlockFetcher; 77 ~FileBlockCache()78 virtual ~FileBlockCache() {} 79 80 /// Read `n` bytes from `filename` starting at `offset` into `out`. This 81 /// method will return: 82 /// 83 /// 1) The error from the remote filesystem, if the read from the remote 84 /// filesystem failed. 85 /// 2) PRECONDITION_FAILED if the read from the remote filesystem succeeded, 86 /// but the read returned a partial block, and the LRU cache contained a 87 /// block at a higher offset (indicating that the partial block should have 88 /// been a full block). 89 /// 3) OUT_OF_RANGE if the read from the remote filesystem succeeded, but 90 /// the file contents do not extend past `offset` and thus nothing was 91 /// placed in `out`. 92 /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed 93 /// in `out`). 94 virtual Status Read(const string& filename, size_t offset, size_t n, 95 char* buffer, size_t* bytes_transferred) = 0; 96 97 // Validate the given file signature with the existing file signature in the 98 // cache. Returns true if the signature doesn't change or the file did not 99 // exist before. If the signature changes, update the existing signature with 100 // the new one and remove the file from cache. 101 virtual bool ValidateAndUpdateFileSignature(const string& filename, 102 int64_t file_signature) = 0; 103 104 /// Remove all cached blocks for `filename`. 105 virtual void RemoveFile(const string& filename) = 0; 106 107 /// Remove all cached data. 108 virtual void Flush() = 0; 109 110 /// Accessors for cache parameters. 111 virtual size_t block_size() const = 0; 112 virtual size_t max_bytes() const = 0; 113 virtual uint64 max_staleness() const = 0; 114 115 /// The current size (in bytes) of the cache. 116 virtual size_t CacheSize() const = 0; 117 118 // Returns true if the cache is enabled. If false, the BlockFetcher callback 119 // is always executed during Read. 120 virtual bool IsCacheEnabled() const = 0; 121 SetStats(FileBlockCacheStatsInterface * stats)122 void SetStats(FileBlockCacheStatsInterface* stats) { 123 if (stats == nullptr) { 124 LOG(ERROR) 125 << "Attempted to monitor a NULL stats object. This may prevent the " 126 "corresponding monitoring data from being exported"; 127 return; 128 } 129 cache_stats_ = stats; 130 cache_stats_->Configure(this); 131 } 132 133 protected: 134 FileBlockCacheStatsInterface* cache_stats_ = nullptr; // Not owned. 135 }; 136 137 } // namespace tensorflow 138 139 #endif // TENSORFLOW_CORE_PLATFORM_CLOUD_FILE_BLOCK_CACHE_H_ 140