xref: /aosp_15_r20/external/tensorflow/tensorflow/core/platform/cloud/file_block_cache.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_CLOUD_FILE_BLOCK_CACHE_H_
17 #define TENSORFLOW_CORE_PLATFORM_CLOUD_FILE_BLOCK_CACHE_H_
18 
19 #include <functional>
20 #include <list>
21 #include <map>
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include "tensorflow/core/platform/env.h"
27 #include "tensorflow/core/platform/mutex.h"
28 #include "tensorflow/core/platform/notification.h"
29 #include "tensorflow/core/platform/status.h"
30 #include "tensorflow/core/platform/stringpiece.h"
31 #include "tensorflow/core/platform/thread_annotations.h"
32 #include "tensorflow/core/platform/types.h"
33 
34 namespace tensorflow {
35 
36 class FileBlockCache;
37 
38 /// FileBlockCacheStatsInterface allows for instrumentation of the block cache.
39 ///
40 /// FileBlockCacheStatsInterface and its subclasses must be safe to use from
41 /// multiple threads concurrently.
42 ///
43 /// WARNING! This is an experimental interface that may change or go away at any
44 /// time.
45 class FileBlockCacheStatsInterface {
46  public:
47   /// Configure is called to provide instrumentation hooks.
48   ///
49   /// Note: Configure can be called multiple times (e.g. if the block cache is
50   /// re-initialized).
51   virtual void Configure(const FileBlockCache* block_cache) = 0;
52 
53   /// RecordBlockLoadRequest is called to record the size of a hit block.
54   virtual void RecordCacheHitBlockSize(size_t bytes_transferred) = 0;
55 
56   /// RecordBlockLoadRequest is called to record the size of a missed block.
57   virtual void RecordCacheMissBlockSize(size_t bytes_transferred) = 0;
58 
59   virtual ~FileBlockCacheStatsInterface() = default;
60 };
61 
62 /// \brief A block cache of file contents, keyed by {filename, offset}.
63 ///
64 /// This class should be shared by read-only random access files on a remote
65 /// filesystem (e.g. GCS).
66 class FileBlockCache {
67  public:
68   /// The callback executed when a block is not found in the cache, and needs to
69   /// be fetched from the backing filesystem. This callback is provided when the
70   /// cache is constructed. The returned Status should be OK as long as the
71   /// read from the remote filesystem succeeded (similar to the semantics of the
72   /// read(2) system call).
73   typedef std::function<Status(const string& filename, size_t offset,
74                                size_t buffer_size, char* buffer,
75                                size_t* bytes_transferred)>
76       BlockFetcher;
77 
~FileBlockCache()78   virtual ~FileBlockCache() {}
79 
80   /// Read `n` bytes from `filename` starting at `offset` into `out`. This
81   /// method will return:
82   ///
83   /// 1) The error from the remote filesystem, if the read from the remote
84   ///    filesystem failed.
85   /// 2) PRECONDITION_FAILED if the read from the remote filesystem succeeded,
86   ///    but the read returned a partial block, and the LRU cache contained a
87   ///    block at a higher offset (indicating that the partial block should have
88   ///    been a full block).
89   /// 3) OUT_OF_RANGE if the read from the remote filesystem succeeded, but
90   ///    the file contents do not extend past `offset` and thus nothing was
91   ///    placed in `out`.
92   /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed
93   ///    in `out`).
94   virtual Status Read(const string& filename, size_t offset, size_t n,
95                       char* buffer, size_t* bytes_transferred) = 0;
96 
97   // Validate the given file signature with the existing file signature in the
98   // cache. Returns true if the signature doesn't change or the file did not
99   // exist before. If the signature changes, update the existing signature with
100   // the new one and remove the file from cache.
101   virtual bool ValidateAndUpdateFileSignature(const string& filename,
102                                               int64_t file_signature) = 0;
103 
104   /// Remove all cached blocks for `filename`.
105   virtual void RemoveFile(const string& filename) = 0;
106 
107   /// Remove all cached data.
108   virtual void Flush() = 0;
109 
110   /// Accessors for cache parameters.
111   virtual size_t block_size() const = 0;
112   virtual size_t max_bytes() const = 0;
113   virtual uint64 max_staleness() const = 0;
114 
115   /// The current size (in bytes) of the cache.
116   virtual size_t CacheSize() const = 0;
117 
118   // Returns true if the cache is enabled. If false, the BlockFetcher callback
119   // is always executed during Read.
120   virtual bool IsCacheEnabled() const = 0;
121 
SetStats(FileBlockCacheStatsInterface * stats)122   void SetStats(FileBlockCacheStatsInterface* stats) {
123     if (stats == nullptr) {
124       LOG(ERROR)
125           << "Attempted to monitor a NULL stats object. This may prevent the "
126              "corresponding monitoring data from being exported";
127       return;
128     }
129     cache_stats_ = stats;
130     cache_stats_->Configure(this);
131   }
132 
133  protected:
134   FileBlockCacheStatsInterface* cache_stats_ = nullptr;  // Not owned.
135 };
136 
137 }  // namespace tensorflow
138 
139 #endif  // TENSORFLOW_CORE_PLATFORM_CLOUD_FILE_BLOCK_CACHE_H_
140