xref: /aosp_15_r20/external/tensorflow/tensorflow/core/platform/file_system.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
17 #define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
18 
19 #include <stdint.h>
20 
21 #include <functional>
22 #include <string>
23 #include <unordered_map>
24 #include <utility>
25 #include <vector>
26 
27 #include "tensorflow/core/platform/cord.h"
28 #include "tensorflow/core/platform/errors.h"
29 #include "tensorflow/core/platform/file_statistics.h"
30 #include "tensorflow/core/platform/macros.h"
31 #include "tensorflow/core/platform/platform.h"
32 #include "tensorflow/core/platform/stringpiece.h"
33 #include "tensorflow/core/platform/types.h"
34 
35 #ifdef PLATFORM_WINDOWS
36 #undef DeleteFile
37 #undef CopyFile
38 #undef TranslateName
39 #endif
40 
41 namespace tensorflow {
42 
43 class RandomAccessFile;
44 class ReadOnlyMemoryRegion;
45 class WritableFile;
46 
47 class FileSystem;
48 struct TransactionToken {
49   FileSystem* owner;
50   void* token;
51 };
52 
53 /// A generic interface for accessing a file system.  Implementations
54 /// of custom filesystem adapters must implement this interface,
55 /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes.
56 class FileSystem {
57  public:
58   /// \brief Creates a brand new random access read-only file with the
59   /// specified name.
60   ///
61   /// On success, stores a pointer to the new file in
62   /// *result and returns OK.  On failure stores NULL in *result and
63   /// returns non-OK.  If the file does not exist, returns a non-OK
64   /// status.
65   ///
66   /// The returned file may be concurrently accessed by multiple threads.
67   ///
68   /// The ownership of the returned RandomAccessFile is passed to the caller
69   /// and the object should be deleted when is not used.
NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result)70   virtual tensorflow::Status NewRandomAccessFile(
71       const std::string& fname, std::unique_ptr<RandomAccessFile>* result) {
72     return NewRandomAccessFile(fname, nullptr, result);
73   }
74 
NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)75   virtual tensorflow::Status NewRandomAccessFile(
76       const std::string& fname, TransactionToken* token,
77       std::unique_ptr<RandomAccessFile>* result) {
78     // We duplicate these methods due to Google internal coding style prevents
79     // virtual functions with default arguments. See PR #41615.
80     return OkStatus();
81   }
82 
83   /// \brief Creates an object that writes to a new file with the specified
84   /// name.
85   ///
86   /// Deletes any existing file with the same name and creates a
87   /// new file.  On success, stores a pointer to the new file in
88   /// *result and returns OK.  On failure stores NULL in *result and
89   /// returns non-OK.
90   ///
91   /// The returned file will only be accessed by one thread at a time.
92   ///
93   /// The ownership of the returned WritableFile is passed to the caller
94   /// and the object should be deleted when is not used.
NewWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)95   virtual tensorflow::Status NewWritableFile(
96       const std::string& fname, std::unique_ptr<WritableFile>* result) {
97     return NewWritableFile(fname, nullptr, result);
98   }
99 
NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)100   virtual tensorflow::Status NewWritableFile(
101       const std::string& fname, TransactionToken* token,
102       std::unique_ptr<WritableFile>* result) {
103     return OkStatus();
104   }
105 
106   /// \brief Creates an object that either appends to an existing file, or
107   /// writes to a new file (if the file does not exist to begin with).
108   ///
109   /// On success, stores a pointer to the new file in *result and
110   /// returns OK.  On failure stores NULL in *result and returns
111   /// non-OK.
112   ///
113   /// The returned file will only be accessed by one thread at a time.
114   ///
115   /// The ownership of the returned WritableFile is passed to the caller
116   /// and the object should be deleted when is not used.
NewAppendableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)117   virtual tensorflow::Status NewAppendableFile(
118       const std::string& fname, std::unique_ptr<WritableFile>* result) {
119     return NewAppendableFile(fname, nullptr, result);
120   }
121 
NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)122   virtual tensorflow::Status NewAppendableFile(
123       const std::string& fname, TransactionToken* token,
124       std::unique_ptr<WritableFile>* result) {
125     return OkStatus();
126   }
127 
128   /// \brief Creates a readonly region of memory with the file context.
129   ///
130   /// On success, it returns a pointer to read-only memory region
131   /// from the content of file fname. The ownership of the region is passed to
132   /// the caller. On failure stores nullptr in *result and returns non-OK.
133   ///
134   /// The returned memory region can be accessed from many threads in parallel.
135   ///
136   /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller
137   /// and the object should be deleted when is not used.
NewReadOnlyMemoryRegionFromFile(const std::string & fname,std::unique_ptr<ReadOnlyMemoryRegion> * result)138   virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile(
139       const std::string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) {
140     return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result);
141   }
142 
NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)143   virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile(
144       const std::string& fname, TransactionToken* token,
145       std::unique_ptr<ReadOnlyMemoryRegion>* result) {
146     return OkStatus();
147   }
148 
149   /// Returns OK if the named path exists and NOT_FOUND otherwise.
FileExists(const std::string & fname)150   virtual tensorflow::Status FileExists(const std::string& fname) {
151     return FileExists(fname, nullptr);
152   }
153 
FileExists(const std::string & fname,TransactionToken * token)154   virtual tensorflow::Status FileExists(const std::string& fname,
155                                         TransactionToken* token) {
156     return OkStatus();
157   }
158 
159   /// Returns true if all the listed files exist, false otherwise.
160   /// if status is not null, populate the vector with a detailed status
161   /// for each file.
FilesExist(const std::vector<string> & files,std::vector<Status> * status)162   virtual bool FilesExist(const std::vector<string>& files,
163                           std::vector<Status>* status) {
164     return FilesExist(files, nullptr, status);
165   }
166 
167   virtual bool FilesExist(const std::vector<string>& files,
168                           TransactionToken* token, std::vector<Status>* status);
169 
170   /// \brief Returns the immediate children in the given directory.
171   ///
172   /// The returned paths are relative to 'dir'.
GetChildren(const std::string & dir,std::vector<string> * result)173   virtual tensorflow::Status GetChildren(const std::string& dir,
174                                          std::vector<string>* result) {
175     return GetChildren(dir, nullptr, result);
176   }
177 
GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)178   virtual tensorflow::Status GetChildren(const std::string& dir,
179                                          TransactionToken* token,
180                                          std::vector<string>* result) {
181     return OkStatus();
182   }
183 
184   /// \brief Given a pattern, stores in *results the set of paths that matches
185   /// that pattern. *results is cleared.
186   ///
187   /// pattern must match all of a name, not just a substring.
188   ///
189   /// pattern: { term }
190   /// term:
191   ///   '*': matches any sequence of non-'/' characters
192   ///   '?': matches a single non-'/' character
193   ///   '[' [ '^' ] { match-list } ']':
194   ///        matches any single character (not) on the list
195   ///   c: matches character c (c != '*', '?', '\\', '[')
196   ///   '\\' c: matches character c
197   /// character-range:
198   ///   c: matches character c (c != '\\', '-', ']')
199   ///   '\\' c: matches character c
200   ///   lo '-' hi: matches character c for lo <= c <= hi
201   ///
202   /// Typical return codes:
203   ///  * OK - no errors
204   ///  * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not
205   ///                    implemented
GetMatchingPaths(const std::string & pattern,std::vector<string> * results)206   virtual tensorflow::Status GetMatchingPaths(const std::string& pattern,
207                                               std::vector<string>* results) {
208     return GetMatchingPaths(pattern, nullptr, results);
209   }
210 
GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)211   virtual tensorflow::Status GetMatchingPaths(const std::string& pattern,
212                                               TransactionToken* token,
213                                               std::vector<string>* results) {
214     return OkStatus();
215   }
216 
217   /// \brief Checks if the given filename matches the pattern.
218   ///
219   /// This function provides the equivalent of posix fnmatch, however it is
220   /// implemented without fnmatch to ensure that this can be used for cloud
221   /// filesystems on windows. For windows filesystems, it uses PathMatchSpec.
222   virtual bool Match(const std::string& filename, const std::string& pattern);
223 
224   /// \brief Obtains statistics for the given path.
Stat(const std::string & fname,FileStatistics * stat)225   virtual tensorflow::Status Stat(const std::string& fname,
226                                   FileStatistics* stat) {
227     return Stat(fname, nullptr, stat);
228   }
229 
Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)230   virtual tensorflow::Status Stat(const std::string& fname,
231                                   TransactionToken* token,
232                                   FileStatistics* stat) {
233     return OkStatus();
234   }
235 
236   /// \brief Deletes the named file.
DeleteFile(const std::string & fname)237   virtual tensorflow::Status DeleteFile(const std::string& fname) {
238     return DeleteFile(fname, nullptr);
239   }
240 
DeleteFile(const std::string & fname,TransactionToken * token)241   virtual tensorflow::Status DeleteFile(const std::string& fname,
242                                         TransactionToken* token) {
243     return OkStatus();
244   }
245 
246   /// \brief Creates the specified directory.
247   /// Typical return codes:
248   ///  * OK - successfully created the directory.
249   ///  * ALREADY_EXISTS - directory with name dirname already exists.
250   ///  * PERMISSION_DENIED - dirname is not writable.
CreateDir(const std::string & dirname)251   virtual tensorflow::Status CreateDir(const std::string& dirname) {
252     return CreateDir(dirname, nullptr);
253   }
254 
CreateDir(const std::string & dirname,TransactionToken * token)255   virtual tensorflow::Status CreateDir(const std::string& dirname,
256                                        TransactionToken* token) {
257     return OkStatus();
258   }
259 
260   /// \brief Creates the specified directory and all the necessary
261   /// subdirectories.
262   /// Typical return codes:
263   ///  * OK - successfully created the directory and sub directories, even if
264   ///         they were already created.
265   ///  * PERMISSION_DENIED - dirname or some subdirectory is not writable.
RecursivelyCreateDir(const std::string & dirname)266   virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname) {
267     return RecursivelyCreateDir(dirname, nullptr);
268   }
269 
270   virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname,
271                                                   TransactionToken* token);
272 
273   /// \brief Deletes the specified directory.
DeleteDir(const std::string & dirname)274   virtual tensorflow::Status DeleteDir(const std::string& dirname) {
275     return DeleteDir(dirname, nullptr);
276   }
277 
DeleteDir(const std::string & dirname,TransactionToken * token)278   virtual tensorflow::Status DeleteDir(const std::string& dirname,
279                                        TransactionToken* token) {
280     return OkStatus();
281   }
282 
283   /// \brief Deletes the specified directory and all subdirectories and files
284   /// underneath it. This is accomplished by traversing the directory tree
285   /// rooted at dirname and deleting entries as they are encountered.
286   ///
287   /// If dirname itself is not readable or does not exist, *undeleted_dir_count
288   /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status
289   /// (e.g. NOT_FOUND) is returned.
290   ///
291   /// If dirname and all its descendants were successfully deleted, TF_OK is
292   /// returned and both error counters are set to zero.
293   ///
294   /// Otherwise, while traversing the tree, undeleted_file_count and
295   /// undeleted_dir_count are updated if an entry of the corresponding type
296   /// could not be deleted. The returned error status represents the reason that
297   /// any one of these entries could not be deleted.
298   ///
299   /// REQUIRES: undeleted_files, undeleted_dirs to be not null.
300   ///
301   /// Typical return codes:
302   ///  * OK - dirname exists and we were able to delete everything underneath.
303   ///  * NOT_FOUND - dirname doesn't exist
304   ///  * PERMISSION_DENIED - dirname or some descendant is not writable
305   ///  * UNIMPLEMENTED - Some underlying functions (like Delete) are not
306   ///                    implemented
DeleteRecursively(const std::string & dirname,int64_t * undeleted_files,int64_t * undeleted_dirs)307   virtual tensorflow::Status DeleteRecursively(const std::string& dirname,
308                                                int64_t* undeleted_files,
309                                                int64_t* undeleted_dirs) {
310     return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs);
311   }
312 
313   virtual tensorflow::Status DeleteRecursively(const std::string& dirname,
314                                                TransactionToken* token,
315                                                int64_t* undeleted_files,
316                                                int64_t* undeleted_dirs);
317 
318   /// \brief Stores the size of `fname` in `*file_size`.
GetFileSize(const std::string & fname,uint64 * file_size)319   virtual tensorflow::Status GetFileSize(const std::string& fname,
320                                          uint64* file_size) {
321     return GetFileSize(fname, nullptr, file_size);
322   }
323 
GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)324   virtual tensorflow::Status GetFileSize(const std::string& fname,
325                                          TransactionToken* token,
326                                          uint64* file_size) {
327     return OkStatus();
328   }
329 
330   /// \brief Overwrites the target if it exists.
RenameFile(const std::string & src,const std::string & target)331   virtual tensorflow::Status RenameFile(const std::string& src,
332                                         const std::string& target) {
333     return RenameFile(src, target, nullptr);
334   }
335 
RenameFile(const std::string & src,const std::string & target,TransactionToken * token)336   virtual tensorflow::Status RenameFile(const std::string& src,
337                                         const std::string& target,
338                                         TransactionToken* token) {
339     return OkStatus();
340   }
341 
342   /// \brief Copy the src to target.
CopyFile(const std::string & src,const std::string & target)343   virtual tensorflow::Status CopyFile(const std::string& src,
344                                       const std::string& target) {
345     return CopyFile(src, target, nullptr);
346   }
347 
348   virtual tensorflow::Status CopyFile(const std::string& src,
349                                       const std::string& target,
350                                       TransactionToken* token);
351 
352   /// \brief Translate an URI to a filename for the FileSystem implementation.
353   ///
354   /// The implementation in this class cleans up the path, removing
355   /// duplicate /'s, resolving .. and removing trailing '/'.
356   /// This respects relative vs. absolute paths, but does not
357   /// invoke any system calls (getcwd(2)) in order to resolve relative
358   /// paths with respect to the actual working directory.  That is, this is
359   /// purely string manipulation, completely independent of process state.
360   virtual std::string TranslateName(const std::string& name) const;
361 
362   /// \brief Returns whether the given path is a directory or not.
363   ///
364   /// Typical return codes (not guaranteed exhaustive):
365   ///  * OK - The path exists and is a directory.
366   ///  * FAILED_PRECONDITION - The path exists and is not a directory.
367   ///  * NOT_FOUND - The path entry does not exist.
368   ///  * PERMISSION_DENIED - Insufficient permissions.
369   ///  * UNIMPLEMENTED - The file factory doesn't support directories.
IsDirectory(const std::string & fname)370   virtual tensorflow::Status IsDirectory(const std::string& fname) {
371     return IsDirectory(fname, nullptr);
372   }
373 
374   virtual tensorflow::Status IsDirectory(const std::string& fname,
375                                          TransactionToken* token);
376 
377   /// \brief Returns whether the given path is on a file system
378   /// that has atomic move capabilities. This can be used
379   /// to determine if there needs to be a temp location to safely write objects.
380   /// The second boolean argument has_atomic_move contains this information.
381   ///
382   /// Returns one of the following status codes (not guaranteed exhaustive):
383   ///  * OK - The path is on a recognized file system,
384   ///         so has_atomic_move holds the above information.
385   ///  * UNIMPLEMENTED - The file system of the path hasn't been implemented in
386   ///  TF
387   virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move);
388 
389   /// Returns whether the give path is on a file system
390   /// that has ability to create a new temp file. This can be used
391   /// to determine if there needs to be a temp location to safely write objects.
392   /// If this returns false, TensorFlow will write directly to output files
393   /// instead of creating a temporary file and swapping it in. This may mean
394   /// that incomplete writes are visible to consumers.
395   virtual Status CanCreateTempFile(const std::string& fname,
396                                    bool* can_create_temp_file);
397 
398   /// \brief Flushes any cached filesystem objects from memory.
FlushCaches()399   virtual void FlushCaches() { FlushCaches(nullptr); }
400 
401   virtual void FlushCaches(TransactionToken* token);
402 
403   /// \brief The separator this filesystem uses.
404   ///
405   /// This is implemented as a part of the filesystem, because even on windows,
406   /// a user may need access to filesystems with '/' separators, such as cloud
407   /// filesystems.
408   virtual char Separator() const;
409 
410   /// \brief Split a path to its basename and dirname.
411   ///
412   /// Helper function for Basename and Dirname.
413   std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) const;
414 
415   /// \brief returns the final file name in the given path.
416   ///
417   /// Returns the part of the path after the final "/".  If there is no
418   /// "/" in the path, the result is the same as the input.
419   virtual StringPiece Basename(StringPiece path) const;
420 
421   /// \brief Returns the part of the path before the final "/".
422   ///
423   /// If there is a single leading "/" in the path, the result will be the
424   /// leading "/".  If there is no "/" in the path, the result is the empty
425   /// prefix of the input.
426   StringPiece Dirname(StringPiece path) const;
427 
428   /// \brief Returns the part of the basename of path after the final ".".
429   ///
430   /// If there is no "." in the basename, the result is empty.
431   StringPiece Extension(StringPiece path) const;
432 
433   /// \brief Clean duplicate and trailing, "/"s, and resolve ".." and ".".
434   ///
435   /// NOTE: This respects relative vs. absolute paths, but does not
436   /// invoke any system calls (getcwd(2)) in order to resolve relative
437   /// paths with respect to the actual working directory.  That is, this is
438   /// purely string manipulation, completely independent of process state.
439   std::string CleanPath(StringPiece path) const;
440 
441   /// \brief Creates a URI from a scheme, host, and path.
442   ///
443   /// If the scheme is empty, we just return the path.
444   std::string CreateURI(StringPiece scheme, StringPiece host,
445                         StringPiece path) const;
446 
447   ///  \brief Creates a temporary file name with an extension.
448   std::string GetTempFilename(const std::string& extension) const;
449 
450   /// \brief Return true if path is absolute.
451   bool IsAbsolutePath(tensorflow::StringPiece path) const;
452 
453 #ifndef SWIG  // variadic templates
454   /// \brief Join multiple paths together.
455   ///
456   /// This function also removes the unnecessary path separators.
457   /// For example:
458   ///
459   ///  Arguments                  | JoinPath
460   ///  ---------------------------+----------
461   ///  '/foo', 'bar'              | /foo/bar
462   ///  '/foo/', 'bar'             | /foo/bar
463   ///  '/foo', '/bar'             | /foo/bar
464   ///
465   /// Usage:
466   /// string path = io::JoinPath("/mydir", filename);
467   /// string path = io::JoinPath(FLAGS_test_srcdir, filename);
468   /// string path = io::JoinPath("/full", "path", "to", "filename");
469   template <typename... T>
JoinPath(const T &...args)470   std::string JoinPath(const T&... args) {
471     return JoinPathImpl({args...});
472   }
473 #endif /* SWIG */
474 
475   std::string JoinPathImpl(
476       std::initializer_list<tensorflow::StringPiece> paths);
477 
478   /// \brief Populates the scheme, host, and path from a URI.
479   ///
480   /// scheme, host, and path are guaranteed by this function to point into the
481   /// contents of uri, even if empty.
482   ///
483   /// Corner cases:
484   /// - If the URI is invalid, scheme and host are set to empty strings and the
485   ///  passed string is assumed to be a path
486   /// - If the URI omits the path (e.g. file://host), then the path is left
487   /// empty.
488   void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
489                 StringPiece* path) const;
490 
491   // Transaction related API
492 
493   /// \brief Starts a new transaction
StartTransaction(TransactionToken ** token)494   virtual tensorflow::Status StartTransaction(TransactionToken** token) {
495     *token = nullptr;
496     return OkStatus();
497   }
498 
499   /// \brief Adds `path` to transaction in `token`
AddToTransaction(const std::string & path,TransactionToken * token)500   virtual tensorflow::Status AddToTransaction(const std::string& path,
501                                               TransactionToken* token) {
502     return OkStatus();
503   }
504 
505   /// \brief Ends transaction
EndTransaction(TransactionToken * token)506   virtual tensorflow::Status EndTransaction(TransactionToken* token) {
507     return OkStatus();
508   }
509 
510   /// \brief Get token for `path` or start a new transaction and add `path` to
511   /// it.
GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)512   virtual tensorflow::Status GetTokenOrStartTransaction(
513       const std::string& path, TransactionToken** token) {
514     *token = nullptr;
515     return OkStatus();
516   }
517 
518   /// \brief Return transaction for `path` or nullptr in `token`
GetTransactionForPath(const std::string & path,TransactionToken ** token)519   virtual tensorflow::Status GetTransactionForPath(const std::string& path,
520                                                    TransactionToken** token) {
521     *token = nullptr;
522     return OkStatus();
523   }
524 
525   /// \brief Decode transaction to human readable string.
526   virtual std::string DecodeTransaction(const TransactionToken* token);
527 
528   /// \brief Set File System Configuration Options
SetOption(const string & key,const string & value)529   virtual Status SetOption(const string& key, const string& value) {
530     return errors::Unimplemented("SetOption");
531   }
532 
533   /// \brief Set File System Configuration Option
SetOption(const std::string & name,const std::vector<string> & values)534   virtual tensorflow::Status SetOption(const std::string& name,
535                                        const std::vector<string>& values) {
536     return errors::Unimplemented("SetOption");
537   }
538 
539   /// \brief Set File System Configuration Option
SetOption(const std::string & name,const std::vector<int64_t> & values)540   virtual tensorflow::Status SetOption(const std::string& name,
541                                        const std::vector<int64_t>& values) {
542     return errors::Unimplemented("SetOption");
543   }
544 
545   /// \brief Set File System Configuration Option
SetOption(const std::string & name,const std::vector<double> & values)546   virtual tensorflow::Status SetOption(const std::string& name,
547                                        const std::vector<double>& values) {
548     return errors::Unimplemented("SetOption");
549   }
550 
FileSystem()551   FileSystem() {}
552 
553   virtual ~FileSystem() = default;
554 };
555 /// This macro adds forwarding methods from FileSystem class to
556 /// used class since name hiding will prevent these to be accessed from
557 /// derived classes and would require all use locations to migrate to
558 /// Transactional API. This is an interim solution until ModularFileSystem class
559 /// becomes a singleton.
560 // TODO(sami): Remove this macro when filesystem plugins migration is complete.
561 #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \
562   using FileSystem::NewRandomAccessFile;                      \
563   using FileSystem::NewWritableFile;                          \
564   using FileSystem::NewAppendableFile;                        \
565   using FileSystem::NewReadOnlyMemoryRegionFromFile;          \
566   using FileSystem::FileExists;                               \
567   using FileSystem::GetChildren;                              \
568   using FileSystem::GetMatchingPaths;                         \
569   using FileSystem::Stat;                                     \
570   using FileSystem::DeleteFile;                               \
571   using FileSystem::RecursivelyCreateDir;                     \
572   using FileSystem::DeleteDir;                                \
573   using FileSystem::DeleteRecursively;                        \
574   using FileSystem::GetFileSize;                              \
575   using FileSystem::RenameFile;                               \
576   using FileSystem::CopyFile;                                 \
577   using FileSystem::IsDirectory;                              \
578   using FileSystem::FlushCaches
579 
580 /// A Wrapper class for Transactional FileSystem support.
581 /// This provides means to make use of the transactions with minimal code change
582 /// Any operations that are done through this interface will be through the
583 /// transaction created at the time of construction of this instance.
584 /// See FileSystem documentation for method descriptions.
585 /// This class simply forwards all calls to wrapped filesystem either with given
586 /// transaction token or with token used in its construction. This allows doing
587 /// transactional filesystem access with minimal code change.
588 class WrappedFileSystem : public FileSystem {
589  public:
590   TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT;
591 
NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)592   tensorflow::Status NewRandomAccessFile(
593       const std::string& fname, TransactionToken* token,
594       std::unique_ptr<RandomAccessFile>* result) override {
595     return fs_->NewRandomAccessFile(fname, (token ? token : token_), result);
596   }
597 
NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)598   tensorflow::Status NewWritableFile(
599       const std::string& fname, TransactionToken* token,
600       std::unique_ptr<WritableFile>* result) override {
601     return fs_->NewWritableFile(fname, (token ? token : token_), result);
602   }
603 
NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)604   tensorflow::Status NewAppendableFile(
605       const std::string& fname, TransactionToken* token,
606       std::unique_ptr<WritableFile>* result) override {
607     return fs_->NewAppendableFile(fname, (token ? token : token_), result);
608   }
609 
NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)610   tensorflow::Status NewReadOnlyMemoryRegionFromFile(
611       const std::string& fname, TransactionToken* token,
612       std::unique_ptr<ReadOnlyMemoryRegion>* result) override {
613     return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_),
614                                                 result);
615   }
616 
FileExists(const std::string & fname,TransactionToken * token)617   tensorflow::Status FileExists(const std::string& fname,
618                                 TransactionToken* token) override {
619     return fs_->FileExists(fname, (token ? token : token_));
620   }
621 
FilesExist(const std::vector<string> & files,TransactionToken * token,std::vector<Status> * status)622   bool FilesExist(const std::vector<string>& files, TransactionToken* token,
623                   std::vector<Status>* status) override {
624     return fs_->FilesExist(files, (token ? token : token_), status);
625   }
626 
GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)627   tensorflow::Status GetChildren(const std::string& dir,
628                                  TransactionToken* token,
629                                  std::vector<string>* result) override {
630     return fs_->GetChildren(dir, (token ? token : token_), result);
631   }
632 
GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)633   tensorflow::Status GetMatchingPaths(const std::string& pattern,
634                                       TransactionToken* token,
635                                       std::vector<string>* results) override {
636     return fs_->GetMatchingPaths(pattern, (token ? token : token_), results);
637   }
638 
Match(const std::string & filename,const std::string & pattern)639   bool Match(const std::string& filename, const std::string& pattern) override {
640     return fs_->Match(filename, pattern);
641   }
642 
Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)643   tensorflow::Status Stat(const std::string& fname, TransactionToken* token,
644                           FileStatistics* stat) override {
645     return fs_->Stat(fname, (token ? token : token_), stat);
646   }
647 
DeleteFile(const std::string & fname,TransactionToken * token)648   tensorflow::Status DeleteFile(const std::string& fname,
649                                 TransactionToken* token) override {
650     return fs_->DeleteFile(fname, (token ? token : token_));
651   }
652 
CreateDir(const std::string & dirname,TransactionToken * token)653   tensorflow::Status CreateDir(const std::string& dirname,
654                                TransactionToken* token) override {
655     return fs_->CreateDir(dirname, (token ? token : token_));
656   }
657 
RecursivelyCreateDir(const std::string & dirname,TransactionToken * token)658   tensorflow::Status RecursivelyCreateDir(const std::string& dirname,
659                                           TransactionToken* token) override {
660     return fs_->RecursivelyCreateDir(dirname, (token ? token : token_));
661   }
662 
DeleteDir(const std::string & dirname,TransactionToken * token)663   tensorflow::Status DeleteDir(const std::string& dirname,
664                                TransactionToken* token) override {
665     return fs_->DeleteDir(dirname, (token ? token : token_));
666   }
667 
DeleteRecursively(const std::string & dirname,TransactionToken * token,int64_t * undeleted_files,int64_t * undeleted_dirs)668   tensorflow::Status DeleteRecursively(const std::string& dirname,
669                                        TransactionToken* token,
670                                        int64_t* undeleted_files,
671                                        int64_t* undeleted_dirs) override {
672     return fs_->DeleteRecursively(dirname, (token ? token : token_),
673                                   undeleted_files, undeleted_dirs);
674   }
675 
GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)676   tensorflow::Status GetFileSize(const std::string& fname,
677                                  TransactionToken* token,
678                                  uint64* file_size) override {
679     return fs_->GetFileSize(fname, (token ? token : token_), file_size);
680   }
681 
RenameFile(const std::string & src,const std::string & target,TransactionToken * token)682   tensorflow::Status RenameFile(const std::string& src,
683                                 const std::string& target,
684                                 TransactionToken* token) override {
685     return fs_->RenameFile(src, target, (token ? token : token_));
686   }
687 
CopyFile(const std::string & src,const std::string & target,TransactionToken * token)688   tensorflow::Status CopyFile(const std::string& src, const std::string& target,
689                               TransactionToken* token) override {
690     return fs_->CopyFile(src, target, (token ? token : token_));
691   }
692 
TranslateName(const std::string & name)693   std::string TranslateName(const std::string& name) const override {
694     return fs_->TranslateName(name);
695   }
696 
IsDirectory(const std::string & fname,TransactionToken * token)697   tensorflow::Status IsDirectory(const std::string& fname,
698                                  TransactionToken* token) override {
699     return fs_->IsDirectory(fname, (token ? token : token_));
700   }
701 
HasAtomicMove(const std::string & path,bool * has_atomic_move)702   Status HasAtomicMove(const std::string& path,
703                        bool* has_atomic_move) override {
704     return fs_->HasAtomicMove(path, has_atomic_move);
705   }
706 
FlushCaches(TransactionToken * token)707   void FlushCaches(TransactionToken* token) override {
708     return fs_->FlushCaches((token ? token : token_));
709   }
710 
Separator()711   char Separator() const override { return fs_->Separator(); }
712 
Basename(StringPiece path)713   StringPiece Basename(StringPiece path) const override {
714     return fs_->Basename(path);
715   }
716 
StartTransaction(TransactionToken ** token)717   tensorflow::Status StartTransaction(TransactionToken** token) override {
718     return fs_->StartTransaction(token);
719   }
720 
AddToTransaction(const std::string & path,TransactionToken * token)721   tensorflow::Status AddToTransaction(const std::string& path,
722                                       TransactionToken* token) override {
723     return fs_->AddToTransaction(path, (token ? token : token_));
724   }
725 
EndTransaction(TransactionToken * token)726   tensorflow::Status EndTransaction(TransactionToken* token) override {
727     return fs_->EndTransaction(token);
728   }
729 
GetTransactionForPath(const std::string & path,TransactionToken ** token)730   tensorflow::Status GetTransactionForPath(const std::string& path,
731                                            TransactionToken** token) override {
732     return fs_->GetTransactionForPath(path, token);
733   }
734 
GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)735   tensorflow::Status GetTokenOrStartTransaction(
736       const std::string& path, TransactionToken** token) override {
737     return fs_->GetTokenOrStartTransaction(path, token);
738   }
739 
DecodeTransaction(const TransactionToken * token)740   std::string DecodeTransaction(const TransactionToken* token) override {
741     return fs_->DecodeTransaction((token ? token : token_));
742   }
743 
WrappedFileSystem(FileSystem * file_system,TransactionToken * token)744   WrappedFileSystem(FileSystem* file_system, TransactionToken* token)
745       : fs_(file_system), token_(token) {}
746 
747   ~WrappedFileSystem() override = default;
748 
749  private:
750   FileSystem* fs_;
751   TransactionToken* token_;
752 };
753 
754 /// A file abstraction for randomly reading the contents of a file.
755 class RandomAccessFile {
756  public:
RandomAccessFile()757   RandomAccessFile() {}
758   virtual ~RandomAccessFile() = default;
759 
760   /// \brief Returns the name of the file.
761   ///
762   /// This is an optional operation that may not be implemented by every
763   /// filesystem.
Name(StringPiece * result)764   virtual tensorflow::Status Name(StringPiece* result) const {
765     return errors::Unimplemented("This filesystem does not support Name()");
766   }
767 
768   /// \brief Reads up to `n` bytes from the file starting at `offset`.
769   ///
770   /// `scratch[0..n-1]` may be written by this routine.  Sets `*result`
771   /// to the data that was read (including if fewer than `n` bytes were
772   /// successfully read).  May set `*result` to point at data in
773   /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when
774   /// `*result` is used.
775   ///
776   /// On OK returned status: `n` bytes have been stored in `*result`.
777   /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`.
778   ///
779   /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result`
780   /// because of EOF.
781   ///
782   /// Safe for concurrent use by multiple threads.
783   virtual tensorflow::Status Read(uint64 offset, size_t n, StringPiece* result,
784                                   char* scratch) const = 0;
785 
786 #if defined(TF_CORD_SUPPORT)
787   /// \brief Read up to `n` bytes from the file starting at `offset`.
Read(uint64 offset,size_t n,absl::Cord * cord)788   virtual tensorflow::Status Read(uint64 offset, size_t n,
789                                   absl::Cord* cord) const {
790     return errors::Unimplemented(
791         "Read(uint64, size_t, absl::Cord*) is not "
792         "implemented");
793   }
794 #endif
795 
796  private:
797   TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile);
798 };
799 
800 /// \brief A file abstraction for sequential writing.
801 ///
802 /// The implementation must provide buffering since callers may append
803 /// small fragments at a time to the file.
804 class WritableFile {
805  public:
WritableFile()806   WritableFile() {}
807   virtual ~WritableFile() = default;
808 
809   /// \brief Append 'data' to the file.
810   virtual tensorflow::Status Append(StringPiece data) = 0;
811 
812 #if defined(TF_CORD_SUPPORT)
813   // \brief Append 'data' to the file.
Append(const absl::Cord & cord)814   virtual tensorflow::Status Append(const absl::Cord& cord) {
815     for (StringPiece chunk : cord.Chunks()) {
816       TF_RETURN_IF_ERROR(Append(chunk));
817     }
818     return OkStatus();
819   }
820 #endif
821 
822   /// \brief Close the file.
823   ///
824   /// Flush() and de-allocate resources associated with this file
825   ///
826   /// Typical return codes (not guaranteed to be exhaustive):
827   ///  * OK
828   ///  * Other codes, as returned from Flush()
829   virtual tensorflow::Status Close() = 0;
830 
831   /// \brief Flushes the file and optionally syncs contents to filesystem.
832   ///
833   /// This should flush any local buffers whose contents have not been
834   /// delivered to the filesystem.
835   ///
836   /// If the process terminates after a successful flush, the contents
837   /// may still be persisted, since the underlying filesystem may
838   /// eventually flush the contents.  If the OS or machine crashes
839   /// after a successful flush, the contents may or may not be
840   /// persisted, depending on the implementation.
841   virtual tensorflow::Status Flush() = 0;
842 
843   // \brief Returns the name of the file.
844   ///
845   /// This is an optional operation that may not be implemented by every
846   /// filesystem.
Name(StringPiece * result)847   virtual tensorflow::Status Name(StringPiece* result) const {
848     return errors::Unimplemented("This filesystem does not support Name()");
849   }
850 
851   /// \brief Syncs contents of file to filesystem.
852   ///
853   /// This waits for confirmation from the filesystem that the contents
854   /// of the file have been persisted to the filesystem; if the OS
855   /// or machine crashes after a successful Sync, the contents should
856   /// be properly saved.
857   virtual tensorflow::Status Sync() = 0;
858 
859   /// \brief Retrieves the current write position in the file, or -1 on
860   /// error.
861   ///
862   /// This is an optional operation, subclasses may choose to return
863   /// errors::Unimplemented.
Tell(int64_t * position)864   virtual tensorflow::Status Tell(int64_t* position) {
865     *position = -1;
866     return errors::Unimplemented("This filesystem does not support Tell()");
867   }
868 
869  private:
870   TF_DISALLOW_COPY_AND_ASSIGN(WritableFile);
871 };
872 
873 /// \brief A readonly memmapped file abstraction.
874 ///
875 /// The implementation must guarantee that all memory is accessible when the
876 /// object exists, independently from the Env that created it.
877 class ReadOnlyMemoryRegion {
878  public:
ReadOnlyMemoryRegion()879   ReadOnlyMemoryRegion() {}
880   virtual ~ReadOnlyMemoryRegion() = default;
881 
882   /// \brief Returns a pointer to the memory region.
883   virtual const void* data() = 0;
884 
885   /// \brief Returns the length of the memory region in bytes.
886   virtual uint64 length() = 0;
887 };
888 
889 /// \brief A registry for file system implementations.
890 ///
891 /// Filenames are specified as an URI, which is of the form
892 /// [scheme://]<filename>.
893 /// File system implementations are registered using the REGISTER_FILE_SYSTEM
894 /// macro, providing the 'scheme' as the key.
895 ///
896 /// There are two `Register` methods: one using `Factory` for legacy filesystems
897 /// (deprecated mechanism of subclassing `FileSystem` and using
898 /// `REGISTER_FILE_SYSTEM` macro), and one using `std::unique_ptr<FileSystem>`
899 /// for the new modular approach.
900 ///
901 /// Note that the new API expects a pointer to `ModularFileSystem` but this is
902 /// not checked as there should be exactly one caller to the API and doing the
903 /// check results in a circular dependency between `BUILD` targets.
904 ///
905 /// Plan is to completely remove the filesystem registration from `Env` and
906 /// incorporate it into `ModularFileSystem` class (which will be renamed to be
907 /// the only `FileSystem` class and marked as `final`). But this will happen at
908 /// a later time, after we convert all filesystems to the new API.
909 ///
910 /// TODO(b/139060984): After all filesystems are converted, remove old
911 /// registration and update comment.
912 class FileSystemRegistry {
913  public:
914   typedef std::function<FileSystem*()> Factory;
915 
916   virtual ~FileSystemRegistry() = default;
917   virtual tensorflow::Status Register(const std::string& scheme,
918                                       Factory factory) = 0;
919   virtual tensorflow::Status Register(
920       const std::string& scheme, std::unique_ptr<FileSystem> filesystem) = 0;
921   virtual FileSystem* Lookup(const std::string& scheme) = 0;
922   virtual tensorflow::Status GetRegisteredFileSystemSchemes(
923       std::vector<std::string>* schemes) = 0;
924 };
925 
926 }  // namespace tensorflow
927 
928 #endif  // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
929