1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 17 #define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 18 19 #include <stdint.h> 20 21 #include <functional> 22 #include <string> 23 #include <unordered_map> 24 #include <utility> 25 #include <vector> 26 27 #include "tensorflow/core/platform/cord.h" 28 #include "tensorflow/core/platform/errors.h" 29 #include "tensorflow/core/platform/file_statistics.h" 30 #include "tensorflow/core/platform/macros.h" 31 #include "tensorflow/core/platform/platform.h" 32 #include "tensorflow/core/platform/stringpiece.h" 33 #include "tensorflow/core/platform/types.h" 34 35 #ifdef PLATFORM_WINDOWS 36 #undef DeleteFile 37 #undef CopyFile 38 #undef TranslateName 39 #endif 40 41 namespace tensorflow { 42 43 class RandomAccessFile; 44 class ReadOnlyMemoryRegion; 45 class WritableFile; 46 47 class FileSystem; 48 struct TransactionToken { 49 FileSystem* owner; 50 void* token; 51 }; 52 53 /// A generic interface for accessing a file system. Implementations 54 /// of custom filesystem adapters must implement this interface, 55 /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. 56 class FileSystem { 57 public: 58 /// \brief Creates a brand new random access read-only file with the 59 /// specified name. 60 /// 61 /// On success, stores a pointer to the new file in 62 /// *result and returns OK. On failure stores NULL in *result and 63 /// returns non-OK. If the file does not exist, returns a non-OK 64 /// status. 65 /// 66 /// The returned file may be concurrently accessed by multiple threads. 67 /// 68 /// The ownership of the returned RandomAccessFile is passed to the caller 69 /// and the object should be deleted when is not used. NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result)70 virtual tensorflow::Status NewRandomAccessFile( 71 const std::string& fname, std::unique_ptr<RandomAccessFile>* result) { 72 return NewRandomAccessFile(fname, nullptr, result); 73 } 74 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)75 virtual tensorflow::Status NewRandomAccessFile( 76 const std::string& fname, TransactionToken* token, 77 std::unique_ptr<RandomAccessFile>* result) { 78 // We duplicate these methods due to Google internal coding style prevents 79 // virtual functions with default arguments. See PR #41615. 80 return OkStatus(); 81 } 82 83 /// \brief Creates an object that writes to a new file with the specified 84 /// name. 85 /// 86 /// Deletes any existing file with the same name and creates a 87 /// new file. On success, stores a pointer to the new file in 88 /// *result and returns OK. On failure stores NULL in *result and 89 /// returns non-OK. 90 /// 91 /// The returned file will only be accessed by one thread at a time. 92 /// 93 /// The ownership of the returned WritableFile is passed to the caller 94 /// and the object should be deleted when is not used. NewWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)95 virtual tensorflow::Status NewWritableFile( 96 const std::string& fname, std::unique_ptr<WritableFile>* result) { 97 return NewWritableFile(fname, nullptr, result); 98 } 99 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)100 virtual tensorflow::Status NewWritableFile( 101 const std::string& fname, TransactionToken* token, 102 std::unique_ptr<WritableFile>* result) { 103 return OkStatus(); 104 } 105 106 /// \brief Creates an object that either appends to an existing file, or 107 /// writes to a new file (if the file does not exist to begin with). 108 /// 109 /// On success, stores a pointer to the new file in *result and 110 /// returns OK. On failure stores NULL in *result and returns 111 /// non-OK. 112 /// 113 /// The returned file will only be accessed by one thread at a time. 114 /// 115 /// The ownership of the returned WritableFile is passed to the caller 116 /// and the object should be deleted when is not used. NewAppendableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)117 virtual tensorflow::Status NewAppendableFile( 118 const std::string& fname, std::unique_ptr<WritableFile>* result) { 119 return NewAppendableFile(fname, nullptr, result); 120 } 121 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)122 virtual tensorflow::Status NewAppendableFile( 123 const std::string& fname, TransactionToken* token, 124 std::unique_ptr<WritableFile>* result) { 125 return OkStatus(); 126 } 127 128 /// \brief Creates a readonly region of memory with the file context. 129 /// 130 /// On success, it returns a pointer to read-only memory region 131 /// from the content of file fname. The ownership of the region is passed to 132 /// the caller. On failure stores nullptr in *result and returns non-OK. 133 /// 134 /// The returned memory region can be accessed from many threads in parallel. 135 /// 136 /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller 137 /// and the object should be deleted when is not used. NewReadOnlyMemoryRegionFromFile(const std::string & fname,std::unique_ptr<ReadOnlyMemoryRegion> * result)138 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 139 const std::string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) { 140 return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result); 141 } 142 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)143 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 144 const std::string& fname, TransactionToken* token, 145 std::unique_ptr<ReadOnlyMemoryRegion>* result) { 146 return OkStatus(); 147 } 148 149 /// Returns OK if the named path exists and NOT_FOUND otherwise. FileExists(const std::string & fname)150 virtual tensorflow::Status FileExists(const std::string& fname) { 151 return FileExists(fname, nullptr); 152 } 153 FileExists(const std::string & fname,TransactionToken * token)154 virtual tensorflow::Status FileExists(const std::string& fname, 155 TransactionToken* token) { 156 return OkStatus(); 157 } 158 159 /// Returns true if all the listed files exist, false otherwise. 160 /// if status is not null, populate the vector with a detailed status 161 /// for each file. FilesExist(const std::vector<string> & files,std::vector<Status> * status)162 virtual bool FilesExist(const std::vector<string>& files, 163 std::vector<Status>* status) { 164 return FilesExist(files, nullptr, status); 165 } 166 167 virtual bool FilesExist(const std::vector<string>& files, 168 TransactionToken* token, std::vector<Status>* status); 169 170 /// \brief Returns the immediate children in the given directory. 171 /// 172 /// The returned paths are relative to 'dir'. GetChildren(const std::string & dir,std::vector<string> * result)173 virtual tensorflow::Status GetChildren(const std::string& dir, 174 std::vector<string>* result) { 175 return GetChildren(dir, nullptr, result); 176 } 177 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)178 virtual tensorflow::Status GetChildren(const std::string& dir, 179 TransactionToken* token, 180 std::vector<string>* result) { 181 return OkStatus(); 182 } 183 184 /// \brief Given a pattern, stores in *results the set of paths that matches 185 /// that pattern. *results is cleared. 186 /// 187 /// pattern must match all of a name, not just a substring. 188 /// 189 /// pattern: { term } 190 /// term: 191 /// '*': matches any sequence of non-'/' characters 192 /// '?': matches a single non-'/' character 193 /// '[' [ '^' ] { match-list } ']': 194 /// matches any single character (not) on the list 195 /// c: matches character c (c != '*', '?', '\\', '[') 196 /// '\\' c: matches character c 197 /// character-range: 198 /// c: matches character c (c != '\\', '-', ']') 199 /// '\\' c: matches character c 200 /// lo '-' hi: matches character c for lo <= c <= hi 201 /// 202 /// Typical return codes: 203 /// * OK - no errors 204 /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not 205 /// implemented GetMatchingPaths(const std::string & pattern,std::vector<string> * results)206 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 207 std::vector<string>* results) { 208 return GetMatchingPaths(pattern, nullptr, results); 209 } 210 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)211 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 212 TransactionToken* token, 213 std::vector<string>* results) { 214 return OkStatus(); 215 } 216 217 /// \brief Checks if the given filename matches the pattern. 218 /// 219 /// This function provides the equivalent of posix fnmatch, however it is 220 /// implemented without fnmatch to ensure that this can be used for cloud 221 /// filesystems on windows. For windows filesystems, it uses PathMatchSpec. 222 virtual bool Match(const std::string& filename, const std::string& pattern); 223 224 /// \brief Obtains statistics for the given path. Stat(const std::string & fname,FileStatistics * stat)225 virtual tensorflow::Status Stat(const std::string& fname, 226 FileStatistics* stat) { 227 return Stat(fname, nullptr, stat); 228 } 229 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)230 virtual tensorflow::Status Stat(const std::string& fname, 231 TransactionToken* token, 232 FileStatistics* stat) { 233 return OkStatus(); 234 } 235 236 /// \brief Deletes the named file. DeleteFile(const std::string & fname)237 virtual tensorflow::Status DeleteFile(const std::string& fname) { 238 return DeleteFile(fname, nullptr); 239 } 240 DeleteFile(const std::string & fname,TransactionToken * token)241 virtual tensorflow::Status DeleteFile(const std::string& fname, 242 TransactionToken* token) { 243 return OkStatus(); 244 } 245 246 /// \brief Creates the specified directory. 247 /// Typical return codes: 248 /// * OK - successfully created the directory. 249 /// * ALREADY_EXISTS - directory with name dirname already exists. 250 /// * PERMISSION_DENIED - dirname is not writable. CreateDir(const std::string & dirname)251 virtual tensorflow::Status CreateDir(const std::string& dirname) { 252 return CreateDir(dirname, nullptr); 253 } 254 CreateDir(const std::string & dirname,TransactionToken * token)255 virtual tensorflow::Status CreateDir(const std::string& dirname, 256 TransactionToken* token) { 257 return OkStatus(); 258 } 259 260 /// \brief Creates the specified directory and all the necessary 261 /// subdirectories. 262 /// Typical return codes: 263 /// * OK - successfully created the directory and sub directories, even if 264 /// they were already created. 265 /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. RecursivelyCreateDir(const std::string & dirname)266 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname) { 267 return RecursivelyCreateDir(dirname, nullptr); 268 } 269 270 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 271 TransactionToken* token); 272 273 /// \brief Deletes the specified directory. DeleteDir(const std::string & dirname)274 virtual tensorflow::Status DeleteDir(const std::string& dirname) { 275 return DeleteDir(dirname, nullptr); 276 } 277 DeleteDir(const std::string & dirname,TransactionToken * token)278 virtual tensorflow::Status DeleteDir(const std::string& dirname, 279 TransactionToken* token) { 280 return OkStatus(); 281 } 282 283 /// \brief Deletes the specified directory and all subdirectories and files 284 /// underneath it. This is accomplished by traversing the directory tree 285 /// rooted at dirname and deleting entries as they are encountered. 286 /// 287 /// If dirname itself is not readable or does not exist, *undeleted_dir_count 288 /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status 289 /// (e.g. NOT_FOUND) is returned. 290 /// 291 /// If dirname and all its descendants were successfully deleted, TF_OK is 292 /// returned and both error counters are set to zero. 293 /// 294 /// Otherwise, while traversing the tree, undeleted_file_count and 295 /// undeleted_dir_count are updated if an entry of the corresponding type 296 /// could not be deleted. The returned error status represents the reason that 297 /// any one of these entries could not be deleted. 298 /// 299 /// REQUIRES: undeleted_files, undeleted_dirs to be not null. 300 /// 301 /// Typical return codes: 302 /// * OK - dirname exists and we were able to delete everything underneath. 303 /// * NOT_FOUND - dirname doesn't exist 304 /// * PERMISSION_DENIED - dirname or some descendant is not writable 305 /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not 306 /// implemented DeleteRecursively(const std::string & dirname,int64_t * undeleted_files,int64_t * undeleted_dirs)307 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 308 int64_t* undeleted_files, 309 int64_t* undeleted_dirs) { 310 return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); 311 } 312 313 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 314 TransactionToken* token, 315 int64_t* undeleted_files, 316 int64_t* undeleted_dirs); 317 318 /// \brief Stores the size of `fname` in `*file_size`. GetFileSize(const std::string & fname,uint64 * file_size)319 virtual tensorflow::Status GetFileSize(const std::string& fname, 320 uint64* file_size) { 321 return GetFileSize(fname, nullptr, file_size); 322 } 323 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)324 virtual tensorflow::Status GetFileSize(const std::string& fname, 325 TransactionToken* token, 326 uint64* file_size) { 327 return OkStatus(); 328 } 329 330 /// \brief Overwrites the target if it exists. RenameFile(const std::string & src,const std::string & target)331 virtual tensorflow::Status RenameFile(const std::string& src, 332 const std::string& target) { 333 return RenameFile(src, target, nullptr); 334 } 335 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)336 virtual tensorflow::Status RenameFile(const std::string& src, 337 const std::string& target, 338 TransactionToken* token) { 339 return OkStatus(); 340 } 341 342 /// \brief Copy the src to target. CopyFile(const std::string & src,const std::string & target)343 virtual tensorflow::Status CopyFile(const std::string& src, 344 const std::string& target) { 345 return CopyFile(src, target, nullptr); 346 } 347 348 virtual tensorflow::Status CopyFile(const std::string& src, 349 const std::string& target, 350 TransactionToken* token); 351 352 /// \brief Translate an URI to a filename for the FileSystem implementation. 353 /// 354 /// The implementation in this class cleans up the path, removing 355 /// duplicate /'s, resolving .. and removing trailing '/'. 356 /// This respects relative vs. absolute paths, but does not 357 /// invoke any system calls (getcwd(2)) in order to resolve relative 358 /// paths with respect to the actual working directory. That is, this is 359 /// purely string manipulation, completely independent of process state. 360 virtual std::string TranslateName(const std::string& name) const; 361 362 /// \brief Returns whether the given path is a directory or not. 363 /// 364 /// Typical return codes (not guaranteed exhaustive): 365 /// * OK - The path exists and is a directory. 366 /// * FAILED_PRECONDITION - The path exists and is not a directory. 367 /// * NOT_FOUND - The path entry does not exist. 368 /// * PERMISSION_DENIED - Insufficient permissions. 369 /// * UNIMPLEMENTED - The file factory doesn't support directories. IsDirectory(const std::string & fname)370 virtual tensorflow::Status IsDirectory(const std::string& fname) { 371 return IsDirectory(fname, nullptr); 372 } 373 374 virtual tensorflow::Status IsDirectory(const std::string& fname, 375 TransactionToken* token); 376 377 /// \brief Returns whether the given path is on a file system 378 /// that has atomic move capabilities. This can be used 379 /// to determine if there needs to be a temp location to safely write objects. 380 /// The second boolean argument has_atomic_move contains this information. 381 /// 382 /// Returns one of the following status codes (not guaranteed exhaustive): 383 /// * OK - The path is on a recognized file system, 384 /// so has_atomic_move holds the above information. 385 /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in 386 /// TF 387 virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move); 388 389 /// Returns whether the give path is on a file system 390 /// that has ability to create a new temp file. This can be used 391 /// to determine if there needs to be a temp location to safely write objects. 392 /// If this returns false, TensorFlow will write directly to output files 393 /// instead of creating a temporary file and swapping it in. This may mean 394 /// that incomplete writes are visible to consumers. 395 virtual Status CanCreateTempFile(const std::string& fname, 396 bool* can_create_temp_file); 397 398 /// \brief Flushes any cached filesystem objects from memory. FlushCaches()399 virtual void FlushCaches() { FlushCaches(nullptr); } 400 401 virtual void FlushCaches(TransactionToken* token); 402 403 /// \brief The separator this filesystem uses. 404 /// 405 /// This is implemented as a part of the filesystem, because even on windows, 406 /// a user may need access to filesystems with '/' separators, such as cloud 407 /// filesystems. 408 virtual char Separator() const; 409 410 /// \brief Split a path to its basename and dirname. 411 /// 412 /// Helper function for Basename and Dirname. 413 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) const; 414 415 /// \brief returns the final file name in the given path. 416 /// 417 /// Returns the part of the path after the final "/". If there is no 418 /// "/" in the path, the result is the same as the input. 419 virtual StringPiece Basename(StringPiece path) const; 420 421 /// \brief Returns the part of the path before the final "/". 422 /// 423 /// If there is a single leading "/" in the path, the result will be the 424 /// leading "/". If there is no "/" in the path, the result is the empty 425 /// prefix of the input. 426 StringPiece Dirname(StringPiece path) const; 427 428 /// \brief Returns the part of the basename of path after the final ".". 429 /// 430 /// If there is no "." in the basename, the result is empty. 431 StringPiece Extension(StringPiece path) const; 432 433 /// \brief Clean duplicate and trailing, "/"s, and resolve ".." and ".". 434 /// 435 /// NOTE: This respects relative vs. absolute paths, but does not 436 /// invoke any system calls (getcwd(2)) in order to resolve relative 437 /// paths with respect to the actual working directory. That is, this is 438 /// purely string manipulation, completely independent of process state. 439 std::string CleanPath(StringPiece path) const; 440 441 /// \brief Creates a URI from a scheme, host, and path. 442 /// 443 /// If the scheme is empty, we just return the path. 444 std::string CreateURI(StringPiece scheme, StringPiece host, 445 StringPiece path) const; 446 447 /// \brief Creates a temporary file name with an extension. 448 std::string GetTempFilename(const std::string& extension) const; 449 450 /// \brief Return true if path is absolute. 451 bool IsAbsolutePath(tensorflow::StringPiece path) const; 452 453 #ifndef SWIG // variadic templates 454 /// \brief Join multiple paths together. 455 /// 456 /// This function also removes the unnecessary path separators. 457 /// For example: 458 /// 459 /// Arguments | JoinPath 460 /// ---------------------------+---------- 461 /// '/foo', 'bar' | /foo/bar 462 /// '/foo/', 'bar' | /foo/bar 463 /// '/foo', '/bar' | /foo/bar 464 /// 465 /// Usage: 466 /// string path = io::JoinPath("/mydir", filename); 467 /// string path = io::JoinPath(FLAGS_test_srcdir, filename); 468 /// string path = io::JoinPath("/full", "path", "to", "filename"); 469 template <typename... T> JoinPath(const T &...args)470 std::string JoinPath(const T&... args) { 471 return JoinPathImpl({args...}); 472 } 473 #endif /* SWIG */ 474 475 std::string JoinPathImpl( 476 std::initializer_list<tensorflow::StringPiece> paths); 477 478 /// \brief Populates the scheme, host, and path from a URI. 479 /// 480 /// scheme, host, and path are guaranteed by this function to point into the 481 /// contents of uri, even if empty. 482 /// 483 /// Corner cases: 484 /// - If the URI is invalid, scheme and host are set to empty strings and the 485 /// passed string is assumed to be a path 486 /// - If the URI omits the path (e.g. file://host), then the path is left 487 /// empty. 488 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, 489 StringPiece* path) const; 490 491 // Transaction related API 492 493 /// \brief Starts a new transaction StartTransaction(TransactionToken ** token)494 virtual tensorflow::Status StartTransaction(TransactionToken** token) { 495 *token = nullptr; 496 return OkStatus(); 497 } 498 499 /// \brief Adds `path` to transaction in `token` AddToTransaction(const std::string & path,TransactionToken * token)500 virtual tensorflow::Status AddToTransaction(const std::string& path, 501 TransactionToken* token) { 502 return OkStatus(); 503 } 504 505 /// \brief Ends transaction EndTransaction(TransactionToken * token)506 virtual tensorflow::Status EndTransaction(TransactionToken* token) { 507 return OkStatus(); 508 } 509 510 /// \brief Get token for `path` or start a new transaction and add `path` to 511 /// it. GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)512 virtual tensorflow::Status GetTokenOrStartTransaction( 513 const std::string& path, TransactionToken** token) { 514 *token = nullptr; 515 return OkStatus(); 516 } 517 518 /// \brief Return transaction for `path` or nullptr in `token` GetTransactionForPath(const std::string & path,TransactionToken ** token)519 virtual tensorflow::Status GetTransactionForPath(const std::string& path, 520 TransactionToken** token) { 521 *token = nullptr; 522 return OkStatus(); 523 } 524 525 /// \brief Decode transaction to human readable string. 526 virtual std::string DecodeTransaction(const TransactionToken* token); 527 528 /// \brief Set File System Configuration Options SetOption(const string & key,const string & value)529 virtual Status SetOption(const string& key, const string& value) { 530 return errors::Unimplemented("SetOption"); 531 } 532 533 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<string> & values)534 virtual tensorflow::Status SetOption(const std::string& name, 535 const std::vector<string>& values) { 536 return errors::Unimplemented("SetOption"); 537 } 538 539 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<int64_t> & values)540 virtual tensorflow::Status SetOption(const std::string& name, 541 const std::vector<int64_t>& values) { 542 return errors::Unimplemented("SetOption"); 543 } 544 545 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<double> & values)546 virtual tensorflow::Status SetOption(const std::string& name, 547 const std::vector<double>& values) { 548 return errors::Unimplemented("SetOption"); 549 } 550 FileSystem()551 FileSystem() {} 552 553 virtual ~FileSystem() = default; 554 }; 555 /// This macro adds forwarding methods from FileSystem class to 556 /// used class since name hiding will prevent these to be accessed from 557 /// derived classes and would require all use locations to migrate to 558 /// Transactional API. This is an interim solution until ModularFileSystem class 559 /// becomes a singleton. 560 // TODO(sami): Remove this macro when filesystem plugins migration is complete. 561 #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ 562 using FileSystem::NewRandomAccessFile; \ 563 using FileSystem::NewWritableFile; \ 564 using FileSystem::NewAppendableFile; \ 565 using FileSystem::NewReadOnlyMemoryRegionFromFile; \ 566 using FileSystem::FileExists; \ 567 using FileSystem::GetChildren; \ 568 using FileSystem::GetMatchingPaths; \ 569 using FileSystem::Stat; \ 570 using FileSystem::DeleteFile; \ 571 using FileSystem::RecursivelyCreateDir; \ 572 using FileSystem::DeleteDir; \ 573 using FileSystem::DeleteRecursively; \ 574 using FileSystem::GetFileSize; \ 575 using FileSystem::RenameFile; \ 576 using FileSystem::CopyFile; \ 577 using FileSystem::IsDirectory; \ 578 using FileSystem::FlushCaches 579 580 /// A Wrapper class for Transactional FileSystem support. 581 /// This provides means to make use of the transactions with minimal code change 582 /// Any operations that are done through this interface will be through the 583 /// transaction created at the time of construction of this instance. 584 /// See FileSystem documentation for method descriptions. 585 /// This class simply forwards all calls to wrapped filesystem either with given 586 /// transaction token or with token used in its construction. This allows doing 587 /// transactional filesystem access with minimal code change. 588 class WrappedFileSystem : public FileSystem { 589 public: 590 TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; 591 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)592 tensorflow::Status NewRandomAccessFile( 593 const std::string& fname, TransactionToken* token, 594 std::unique_ptr<RandomAccessFile>* result) override { 595 return fs_->NewRandomAccessFile(fname, (token ? token : token_), result); 596 } 597 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)598 tensorflow::Status NewWritableFile( 599 const std::string& fname, TransactionToken* token, 600 std::unique_ptr<WritableFile>* result) override { 601 return fs_->NewWritableFile(fname, (token ? token : token_), result); 602 } 603 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)604 tensorflow::Status NewAppendableFile( 605 const std::string& fname, TransactionToken* token, 606 std::unique_ptr<WritableFile>* result) override { 607 return fs_->NewAppendableFile(fname, (token ? token : token_), result); 608 } 609 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)610 tensorflow::Status NewReadOnlyMemoryRegionFromFile( 611 const std::string& fname, TransactionToken* token, 612 std::unique_ptr<ReadOnlyMemoryRegion>* result) override { 613 return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_), 614 result); 615 } 616 FileExists(const std::string & fname,TransactionToken * token)617 tensorflow::Status FileExists(const std::string& fname, 618 TransactionToken* token) override { 619 return fs_->FileExists(fname, (token ? token : token_)); 620 } 621 FilesExist(const std::vector<string> & files,TransactionToken * token,std::vector<Status> * status)622 bool FilesExist(const std::vector<string>& files, TransactionToken* token, 623 std::vector<Status>* status) override { 624 return fs_->FilesExist(files, (token ? token : token_), status); 625 } 626 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)627 tensorflow::Status GetChildren(const std::string& dir, 628 TransactionToken* token, 629 std::vector<string>* result) override { 630 return fs_->GetChildren(dir, (token ? token : token_), result); 631 } 632 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)633 tensorflow::Status GetMatchingPaths(const std::string& pattern, 634 TransactionToken* token, 635 std::vector<string>* results) override { 636 return fs_->GetMatchingPaths(pattern, (token ? token : token_), results); 637 } 638 Match(const std::string & filename,const std::string & pattern)639 bool Match(const std::string& filename, const std::string& pattern) override { 640 return fs_->Match(filename, pattern); 641 } 642 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)643 tensorflow::Status Stat(const std::string& fname, TransactionToken* token, 644 FileStatistics* stat) override { 645 return fs_->Stat(fname, (token ? token : token_), stat); 646 } 647 DeleteFile(const std::string & fname,TransactionToken * token)648 tensorflow::Status DeleteFile(const std::string& fname, 649 TransactionToken* token) override { 650 return fs_->DeleteFile(fname, (token ? token : token_)); 651 } 652 CreateDir(const std::string & dirname,TransactionToken * token)653 tensorflow::Status CreateDir(const std::string& dirname, 654 TransactionToken* token) override { 655 return fs_->CreateDir(dirname, (token ? token : token_)); 656 } 657 RecursivelyCreateDir(const std::string & dirname,TransactionToken * token)658 tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 659 TransactionToken* token) override { 660 return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); 661 } 662 DeleteDir(const std::string & dirname,TransactionToken * token)663 tensorflow::Status DeleteDir(const std::string& dirname, 664 TransactionToken* token) override { 665 return fs_->DeleteDir(dirname, (token ? token : token_)); 666 } 667 DeleteRecursively(const std::string & dirname,TransactionToken * token,int64_t * undeleted_files,int64_t * undeleted_dirs)668 tensorflow::Status DeleteRecursively(const std::string& dirname, 669 TransactionToken* token, 670 int64_t* undeleted_files, 671 int64_t* undeleted_dirs) override { 672 return fs_->DeleteRecursively(dirname, (token ? token : token_), 673 undeleted_files, undeleted_dirs); 674 } 675 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)676 tensorflow::Status GetFileSize(const std::string& fname, 677 TransactionToken* token, 678 uint64* file_size) override { 679 return fs_->GetFileSize(fname, (token ? token : token_), file_size); 680 } 681 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)682 tensorflow::Status RenameFile(const std::string& src, 683 const std::string& target, 684 TransactionToken* token) override { 685 return fs_->RenameFile(src, target, (token ? token : token_)); 686 } 687 CopyFile(const std::string & src,const std::string & target,TransactionToken * token)688 tensorflow::Status CopyFile(const std::string& src, const std::string& target, 689 TransactionToken* token) override { 690 return fs_->CopyFile(src, target, (token ? token : token_)); 691 } 692 TranslateName(const std::string & name)693 std::string TranslateName(const std::string& name) const override { 694 return fs_->TranslateName(name); 695 } 696 IsDirectory(const std::string & fname,TransactionToken * token)697 tensorflow::Status IsDirectory(const std::string& fname, 698 TransactionToken* token) override { 699 return fs_->IsDirectory(fname, (token ? token : token_)); 700 } 701 HasAtomicMove(const std::string & path,bool * has_atomic_move)702 Status HasAtomicMove(const std::string& path, 703 bool* has_atomic_move) override { 704 return fs_->HasAtomicMove(path, has_atomic_move); 705 } 706 FlushCaches(TransactionToken * token)707 void FlushCaches(TransactionToken* token) override { 708 return fs_->FlushCaches((token ? token : token_)); 709 } 710 Separator()711 char Separator() const override { return fs_->Separator(); } 712 Basename(StringPiece path)713 StringPiece Basename(StringPiece path) const override { 714 return fs_->Basename(path); 715 } 716 StartTransaction(TransactionToken ** token)717 tensorflow::Status StartTransaction(TransactionToken** token) override { 718 return fs_->StartTransaction(token); 719 } 720 AddToTransaction(const std::string & path,TransactionToken * token)721 tensorflow::Status AddToTransaction(const std::string& path, 722 TransactionToken* token) override { 723 return fs_->AddToTransaction(path, (token ? token : token_)); 724 } 725 EndTransaction(TransactionToken * token)726 tensorflow::Status EndTransaction(TransactionToken* token) override { 727 return fs_->EndTransaction(token); 728 } 729 GetTransactionForPath(const std::string & path,TransactionToken ** token)730 tensorflow::Status GetTransactionForPath(const std::string& path, 731 TransactionToken** token) override { 732 return fs_->GetTransactionForPath(path, token); 733 } 734 GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)735 tensorflow::Status GetTokenOrStartTransaction( 736 const std::string& path, TransactionToken** token) override { 737 return fs_->GetTokenOrStartTransaction(path, token); 738 } 739 DecodeTransaction(const TransactionToken * token)740 std::string DecodeTransaction(const TransactionToken* token) override { 741 return fs_->DecodeTransaction((token ? token : token_)); 742 } 743 WrappedFileSystem(FileSystem * file_system,TransactionToken * token)744 WrappedFileSystem(FileSystem* file_system, TransactionToken* token) 745 : fs_(file_system), token_(token) {} 746 747 ~WrappedFileSystem() override = default; 748 749 private: 750 FileSystem* fs_; 751 TransactionToken* token_; 752 }; 753 754 /// A file abstraction for randomly reading the contents of a file. 755 class RandomAccessFile { 756 public: RandomAccessFile()757 RandomAccessFile() {} 758 virtual ~RandomAccessFile() = default; 759 760 /// \brief Returns the name of the file. 761 /// 762 /// This is an optional operation that may not be implemented by every 763 /// filesystem. Name(StringPiece * result)764 virtual tensorflow::Status Name(StringPiece* result) const { 765 return errors::Unimplemented("This filesystem does not support Name()"); 766 } 767 768 /// \brief Reads up to `n` bytes from the file starting at `offset`. 769 /// 770 /// `scratch[0..n-1]` may be written by this routine. Sets `*result` 771 /// to the data that was read (including if fewer than `n` bytes were 772 /// successfully read). May set `*result` to point at data in 773 /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when 774 /// `*result` is used. 775 /// 776 /// On OK returned status: `n` bytes have been stored in `*result`. 777 /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`. 778 /// 779 /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result` 780 /// because of EOF. 781 /// 782 /// Safe for concurrent use by multiple threads. 783 virtual tensorflow::Status Read(uint64 offset, size_t n, StringPiece* result, 784 char* scratch) const = 0; 785 786 #if defined(TF_CORD_SUPPORT) 787 /// \brief Read up to `n` bytes from the file starting at `offset`. Read(uint64 offset,size_t n,absl::Cord * cord)788 virtual tensorflow::Status Read(uint64 offset, size_t n, 789 absl::Cord* cord) const { 790 return errors::Unimplemented( 791 "Read(uint64, size_t, absl::Cord*) is not " 792 "implemented"); 793 } 794 #endif 795 796 private: 797 TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile); 798 }; 799 800 /// \brief A file abstraction for sequential writing. 801 /// 802 /// The implementation must provide buffering since callers may append 803 /// small fragments at a time to the file. 804 class WritableFile { 805 public: WritableFile()806 WritableFile() {} 807 virtual ~WritableFile() = default; 808 809 /// \brief Append 'data' to the file. 810 virtual tensorflow::Status Append(StringPiece data) = 0; 811 812 #if defined(TF_CORD_SUPPORT) 813 // \brief Append 'data' to the file. Append(const absl::Cord & cord)814 virtual tensorflow::Status Append(const absl::Cord& cord) { 815 for (StringPiece chunk : cord.Chunks()) { 816 TF_RETURN_IF_ERROR(Append(chunk)); 817 } 818 return OkStatus(); 819 } 820 #endif 821 822 /// \brief Close the file. 823 /// 824 /// Flush() and de-allocate resources associated with this file 825 /// 826 /// Typical return codes (not guaranteed to be exhaustive): 827 /// * OK 828 /// * Other codes, as returned from Flush() 829 virtual tensorflow::Status Close() = 0; 830 831 /// \brief Flushes the file and optionally syncs contents to filesystem. 832 /// 833 /// This should flush any local buffers whose contents have not been 834 /// delivered to the filesystem. 835 /// 836 /// If the process terminates after a successful flush, the contents 837 /// may still be persisted, since the underlying filesystem may 838 /// eventually flush the contents. If the OS or machine crashes 839 /// after a successful flush, the contents may or may not be 840 /// persisted, depending on the implementation. 841 virtual tensorflow::Status Flush() = 0; 842 843 // \brief Returns the name of the file. 844 /// 845 /// This is an optional operation that may not be implemented by every 846 /// filesystem. Name(StringPiece * result)847 virtual tensorflow::Status Name(StringPiece* result) const { 848 return errors::Unimplemented("This filesystem does not support Name()"); 849 } 850 851 /// \brief Syncs contents of file to filesystem. 852 /// 853 /// This waits for confirmation from the filesystem that the contents 854 /// of the file have been persisted to the filesystem; if the OS 855 /// or machine crashes after a successful Sync, the contents should 856 /// be properly saved. 857 virtual tensorflow::Status Sync() = 0; 858 859 /// \brief Retrieves the current write position in the file, or -1 on 860 /// error. 861 /// 862 /// This is an optional operation, subclasses may choose to return 863 /// errors::Unimplemented. Tell(int64_t * position)864 virtual tensorflow::Status Tell(int64_t* position) { 865 *position = -1; 866 return errors::Unimplemented("This filesystem does not support Tell()"); 867 } 868 869 private: 870 TF_DISALLOW_COPY_AND_ASSIGN(WritableFile); 871 }; 872 873 /// \brief A readonly memmapped file abstraction. 874 /// 875 /// The implementation must guarantee that all memory is accessible when the 876 /// object exists, independently from the Env that created it. 877 class ReadOnlyMemoryRegion { 878 public: ReadOnlyMemoryRegion()879 ReadOnlyMemoryRegion() {} 880 virtual ~ReadOnlyMemoryRegion() = default; 881 882 /// \brief Returns a pointer to the memory region. 883 virtual const void* data() = 0; 884 885 /// \brief Returns the length of the memory region in bytes. 886 virtual uint64 length() = 0; 887 }; 888 889 /// \brief A registry for file system implementations. 890 /// 891 /// Filenames are specified as an URI, which is of the form 892 /// [scheme://]<filename>. 893 /// File system implementations are registered using the REGISTER_FILE_SYSTEM 894 /// macro, providing the 'scheme' as the key. 895 /// 896 /// There are two `Register` methods: one using `Factory` for legacy filesystems 897 /// (deprecated mechanism of subclassing `FileSystem` and using 898 /// `REGISTER_FILE_SYSTEM` macro), and one using `std::unique_ptr<FileSystem>` 899 /// for the new modular approach. 900 /// 901 /// Note that the new API expects a pointer to `ModularFileSystem` but this is 902 /// not checked as there should be exactly one caller to the API and doing the 903 /// check results in a circular dependency between `BUILD` targets. 904 /// 905 /// Plan is to completely remove the filesystem registration from `Env` and 906 /// incorporate it into `ModularFileSystem` class (which will be renamed to be 907 /// the only `FileSystem` class and marked as `final`). But this will happen at 908 /// a later time, after we convert all filesystems to the new API. 909 /// 910 /// TODO(b/139060984): After all filesystems are converted, remove old 911 /// registration and update comment. 912 class FileSystemRegistry { 913 public: 914 typedef std::function<FileSystem*()> Factory; 915 916 virtual ~FileSystemRegistry() = default; 917 virtual tensorflow::Status Register(const std::string& scheme, 918 Factory factory) = 0; 919 virtual tensorflow::Status Register( 920 const std::string& scheme, std::unique_ptr<FileSystem> filesystem) = 0; 921 virtual FileSystem* Lookup(const std::string& scheme) = 0; 922 virtual tensorflow::Status GetRegisteredFileSystemSchemes( 923 std::vector<std::string>* schemes) = 0; 924 }; 925 926 } // namespace tensorflow 927 928 #endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 929