1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""File IO methods that wrap the C++ FileSystem API.""" 16import binascii 17import os 18from posixpath import join as urljoin 19import uuid 20 21import six 22 23from tensorflow.python.framework import errors 24from tensorflow.python.lib.io import _pywrap_file_io 25from tensorflow.python.util import compat 26from tensorflow.python.util import deprecation 27from tensorflow.python.util.tf_export import tf_export 28 29# A good default block size depends on the system in question. 30# A somewhat conservative default chosen here. 31_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024 32 33 34class FileIO(object): 35 """FileIO class that exposes methods to read / write to / from files. 36 37 The constructor takes the following arguments: 38 name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object) 39 giving the pathname of the file to be opened. 40 mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode. 41 42 Can be used as an iterator to iterate over lines in the file. 43 44 The default buffer size used for the BufferedInputStream used for reading 45 the file line by line is 1024 * 512 bytes. 46 """ 47 48 def __init__(self, name, mode, encoding="utf-8"): 49 self.__name = name 50 self.__mode = mode 51 self.__encoding = encoding 52 self._read_buf = None 53 self._writable_file = None 54 self._binary_mode = "b" in mode 55 mode = mode.replace("b", "") 56 if mode not in ("r", "w", "a", "r+", "w+", "a+"): 57 raise errors.InvalidArgumentError( 58 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'") 59 self._read_check_passed = mode in ("r", "r+", "a+", "w+") 60 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+") 61 62 @property 63 def name(self): 64 """Returns the file name.""" 65 return self.__name 66 67 @property 68 def mode(self): 69 """Returns the mode in which the file was opened.""" 70 return self.__mode 71 72 def _preread_check(self): 73 if not self._read_buf: 74 if not self._read_check_passed: 75 raise errors.PermissionDeniedError(None, None, 76 "File isn't open for reading") 77 self._read_buf = _pywrap_file_io.BufferedInputStream( 78 compat.path_to_str(self.__name), 1024 * 512) 79 80 def _prewrite_check(self): 81 if not self._writable_file: 82 if not self._write_check_passed: 83 raise errors.PermissionDeniedError(None, None, 84 "File isn't open for writing") 85 self._writable_file = _pywrap_file_io.WritableFile( 86 compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode)) 87 88 def _prepare_value(self, val): 89 if self._binary_mode: 90 return compat.as_bytes(val, encoding=self.__encoding) 91 else: 92 return compat.as_str_any(val, encoding=self.__encoding) 93 94 def size(self): 95 """Returns the size of the file.""" 96 return stat(self.__name).length 97 98 def write(self, file_content): 99 """Writes file_content to the file. Appends to the end of the file.""" 100 self._prewrite_check() 101 self._writable_file.append( 102 compat.as_bytes(file_content, encoding=self.__encoding)) 103 104 def read(self, n=-1): 105 """Returns the contents of a file as a string. 106 107 Starts reading from current position in file. 108 109 Args: 110 n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file. 111 112 Returns: 113 `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the 114 string if in string (regular) mode. 115 """ 116 self._preread_check() 117 if n == -1: 118 length = self.size() - self.tell() 119 else: 120 length = n 121 return self._prepare_value(self._read_buf.read(length)) 122 123 @deprecation.deprecated_args( 124 None, "position is deprecated in favor of the offset argument.", 125 "position") 126 def seek(self, offset=None, whence=0, position=None): 127 # TODO(jhseu): Delete later. Used to omit `position` from docs. 128 # pylint: disable=g-doc-args 129 """Seeks to the offset in the file. 130 131 Args: 132 offset: The byte count relative to the whence argument. 133 whence: Valid values for whence are: 134 0: start of the file (default) 135 1: relative to the current position of the file 136 2: relative to the end of file. `offset` is usually negative. 137 """ 138 # pylint: enable=g-doc-args 139 self._preread_check() 140 # We needed to make offset a keyword argument for backwards-compatibility. 141 # This check exists so that we can convert back to having offset be a 142 # positional argument. 143 # TODO(jhseu): Make `offset` a positional argument after `position` is 144 # deleted. 145 if offset is None and position is None: 146 raise TypeError("seek(): offset argument required") 147 if offset is not None and position is not None: 148 raise TypeError("seek(): offset and position may not be set " 149 "simultaneously.") 150 151 if position is not None: 152 offset = position 153 154 if whence == 0: 155 pass 156 elif whence == 1: 157 offset += self.tell() 158 elif whence == 2: 159 offset += self.size() 160 else: 161 raise errors.InvalidArgumentError( 162 None, None, 163 "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format( 164 whence)) 165 self._read_buf.seek(offset) 166 167 def readline(self): 168 r"""Reads the next line, keeping \n. At EOF, returns ''.""" 169 self._preread_check() 170 return self._prepare_value(self._read_buf.readline()) 171 172 def readlines(self): 173 """Returns all lines from the file in a list.""" 174 self._preread_check() 175 lines = [] 176 while True: 177 s = self.readline() 178 if not s: 179 break 180 lines.append(s) 181 return lines 182 183 def tell(self): 184 """Returns the current position in the file.""" 185 if self._read_check_passed: 186 self._preread_check() 187 return self._read_buf.tell() 188 else: 189 self._prewrite_check() 190 191 return self._writable_file.tell() 192 193 def __enter__(self): 194 """Make usable with "with" statement.""" 195 return self 196 197 def __exit__(self, unused_type, unused_value, unused_traceback): 198 """Make usable with "with" statement.""" 199 self.close() 200 201 def __iter__(self): 202 return self 203 204 def __next__(self): 205 retval = self.readline() 206 if not retval: 207 raise StopIteration() 208 return retval 209 210 def next(self): 211 return self.__next__() 212 213 def flush(self): 214 """Flushes the Writable file. 215 216 This only ensures that the data has made its way out of the process without 217 any guarantees on whether it's written to disk. This means that the 218 data would survive an application crash but not necessarily an OS crash. 219 """ 220 if self._writable_file: 221 self._writable_file.flush() 222 223 def close(self): 224 r"""Closes the file. 225 226 Should be called for the WritableFile to be flushed. 227 228 In general, if you use the context manager pattern, you don't need to call 229 this directly. 230 231 >>> with tf.io.gfile.GFile("/tmp/x", "w") as f: 232 ... f.write("asdf\n") 233 ... f.write("qwer\n") 234 >>> # implicit f.close() at the end of the block 235 236 For cloud filesystems, forgetting to call `close()` might result in data 237 loss as last write might not have been replicated. 238 """ 239 self._read_buf = None 240 if self._writable_file: 241 self._writable_file.close() 242 self._writable_file = None 243 244 def seekable(self): 245 """Returns True as FileIO supports random access ops of seek()/tell()""" 246 return True 247 248 249@tf_export("io.gfile.exists") 250def file_exists_v2(path): 251 """Determines whether a path exists or not. 252 253 >>> with open("/tmp/x", "w") as f: 254 ... f.write("asdf") 255 ... 256 4 257 >>> tf.io.gfile.exists("/tmp/x") 258 True 259 260 You can also specify the URI scheme for selecting a different filesystem: 261 262 >>> # for a GCS filesystem path: 263 >>> # tf.io.gfile.exists("gs://bucket/file") 264 >>> # for a local filesystem: 265 >>> with open("/tmp/x", "w") as f: 266 ... f.write("asdf") 267 ... 268 4 269 >>> tf.io.gfile.exists("file:///tmp/x") 270 True 271 272 This currently returns `True` for existing directories but don't rely on this 273 behavior, especially if you are using cloud filesystems (e.g., GCS, S3, 274 Hadoop): 275 276 >>> tf.io.gfile.exists("/tmp") 277 True 278 279 Args: 280 path: string, a path 281 282 Returns: 283 True if the path exists, whether it's a file or a directory. 284 False if the path does not exist and there are no filesystem errors. 285 286 Raises: 287 errors.OpError: Propagates any errors reported by the FileSystem API. 288 """ 289 try: 290 _pywrap_file_io.FileExists(compat.path_to_bytes(path)) 291 except errors.NotFoundError: 292 return False 293 return True 294 295 296@tf_export(v1=["gfile.Exists"]) 297def file_exists(filename): 298 return file_exists_v2(filename) 299 300 301file_exists.__doc__ = file_exists_v2.__doc__ 302 303 304@tf_export(v1=["gfile.Remove"]) 305def delete_file(filename): 306 """Deletes the file located at 'filename'. 307 308 Args: 309 filename: string, a filename 310 311 Raises: 312 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 313 `NotFoundError` if the file does not exist. 314 """ 315 delete_file_v2(filename) 316 317 318@tf_export("io.gfile.remove") 319def delete_file_v2(path): 320 """Deletes the path located at 'path'. 321 322 Args: 323 path: string, a path 324 325 Raises: 326 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 327 `NotFoundError` if the path does not exist. 328 """ 329 _pywrap_file_io.DeleteFile(compat.path_to_bytes(path)) 330 331 332def read_file_to_string(filename, binary_mode=False): 333 """Reads the entire contents of a file to a string. 334 335 Args: 336 filename: string, path to a file 337 binary_mode: whether to open the file in binary mode or not. This changes 338 the type of the object returned. 339 340 Returns: 341 contents of the file as a string or bytes. 342 343 Raises: 344 errors.OpError: Raises variety of errors that are subtypes e.g. 345 `NotFoundError` etc. 346 """ 347 if binary_mode: 348 f = FileIO(filename, mode="rb") 349 else: 350 f = FileIO(filename, mode="r") 351 return f.read() 352 353 354def write_string_to_file(filename, file_content): 355 """Writes a string to a given file. 356 357 Args: 358 filename: string, path to a file 359 file_content: string, contents that need to be written to the file 360 361 Raises: 362 errors.OpError: If there are errors during the operation. 363 """ 364 with FileIO(filename, mode="w") as f: 365 f.write(file_content) 366 367 368@tf_export(v1=["gfile.Glob"]) 369def get_matching_files(filename): 370 """Returns a list of files that match the given pattern(s). 371 372 Args: 373 filename: string or iterable of strings. The glob pattern(s). 374 375 Returns: 376 A list of strings containing filenames that match the given pattern(s). 377 378 Raises: 379 * errors.OpError: If there are filesystem / directory listing errors. 380 * errors.NotFoundError: If pattern to be matched is an invalid directory. 381 """ 382 return get_matching_files_v2(filename) 383 384 385@tf_export("io.gfile.glob") 386def get_matching_files_v2(pattern): 387 r"""Returns a list of files that match the given pattern(s). 388 389 The patterns are defined as strings. Supported patterns are defined 390 here. Note that the pattern can be a Python iteratable of string patterns. 391 392 The format definition of the pattern is: 393 394 **pattern**: `{ term }` 395 396 **term**: 397 * `'*'`: matches any sequence of non-'/' characters 398 * `'?'`: matches a single non-'/' character 399 * `'[' [ '^' ] { match-list } ']'`: matches any single 400 character (not) on the list 401 * `c`: matches character `c` where `c != '*', '?', '\\', '['` 402 * `'\\' c`: matches character `c` 403 404 **character range**: 405 * `c`: matches character `c` while `c != '\\', '-', ']'` 406 * `'\\' c`: matches character `c` 407 * `lo '-' hi`: matches character `c` for `lo <= c <= hi` 408 409 Examples: 410 411 >>> tf.io.gfile.glob("*.py") 412 ... # For example, ['__init__.py'] 413 414 >>> tf.io.gfile.glob("__init__.??") 415 ... # As above 416 417 >>> files = {"*.py"} 418 >>> the_iterator = iter(files) 419 >>> tf.io.gfile.glob(the_iterator) 420 ... # As above 421 422 See the C++ function `GetMatchingPaths` in 423 [`core/platform/file_system.h`] 424 (../../../core/platform/file_system.h) 425 for implementation details. 426 427 Args: 428 pattern: string or iterable of strings. The glob pattern(s). 429 430 Returns: 431 A list of strings containing filenames that match the given pattern(s). 432 433 Raises: 434 errors.OpError: If there are filesystem / directory listing errors. 435 errors.NotFoundError: If pattern to be matched is an invalid directory. 436 """ 437 if isinstance(pattern, six.string_types): 438 return [ 439 # Convert the filenames to string from bytes. 440 compat.as_str_any(matching_filename) 441 for matching_filename in _pywrap_file_io.GetMatchingFiles( 442 compat.as_bytes(pattern)) 443 ] 444 else: 445 return [ 446 # Convert the filenames to string from bytes. 447 compat.as_str_any(matching_filename) # pylint: disable=g-complex-comprehension 448 for single_filename in pattern 449 for matching_filename in _pywrap_file_io.GetMatchingFiles( 450 compat.as_bytes(single_filename)) 451 ] 452 453 454@tf_export(v1=["gfile.MkDir"]) 455def create_dir(dirname): 456 """Creates a directory with the name `dirname`. 457 458 Args: 459 dirname: string, name of the directory to be created 460 461 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 462 instead if there is the possibility that the parent dirs don't exist. 463 464 Raises: 465 errors.OpError: If the operation fails. 466 """ 467 create_dir_v2(dirname) 468 469 470@tf_export("io.gfile.mkdir") 471def create_dir_v2(path): 472 """Creates a directory with the name given by `path`. 473 474 Args: 475 path: string, name of the directory to be created 476 477 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 478 instead if there is the possibility that the parent dirs don't exist. 479 480 Raises: 481 errors.OpError: If the operation fails. 482 """ 483 _pywrap_file_io.CreateDir(compat.path_to_bytes(path)) 484 485 486@tf_export(v1=["gfile.MakeDirs"]) 487def recursive_create_dir(dirname): 488 """Creates a directory and all parent/intermediate directories. 489 490 It succeeds if dirname already exists and is writable. 491 492 Args: 493 dirname: string, name of the directory to be created 494 495 Raises: 496 errors.OpError: If the operation fails. 497 """ 498 recursive_create_dir_v2(dirname) 499 500 501@tf_export("io.gfile.makedirs") 502def recursive_create_dir_v2(path): 503 """Creates a directory and all parent/intermediate directories. 504 505 It succeeds if path already exists and is writable. 506 507 Args: 508 path: string, name of the directory to be created 509 510 Raises: 511 errors.OpError: If the operation fails. 512 """ 513 _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path)) 514 515 516@tf_export("io.gfile.copy") 517def copy_v2(src, dst, overwrite=False): 518 """Copies data from `src` to `dst`. 519 520 >>> with open("/tmp/x", "w") as f: 521 ... f.write("asdf") 522 ... 523 4 524 >>> tf.io.gfile.exists("/tmp/x") 525 True 526 >>> tf.io.gfile.copy("/tmp/x", "/tmp/y") 527 >>> tf.io.gfile.exists("/tmp/y") 528 True 529 >>> tf.io.gfile.remove("/tmp/y") 530 531 You can also specify the URI scheme for selecting a different filesystem: 532 533 >>> with open("/tmp/x", "w") as f: 534 ... f.write("asdf") 535 ... 536 4 537 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y") 538 >>> tf.io.gfile.exists("/tmp/y") 539 True 540 >>> tf.io.gfile.remove("/tmp/y") 541 542 Note that you need to always specify a file name, even if moving into a new 543 directory. This is because some cloud filesystems don't have the concept of a 544 directory. 545 546 >>> with open("/tmp/x", "w") as f: 547 ... f.write("asdf") 548 ... 549 4 550 >>> tf.io.gfile.mkdir("/tmp/new_dir") 551 >>> tf.io.gfile.copy("/tmp/x", "/tmp/new_dir/y") 552 >>> tf.io.gfile.exists("/tmp/new_dir/y") 553 True 554 >>> tf.io.gfile.rmtree("/tmp/new_dir") 555 556 If you want to prevent errors if the path already exists, you can use 557 `overwrite` argument: 558 559 >>> with open("/tmp/x", "w") as f: 560 ... f.write("asdf") 561 ... 562 4 563 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y") 564 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y", overwrite=True) 565 >>> tf.io.gfile.remove("/tmp/y") 566 567 Note that the above will still result in an error if you try to overwrite a 568 directory with a file. 569 570 Note that you cannot copy a directory, only file arguments are supported. 571 572 Args: 573 src: string, name of the file whose contents need to be copied 574 dst: string, name of the file to which to copy to 575 overwrite: boolean, if false it's an error for `dst` to be occupied by an 576 existing file. 577 578 Raises: 579 errors.OpError: If the operation fails. 580 """ 581 _pywrap_file_io.CopyFile( 582 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 583 584 585@tf_export(v1=["gfile.Copy"]) 586def copy(oldpath, newpath, overwrite=False): 587 copy_v2(oldpath, newpath, overwrite) 588 589 590copy.__doc__ = copy_v2.__doc__ 591 592 593@tf_export(v1=["gfile.Rename"]) 594def rename(oldname, newname, overwrite=False): 595 """Rename or move a file / directory. 596 597 Args: 598 oldname: string, pathname for a file 599 newname: string, pathname to which the file needs to be moved 600 overwrite: boolean, if false it's an error for `newname` to be occupied by 601 an existing file. 602 603 Raises: 604 errors.OpError: If the operation fails. 605 """ 606 rename_v2(oldname, newname, overwrite) 607 608 609@tf_export("io.gfile.rename") 610def rename_v2(src, dst, overwrite=False): 611 """Rename or move a file / directory. 612 613 Args: 614 src: string, pathname for a file 615 dst: string, pathname to which the file needs to be moved 616 overwrite: boolean, if false it's an error for `dst` to be occupied by an 617 existing file. 618 619 Raises: 620 errors.OpError: If the operation fails. 621 """ 622 _pywrap_file_io.RenameFile( 623 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 624 625 626def atomic_write_string_to_file(filename, contents, overwrite=True): 627 """Writes to `filename` atomically. 628 629 This means that when `filename` appears in the filesystem, it will contain 630 all of `contents`. With write_string_to_file, it is possible for the file 631 to appear in the filesystem with `contents` only partially written. 632 633 Accomplished by writing to a temp file and then renaming it. 634 635 Args: 636 filename: string, pathname for a file 637 contents: string, contents that need to be written to the file 638 overwrite: boolean, if false it's an error for `filename` to be occupied by 639 an existing file. 640 """ 641 if not has_atomic_move(filename): 642 write_string_to_file(filename, contents) 643 else: 644 temp_pathname = filename + ".tmp" + uuid.uuid4().hex 645 write_string_to_file(temp_pathname, contents) 646 try: 647 rename(temp_pathname, filename, overwrite) 648 except errors.OpError: 649 delete_file(temp_pathname) 650 raise 651 652 653@tf_export(v1=["gfile.DeleteRecursively"]) 654def delete_recursively(dirname): 655 """Deletes everything under dirname recursively. 656 657 Args: 658 dirname: string, a path to a directory 659 660 Raises: 661 errors.OpError: If the operation fails. 662 """ 663 delete_recursively_v2(dirname) 664 665 666@tf_export("io.gfile.rmtree") 667def delete_recursively_v2(path): 668 """Deletes everything under path recursively. 669 670 Args: 671 path: string, a path 672 673 Raises: 674 errors.OpError: If the operation fails. 675 """ 676 _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path)) 677 678 679@tf_export(v1=["gfile.IsDirectory"]) 680def is_directory(dirname): 681 """Returns whether the path is a directory or not. 682 683 Args: 684 dirname: string, path to a potential directory 685 686 Returns: 687 True, if the path is a directory; False otherwise 688 """ 689 return is_directory_v2(dirname) 690 691 692@tf_export("io.gfile.isdir") 693def is_directory_v2(path): 694 """Returns whether the path is a directory or not. 695 696 Args: 697 path: string, path to a potential directory 698 699 Returns: 700 True, if the path is a directory; False otherwise 701 """ 702 try: 703 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) 704 except errors.OpError: 705 return False 706 707 708def has_atomic_move(path): 709 """Checks whether the file system supports atomic moves. 710 711 Returns whether or not the file system of the given path supports the atomic 712 move operation for a file or folder. If atomic move is supported, it is 713 recommended to use a temp location for writing and then move to the final 714 location. 715 716 Args: 717 path: string, path to a file 718 719 Returns: 720 True, if the path is on a file system that supports atomic move 721 False, if the file system does not support atomic move. In such cases 722 we need to be careful about using moves. In some cases it is safer 723 not to use temporary locations in this case. 724 """ 725 try: 726 return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path)) 727 except errors.OpError: 728 # defaults to True 729 return True 730 731 732@tf_export(v1=["gfile.ListDirectory"]) 733def list_directory(dirname): 734 """Returns a list of entries contained within a directory. 735 736 The list is in arbitrary order. It does not contain the special entries "." 737 and "..". 738 739 Args: 740 dirname: string, path to a directory 741 742 Returns: 743 [filename1, filename2, ... filenameN] as strings 744 745 Raises: 746 errors.NotFoundError if directory doesn't exist 747 """ 748 return list_directory_v2(dirname) 749 750 751@tf_export("io.gfile.listdir") 752def list_directory_v2(path): 753 """Returns a list of entries contained within a directory. 754 755 The list is in arbitrary order. It does not contain the special entries "." 756 and "..". 757 758 Args: 759 path: string, path to a directory 760 761 Returns: 762 [filename1, filename2, ... filenameN] as strings 763 764 Raises: 765 errors.NotFoundError if directory doesn't exist 766 """ 767 if not is_directory(path): 768 raise errors.NotFoundError( 769 node_def=None, 770 op=None, 771 message="Could not find directory {}".format(path)) 772 773 # Convert each element to string, since the return values of the 774 # vector of string should be interpreted as strings, not bytes. 775 return [ 776 compat.as_str_any(filename) 777 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path)) 778 ] 779 780 781@tf_export("io.gfile.join") 782def join(path, *paths): 783 r"""Join one or more path components intelligently. 784 785 TensorFlow specific filesystems will be joined 786 like a url (using "/" as the path seperator) on all platforms: 787 788 On Windows or Linux/Unix-like: 789 >>> tf.io.gfile.join("gcs://folder", "file.py") 790 'gcs://folder/file.py' 791 792 >>> tf.io.gfile.join("ram://folder", "file.py") 793 'ram://folder/file.py' 794 795 But the native filesystem is handled just like os.path.join: 796 797 >>> path = tf.io.gfile.join("folder", "file.py") 798 >>> if os.name == "nt": 799 ... expected = "folder\\file.py" # Windows 800 ... else: 801 ... expected = "folder/file.py" # Linux/Unix-like 802 >>> path == expected 803 True 804 805 Args: 806 path: string, path to a directory 807 paths: string, additional paths to concatenate 808 809 Returns: 810 path: the joined path. 811 """ 812 # os.path.join won't take mixed bytes/str, so don't overwrite the incoming `path` var 813 path_ = compat.as_str_any(compat.path_to_str(path)) 814 if "://" in path_[1:]: 815 return urljoin(path, *paths) 816 return os.path.join(path, *paths) 817 818 819@tf_export(v1=["gfile.Walk"]) 820def walk(top, in_order=True): 821 """Recursive directory tree generator for directories. 822 823 Args: 824 top: string, a Directory name 825 in_order: bool, Traverse in order if True, post order if False. Errors that 826 happen while listing directories are ignored. 827 828 Yields: 829 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 830 all its subdirectories and leaf files. That is, each yield looks like: 831 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 832 Each item is a string. 833 """ 834 return walk_v2(top, in_order) 835 836 837@tf_export("io.gfile.walk") 838def walk_v2(top, topdown=True, onerror=None): 839 """Recursive directory tree generator for directories. 840 841 Args: 842 top: string, a Directory name 843 topdown: bool, Traverse pre order if True, post order if False. 844 onerror: optional handler for errors. Should be a function, it will be 845 called with the error as argument. Rethrowing the error aborts the walk. 846 Errors that happen while listing directories are ignored. 847 848 Yields: 849 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 850 all its subdirectories and leaf files. That is, each yield looks like: 851 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 852 Each item is a string. 853 """ 854 855 def _make_full_path(parent, item): 856 # Since `join` discards paths before one that starts with the path 857 # separator (https://docs.python.org/3/library/os.path.html#join), 858 # we have to manually handle that case as `/` is a valid character on GCS. 859 if item[0] == os.sep: 860 return "".join([join(parent, ""), item]) 861 return join(parent, item) 862 863 top = compat.as_str_any(compat.path_to_str(top)) 864 try: 865 listing = list_directory(top) 866 except errors.NotFoundError as err: 867 if onerror: 868 onerror(err) 869 else: 870 return 871 872 files = [] 873 subdirs = [] 874 for item in listing: 875 full_path = _make_full_path(top, item) 876 if is_directory(full_path): 877 subdirs.append(item) 878 else: 879 files.append(item) 880 881 here = (top, subdirs, files) 882 883 if topdown: 884 yield here 885 886 for subdir in subdirs: 887 for subitem in walk_v2( 888 _make_full_path(top, subdir), topdown, onerror=onerror): 889 yield subitem 890 891 if not topdown: 892 yield here 893 894 895@tf_export(v1=["gfile.Stat"]) 896def stat(filename): 897 """Returns file statistics for a given path. 898 899 Args: 900 filename: string, path to a file 901 902 Returns: 903 FileStatistics struct that contains information about the path 904 905 Raises: 906 errors.OpError: If the operation fails. 907 """ 908 return stat_v2(filename) 909 910 911@tf_export("io.gfile.stat") 912def stat_v2(path): 913 """Returns file statistics for a given path. 914 915 Args: 916 path: string, path to a file 917 918 Returns: 919 FileStatistics struct that contains information about the path 920 921 Raises: 922 errors.OpError: If the operation fails. 923 """ 924 return _pywrap_file_io.Stat(compat.path_to_str(path)) 925 926 927def filecmp(filename_a, filename_b): 928 """Compare two files, returning True if they are the same, False otherwise. 929 930 We check size first and return False quickly if the files are different sizes. 931 If they are the same size, we continue to generating a crc for the whole file. 932 933 You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is 934 that the builtin library is not robust to the many different filesystems 935 TensorFlow runs on, and so we here perform a similar comparison with 936 the more robust FileIO. 937 938 Args: 939 filename_a: string path to the first file. 940 filename_b: string path to the second file. 941 942 Returns: 943 True if the files are the same, False otherwise. 944 """ 945 size_a = FileIO(filename_a, "rb").size() 946 size_b = FileIO(filename_b, "rb").size() 947 if size_a != size_b: 948 return False 949 950 # Size is the same. Do a full check. 951 crc_a = file_crc32(filename_a) 952 crc_b = file_crc32(filename_b) 953 return crc_a == crc_b 954 955 956def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE): 957 """Get the crc32 of the passed file. 958 959 The crc32 of a file can be used for error checking; two files with the same 960 crc32 are considered equivalent. Note that the entire file must be read 961 to produce the crc32. 962 963 Args: 964 filename: string, path to a file 965 block_size: Integer, process the files by reading blocks of `block_size` 966 bytes. Use -1 to read the file as once. 967 968 Returns: 969 hexadecimal as string, the crc32 of the passed file. 970 """ 971 crc = 0 972 with FileIO(filename, mode="rb") as f: 973 chunk = f.read(n=block_size) 974 while chunk: 975 crc = binascii.crc32(chunk, crc) 976 chunk = f.read(n=block_size) 977 return hex(crc & 0xFFFFFFFF) 978 979 980@tf_export("io.gfile.get_registered_schemes") 981def get_registered_schemes(): 982 """Returns the currently registered filesystem schemes. 983 984 The `tf.io.gfile` APIs, in addition to accepting traditional filesystem paths, 985 also accept file URIs that begin with a scheme. For example, the local 986 filesystem path `/tmp/tf` can also be addressed as `file:///tmp/tf`. In this 987 case, the scheme is `file`, followed by `://` and then the path, according to 988 [URI syntax](https://datatracker.ietf.org/doc/html/rfc3986#section-3). 989 990 This function returns the currently registered schemes that will be recognized 991 by `tf.io.gfile` APIs. This includes both built-in schemes and those 992 registered by other TensorFlow filesystem implementations, for example those 993 provided by [TensorFlow I/O](https://github.com/tensorflow/io). 994 995 The empty string is always included, and represents the "scheme" for regular 996 local filesystem paths. 997 998 Returns: 999 List of string schemes, e.g. `['', 'file', 'ram']`, in arbitrary order. 1000 1001 Raises: 1002 errors.OpError: If the operation fails. 1003 """ 1004 return _pywrap_file_io.GetRegisteredSchemes() 1005