xref: /aosp_15_r20/external/tensorflow/tensorflow/python/lib/io/file_io.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""File IO methods that wrap the C++ FileSystem API."""
16import binascii
17import os
18from posixpath import join as urljoin
19import uuid
20
21import six
22
23from tensorflow.python.framework import errors
24from tensorflow.python.lib.io import _pywrap_file_io
25from tensorflow.python.util import compat
26from tensorflow.python.util import deprecation
27from tensorflow.python.util.tf_export import tf_export
28
29# A good default block size depends on the system in question.
30# A somewhat conservative default chosen here.
31_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024
32
33
34class FileIO(object):
35  """FileIO class that exposes methods to read / write to / from files.
36
37  The constructor takes the following arguments:
38  name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object)
39    giving the pathname of the file to be opened.
40  mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode.
41
42  Can be used as an iterator to iterate over lines in the file.
43
44  The default buffer size used for the BufferedInputStream used for reading
45  the file line by line is 1024 * 512 bytes.
46  """
47
48  def __init__(self, name, mode, encoding="utf-8"):
49    self.__name = name
50    self.__mode = mode
51    self.__encoding = encoding
52    self._read_buf = None
53    self._writable_file = None
54    self._binary_mode = "b" in mode
55    mode = mode.replace("b", "")
56    if mode not in ("r", "w", "a", "r+", "w+", "a+"):
57      raise errors.InvalidArgumentError(
58          None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'")
59    self._read_check_passed = mode in ("r", "r+", "a+", "w+")
60    self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+")
61
62  @property
63  def name(self):
64    """Returns the file name."""
65    return self.__name
66
67  @property
68  def mode(self):
69    """Returns the mode in which the file was opened."""
70    return self.__mode
71
72  def _preread_check(self):
73    if not self._read_buf:
74      if not self._read_check_passed:
75        raise errors.PermissionDeniedError(None, None,
76                                           "File isn't open for reading")
77      self._read_buf = _pywrap_file_io.BufferedInputStream(
78          compat.path_to_str(self.__name), 1024 * 512)
79
80  def _prewrite_check(self):
81    if not self._writable_file:
82      if not self._write_check_passed:
83        raise errors.PermissionDeniedError(None, None,
84                                           "File isn't open for writing")
85      self._writable_file = _pywrap_file_io.WritableFile(
86          compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode))
87
88  def _prepare_value(self, val):
89    if self._binary_mode:
90      return compat.as_bytes(val, encoding=self.__encoding)
91    else:
92      return compat.as_str_any(val, encoding=self.__encoding)
93
94  def size(self):
95    """Returns the size of the file."""
96    return stat(self.__name).length
97
98  def write(self, file_content):
99    """Writes file_content to the file. Appends to the end of the file."""
100    self._prewrite_check()
101    self._writable_file.append(
102        compat.as_bytes(file_content, encoding=self.__encoding))
103
104  def read(self, n=-1):
105    """Returns the contents of a file as a string.
106
107    Starts reading from current position in file.
108
109    Args:
110      n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file.
111
112    Returns:
113      `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the
114      string if in string (regular) mode.
115    """
116    self._preread_check()
117    if n == -1:
118      length = self.size() - self.tell()
119    else:
120      length = n
121    return self._prepare_value(self._read_buf.read(length))
122
123  @deprecation.deprecated_args(
124      None, "position is deprecated in favor of the offset argument.",
125      "position")
126  def seek(self, offset=None, whence=0, position=None):
127    # TODO(jhseu): Delete later. Used to omit `position` from docs.
128    # pylint: disable=g-doc-args
129    """Seeks to the offset in the file.
130
131    Args:
132      offset: The byte count relative to the whence argument.
133      whence: Valid values for whence are:
134        0: start of the file (default)
135        1: relative to the current position of the file
136        2: relative to the end of file. `offset` is usually negative.
137    """
138    # pylint: enable=g-doc-args
139    self._preread_check()
140    # We needed to make offset a keyword argument for backwards-compatibility.
141    # This check exists so that we can convert back to having offset be a
142    # positional argument.
143    # TODO(jhseu): Make `offset` a positional argument after `position` is
144    # deleted.
145    if offset is None and position is None:
146      raise TypeError("seek(): offset argument required")
147    if offset is not None and position is not None:
148      raise TypeError("seek(): offset and position may not be set "
149                      "simultaneously.")
150
151    if position is not None:
152      offset = position
153
154    if whence == 0:
155      pass
156    elif whence == 1:
157      offset += self.tell()
158    elif whence == 2:
159      offset += self.size()
160    else:
161      raise errors.InvalidArgumentError(
162          None, None,
163          "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format(
164              whence))
165    self._read_buf.seek(offset)
166
167  def readline(self):
168    r"""Reads the next line, keeping \n. At EOF, returns ''."""
169    self._preread_check()
170    return self._prepare_value(self._read_buf.readline())
171
172  def readlines(self):
173    """Returns all lines from the file in a list."""
174    self._preread_check()
175    lines = []
176    while True:
177      s = self.readline()
178      if not s:
179        break
180      lines.append(s)
181    return lines
182
183  def tell(self):
184    """Returns the current position in the file."""
185    if self._read_check_passed:
186      self._preread_check()
187      return self._read_buf.tell()
188    else:
189      self._prewrite_check()
190
191      return self._writable_file.tell()
192
193  def __enter__(self):
194    """Make usable with "with" statement."""
195    return self
196
197  def __exit__(self, unused_type, unused_value, unused_traceback):
198    """Make usable with "with" statement."""
199    self.close()
200
201  def __iter__(self):
202    return self
203
204  def __next__(self):
205    retval = self.readline()
206    if not retval:
207      raise StopIteration()
208    return retval
209
210  def next(self):
211    return self.__next__()
212
213  def flush(self):
214    """Flushes the Writable file.
215
216    This only ensures that the data has made its way out of the process without
217    any guarantees on whether it's written to disk. This means that the
218    data would survive an application crash but not necessarily an OS crash.
219    """
220    if self._writable_file:
221      self._writable_file.flush()
222
223  def close(self):
224    r"""Closes the file.
225
226    Should be called for the WritableFile to be flushed.
227
228    In general, if you use the context manager pattern, you don't need to call
229    this directly.
230
231    >>> with tf.io.gfile.GFile("/tmp/x", "w") as f:
232    ...   f.write("asdf\n")
233    ...   f.write("qwer\n")
234    >>> # implicit f.close() at the end of the block
235
236    For cloud filesystems, forgetting to call `close()` might result in data
237    loss as last write might not have been replicated.
238    """
239    self._read_buf = None
240    if self._writable_file:
241      self._writable_file.close()
242      self._writable_file = None
243
244  def seekable(self):
245    """Returns True as FileIO supports random access ops of seek()/tell()"""
246    return True
247
248
249@tf_export("io.gfile.exists")
250def file_exists_v2(path):
251  """Determines whether a path exists or not.
252
253  >>> with open("/tmp/x", "w") as f:
254  ...   f.write("asdf")
255  ...
256  4
257  >>> tf.io.gfile.exists("/tmp/x")
258  True
259
260  You can also specify the URI scheme for selecting a different filesystem:
261
262  >>> # for a GCS filesystem path:
263  >>> # tf.io.gfile.exists("gs://bucket/file")
264  >>> # for a local filesystem:
265  >>> with open("/tmp/x", "w") as f:
266  ...   f.write("asdf")
267  ...
268  4
269  >>> tf.io.gfile.exists("file:///tmp/x")
270  True
271
272  This currently returns `True` for existing directories but don't rely on this
273  behavior, especially if you are using cloud filesystems (e.g., GCS, S3,
274  Hadoop):
275
276  >>> tf.io.gfile.exists("/tmp")
277  True
278
279  Args:
280    path: string, a path
281
282  Returns:
283    True if the path exists, whether it's a file or a directory.
284    False if the path does not exist and there are no filesystem errors.
285
286  Raises:
287    errors.OpError: Propagates any errors reported by the FileSystem API.
288  """
289  try:
290    _pywrap_file_io.FileExists(compat.path_to_bytes(path))
291  except errors.NotFoundError:
292    return False
293  return True
294
295
296@tf_export(v1=["gfile.Exists"])
297def file_exists(filename):
298  return file_exists_v2(filename)
299
300
301file_exists.__doc__ = file_exists_v2.__doc__
302
303
304@tf_export(v1=["gfile.Remove"])
305def delete_file(filename):
306  """Deletes the file located at 'filename'.
307
308  Args:
309    filename: string, a filename
310
311  Raises:
312    errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
313    `NotFoundError` if the file does not exist.
314  """
315  delete_file_v2(filename)
316
317
318@tf_export("io.gfile.remove")
319def delete_file_v2(path):
320  """Deletes the path located at 'path'.
321
322  Args:
323    path: string, a path
324
325  Raises:
326    errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
327    `NotFoundError` if the path does not exist.
328  """
329  _pywrap_file_io.DeleteFile(compat.path_to_bytes(path))
330
331
332def read_file_to_string(filename, binary_mode=False):
333  """Reads the entire contents of a file to a string.
334
335  Args:
336    filename: string, path to a file
337    binary_mode: whether to open the file in binary mode or not. This changes
338      the type of the object returned.
339
340  Returns:
341    contents of the file as a string or bytes.
342
343  Raises:
344    errors.OpError: Raises variety of errors that are subtypes e.g.
345    `NotFoundError` etc.
346  """
347  if binary_mode:
348    f = FileIO(filename, mode="rb")
349  else:
350    f = FileIO(filename, mode="r")
351  return f.read()
352
353
354def write_string_to_file(filename, file_content):
355  """Writes a string to a given file.
356
357  Args:
358    filename: string, path to a file
359    file_content: string, contents that need to be written to the file
360
361  Raises:
362    errors.OpError: If there are errors during the operation.
363  """
364  with FileIO(filename, mode="w") as f:
365    f.write(file_content)
366
367
368@tf_export(v1=["gfile.Glob"])
369def get_matching_files(filename):
370  """Returns a list of files that match the given pattern(s).
371
372  Args:
373    filename: string or iterable of strings. The glob pattern(s).
374
375  Returns:
376    A list of strings containing filenames that match the given pattern(s).
377
378  Raises:
379  *  errors.OpError: If there are filesystem / directory listing errors.
380  *  errors.NotFoundError: If pattern to be matched is an invalid directory.
381  """
382  return get_matching_files_v2(filename)
383
384
385@tf_export("io.gfile.glob")
386def get_matching_files_v2(pattern):
387  r"""Returns a list of files that match the given pattern(s).
388
389  The patterns are defined as strings. Supported patterns are defined
390  here. Note that the pattern can be a Python iteratable of string patterns.
391
392  The format definition of the pattern is:
393
394  **pattern**: `{ term }`
395
396  **term**:
397    * `'*'`: matches any sequence of non-'/' characters
398    * `'?'`: matches a single non-'/' character
399    * `'[' [ '^' ] { match-list } ']'`: matches any single
400      character (not) on the list
401    * `c`: matches character `c`  where `c != '*', '?', '\\', '['`
402    * `'\\' c`: matches character `c`
403
404  **character range**:
405    * `c`: matches character `c` while `c != '\\', '-', ']'`
406    * `'\\' c`: matches character `c`
407    * `lo '-' hi`: matches character `c` for `lo <= c <= hi`
408
409  Examples:
410
411  >>> tf.io.gfile.glob("*.py")
412  ... # For example, ['__init__.py']
413
414  >>> tf.io.gfile.glob("__init__.??")
415  ... # As above
416
417  >>> files = {"*.py"}
418  >>> the_iterator = iter(files)
419  >>> tf.io.gfile.glob(the_iterator)
420  ... # As above
421
422  See the C++ function `GetMatchingPaths` in
423  [`core/platform/file_system.h`]
424  (../../../core/platform/file_system.h)
425  for implementation details.
426
427  Args:
428    pattern: string or iterable of strings. The glob pattern(s).
429
430  Returns:
431    A list of strings containing filenames that match the given pattern(s).
432
433  Raises:
434    errors.OpError: If there are filesystem / directory listing errors.
435    errors.NotFoundError: If pattern to be matched is an invalid directory.
436  """
437  if isinstance(pattern, six.string_types):
438    return [
439        # Convert the filenames to string from bytes.
440        compat.as_str_any(matching_filename)
441        for matching_filename in _pywrap_file_io.GetMatchingFiles(
442            compat.as_bytes(pattern))
443    ]
444  else:
445    return [
446        # Convert the filenames to string from bytes.
447        compat.as_str_any(matching_filename)  # pylint: disable=g-complex-comprehension
448        for single_filename in pattern
449        for matching_filename in _pywrap_file_io.GetMatchingFiles(
450            compat.as_bytes(single_filename))
451    ]
452
453
454@tf_export(v1=["gfile.MkDir"])
455def create_dir(dirname):
456  """Creates a directory with the name `dirname`.
457
458  Args:
459    dirname: string, name of the directory to be created
460
461  Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`
462    instead if there is the possibility that the parent dirs don't exist.
463
464  Raises:
465    errors.OpError: If the operation fails.
466  """
467  create_dir_v2(dirname)
468
469
470@tf_export("io.gfile.mkdir")
471def create_dir_v2(path):
472  """Creates a directory with the name given by `path`.
473
474  Args:
475    path: string, name of the directory to be created
476
477  Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`
478    instead if there is the possibility that the parent dirs don't exist.
479
480  Raises:
481    errors.OpError: If the operation fails.
482  """
483  _pywrap_file_io.CreateDir(compat.path_to_bytes(path))
484
485
486@tf_export(v1=["gfile.MakeDirs"])
487def recursive_create_dir(dirname):
488  """Creates a directory and all parent/intermediate directories.
489
490  It succeeds if dirname already exists and is writable.
491
492  Args:
493    dirname: string, name of the directory to be created
494
495  Raises:
496    errors.OpError: If the operation fails.
497  """
498  recursive_create_dir_v2(dirname)
499
500
501@tf_export("io.gfile.makedirs")
502def recursive_create_dir_v2(path):
503  """Creates a directory and all parent/intermediate directories.
504
505  It succeeds if path already exists and is writable.
506
507  Args:
508    path: string, name of the directory to be created
509
510  Raises:
511    errors.OpError: If the operation fails.
512  """
513  _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path))
514
515
516@tf_export("io.gfile.copy")
517def copy_v2(src, dst, overwrite=False):
518  """Copies data from `src` to `dst`.
519
520  >>> with open("/tmp/x", "w") as f:
521  ...   f.write("asdf")
522  ...
523  4
524  >>> tf.io.gfile.exists("/tmp/x")
525  True
526  >>> tf.io.gfile.copy("/tmp/x", "/tmp/y")
527  >>> tf.io.gfile.exists("/tmp/y")
528  True
529  >>> tf.io.gfile.remove("/tmp/y")
530
531  You can also specify the URI scheme for selecting a different filesystem:
532
533  >>> with open("/tmp/x", "w") as f:
534  ...   f.write("asdf")
535  ...
536  4
537  >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y")
538  >>> tf.io.gfile.exists("/tmp/y")
539  True
540  >>> tf.io.gfile.remove("/tmp/y")
541
542  Note that you need to always specify a file name, even if moving into a new
543  directory. This is because some cloud filesystems don't have the concept of a
544  directory.
545
546  >>> with open("/tmp/x", "w") as f:
547  ...   f.write("asdf")
548  ...
549  4
550  >>> tf.io.gfile.mkdir("/tmp/new_dir")
551  >>> tf.io.gfile.copy("/tmp/x", "/tmp/new_dir/y")
552  >>> tf.io.gfile.exists("/tmp/new_dir/y")
553  True
554  >>> tf.io.gfile.rmtree("/tmp/new_dir")
555
556  If you want to prevent errors if the path already exists, you can use
557  `overwrite` argument:
558
559  >>> with open("/tmp/x", "w") as f:
560  ...   f.write("asdf")
561  ...
562  4
563  >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y")
564  >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y", overwrite=True)
565  >>> tf.io.gfile.remove("/tmp/y")
566
567  Note that the above will still result in an error if you try to overwrite a
568  directory with a file.
569
570  Note that you cannot copy a directory, only file arguments are supported.
571
572  Args:
573    src: string, name of the file whose contents need to be copied
574    dst: string, name of the file to which to copy to
575    overwrite: boolean, if false it's an error for `dst` to be occupied by an
576      existing file.
577
578  Raises:
579    errors.OpError: If the operation fails.
580  """
581  _pywrap_file_io.CopyFile(
582      compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)
583
584
585@tf_export(v1=["gfile.Copy"])
586def copy(oldpath, newpath, overwrite=False):
587  copy_v2(oldpath, newpath, overwrite)
588
589
590copy.__doc__ = copy_v2.__doc__
591
592
593@tf_export(v1=["gfile.Rename"])
594def rename(oldname, newname, overwrite=False):
595  """Rename or move a file / directory.
596
597  Args:
598    oldname: string, pathname for a file
599    newname: string, pathname to which the file needs to be moved
600    overwrite: boolean, if false it's an error for `newname` to be occupied by
601      an existing file.
602
603  Raises:
604    errors.OpError: If the operation fails.
605  """
606  rename_v2(oldname, newname, overwrite)
607
608
609@tf_export("io.gfile.rename")
610def rename_v2(src, dst, overwrite=False):
611  """Rename or move a file / directory.
612
613  Args:
614    src: string, pathname for a file
615    dst: string, pathname to which the file needs to be moved
616    overwrite: boolean, if false it's an error for `dst` to be occupied by an
617      existing file.
618
619  Raises:
620    errors.OpError: If the operation fails.
621  """
622  _pywrap_file_io.RenameFile(
623      compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)
624
625
626def atomic_write_string_to_file(filename, contents, overwrite=True):
627  """Writes to `filename` atomically.
628
629  This means that when `filename` appears in the filesystem, it will contain
630  all of `contents`. With write_string_to_file, it is possible for the file
631  to appear in the filesystem with `contents` only partially written.
632
633  Accomplished by writing to a temp file and then renaming it.
634
635  Args:
636    filename: string, pathname for a file
637    contents: string, contents that need to be written to the file
638    overwrite: boolean, if false it's an error for `filename` to be occupied by
639      an existing file.
640  """
641  if not has_atomic_move(filename):
642    write_string_to_file(filename, contents)
643  else:
644    temp_pathname = filename + ".tmp" + uuid.uuid4().hex
645    write_string_to_file(temp_pathname, contents)
646    try:
647      rename(temp_pathname, filename, overwrite)
648    except errors.OpError:
649      delete_file(temp_pathname)
650      raise
651
652
653@tf_export(v1=["gfile.DeleteRecursively"])
654def delete_recursively(dirname):
655  """Deletes everything under dirname recursively.
656
657  Args:
658    dirname: string, a path to a directory
659
660  Raises:
661    errors.OpError: If the operation fails.
662  """
663  delete_recursively_v2(dirname)
664
665
666@tf_export("io.gfile.rmtree")
667def delete_recursively_v2(path):
668  """Deletes everything under path recursively.
669
670  Args:
671    path: string, a path
672
673  Raises:
674    errors.OpError: If the operation fails.
675  """
676  _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path))
677
678
679@tf_export(v1=["gfile.IsDirectory"])
680def is_directory(dirname):
681  """Returns whether the path is a directory or not.
682
683  Args:
684    dirname: string, path to a potential directory
685
686  Returns:
687    True, if the path is a directory; False otherwise
688  """
689  return is_directory_v2(dirname)
690
691
692@tf_export("io.gfile.isdir")
693def is_directory_v2(path):
694  """Returns whether the path is a directory or not.
695
696  Args:
697    path: string, path to a potential directory
698
699  Returns:
700    True, if the path is a directory; False otherwise
701  """
702  try:
703    return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path))
704  except errors.OpError:
705    return False
706
707
708def has_atomic_move(path):
709  """Checks whether the file system supports atomic moves.
710
711  Returns whether or not the file system of the given path supports the atomic
712  move operation for a file or folder.  If atomic move is supported, it is
713  recommended to use a temp location for writing and then move to the final
714  location.
715
716  Args:
717    path: string, path to a file
718
719  Returns:
720    True, if the path is on a file system that supports atomic move
721    False, if the file system does not support atomic move. In such cases
722           we need to be careful about using moves. In some cases it is safer
723           not to use temporary locations in this case.
724  """
725  try:
726    return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path))
727  except errors.OpError:
728    # defaults to True
729    return True
730
731
732@tf_export(v1=["gfile.ListDirectory"])
733def list_directory(dirname):
734  """Returns a list of entries contained within a directory.
735
736  The list is in arbitrary order. It does not contain the special entries "."
737  and "..".
738
739  Args:
740    dirname: string, path to a directory
741
742  Returns:
743    [filename1, filename2, ... filenameN] as strings
744
745  Raises:
746    errors.NotFoundError if directory doesn't exist
747  """
748  return list_directory_v2(dirname)
749
750
751@tf_export("io.gfile.listdir")
752def list_directory_v2(path):
753  """Returns a list of entries contained within a directory.
754
755  The list is in arbitrary order. It does not contain the special entries "."
756  and "..".
757
758  Args:
759    path: string, path to a directory
760
761  Returns:
762    [filename1, filename2, ... filenameN] as strings
763
764  Raises:
765    errors.NotFoundError if directory doesn't exist
766  """
767  if not is_directory(path):
768    raise errors.NotFoundError(
769        node_def=None,
770        op=None,
771        message="Could not find directory {}".format(path))
772
773  # Convert each element to string, since the return values of the
774  # vector of string should be interpreted as strings, not bytes.
775  return [
776      compat.as_str_any(filename)
777      for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path))
778  ]
779
780
781@tf_export("io.gfile.join")
782def join(path, *paths):
783  r"""Join one or more path components intelligently.
784
785  TensorFlow specific filesystems will be joined
786  like a url (using "/" as the path seperator) on all platforms:
787
788  On Windows or Linux/Unix-like:
789  >>> tf.io.gfile.join("gcs://folder", "file.py")
790  'gcs://folder/file.py'
791
792  >>> tf.io.gfile.join("ram://folder", "file.py")
793  'ram://folder/file.py'
794
795  But the native filesystem is handled just like os.path.join:
796
797  >>> path = tf.io.gfile.join("folder", "file.py")
798  >>> if os.name == "nt":
799  ...   expected = "folder\\file.py"  # Windows
800  ... else:
801  ...   expected = "folder/file.py"  # Linux/Unix-like
802  >>> path == expected
803  True
804
805  Args:
806    path: string, path to a directory
807    paths: string, additional paths to concatenate
808
809  Returns:
810    path: the joined path.
811  """
812  # os.path.join won't take mixed bytes/str, so don't overwrite the incoming `path` var
813  path_ = compat.as_str_any(compat.path_to_str(path))
814  if "://" in path_[1:]:
815    return urljoin(path, *paths)
816  return os.path.join(path, *paths)
817
818
819@tf_export(v1=["gfile.Walk"])
820def walk(top, in_order=True):
821  """Recursive directory tree generator for directories.
822
823  Args:
824    top: string, a Directory name
825    in_order: bool, Traverse in order if True, post order if False.  Errors that
826      happen while listing directories are ignored.
827
828  Yields:
829    Each yield is a 3-tuple:  the pathname of a directory, followed by lists of
830    all its subdirectories and leaf files. That is, each yield looks like:
831    `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.
832    Each item is a string.
833  """
834  return walk_v2(top, in_order)
835
836
837@tf_export("io.gfile.walk")
838def walk_v2(top, topdown=True, onerror=None):
839  """Recursive directory tree generator for directories.
840
841  Args:
842    top: string, a Directory name
843    topdown: bool, Traverse pre order if True, post order if False.
844    onerror: optional handler for errors. Should be a function, it will be
845      called with the error as argument. Rethrowing the error aborts the walk.
846      Errors that happen while listing directories are ignored.
847
848  Yields:
849    Each yield is a 3-tuple:  the pathname of a directory, followed by lists of
850    all its subdirectories and leaf files. That is, each yield looks like:
851    `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.
852    Each item is a string.
853  """
854
855  def _make_full_path(parent, item):
856    # Since `join` discards paths before one that starts with the path
857    # separator (https://docs.python.org/3/library/os.path.html#join),
858    # we have to manually handle that case as `/` is a valid character on GCS.
859    if item[0] == os.sep:
860      return "".join([join(parent, ""), item])
861    return join(parent, item)
862
863  top = compat.as_str_any(compat.path_to_str(top))
864  try:
865    listing = list_directory(top)
866  except errors.NotFoundError as err:
867    if onerror:
868      onerror(err)
869    else:
870      return
871
872  files = []
873  subdirs = []
874  for item in listing:
875    full_path = _make_full_path(top, item)
876    if is_directory(full_path):
877      subdirs.append(item)
878    else:
879      files.append(item)
880
881  here = (top, subdirs, files)
882
883  if topdown:
884    yield here
885
886  for subdir in subdirs:
887    for subitem in walk_v2(
888        _make_full_path(top, subdir), topdown, onerror=onerror):
889      yield subitem
890
891  if not topdown:
892    yield here
893
894
895@tf_export(v1=["gfile.Stat"])
896def stat(filename):
897  """Returns file statistics for a given path.
898
899  Args:
900    filename: string, path to a file
901
902  Returns:
903    FileStatistics struct that contains information about the path
904
905  Raises:
906    errors.OpError: If the operation fails.
907  """
908  return stat_v2(filename)
909
910
911@tf_export("io.gfile.stat")
912def stat_v2(path):
913  """Returns file statistics for a given path.
914
915  Args:
916    path: string, path to a file
917
918  Returns:
919    FileStatistics struct that contains information about the path
920
921  Raises:
922    errors.OpError: If the operation fails.
923  """
924  return _pywrap_file_io.Stat(compat.path_to_str(path))
925
926
927def filecmp(filename_a, filename_b):
928  """Compare two files, returning True if they are the same, False otherwise.
929
930  We check size first and return False quickly if the files are different sizes.
931  If they are the same size, we continue to generating a crc for the whole file.
932
933  You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is
934  that the builtin library is not robust to the many different filesystems
935  TensorFlow runs on, and so we here perform a similar comparison with
936  the more robust FileIO.
937
938  Args:
939    filename_a: string path to the first file.
940    filename_b: string path to the second file.
941
942  Returns:
943    True if the files are the same, False otherwise.
944  """
945  size_a = FileIO(filename_a, "rb").size()
946  size_b = FileIO(filename_b, "rb").size()
947  if size_a != size_b:
948    return False
949
950  # Size is the same. Do a full check.
951  crc_a = file_crc32(filename_a)
952  crc_b = file_crc32(filename_b)
953  return crc_a == crc_b
954
955
956def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE):
957  """Get the crc32 of the passed file.
958
959  The crc32 of a file can be used for error checking; two files with the same
960  crc32 are considered equivalent. Note that the entire file must be read
961  to produce the crc32.
962
963  Args:
964    filename: string, path to a file
965    block_size: Integer, process the files by reading blocks of `block_size`
966      bytes. Use -1 to read the file as once.
967
968  Returns:
969    hexadecimal as string, the crc32 of the passed file.
970  """
971  crc = 0
972  with FileIO(filename, mode="rb") as f:
973    chunk = f.read(n=block_size)
974    while chunk:
975      crc = binascii.crc32(chunk, crc)
976      chunk = f.read(n=block_size)
977  return hex(crc & 0xFFFFFFFF)
978
979
980@tf_export("io.gfile.get_registered_schemes")
981def get_registered_schemes():
982  """Returns the currently registered filesystem schemes.
983
984  The `tf.io.gfile` APIs, in addition to accepting traditional filesystem paths,
985  also accept file URIs that begin with a scheme. For example, the local
986  filesystem path `/tmp/tf` can also be addressed as `file:///tmp/tf`. In this
987  case, the scheme is `file`, followed by `://` and then the path, according to
988  [URI syntax](https://datatracker.ietf.org/doc/html/rfc3986#section-3).
989
990  This function returns the currently registered schemes that will be recognized
991  by `tf.io.gfile` APIs. This includes both built-in schemes and those
992  registered by other TensorFlow filesystem implementations, for example those
993  provided by [TensorFlow I/O](https://github.com/tensorflow/io).
994
995  The empty string is always included, and represents the "scheme" for regular
996  local filesystem paths.
997
998  Returns:
999    List of string schemes, e.g. `['', 'file', 'ram']`, in arbitrary order.
1000
1001  Raises:
1002    errors.OpError: If the operation fails.
1003  """
1004  return _pywrap_file_io.GetRegisteredSchemes()
1005