xref: /aosp_15_r20/external/tensorflow/tensorflow/python/checkpoint/checkpoint_options.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Options for saving Checkpoints."""
16
17from tensorflow.python.util.tf_export import tf_export
18
19
20@tf_export("train.CheckpointOptions")
21class CheckpointOptions(object):
22  """Options for constructing a Checkpoint.
23
24  Used as the `options` argument to either `tf.train.Checkpoint.save()` or
25  `tf.train.Checkpoint.restore()` methods to adjust how variables are
26  saved/restored.
27
28  Example: Run IO ops on "localhost" while saving a checkpoint:
29
30  ```
31  step = tf.Variable(0, name="step")
32  checkpoint = tf.train.Checkpoint(step=step)
33  options = tf.train.CheckpointOptions(experimental_io_device="/job:localhost")
34  checkpoint.save("/tmp/ckpt", options=options)
35  ```
36  """
37
38  # Define object attributes in __slots__ for improved memory and performance.
39  __slots__ = ("experimental_io_device", "experimental_enable_async_checkpoint")
40
41  def __init__(self, experimental_io_device=None,
42               experimental_enable_async_checkpoint=False):
43    """Creates an object that stores options for a Checkpoint.
44
45    Args:
46      experimental_io_device: string. Applies in a distributed setting.
47        Tensorflow device to use to access the filesystem. If `None` (default)
48        then for each variable the filesystem is accessed from the CPU:0 device
49        of the host where that variable is assigned. If specified, the
50        filesystem is instead accessed from that device for all variables.
51
52        This is for example useful if you want to save to a local directory,
53        such as "/tmp" when running in a distributed setting. In that case pass
54        a device for the host where the "/tmp" directory is accessible.
55
56      experimental_enable_async_checkpoint: bool Type. Indicates whether async
57        checkpoint is enabled. Default is False, i.e., no async checkpoint.
58
59        Async checkpoint moves the checkpoint file writing off the main thread,
60        so that the model can continue to train while the checkpoing file
61        writing runs in the background. Async checkpoint reduces TPU device idle
62        cycles and speeds up model training process, while memory consumption
63        may increase.
64    """
65    self.experimental_io_device = experimental_io_device
66    self.experimental_enable_async_checkpoint = experimental_enable_async_checkpoint
67