xref: /aosp_15_r20/external/tensorflow/tensorflow/python/distribute/shared_variable_creator.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Utility to re-use variables created on first device on subsequent devices."""
16
17import re
18
19_VARIABLE_UNIQUIFYING_REGEX = re.compile(r"_\d/")
20_VARIABLE_UNIQUIFYING_REGEX_AT_END = re.compile(r"_\d$")
21
22
23def _canonicalize_variable_name(name):
24  # If no name is specified, uses default name "Variable".
25  if name is None:
26    return "Variable"
27  # Replace all instances of "_<num>/" with "/"
28  name = _VARIABLE_UNIQUIFYING_REGEX.sub("/", name)
29  # Replace any instances of "_<num>" at the end of the string with ""
30  name = _VARIABLE_UNIQUIFYING_REGEX_AT_END.sub("", name)
31  return name
32
33
34def make_fn(shared_variable_store, device_id):
35  """Construct the variable creator function for device `device_id`.
36
37  Constructs custom variable creator functions for the given device.
38  On first device (device_id == 0), it creates the variable using the
39  `next_creator`, and stores it in the provided `shared_variable_store`.
40  On all other devices (device_id > 0), it tries to re-use the variable
41  already created with the same name. If no such variable exists, it throws an
42  error.
43  Additionally, we de-uniquify variable names before checking for matches. This
44  helps re-use variables which are intended to be the same but have different
45  names due to variable uniquification happening upstream. Since this might
46  mean we may have multiple variables with the same canonical name, we store
47  them in a list per canonical name and return them in the same order as well.
48
49  Args:
50    shared_variable_store: A dictionary that we will use to store variables
51      created on the first device, and re-used by creators for other devices.
52    device_id: Integer index of the device whose creator should be
53      constructed.
54
55  Returns:
56    An appropriate creator function based on device_id.
57
58  """
59  variable_scope_access_index = {}
60  assert isinstance(device_id, int)
61
62  def create_new_variable(next_creator, **kwargs):
63    """Create the variable using `next_creator` and store it."""
64    canonical_name = _canonicalize_variable_name(kwargs.get("name"))
65    v = next_creator(**kwargs)
66
67    if canonical_name not in shared_variable_store:
68      shared_variable_store[canonical_name] = []
69    shared_variable_store[canonical_name].append(v)
70    return v
71
72  def reuse_variable(next_creator, **kwargs):
73    """Re-use existing variable from store with same name (in order)."""
74    del next_creator
75    name = kwargs.get("name")
76    canonical_name = _canonicalize_variable_name(name)
77
78    try:
79      variable_index = variable_scope_access_index.get(canonical_name, 0)
80      v = shared_variable_store[canonical_name][variable_index]
81      # TODO(priyag): Make this variable re-use more robust by adding checks
82      # that the requested shape and dtype match the existing variable.
83      variable_scope_access_index[canonical_name] = variable_index + 1
84      return v
85    except (KeyError, IndexError):
86      raise RuntimeError(
87          "Tried to create variable {} with mismatching name on device {}".
88          format(name, device_id))
89
90  if device_id == 0:
91    return create_new_variable
92  else:
93    return reuse_variable
94