xref: /aosp_15_r20/external/tensorflow/tensorflow/python/ops/tensor_array_grad.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Gradients for operators defined in tensor_array_ops.py."""
16from tensorflow.python.framework import ops
17from tensorflow.python.ops import array_ops
18from tensorflow.python.ops import tensor_array_ops
19
20# TODO(b/31222613): These ops may be differentiable, and there may be
21# latent bugs here.
22ops.NotDifferentiable("TensorArray")
23ops.NotDifferentiable("TensorArrayGrad")
24ops.NotDifferentiable("TensorArraySize")
25ops.NotDifferentiable("TensorArrayClose")
26
27ops.NotDifferentiable("TensorArrayV2")
28ops.NotDifferentiable("TensorArrayGradV2")
29ops.NotDifferentiable("TensorArraySizeV2")
30ops.NotDifferentiable("TensorArrayCloseV2")
31
32ops.NotDifferentiable("TensorArrayV3")
33ops.NotDifferentiable("TensorArrayGradV3")
34ops.NotDifferentiable("TensorArrayGradWithShape")
35ops.NotDifferentiable("TensorArraySizeV3")
36ops.NotDifferentiable("TensorArrayCloseV3")
37
38
39def _GetGradSource(op_or_tensor):
40  """Identify which call to tf.gradients created this gradient op or tensor.
41
42  TensorArray gradient calls use an accumulator TensorArray object.  If
43  multiple gradients are calculated and run in the same session, the multiple
44  gradient nodes may accidentally flow through the same accumulator TensorArray.
45  This double counting breaks the TensorArray gradient flow.
46
47  The solution is to identify which gradient call this particular
48  TensorArray*Grad is being called in, by looking at the input gradient
49  tensor's name, and create or lookup an accumulator gradient TensorArray
50  associated with this specific call.  This solves any confusion and ensures
51  different gradients from the same forward graph get their own accumulators.
52
53  This function creates the unique label associated with the tf.gradients call
54  that is used to create the gradient TensorArray.
55
56  Args:
57    op_or_tensor: `Tensor` or `Operation` which is an input to a
58      TensorArray*Grad call.
59
60  Returns:
61    A python string, the unique label associated with this particular
62    gradients calculation.
63
64  Raises:
65    ValueError: If not called within a gradients calculation.
66  """
67  name_tokens = op_or_tensor.name.split("/")
68  grad_pos = [i for i, x in enumerate(name_tokens) if x.startswith("gradients")]
69  if not grad_pos:
70    raise ValueError(
71        "Expected op/tensor name to start with gradients (excluding scope)"
72        f", got: {op_or_tensor.name}. This means that a tf.gradients op with "
73        "this op in its dependency path has a custom name that does not start "
74        "with 'gradients'. Please make sure all calls to tf.gradients that "
75        "have non-empty `name` arguments use names that start with "
76        "'gradients'.")
77  return "/".join(name_tokens[:grad_pos[-1] + 1])
78
79
80@ops.RegisterGradient("TensorArrayRead")
81@ops.RegisterGradient("TensorArrayReadV2")
82@ops.RegisterGradient("TensorArrayReadV3")
83def _TensorArrayReadGrad(op, grad):
84  """Gradient for TensorArrayRead.
85
86  Args:
87    op: Forward TensorArrayRead op.
88    grad: Gradient `Tensor` to TensorArrayRead.
89
90  Returns:
91    A flow `Tensor`, which can be used in control dependencies to
92    force the write of `grad` to the gradient `TensorArray`.
93  """
94  # Note: the forward flow dependency in the call to grad() is necessary for
95  # the case of dynamic sized TensorArrays.  When creating the gradient
96  # TensorArray, the final size of the forward array must be known.
97  # For this we need to wait until it has been created by depending on
98  # the input flow of the original op.
99  handle = op.inputs[0]
100  index = op.inputs[1]
101  flow = op.inputs[2]
102  dtype = op.get_attr("dtype")
103  grad_source = _GetGradSource(grad)
104  g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow,
105                                    colocate_with_first_write_call=False)
106       .grad(source=grad_source, flow=flow))
107  w_g = g.write(index, grad)
108  return [None, None, w_g.flow]
109
110
111@ops.RegisterGradient("TensorArrayWrite")
112@ops.RegisterGradient("TensorArrayWriteV2")
113@ops.RegisterGradient("TensorArrayWriteV3")
114def _TensorArrayWriteGrad(op, flow):
115  """Gradient for TensorArrayWrite.
116
117  Args:
118    op: Forward TensorArrayWrite op.
119    flow: Gradient `Tensor` flow to TensorArrayWrite.
120
121  Returns:
122    A grad `Tensor`, the gradient created in an upstream ReadGrad or PackGrad.
123  """
124  # handle is the output store_handle of TensorArrayReadGrad or
125  # the handle output of TensorArrayWriteGrad.  we must use this one.
126  handle = op.inputs[0]
127  index = op.inputs[1]
128  dtype = op.get_attr("T")
129  grad_source = _GetGradSource(flow)
130  flow_out = array_ops.identity(op.outputs[0], "flow_out")
131  # Avoid a race condition where the TensorArrayGrad op is executed before the
132  # final TensorArrayWrite by adding a control dependency on the output flow of
133  # the write to the input flow to the TensorArrayGrad.
134  with ops.control_dependencies([flow_out]):
135    flow = array_ops.identity(flow, "write_barrier")
136  g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow,
137                                    colocate_with_first_write_call=False)
138       .grad(source=grad_source, flow=flow))
139  grad = g.read(index)
140  return [None, None, grad, flow]
141
142
143@ops.RegisterGradient("TensorArrayGather")
144@ops.RegisterGradient("TensorArrayGatherV2")
145@ops.RegisterGradient("TensorArrayGatherV3")
146def _TensorArrayGatherGrad(op, grad):
147  """Gradient for TensorArrayGather.
148
149  Args:
150    op: Forward TensorArrayGather op.
151    grad: Gradient `Tensor` to TensorArrayGather.
152
153  Returns:
154    A flow `Tensor`, which can be used in control dependencies to
155    force the write of `grad` to the gradient `TensorArray`.
156  """
157  # Note: the forward flow dependency in the call to grad() is necessary for
158  # the case of dynamic sized TensorArrays.  When creating the gradient
159  # TensorArray, the final size of the forward array must be known.
160  # For this we need to wait until it has been created by depending on
161  # the input flow of the original op.
162  handle = op.inputs[0]
163  indices = op.inputs[1]
164  flow = op.inputs[2]
165  dtype = op.get_attr("dtype")
166  grad_source = _GetGradSource(grad)
167  g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow,
168                                    colocate_with_first_write_call=False)
169       .grad(source=grad_source, flow=flow))
170  u_g = g.scatter(indices, grad)
171  return [None, None, u_g.flow]
172
173
174@ops.RegisterGradient("TensorArrayScatter")
175@ops.RegisterGradient("TensorArrayScatterV2")
176@ops.RegisterGradient("TensorArrayScatterV3")
177def _TensorArrayScatterGrad(op, flow):
178  """Gradient for TensorArrayScatter.
179
180  Args:
181    op: Forward TensorArrayScatter op.
182    flow: Gradient `Tensor` flow to TensorArrayScatter.
183
184  Returns:
185    A grad `Tensor`, the gradient created in upstream ReadGrads or PackGrad.
186  """
187  handle = op.inputs[0]
188  indices = op.inputs[1]
189  dtype = op.get_attr("T")
190  grad_source = _GetGradSource(flow)
191  flow_out = array_ops.identity(op.outputs[0], "flow_out")
192  # Avoid a race condition where the TensorArrayGrad op is executed before the
193  # TensorArrayScatter by adding a control dependency on the output flow of
194  # the scatter to the input flow to the TensorArrayGrad.
195  with ops.control_dependencies([flow_out]):
196    flow = array_ops.identity(flow, "write_barrier")
197  g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow,
198                                    colocate_with_first_write_call=False)
199       .grad(source=grad_source, flow=flow))
200  grad = g.gather(indices)
201  return [None, None, grad, flow]
202
203
204@ops.RegisterGradient("TensorArrayConcat")
205@ops.RegisterGradient("TensorArrayConcatV2")
206@ops.RegisterGradient("TensorArrayConcatV3")
207def _TensorArrayConcatGrad(op, grad, unused_lengths_grad):
208  """Gradient for TensorArrayConcat.
209
210  Args:
211    op: Forward TensorArrayConcat op.
212    grad: Gradient `Tensor` to TensorArrayConcat.
213
214  Returns:
215    A flow `Tensor`, which can be used in control dependencies to
216    force the write of `grad` to the gradient `TensorArray`.
217  """
218  # Note: the forward flow dependency in the call to grad() is necessary for
219  # the case of dynamic sized TensorArrays.  When creating the gradient
220  # TensorArray, the final size of the forward array must be known.
221  # For this we need to wait until it has been created by depending on
222  # the input flow of the original op.
223  handle = op.inputs[0]
224  flow = op.inputs[1]
225  lengths = op.outputs[1]
226  dtype = op.get_attr("dtype")
227  grad_source = _GetGradSource(grad)
228  g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow,
229                                    colocate_with_first_write_call=False)
230       .grad(source=grad_source, flow=flow))
231  u_g = g.split(grad, lengths=lengths)
232  # handle, flow_in
233  return [None, u_g.flow]
234
235
236@ops.RegisterGradient("TensorArraySplit")
237@ops.RegisterGradient("TensorArraySplitV2")
238@ops.RegisterGradient("TensorArraySplitV3")
239def _TensorArraySplitGrad(op, flow):
240  """Gradient for TensorArraySplit.
241
242  Args:
243    op: Forward TensorArraySplit op.
244    flow: Gradient `Tensor` flow to TensorArraySplit.
245
246  Returns:
247    A grad `Tensor`, the gradient created in upstream ReadGrads or PackGrad.
248  """
249  handle = op.inputs[0]
250  dtype = op.get_attr("T")
251  grad_source = _GetGradSource(flow)
252  flow_out = array_ops.identity(op.outputs[0], "flow_out")
253  # Avoid a race condition where the TensorArrayGrad op is executed before the
254  # TensorArraySplit by adding a control dependency on the output flow of
255  # the split to the input flow to the TensorArrayGrad.
256  with ops.control_dependencies([flow_out]):
257    flow = array_ops.identity(flow, "write_barrier")
258  g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow,
259                                    colocate_with_first_write_call=False)
260       .grad(source=grad_source, flow=flow))
261  grad = g.concat()
262  # handle, value, lengths, flow_in
263  return [None, grad, None, flow]
264