xref: /aosp_15_r20/external/tensorflow/tensorflow/cc/framework/gradient_checker.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/cc/framework/gradient_checker.h"
17 
18 #include "tensorflow/cc/client/client_session.h"
19 #include "tensorflow/cc/framework/gradients.h"
20 #include "tensorflow/cc/ops/standard_ops.h"
21 #include "tensorflow/core/framework/tensor_util.h"
22 #include "tensorflow/core/framework/type_traits.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/platform/errors.h"
25 
26 namespace tensorflow {
27 namespace {
28 
29 // TODO(andydavis) Support returning relative error (as opposed to max error)
30 // between theoretical and numerical jacobians:
31 //   fabs(jac_t - jac_n) / max(fabs(jac_t), fabs(jac_n))
32 
33 // TODO(andydavis) Vectorize and/or multi-thread Jacobian computations if
34 // performance becomes an issue.
35 
36 // BaseUnitsForType provides a list of typed unit values for each basis in the
37 // requested type.
38 // When T is real,
39 // BaseUnitsForType<T>::values() is just a single-entry vector [1]
40 // When T is complex,
41 // BaseUnitsForType<T>::values() is a two-entry vector [1, i] - the unit
42 // values in each of its two bases.
43 template <typename T>
44 struct BaseUnitsForType {};  // Specializations below
45 
46 // Template specialization for BaseUnitsForType
47 #define SET_BASE_UNITS_FOR_TYPE(TYPE, INIT)                         \
48   template <>                                                       \
49   struct BaseUnitsForType<TYPE> {                                   \
50     static const std::vector<TYPE>& values() {                      \
51       static std::vector<TYPE>* units = new std::vector<TYPE> INIT; \
52       return *units;                                                \
53     }                                                               \
54   }
55 
56 SET_BASE_UNITS_FOR_TYPE(float, {1});
57 SET_BASE_UNITS_FOR_TYPE(double, {1});
58 SET_BASE_UNITS_FOR_TYPE(complex64, ({{1, 0}, {0, 1}}));
59 SET_BASE_UNITS_FOR_TYPE(complex128, ({{1, 0}, {0, 1}}));
60 
61 // SetJacobian sets the jacobian value at the provided row and column from a
62 // tensor entry with type T.
63 // When T is real, this is a simple assignment that casts the entry into the
64 // jacobian type.
65 // When T is complex, it assigns the real and complex values to successive rows
66 // or columns in the matrix depending on the expand_by_row parameter
67 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)68 typename std::enable_if<std::is_floating_point<T>::value>::type SetJacobian(
69     typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
70     const T& value, const bool expand_by_row) {
71   (*jacobian)(row, col) = JAC_T{value};
72 }
73 
74 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)75 typename std::enable_if<is_complex<T>::value>::type SetJacobian(
76     typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
77     const T& value, const bool expand_by_row) {
78   (*jacobian)(row, col) = JAC_T{value.real()};
79   if (expand_by_row) {
80     (*jacobian)(row + 1, col) = JAC_T{value.imag()};
81   } else {
82     (*jacobian)(row, col + 1) = JAC_T{value.imag()};
83   }
84 }
85 
86 // JacobianStride<T>::value holds the number of Jacobian elements needed to
87 // represent one element of the given type.
88 // When T is real the stride is 1, and when T is complex the stride is 2.
89 template <typename T>
90 struct JacobianStride {};  // Specializations below
91 
92 #define SET_JACOBIAN_STRIDE(TYPE, VALUE) \
93   template <>                            \
94   struct JacobianStride<TYPE> {          \
95     static constexpr int value = VALUE;  \
96   }
97 
98 SET_JACOBIAN_STRIDE(float, 1);
99 SET_JACOBIAN_STRIDE(double, 1);
100 SET_JACOBIAN_STRIDE(complex64, 2);
101 SET_JACOBIAN_STRIDE(complex128, 2);
102 
103 template <typename X_T, typename Y_T, typename JAC_T>
ComputeTheoreticalJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<Tensor> & x_datas,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobian_ts)104 Status ComputeTheoreticalJacobianTranspose(
105     const Scope& scope, const OutputList& xs,
106     const std::vector<TensorShape>& x_shapes,
107     const std::vector<Tensor>& x_datas, const OutputList& ys,
108     const std::vector<TensorShape>& y_shapes,
109     std::vector<Tensor>* jacobian_ts) {
110   size_t y_num = y_shapes.size();
111   size_t x_num = x_shapes.size();
112   // Call AddSymbolicGradients to get 'dxs' (we will feed 'dys').
113   OutputList dys;
114   dys.reserve(y_shapes.size());
115   for (const auto& y_shape : y_shapes) {
116     // TODO(suharshs): This currently assumes that all y's are the same type.
117     dys.push_back(
118         ops::Cast(scope, ops::Const(scope, 1.0, y_shape), ys[0].type()));
119   }
120   OutputList dxs;
121   TF_RETURN_IF_ERROR(AddSymbolicGradients(scope, ys, xs, dys, &dxs));
122 
123   // Initialize 'dy_data' to zeros.
124   std::vector<Tensor> dy_datas(y_num);
125   for (int i = 0; i < y_num; i++) {
126     dy_datas[i] = Tensor(ys[i].type(), y_shapes[i]);
127     auto dy_data_flat = dy_datas[i].flat<Y_T>();
128     dy_data_flat.setZero();
129   }
130 
131   // Create the feed list.
132   ClientSession::FeedType feed_list;
133   for (int i = 0; i < x_num; i++) {
134     feed_list.insert({xs[i], x_datas[i]});
135   }
136   for (int i = 0; i < y_num; i++) {
137     feed_list.insert({dys[i], dy_datas[i]});
138   }
139 
140   // x_stride and y_stride are used to calculate the correct jacobian row and
141   // column position for a pair of elements at positions r, c within the x and y
142   // tensors respectively.
143   const int x_stride = JacobianStride<X_T>::value;
144   const int y_stride = JacobianStride<Y_T>::value;
145   ClientSession session(scope);
146   for (int y_idx = 0; y_idx < y_num; y_idx++) {
147     auto dy_data_flat = dy_datas[y_idx].flat<Y_T>();
148     const int64_t dy_size = y_shapes[y_idx].num_elements();
149 
150     // Compute the theoretical Jacobians one row at a time by back propagating
151     // '1.0' (or '1' and 'i' if y is complex) for each element of 'dy', while
152     // holding all other elements of 'dy' at zero.
153     for (int c = 0; c < dy_size; ++c) {
154       int unit_dimension = 0;
155       for (Y_T unit : BaseUnitsForType<Y_T>::values()) {
156         dy_data_flat(c) = unit;
157 
158         std::vector<Tensor> dxout;
159         TF_RETURN_IF_ERROR(session.Run(feed_list, dxs, &dxout));
160 
161         for (int x_idx = 0; x_idx < x_num; x_idx++) {
162           if (x_shapes[x_idx] != dxout[x_idx].shape()) {
163             return errors::Internal("Gradient for input ", x_idx,
164                                     " expected shape ",
165                                     x_shapes[x_idx].DebugString(), " but was ",
166                                     dxout[x_idx].shape().DebugString());
167           }
168           const int64_t x_size = x_shapes[x_idx].num_elements();
169           auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
170           auto dx_flat = dxout[x_idx].flat<X_T>();
171           for (int r = 0; r < x_size; ++r) {
172             SetJacobian<X_T, JAC_T>(&jacobian, r * x_stride,
173                                     c * y_stride + unit_dimension, dx_flat(r),
174                                     true /* expand_by_row=true */);
175           }
176         }
177 
178         dy_data_flat(c) = Y_T{0};
179         unit_dimension++;
180       }
181     }
182   }
183   return OkStatus();
184 }
185 
EvaluateGraph(ClientSession * session,const OutputList & xs,const OutputList & ys,std::vector<Tensor> * x_datas,std::vector<Tensor> * y_datas)186 Status EvaluateGraph(ClientSession* session, const OutputList& xs,
187                      const OutputList& ys, std::vector<Tensor>* x_datas,
188                      std::vector<Tensor>* y_datas) {
189   // Create the feed list.
190   ClientSession::FeedType feed_list;
191   for (int i = 0; i < x_datas->size(); i++) {
192     feed_list.insert({xs[i], (*x_datas)[i]});
193   }
194 
195   TF_RETURN_IF_ERROR(session->Run(feed_list, ys, y_datas));
196   for (int y_idx = 0; y_idx < y_datas->size(); y_idx++) {
197     for (int x_idx = 0; x_idx < x_datas->size(); x_idx++) {
198       Tensor y_data = (*y_datas)[y_idx];
199       if (y_data.SharesBufferWith((*x_datas)[x_idx])) {
200         // Create copies of outputs that share a buffer with any inputs since
201         // the underlying buffer of the input Tensors are not copied for some
202         // operations (i.e. Identity), which can lead to incorrect results for
203         // the centered difference calculation.
204         (*y_datas)[y_idx] = tensor::DeepCopy(y_data);
205       }
206     }
207   }
208   return OkStatus();
209 }
210 
211 template <typename X_T, typename Y_T, typename JAC_T>
ComputeNumericJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,const JAC_T delta,std::vector<Tensor> * x_datas,std::vector<Tensor> * jacobian_ts)212 Status ComputeNumericJacobianTranspose(const Scope& scope, const OutputList& xs,
213                                        const std::vector<TensorShape>& x_shapes,
214                                        const OutputList& ys,
215                                        const std::vector<TensorShape>& y_shapes,
216                                        const JAC_T delta,
217                                        std::vector<Tensor>* x_datas,
218                                        std::vector<Tensor>* jacobian_ts) {
219   size_t y_num = y_shapes.size();
220   size_t x_num = x_shapes.size();
221   // x_stride and y_stride are used to calculate the correct jacobian row and
222   // column position for a pair of elements at positions r, c within the x and y
223   // tensors respectively.
224   const int x_stride = JacobianStride<X_T>::value;
225   const int y_stride = JacobianStride<Y_T>::value;
226 
227   ClientSession session(scope);
228   for (int x_idx = 0; x_idx < x_num; x_idx++) {
229     auto x_data_flat = (*x_datas)[x_idx].flat<X_T>();
230     const int64_t x_size = x_shapes[x_idx].num_elements();
231 
232     // Compute the numeric Jacobian one column at a time by perturbing each
233     // element of 'x_data' (positively and negatively) by 'delta', and
234     // updating the jacobian with the centered difference. When x_data is
235     // complex-valued, we perturb its real and complex parts separately.
236     for (int r = 0; r < x_size; ++r) {
237       int unit_dimension = 0;
238       for (X_T unit : BaseUnitsForType<X_T>::values()) {
239         X_T x_delta = unit * X_T{delta};
240         // Store current value of 'x' at 'r'.
241         X_T v = x_data_flat(r);
242         // Evaluate at positive delta.
243         x_data_flat(r) = v + x_delta;
244         std::vector<Tensor> y_pos;
245         TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_pos));
246         // Evaluate at negative delta.
247         x_data_flat(r) = v - x_delta;
248         std::vector<Tensor> y_neg;
249         TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_neg));
250 
251         for (int y_idx = 0; y_idx < y_num; y_idx++) {
252           // Compute element-wise centered difference and store in each
253           // Jacobian.
254           auto y_pos_flat = y_pos[y_idx].flat<Y_T>();
255           auto y_neg_flat = y_neg[y_idx].flat<Y_T>();
256           const int64_t y_size = y_shapes[y_idx].num_elements();
257           const Y_T scale = 2 * delta;
258           auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
259           for (int c = 0; c < y_size; ++c) {
260             SetJacobian<Y_T, JAC_T>(&jacobian, r * x_stride + unit_dimension,
261                                     c * y_stride,
262                                     (y_pos_flat(c) - y_neg_flat(c)) / scale,
263                                     false /* expand_by_row=false */);
264           }
265         }
266         // Restore pre-perturbation value.
267         x_data_flat(r) = v;
268         unit_dimension++;
269       }
270     }
271   }
272   return OkStatus();
273 }
274 
275 // The Jacobian is always a real-valued matrix.
276 // Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for
277 // every pair y_i in y and x_j in x.  Note that the Jacobian is defined directly
278 // over the elements of tensors y and x, and doesn't depend on their shapes.
279 //
280 // If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated
281 // is actually the Jacobian transpose, defined as this mxn matrix:
282 // dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1
283 // dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2
284 //     .
285 //     .
286 //     .
287 // dy_1/dx_m dy_2/dx_m ... dy_n/dx_m
288 //
289 // If x or y is complex, each complex entry is "expanded" into a real and
290 // imaginary entry, and the Jacobian is organized as above on the expanded list.
291 // e.g.
292 // [y1, y2] = Square([x1, x2]) where x and y are complex.
293 // Writing
294 // x = [x1_real, x1_imag, x2_real, x2_imag]
295 // y = [y1_real, y1_imag, y2_real, y2_imag]
296 // the Jacobian transpose is
297 // the 4x4 matrix:
298 // dy1_real/dx1_real dy1_imag/dx1_real dy2_real/dx1_real dy2_imag/dx1_real
299 // dy1_real/dx1_imag dy1_imag/dx1_imag dy2_real/dx1_imag dy2_imag/dx1_imag
300 // dy1_real/dx2_real dy1_imag/dx2_real dy2_real/dx2_real dy2_imag/dx2_real
301 // dy1_real/dx2_imag dy1_imag/dx2_imag dy2_real/dx2_imag dy2_imag/dx2_imag
302 template <typename X_T, typename Y_T, typename JAC_T>
InitJacobians(const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobians)303 void InitJacobians(const OutputList& xs,
304                    const std::vector<TensorShape>& x_shapes,
305                    const std::vector<TensorShape>& y_shapes,
306                    std::vector<Tensor>* jacobians) {
307   const size_t y_num = y_shapes.size();
308   const size_t x_num = x_shapes.size();
309   const DataType jacobian_type = DataTypeToEnum<JAC_T>::v();
310 
311   jacobians->resize(y_num * x_num);
312   for (int x_idx = 0; x_idx < x_num; x_idx++) {
313     // The number of rows is the number of elements in the x tensor multiplied
314     // by the number of Jacobian entries needed to represent each x type.
315     const int64_t x_size =
316         x_shapes[x_idx].num_elements() * JacobianStride<X_T>::value;
317     for (int y_idx = 0; y_idx < y_num; y_idx++) {
318       // The number of columns is the number of elements in the y tensor
319       // multiplied by the number of Jacobian entries needed to represent each
320       // y type.
321       const int64_t y_size =
322           y_shapes[y_idx].num_elements() * JacobianStride<Y_T>::value;
323       Tensor jacobian_t(jacobian_type, {x_size, y_size});
324       auto jacobian_t_flat = jacobian_t.flat<JAC_T>();
325       jacobian_t_flat.setZero();
326       (*jacobians)[x_idx * y_num + y_idx] = std::move(jacobian_t);
327     }
328   }
329 }
330 
331 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientErrorInternal(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * x_datas,JAC_T * max_error)332 Status ComputeGradientErrorInternal(const Scope& scope, const OutputList& xs,
333                                     const std::vector<TensorShape>& x_shapes,
334                                     const OutputList& ys,
335                                     const std::vector<TensorShape>& y_shapes,
336                                     std::vector<Tensor>* x_datas,
337                                     JAC_T* max_error) {
338   // Initialize theoretical Jacobians to zeros.
339   std::vector<Tensor> jacobian_ts;
340   InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ts);
341 
342   // Compute theoretical Jacobian.
343   TF_RETURN_IF_ERROR((ComputeTheoreticalJacobianTranspose<X_T, Y_T, JAC_T>(
344       scope, xs, x_shapes, *x_datas, ys, y_shapes, &jacobian_ts)));
345 
346   // Initialize numeric Jacobian to zeros.
347   std::vector<Tensor> jacobian_ns;
348   InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ns);
349 
350   // Compute numeric Jacobian.
351   TF_RETURN_IF_ERROR((ComputeNumericJacobianTranspose<X_T, Y_T, JAC_T>(
352       scope, xs, x_shapes, ys, y_shapes, JAC_T{1e-3f}, x_datas, &jacobian_ns)));
353 
354   for (int i = 0; i < jacobian_ts.size(); i++) {
355     // Compute the maximum error between theoretical and numeric Jacobians.
356     *max_error = 0.0;
357     auto jac_t = jacobian_ts[i].matrix<JAC_T>();
358     auto jac_n = jacobian_ns[i].matrix<JAC_T>();
359     for (int r = 0; r < jacobian_ts[i].dim_size(0); ++r) {
360       for (int c = 0; c < jacobian_ts[i].dim_size(1); ++c) {
361         auto cur_error = std::fabs(jac_t(r, c) - jac_n(r, c));
362         // Treat any NaN as max_error and immediately return.
363         // (Note that std::max may ignore NaN arguments.)
364         if (std::isnan(cur_error)) {
365           *max_error = cur_error;
366           return OkStatus();
367         }
368         *max_error = std::max(*max_error, cur_error);
369       }
370     }
371   }
372   return OkStatus();
373 }
374 
375 }  // namespace
376 
377 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,JAC_T * max_error)378 Status ComputeGradientError(const Scope& scope, const OutputList& xs,
379                             const std::vector<TensorShape>& x_shapes,
380                             const OutputList& ys,
381                             const std::vector<TensorShape>& y_shapes,
382                             JAC_T* max_error) {
383   if (xs.size() != x_shapes.size()) {
384     return errors::InvalidArgument("xs(size ", xs.size(),
385                                    ") and x_shapes(size ", x_shapes.size(),
386                                    ") must be the same size.");
387   }
388   if (ys.size() != y_shapes.size()) {
389     return errors::InvalidArgument("ys(size ", ys.size(),
390                                    ") and y_shapes(size ", y_shapes.size(),
391                                    ") must be the same size.");
392   }
393   // Initialize 'x_datas' to random values.
394   std::vector<Tensor> x_datas(x_shapes.size());
395   for (int i = 0; i < x_shapes.size(); i++) {
396     x_datas[i] = Tensor(xs[i].type(), x_shapes[i]);
397     auto x_data_flat = x_datas[i].flat<X_T>();
398     x_data_flat.setRandom();
399   }
400   // Compute gradient error.
401   return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
402       scope, xs, x_shapes, ys, y_shapes, &x_datas, max_error);
403 }
404 
405 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const Output & x,const Tensor & x_init_value,const Output & y,const TensorShape & y_shape,JAC_T * max_error)406 Status ComputeGradientError(const Scope& scope, const Output& x,
407                             const Tensor& x_init_value, const Output& y,
408                             const TensorShape& y_shape, JAC_T* max_error) {
409   // Initialize 'x_data' from 'x_init_value'.
410   std::vector<Tensor> x_datas(1, Tensor(x_init_value));
411   // Compute gradient error.
412   return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
413       scope, {x}, {x_datas[0].shape()}, {y}, {y_shape}, &x_datas, max_error);
414 }
415 
416 #define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T)                     \
417   template Status ComputeGradientError<X_T, Y_T, JAC_T>(               \
418       const Scope& scope, const OutputList& xs,                        \
419       const std::vector<TensorShape>& x_shapes, const OutputList& ys,  \
420       const std::vector<TensorShape>& y_shapes, JAC_T* max_error);     \
421   template Status ComputeGradientError<X_T, Y_T, JAC_T>(               \
422       const Scope& scope, const Output& x, const Tensor& x_init_value, \
423       const Output& y, const TensorShape& y_shape, JAC_T* max_error);
424 
425 INSTANTIATE_GRAD_ERR_TYPE(float, float, float);
426 INSTANTIATE_GRAD_ERR_TYPE(double, float, double);
427 INSTANTIATE_GRAD_ERR_TYPE(double, double, double);
428 INSTANTIATE_GRAD_ERR_TYPE(complex64, float, float);
429 INSTANTIATE_GRAD_ERR_TYPE(float, complex64, float);
430 INSTANTIATE_GRAD_ERR_TYPE(complex64, complex64, float);
431 INSTANTIATE_GRAD_ERR_TYPE(complex128, complex128, double);
432 
433 }  // namespace tensorflow
434