1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/cc/framework/gradient_checker.h"
17
18 #include "tensorflow/cc/client/client_session.h"
19 #include "tensorflow/cc/framework/gradients.h"
20 #include "tensorflow/cc/ops/standard_ops.h"
21 #include "tensorflow/core/framework/tensor_util.h"
22 #include "tensorflow/core/framework/type_traits.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/platform/errors.h"
25
26 namespace tensorflow {
27 namespace {
28
29 // TODO(andydavis) Support returning relative error (as opposed to max error)
30 // between theoretical and numerical jacobians:
31 // fabs(jac_t - jac_n) / max(fabs(jac_t), fabs(jac_n))
32
33 // TODO(andydavis) Vectorize and/or multi-thread Jacobian computations if
34 // performance becomes an issue.
35
36 // BaseUnitsForType provides a list of typed unit values for each basis in the
37 // requested type.
38 // When T is real,
39 // BaseUnitsForType<T>::values() is just a single-entry vector [1]
40 // When T is complex,
41 // BaseUnitsForType<T>::values() is a two-entry vector [1, i] - the unit
42 // values in each of its two bases.
43 template <typename T>
44 struct BaseUnitsForType {}; // Specializations below
45
46 // Template specialization for BaseUnitsForType
47 #define SET_BASE_UNITS_FOR_TYPE(TYPE, INIT) \
48 template <> \
49 struct BaseUnitsForType<TYPE> { \
50 static const std::vector<TYPE>& values() { \
51 static std::vector<TYPE>* units = new std::vector<TYPE> INIT; \
52 return *units; \
53 } \
54 }
55
56 SET_BASE_UNITS_FOR_TYPE(float, {1});
57 SET_BASE_UNITS_FOR_TYPE(double, {1});
58 SET_BASE_UNITS_FOR_TYPE(complex64, ({{1, 0}, {0, 1}}));
59 SET_BASE_UNITS_FOR_TYPE(complex128, ({{1, 0}, {0, 1}}));
60
61 // SetJacobian sets the jacobian value at the provided row and column from a
62 // tensor entry with type T.
63 // When T is real, this is a simple assignment that casts the entry into the
64 // jacobian type.
65 // When T is complex, it assigns the real and complex values to successive rows
66 // or columns in the matrix depending on the expand_by_row parameter
67 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)68 typename std::enable_if<std::is_floating_point<T>::value>::type SetJacobian(
69 typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
70 const T& value, const bool expand_by_row) {
71 (*jacobian)(row, col) = JAC_T{value};
72 }
73
74 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)75 typename std::enable_if<is_complex<T>::value>::type SetJacobian(
76 typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
77 const T& value, const bool expand_by_row) {
78 (*jacobian)(row, col) = JAC_T{value.real()};
79 if (expand_by_row) {
80 (*jacobian)(row + 1, col) = JAC_T{value.imag()};
81 } else {
82 (*jacobian)(row, col + 1) = JAC_T{value.imag()};
83 }
84 }
85
86 // JacobianStride<T>::value holds the number of Jacobian elements needed to
87 // represent one element of the given type.
88 // When T is real the stride is 1, and when T is complex the stride is 2.
89 template <typename T>
90 struct JacobianStride {}; // Specializations below
91
92 #define SET_JACOBIAN_STRIDE(TYPE, VALUE) \
93 template <> \
94 struct JacobianStride<TYPE> { \
95 static constexpr int value = VALUE; \
96 }
97
98 SET_JACOBIAN_STRIDE(float, 1);
99 SET_JACOBIAN_STRIDE(double, 1);
100 SET_JACOBIAN_STRIDE(complex64, 2);
101 SET_JACOBIAN_STRIDE(complex128, 2);
102
103 template <typename X_T, typename Y_T, typename JAC_T>
ComputeTheoreticalJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<Tensor> & x_datas,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobian_ts)104 Status ComputeTheoreticalJacobianTranspose(
105 const Scope& scope, const OutputList& xs,
106 const std::vector<TensorShape>& x_shapes,
107 const std::vector<Tensor>& x_datas, const OutputList& ys,
108 const std::vector<TensorShape>& y_shapes,
109 std::vector<Tensor>* jacobian_ts) {
110 size_t y_num = y_shapes.size();
111 size_t x_num = x_shapes.size();
112 // Call AddSymbolicGradients to get 'dxs' (we will feed 'dys').
113 OutputList dys;
114 dys.reserve(y_shapes.size());
115 for (const auto& y_shape : y_shapes) {
116 // TODO(suharshs): This currently assumes that all y's are the same type.
117 dys.push_back(
118 ops::Cast(scope, ops::Const(scope, 1.0, y_shape), ys[0].type()));
119 }
120 OutputList dxs;
121 TF_RETURN_IF_ERROR(AddSymbolicGradients(scope, ys, xs, dys, &dxs));
122
123 // Initialize 'dy_data' to zeros.
124 std::vector<Tensor> dy_datas(y_num);
125 for (int i = 0; i < y_num; i++) {
126 dy_datas[i] = Tensor(ys[i].type(), y_shapes[i]);
127 auto dy_data_flat = dy_datas[i].flat<Y_T>();
128 dy_data_flat.setZero();
129 }
130
131 // Create the feed list.
132 ClientSession::FeedType feed_list;
133 for (int i = 0; i < x_num; i++) {
134 feed_list.insert({xs[i], x_datas[i]});
135 }
136 for (int i = 0; i < y_num; i++) {
137 feed_list.insert({dys[i], dy_datas[i]});
138 }
139
140 // x_stride and y_stride are used to calculate the correct jacobian row and
141 // column position for a pair of elements at positions r, c within the x and y
142 // tensors respectively.
143 const int x_stride = JacobianStride<X_T>::value;
144 const int y_stride = JacobianStride<Y_T>::value;
145 ClientSession session(scope);
146 for (int y_idx = 0; y_idx < y_num; y_idx++) {
147 auto dy_data_flat = dy_datas[y_idx].flat<Y_T>();
148 const int64_t dy_size = y_shapes[y_idx].num_elements();
149
150 // Compute the theoretical Jacobians one row at a time by back propagating
151 // '1.0' (or '1' and 'i' if y is complex) for each element of 'dy', while
152 // holding all other elements of 'dy' at zero.
153 for (int c = 0; c < dy_size; ++c) {
154 int unit_dimension = 0;
155 for (Y_T unit : BaseUnitsForType<Y_T>::values()) {
156 dy_data_flat(c) = unit;
157
158 std::vector<Tensor> dxout;
159 TF_RETURN_IF_ERROR(session.Run(feed_list, dxs, &dxout));
160
161 for (int x_idx = 0; x_idx < x_num; x_idx++) {
162 if (x_shapes[x_idx] != dxout[x_idx].shape()) {
163 return errors::Internal("Gradient for input ", x_idx,
164 " expected shape ",
165 x_shapes[x_idx].DebugString(), " but was ",
166 dxout[x_idx].shape().DebugString());
167 }
168 const int64_t x_size = x_shapes[x_idx].num_elements();
169 auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
170 auto dx_flat = dxout[x_idx].flat<X_T>();
171 for (int r = 0; r < x_size; ++r) {
172 SetJacobian<X_T, JAC_T>(&jacobian, r * x_stride,
173 c * y_stride + unit_dimension, dx_flat(r),
174 true /* expand_by_row=true */);
175 }
176 }
177
178 dy_data_flat(c) = Y_T{0};
179 unit_dimension++;
180 }
181 }
182 }
183 return OkStatus();
184 }
185
EvaluateGraph(ClientSession * session,const OutputList & xs,const OutputList & ys,std::vector<Tensor> * x_datas,std::vector<Tensor> * y_datas)186 Status EvaluateGraph(ClientSession* session, const OutputList& xs,
187 const OutputList& ys, std::vector<Tensor>* x_datas,
188 std::vector<Tensor>* y_datas) {
189 // Create the feed list.
190 ClientSession::FeedType feed_list;
191 for (int i = 0; i < x_datas->size(); i++) {
192 feed_list.insert({xs[i], (*x_datas)[i]});
193 }
194
195 TF_RETURN_IF_ERROR(session->Run(feed_list, ys, y_datas));
196 for (int y_idx = 0; y_idx < y_datas->size(); y_idx++) {
197 for (int x_idx = 0; x_idx < x_datas->size(); x_idx++) {
198 Tensor y_data = (*y_datas)[y_idx];
199 if (y_data.SharesBufferWith((*x_datas)[x_idx])) {
200 // Create copies of outputs that share a buffer with any inputs since
201 // the underlying buffer of the input Tensors are not copied for some
202 // operations (i.e. Identity), which can lead to incorrect results for
203 // the centered difference calculation.
204 (*y_datas)[y_idx] = tensor::DeepCopy(y_data);
205 }
206 }
207 }
208 return OkStatus();
209 }
210
211 template <typename X_T, typename Y_T, typename JAC_T>
ComputeNumericJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,const JAC_T delta,std::vector<Tensor> * x_datas,std::vector<Tensor> * jacobian_ts)212 Status ComputeNumericJacobianTranspose(const Scope& scope, const OutputList& xs,
213 const std::vector<TensorShape>& x_shapes,
214 const OutputList& ys,
215 const std::vector<TensorShape>& y_shapes,
216 const JAC_T delta,
217 std::vector<Tensor>* x_datas,
218 std::vector<Tensor>* jacobian_ts) {
219 size_t y_num = y_shapes.size();
220 size_t x_num = x_shapes.size();
221 // x_stride and y_stride are used to calculate the correct jacobian row and
222 // column position for a pair of elements at positions r, c within the x and y
223 // tensors respectively.
224 const int x_stride = JacobianStride<X_T>::value;
225 const int y_stride = JacobianStride<Y_T>::value;
226
227 ClientSession session(scope);
228 for (int x_idx = 0; x_idx < x_num; x_idx++) {
229 auto x_data_flat = (*x_datas)[x_idx].flat<X_T>();
230 const int64_t x_size = x_shapes[x_idx].num_elements();
231
232 // Compute the numeric Jacobian one column at a time by perturbing each
233 // element of 'x_data' (positively and negatively) by 'delta', and
234 // updating the jacobian with the centered difference. When x_data is
235 // complex-valued, we perturb its real and complex parts separately.
236 for (int r = 0; r < x_size; ++r) {
237 int unit_dimension = 0;
238 for (X_T unit : BaseUnitsForType<X_T>::values()) {
239 X_T x_delta = unit * X_T{delta};
240 // Store current value of 'x' at 'r'.
241 X_T v = x_data_flat(r);
242 // Evaluate at positive delta.
243 x_data_flat(r) = v + x_delta;
244 std::vector<Tensor> y_pos;
245 TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_pos));
246 // Evaluate at negative delta.
247 x_data_flat(r) = v - x_delta;
248 std::vector<Tensor> y_neg;
249 TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_neg));
250
251 for (int y_idx = 0; y_idx < y_num; y_idx++) {
252 // Compute element-wise centered difference and store in each
253 // Jacobian.
254 auto y_pos_flat = y_pos[y_idx].flat<Y_T>();
255 auto y_neg_flat = y_neg[y_idx].flat<Y_T>();
256 const int64_t y_size = y_shapes[y_idx].num_elements();
257 const Y_T scale = 2 * delta;
258 auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
259 for (int c = 0; c < y_size; ++c) {
260 SetJacobian<Y_T, JAC_T>(&jacobian, r * x_stride + unit_dimension,
261 c * y_stride,
262 (y_pos_flat(c) - y_neg_flat(c)) / scale,
263 false /* expand_by_row=false */);
264 }
265 }
266 // Restore pre-perturbation value.
267 x_data_flat(r) = v;
268 unit_dimension++;
269 }
270 }
271 }
272 return OkStatus();
273 }
274
275 // The Jacobian is always a real-valued matrix.
276 // Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for
277 // every pair y_i in y and x_j in x. Note that the Jacobian is defined directly
278 // over the elements of tensors y and x, and doesn't depend on their shapes.
279 //
280 // If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated
281 // is actually the Jacobian transpose, defined as this mxn matrix:
282 // dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1
283 // dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2
284 // .
285 // .
286 // .
287 // dy_1/dx_m dy_2/dx_m ... dy_n/dx_m
288 //
289 // If x or y is complex, each complex entry is "expanded" into a real and
290 // imaginary entry, and the Jacobian is organized as above on the expanded list.
291 // e.g.
292 // [y1, y2] = Square([x1, x2]) where x and y are complex.
293 // Writing
294 // x = [x1_real, x1_imag, x2_real, x2_imag]
295 // y = [y1_real, y1_imag, y2_real, y2_imag]
296 // the Jacobian transpose is
297 // the 4x4 matrix:
298 // dy1_real/dx1_real dy1_imag/dx1_real dy2_real/dx1_real dy2_imag/dx1_real
299 // dy1_real/dx1_imag dy1_imag/dx1_imag dy2_real/dx1_imag dy2_imag/dx1_imag
300 // dy1_real/dx2_real dy1_imag/dx2_real dy2_real/dx2_real dy2_imag/dx2_real
301 // dy1_real/dx2_imag dy1_imag/dx2_imag dy2_real/dx2_imag dy2_imag/dx2_imag
302 template <typename X_T, typename Y_T, typename JAC_T>
InitJacobians(const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobians)303 void InitJacobians(const OutputList& xs,
304 const std::vector<TensorShape>& x_shapes,
305 const std::vector<TensorShape>& y_shapes,
306 std::vector<Tensor>* jacobians) {
307 const size_t y_num = y_shapes.size();
308 const size_t x_num = x_shapes.size();
309 const DataType jacobian_type = DataTypeToEnum<JAC_T>::v();
310
311 jacobians->resize(y_num * x_num);
312 for (int x_idx = 0; x_idx < x_num; x_idx++) {
313 // The number of rows is the number of elements in the x tensor multiplied
314 // by the number of Jacobian entries needed to represent each x type.
315 const int64_t x_size =
316 x_shapes[x_idx].num_elements() * JacobianStride<X_T>::value;
317 for (int y_idx = 0; y_idx < y_num; y_idx++) {
318 // The number of columns is the number of elements in the y tensor
319 // multiplied by the number of Jacobian entries needed to represent each
320 // y type.
321 const int64_t y_size =
322 y_shapes[y_idx].num_elements() * JacobianStride<Y_T>::value;
323 Tensor jacobian_t(jacobian_type, {x_size, y_size});
324 auto jacobian_t_flat = jacobian_t.flat<JAC_T>();
325 jacobian_t_flat.setZero();
326 (*jacobians)[x_idx * y_num + y_idx] = std::move(jacobian_t);
327 }
328 }
329 }
330
331 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientErrorInternal(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * x_datas,JAC_T * max_error)332 Status ComputeGradientErrorInternal(const Scope& scope, const OutputList& xs,
333 const std::vector<TensorShape>& x_shapes,
334 const OutputList& ys,
335 const std::vector<TensorShape>& y_shapes,
336 std::vector<Tensor>* x_datas,
337 JAC_T* max_error) {
338 // Initialize theoretical Jacobians to zeros.
339 std::vector<Tensor> jacobian_ts;
340 InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ts);
341
342 // Compute theoretical Jacobian.
343 TF_RETURN_IF_ERROR((ComputeTheoreticalJacobianTranspose<X_T, Y_T, JAC_T>(
344 scope, xs, x_shapes, *x_datas, ys, y_shapes, &jacobian_ts)));
345
346 // Initialize numeric Jacobian to zeros.
347 std::vector<Tensor> jacobian_ns;
348 InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ns);
349
350 // Compute numeric Jacobian.
351 TF_RETURN_IF_ERROR((ComputeNumericJacobianTranspose<X_T, Y_T, JAC_T>(
352 scope, xs, x_shapes, ys, y_shapes, JAC_T{1e-3f}, x_datas, &jacobian_ns)));
353
354 for (int i = 0; i < jacobian_ts.size(); i++) {
355 // Compute the maximum error between theoretical and numeric Jacobians.
356 *max_error = 0.0;
357 auto jac_t = jacobian_ts[i].matrix<JAC_T>();
358 auto jac_n = jacobian_ns[i].matrix<JAC_T>();
359 for (int r = 0; r < jacobian_ts[i].dim_size(0); ++r) {
360 for (int c = 0; c < jacobian_ts[i].dim_size(1); ++c) {
361 auto cur_error = std::fabs(jac_t(r, c) - jac_n(r, c));
362 // Treat any NaN as max_error and immediately return.
363 // (Note that std::max may ignore NaN arguments.)
364 if (std::isnan(cur_error)) {
365 *max_error = cur_error;
366 return OkStatus();
367 }
368 *max_error = std::max(*max_error, cur_error);
369 }
370 }
371 }
372 return OkStatus();
373 }
374
375 } // namespace
376
377 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,JAC_T * max_error)378 Status ComputeGradientError(const Scope& scope, const OutputList& xs,
379 const std::vector<TensorShape>& x_shapes,
380 const OutputList& ys,
381 const std::vector<TensorShape>& y_shapes,
382 JAC_T* max_error) {
383 if (xs.size() != x_shapes.size()) {
384 return errors::InvalidArgument("xs(size ", xs.size(),
385 ") and x_shapes(size ", x_shapes.size(),
386 ") must be the same size.");
387 }
388 if (ys.size() != y_shapes.size()) {
389 return errors::InvalidArgument("ys(size ", ys.size(),
390 ") and y_shapes(size ", y_shapes.size(),
391 ") must be the same size.");
392 }
393 // Initialize 'x_datas' to random values.
394 std::vector<Tensor> x_datas(x_shapes.size());
395 for (int i = 0; i < x_shapes.size(); i++) {
396 x_datas[i] = Tensor(xs[i].type(), x_shapes[i]);
397 auto x_data_flat = x_datas[i].flat<X_T>();
398 x_data_flat.setRandom();
399 }
400 // Compute gradient error.
401 return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
402 scope, xs, x_shapes, ys, y_shapes, &x_datas, max_error);
403 }
404
405 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const Output & x,const Tensor & x_init_value,const Output & y,const TensorShape & y_shape,JAC_T * max_error)406 Status ComputeGradientError(const Scope& scope, const Output& x,
407 const Tensor& x_init_value, const Output& y,
408 const TensorShape& y_shape, JAC_T* max_error) {
409 // Initialize 'x_data' from 'x_init_value'.
410 std::vector<Tensor> x_datas(1, Tensor(x_init_value));
411 // Compute gradient error.
412 return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
413 scope, {x}, {x_datas[0].shape()}, {y}, {y_shape}, &x_datas, max_error);
414 }
415
416 #define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T) \
417 template Status ComputeGradientError<X_T, Y_T, JAC_T>( \
418 const Scope& scope, const OutputList& xs, \
419 const std::vector<TensorShape>& x_shapes, const OutputList& ys, \
420 const std::vector<TensorShape>& y_shapes, JAC_T* max_error); \
421 template Status ComputeGradientError<X_T, Y_T, JAC_T>( \
422 const Scope& scope, const Output& x, const Tensor& x_init_value, \
423 const Output& y, const TensorShape& y_shape, JAC_T* max_error);
424
425 INSTANTIATE_GRAD_ERR_TYPE(float, float, float);
426 INSTANTIATE_GRAD_ERR_TYPE(double, float, double);
427 INSTANTIATE_GRAD_ERR_TYPE(double, double, double);
428 INSTANTIATE_GRAD_ERR_TYPE(complex64, float, float);
429 INSTANTIATE_GRAD_ERR_TYPE(float, complex64, float);
430 INSTANTIATE_GRAD_ERR_TYPE(complex64, complex64, float);
431 INSTANTIATE_GRAD_ERR_TYPE(complex128, complex128, double);
432
433 } // namespace tensorflow
434