xref: /aosp_15_r20/external/tensorflow/tensorflow/python/ops/losses/losses_impl.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of Loss operations for use in neural networks."""
16
17from tensorflow.python.eager import context
18from tensorflow.python.framework import dtypes
19from tensorflow.python.framework import ops
20from tensorflow.python.ops import array_ops
21from tensorflow.python.ops import confusion_matrix
22from tensorflow.python.ops import control_flow_ops
23from tensorflow.python.ops import math_ops
24from tensorflow.python.ops import nn
25from tensorflow.python.ops import nn_ops
26from tensorflow.python.ops import weights_broadcast_ops
27from tensorflow.python.ops.losses import util
28from tensorflow.python.util import dispatch
29from tensorflow.python.util.deprecation import deprecated_args
30from tensorflow.python.util.deprecation import deprecated_argument_lookup
31from tensorflow.python.util.tf_export import tf_export
32
33
34@tf_export(v1=["losses.Reduction"])
35class Reduction:
36  """Types of loss reduction.
37
38  Contains the following values:
39
40  * `NONE`: Un-reduced weighted losses with the same shape as input.
41  * `SUM`: Scalar sum of weighted losses.
42  * `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED.
43  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
44  * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
45     weights. DEPRECATED.
46  * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED.
47  """
48
49  NONE = "none"
50  SUM = "weighted_sum"
51  SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size"
52  MEAN = "weighted_mean"
53  SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights"
54  SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS
55
56  @classmethod
57  def all(cls):
58    return (
59        cls.NONE,
60        cls.SUM,
61        cls.MEAN,
62        cls.SUM_OVER_BATCH_SIZE,
63        cls.SUM_OVER_NONZERO_WEIGHTS,
64        cls.SUM_BY_NONZERO_WEIGHTS)
65
66  @classmethod
67  def validate(cls, key):
68    if key not in cls.all():
69      raise ValueError(f"Invalid Reduction Key {key}. Key should be one of "
70                       f"{cls.all()}.")
71
72
73def _safe_mean(losses, num_present):
74  """Computes a safe mean of the losses.
75
76  Args:
77    losses: `Tensor` whose elements contain individual loss measurements.
78    num_present: The number of measurable elements in `losses`.
79
80  Returns:
81    A scalar representing the mean of `losses`. If `num_present` is zero,
82      then zero is returned.
83  """
84  total_loss = math_ops.reduce_sum(losses)
85  return math_ops.div_no_nan(total_loss, num_present, name="value")
86
87
88def _num_present(losses, weights, per_batch=False):
89  """Computes the number of elements in the loss function induced by `weights`.
90
91  A given weights tensor induces different numbers of usable elements in the
92  `losses` tensor. The `weights` tensor is broadcast across `losses` for all
93  possible dimensions. For example, if `losses` is a tensor of dimension
94  `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
95  in effect, tiled to match the shape of `losses`. Following this effective
96  tile, the total number of present elements is the number of non-zero weights.
97
98  Args:
99    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
100    weights: `Tensor` of shape `[]`, `[batch_size]` or
101      `[batch_size, d1, ... dK]`, where K < N.
102    per_batch: Whether to return the number of elements per batch or as a sum
103      total.
104
105  Returns:
106    The number of present (non-zero) elements in the losses tensor. If
107      `per_batch` is `True`, the value is returned as a tensor of size
108      `[batch_size]`. Otherwise, a single scalar tensor is returned.
109  """
110  if ((isinstance(weights, float) and weights != 0.0) or
111      (context.executing_eagerly() and weights._rank() == 0  # pylint: disable=protected-access
112       and not math_ops.equal(weights, 0.0))):
113    return _num_elements(losses)
114  with ops.name_scope(None, "num_present", (losses, weights)) as scope:
115    weights = math_ops.cast(weights, dtype=dtypes.float32)
116    present = array_ops.where(
117        math_ops.equal(weights, 0.0),
118        array_ops.zeros_like(weights),
119        array_ops.ones_like(weights))
120    present = weights_broadcast_ops.broadcast_weights(present, losses)
121    if per_batch:
122      return math_ops.reduce_sum(
123          present,
124          axis=math_ops.range(1, array_ops.rank(present)),
125          keepdims=True,
126          name=scope)
127    return math_ops.reduce_sum(present, name=scope)
128
129
130def _num_elements(losses):
131  """Computes the number of elements in `losses` tensor."""
132  with ops.name_scope(None, "num_elements", values=[losses]) as scope:
133    return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype)
134
135
136@tf_export(v1=["losses.compute_weighted_loss"])
137@dispatch.add_dispatch_support
138def compute_weighted_loss(
139    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
140    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
141  """Computes the weighted loss.
142
143  Args:
144    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
145    weights: Optional `Tensor` whose rank is either 0, or the same rank as
146      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
147      be either `1`, or the same as the corresponding `losses` dimension).
148    scope: the scope for the operations performed in computing the loss.
149    loss_collection: the loss will be added to these collections.
150    reduction: Type of reduction to apply to loss.
151
152  Returns:
153    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
154    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
155
156  Raises:
157    ValueError: If `weights` is `None` or the shape is not compatible with
158      `losses`, or if the number of dimensions (rank) of either `losses` or
159      `weights` is missing.
160
161  Note:
162    When calculating the gradient of a weighted loss contributions from
163    both `losses` and `weights` are considered. If your `weights` depend
164    on some model parameters but you do not want this to affect the loss
165    gradient, you need to apply `tf.stop_gradient` to `weights` before
166    passing them to `compute_weighted_loss`.
167
168  @compatibility(eager)
169  The `loss_collection` argument is ignored when executing eagerly. Consider
170  holding on to the return value or collecting losses via a `tf.keras.Model`.
171  @end_compatibility
172  """
173  Reduction.validate(reduction)
174  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
175    # Save the `reduction` argument for loss normalization when distributing
176    # to multiple replicas. Used only for estimator + v1 optimizer flow.
177    ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access
178
179    def compute_loss(losses, weights, loss_collection, reduction):
180      losses = ops.convert_to_tensor(losses)
181      input_dtype = losses.dtype
182      losses = math_ops.cast(losses, dtype=dtypes.float32)
183      weights = math_ops.cast(weights, dtype=dtypes.float32)
184      weighted_losses = math_ops.multiply(losses, weights)
185      if reduction == Reduction.NONE:
186        loss = weighted_losses
187      else:
188        loss = math_ops.reduce_sum(weighted_losses)
189        if reduction == Reduction.MEAN:
190          loss = _safe_mean(
191              loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
192        elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
193              reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
194          loss = _safe_mean(loss, _num_present(losses, weights))
195        elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
196          loss = _safe_mean(loss, _num_elements(losses))
197
198      # Convert the result back to the input type.
199      loss = math_ops.cast(loss, input_dtype)
200      util.add_loss(loss, loss_collection)
201      return loss
202
203    # Skip the assert_broadcastable in XLA context because asserts are not
204    # supported so it only causes unnecessary ops. Also skip it because it uses
205    # a DenseToDenseSetOperation op that is incompatible with XLA when
206    # the shape(s) are dynamic.
207    if control_flow_ops.get_enclosing_xla_context() is not None:
208      return compute_loss(losses, weights, loss_collection, reduction)
209    else:
210      with ops.control_dependencies(
211          (weights_broadcast_ops.assert_broadcastable(weights, losses),)):
212        return compute_loss(losses, weights, loss_collection, reduction)
213
214
215@tf_export(v1=["losses.absolute_difference"])
216@dispatch.add_dispatch_support
217def absolute_difference(
218    labels, predictions, weights=1.0, scope=None,
219    loss_collection=ops.GraphKeys.LOSSES,
220    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
221  """Adds an Absolute Difference loss to the training procedure.
222
223  `weights` acts as a coefficient for the loss. If a scalar is provided, then
224  the loss is simply scaled by the given value. If `weights` is a `Tensor` of
225  shape `[batch_size]`, then the total loss for each sample of the batch is
226  rescaled by the corresponding element in the `weights` vector. If the shape of
227  `weights` matches the shape of `predictions`, then the loss of each
228  measurable element of `predictions` is scaled by the corresponding value of
229  `weights`.
230
231  Args:
232    labels: The ground truth output tensor, same dimensions as 'predictions'.
233    predictions: The predicted outputs.
234    weights: Optional `Tensor` whose rank is either 0, or the same rank as
235      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
236      be either `1`, or the same as the corresponding `losses` dimension).
237    scope: The scope for the operations performed in computing the loss.
238    loss_collection: collection to which this loss will be added.
239    reduction: Type of reduction to apply to loss.
240
241  Returns:
242    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
243    shape as `labels`; otherwise, it is scalar.
244
245  Raises:
246    ValueError: If the shape of `predictions` doesn't match that of
247      `labels` or if the shape of `weights` is invalid or if `labels`
248      or `predictions` is None.
249
250  @compatibility(eager)
251  The `loss_collection` argument is ignored when executing eagerly. Consider
252  holding on to the return value or collecting losses via a `tf.keras.Model`.
253  @end_compatibility
254  """
255  if labels is None:
256    raise ValueError("Argument `labels` must not be None.")
257  if predictions is None:
258    raise ValueError("Argument `predictions` must not be None.")
259  with ops.name_scope(scope, "absolute_difference",
260                      (predictions, labels, weights)) as scope:
261    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
262    labels = math_ops.cast(labels, dtype=dtypes.float32)
263    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
264    losses = math_ops.abs(math_ops.subtract(predictions, labels))
265    return compute_weighted_loss(
266        losses, weights, scope, loss_collection, reduction=reduction)
267
268
269@tf_export(v1=["losses.cosine_distance"])
270@dispatch.add_dispatch_support
271@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
272def cosine_distance(
273    labels, predictions, axis=None, weights=1.0, scope=None,
274    loss_collection=ops.GraphKeys.LOSSES,
275    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
276    dim=None):
277  """Adds a cosine-distance loss to the training procedure.
278
279  Note that the function assumes that `predictions` and `labels` are already
280  unit-normalized.
281
282  Args:
283    labels: `Tensor` whose shape matches 'predictions'
284    predictions: An arbitrary matrix.
285    axis: The dimension along which the cosine distance is computed.
286    weights: Optional `Tensor` whose rank is either 0, or the same rank as
287      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
288      be either `1`, or the same as the corresponding `losses` dimension).
289    scope: The scope for the operations performed in computing the loss.
290    loss_collection: collection to which this loss will be added.
291    reduction: Type of reduction to apply to loss.
292    dim: The old (deprecated) name for `axis`.
293
294  Returns:
295    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
296    shape as `labels`; otherwise, it is scalar.
297
298  Raises:
299    ValueError: If `predictions` shape doesn't match `labels` shape, or
300      `axis`, `labels`, `predictions` or `weights` is `None`.
301
302  @compatibility(eager)
303  The `loss_collection` argument is ignored when executing eagerly. Consider
304  holding on to the return value or collecting losses via a `tf.keras.Model`.
305  @end_compatibility
306  """
307  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
308  if axis is None:
309    raise ValueError("You must specify argument `axis`.")
310  if labels is None:
311    raise ValueError("Argument `labels` must not be None.")
312  if predictions is None:
313    raise ValueError("Argument `predictions` must not be None.")
314  with ops.name_scope(scope, "cosine_distance_loss",
315                      (predictions, labels, weights)) as scope:
316    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
317    labels = math_ops.cast(labels, dtype=dtypes.float32)
318    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
319
320    radial_diffs = math_ops.multiply(predictions, labels)
321    losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True)
322    return compute_weighted_loss(
323        losses, weights, scope, loss_collection, reduction=reduction)
324
325
326@tf_export(v1=["losses.hinge_loss"])
327@dispatch.add_dispatch_support
328def hinge_loss(labels, logits, weights=1.0, scope=None,
329               loss_collection=ops.GraphKeys.LOSSES,
330               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
331  """Adds a hinge loss to the training procedure.
332
333  Args:
334    labels: The ground truth output tensor. Its shape should match the shape of
335      logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
336      the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
337    logits: The logits, a float tensor. Note that logits are assumed to be
338      unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
339      (resp. negative) binary prediction.
340    weights: Optional `Tensor` whose rank is either 0, or the same rank as
341      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
342      be either `1`, or the same as the corresponding `losses` dimension).
343    scope: The scope for the operations performed in computing the loss.
344    loss_collection: collection to which the loss will be added.
345    reduction: Type of reduction to apply to loss.
346
347  Returns:
348    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
349    shape as `labels`; otherwise, it is scalar.
350
351  Raises:
352    ValueError: If the shapes of `logits` and `labels` don't match or
353      if `labels` or `logits` is None.
354
355  @compatibility(eager)
356  The `loss_collection` argument is ignored when executing eagerly. Consider
357  holding on to the return value or collecting losses via a `tf.keras.Model`.
358  @end_compatibility
359  """
360  if labels is None:
361    raise ValueError("Argument `labels` must not be None.")
362  if logits is None:
363    raise ValueError("Argument `logits` must not be None.")
364  with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
365    logits = math_ops.cast(logits, dtype=dtypes.float32)
366    labels = math_ops.cast(labels, dtype=dtypes.float32)
367    logits.get_shape().assert_is_compatible_with(labels.get_shape())
368    # We first need to convert binary labels to -1/1 labels (as floats).
369    all_ones = array_ops.ones_like(labels)
370    labels = math_ops.subtract(2 * labels, all_ones)
371    losses = nn_ops.relu(
372        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
373    return compute_weighted_loss(
374        losses, weights, scope, loss_collection, reduction=reduction)
375
376
377@tf_export(v1=["losses.huber_loss"])
378@dispatch.add_dispatch_support
379def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
380               loss_collection=ops.GraphKeys.LOSSES,
381               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
382  """Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure.
383
384  For each value x in `error=labels-predictions`, the following is calculated:
385
386  ```
387    0.5 * x^2                  if |x| <= d
388    0.5 * d^2 + d * (|x| - d)  if |x| > d
389  ```
390
391  where d is `delta`.
392
393  `weights` acts as a coefficient for the loss. If a scalar is provided, then
394  the loss is simply scaled by the given value. If `weights` is a tensor of size
395  `[batch_size]`, then the total loss for each sample of the batch is rescaled
396  by the corresponding element in the `weights` vector. If the shape of
397  `weights` matches the shape of `predictions`, then the loss of each
398  measurable element of `predictions` is scaled by the corresponding value of
399  `weights`.
400
401  Args:
402    labels: The ground truth output tensor, same dimensions as 'predictions'.
403    predictions: The predicted outputs.
404    weights: Optional `Tensor` whose rank is either 0, or the same rank as
405      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
406      be either `1`, or the same as the corresponding `losses` dimension).
407    delta: `float`, the point where the huber loss function changes from a
408      quadratic to linear.
409    scope: The scope for the operations performed in computing the loss.
410    loss_collection: collection to which the loss will be added.
411    reduction: Type of reduction to apply to loss.
412
413  Returns:
414    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
415    shape as `labels`; otherwise, it is scalar.
416
417  Raises:
418    ValueError: If the shape of `predictions` doesn't match that of `labels` or
419      if the shape of `weights` is invalid.  Also if `labels` or
420     `predictions` is None.
421
422  @compatibility(eager)
423  The `loss_collection` argument is ignored when executing eagerly. Consider
424  holding on to the return value or collecting losses via a `tf.keras.Model`.
425  @end_compatibility
426  """
427  if labels is None:
428    raise ValueError("Argument `labels` must not be None.")
429  if predictions is None:
430    raise ValueError("Argument `predictions` must not be None.")
431  with ops.name_scope(scope, "huber_loss",
432                      (predictions, labels, weights)) as scope:
433    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
434    labels = math_ops.cast(labels, dtype=dtypes.float32)
435    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
436    error = math_ops.subtract(predictions, labels)
437    abs_error = math_ops.abs(error)
438    quadratic = math_ops.minimum(abs_error, delta)
439    # The following expression is the same in value as
440    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
441    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
442    # This is necessary to avoid doubling the gradient, since there is already a
443    # nonzero contribution to the gradient from the quadratic term.
444    linear = math_ops.subtract(abs_error, quadratic)
445    losses = math_ops.add(
446        math_ops.multiply(
447            ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
448            math_ops.multiply(quadratic, quadratic)),
449        math_ops.multiply(delta, linear))
450    return compute_weighted_loss(
451        losses, weights, scope, loss_collection, reduction=reduction)
452
453
454@tf_export(v1=["losses.log_loss"])
455@dispatch.add_dispatch_support
456def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
457             loss_collection=ops.GraphKeys.LOSSES,
458             reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
459  """Adds a Log Loss term to the training procedure.
460
461  `weights` acts as a coefficient for the loss. If a scalar is provided, then
462  the loss is simply scaled by the given value. If `weights` is a tensor of size
463  `[batch_size]`, then the total loss for each sample of the batch is rescaled
464  by the corresponding element in the `weights` vector. If the shape of
465  `weights` matches the shape of `predictions`, then the loss of each
466  measurable element of `predictions` is scaled by the corresponding value of
467  `weights`.
468
469  Args:
470    labels: The ground truth output tensor, same dimensions as 'predictions'.
471    predictions: The predicted outputs.
472    weights: Optional `Tensor` whose rank is either 0, or the same rank as
473      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
474      be either `1`, or the same as the corresponding `losses` dimension).
475    epsilon: A small increment to add to avoid taking a log of zero.
476    scope: The scope for the operations performed in computing the loss.
477    loss_collection: collection to which the loss will be added.
478    reduction: Type of reduction to apply to loss.
479
480  Returns:
481    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
482    shape as `labels`; otherwise, it is scalar.
483
484  Raises:
485    ValueError: If the shape of `predictions` doesn't match that of `labels` or
486      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
487      is None.
488
489  @compatibility(eager)
490  The `loss_collection` argument is ignored when executing eagerly. Consider
491  holding on to the return value or collecting losses via a `tf.keras.Model`.
492  @end_compatibility
493  """
494  if labels is None:
495    raise ValueError("Argument `labels` must not be None.")
496  if predictions is None:
497    raise ValueError("Argument `predictions` must not be None.")
498  with ops.name_scope(scope, "log_loss",
499                      (predictions, labels, weights)) as scope:
500    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
501    labels = math_ops.cast(labels, dtype=dtypes.float32)
502    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
503    losses = -math_ops.multiply(
504        labels,
505        math_ops.log(predictions + epsilon)) - math_ops.multiply(
506            (1 - labels), math_ops.log(1 - predictions + epsilon))
507    return compute_weighted_loss(
508        losses, weights, scope, loss_collection, reduction=reduction)
509
510
511# TODO(b/37208492): Add reduction arg.
512@tf_export(v1=["losses.mean_pairwise_squared_error"])
513@dispatch.add_dispatch_support
514def mean_pairwise_squared_error(
515    labels, predictions, weights=1.0, scope=None,
516    loss_collection=ops.GraphKeys.LOSSES):
517  """Adds a pairwise-errors-squared loss to the training procedure.
518
519  Unlike `mean_squared_error`, which is a measure of the differences between
520  corresponding elements of `predictions` and `labels`,
521  `mean_pairwise_squared_error` is a measure of the differences between pairs of
522  corresponding elements of `predictions` and `labels`.
523
524  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
525  three pairs of differences are summed to compute the loss:
526    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
527
528  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
529  corresponding pairs are computed within each batch sample but not across
530  samples within a batch. For example, if `predictions` represents a batch of
531  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
532  is drawn from each image, but not across images.
533
534  `weights` acts as a coefficient for the loss. If a scalar is provided, then
535  the loss is simply scaled by the given value. If `weights` is a tensor of size
536  `[batch_size]`, then the total loss for each sample of the batch is rescaled
537  by the corresponding element in the `weights` vector.
538
539  Args:
540    labels: The ground truth output tensor, whose shape must match the shape of
541      `predictions`.
542    predictions: The predicted outputs, a tensor of size
543      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
544      `predictions`.
545    weights: Coefficients for the loss a scalar, a tensor of shape
546      `[batch_size]` or a tensor whose shape matches `predictions`.
547    scope: The scope for the operations performed in computing the loss.
548    loss_collection: collection to which the loss will be added.
549
550  Returns:
551    A scalar `Tensor` that returns the weighted loss.
552
553  Raises:
554    ValueError: If the shape of `predictions` doesn't match that of `labels` or
555      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
556      is None.
557
558  @compatibility(eager)
559  The `loss_collection` argument is ignored when executing eagerly. Consider
560  holding on to the return value or collecting losses via a `tf.keras.Model`.
561  @end_compatibility
562  """
563  if labels is None:
564    raise ValueError("Argument `labels` must not be None.")
565  if predictions is None:
566    raise ValueError("Argument `predictions` must not be None.")
567  with ops.name_scope(scope, "mean_pairwise_squared_error",
568                      (predictions, labels, weights)) as scope:
569    weights = math_ops.cast(weights, dtype=dtypes.float32)
570    labels = math_ops.cast(labels, dtype=dtypes.float32)
571
572    def compute_loss(labels, predictions, weights, loss_collection):
573      predictions = math_ops.cast(predictions, dtype=dtypes.float32)
574      predictions.get_shape().assert_is_compatible_with(labels.get_shape())
575
576      diffs = math_ops.subtract(predictions, labels)
577
578      axis = math_ops.range(1, array_ops.rank(diffs))
579
580      sum_squares_diff_per_batch = math_ops.reduce_sum(
581          math_ops.square(diffs), axis=axis, keepdims=True)
582      num_present_per_batch = _num_present(diffs, weights, per_batch=True)
583
584      term1 = 2.0 * math_ops.div_no_nan(
585          sum_squares_diff_per_batch,
586          math_ops.maximum(num_present_per_batch - 1, 0),
587          name="value")
588
589      sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
590      term2 = 2.0 * math_ops.div_no_nan(
591          math_ops.square(sum_diff),
592          math_ops.maximum(
593              math_ops.multiply(num_present_per_batch,
594                                num_present_per_batch - 1), 0),
595          name="value")
596
597      weighted_losses = math_ops.multiply(term1 - term2, weights)
598      loss = math_ops.reduce_sum(weighted_losses)
599
600      mean_loss = array_ops.where(
601          math_ops.reduce_sum(num_present_per_batch) > 0,
602          loss,
603          array_ops.zeros_like(loss),
604          name="value")
605      util.add_loss(mean_loss, loss_collection)
606      return mean_loss
607
608    # Skip the assert_broadcastable in XLA context because asserts are not
609    # supported so it only causes unnecessary ops. Also skip it because it uses
610    # a DenseToDenseSetOperation op that is incompatible with XLA when
611    # the shape(s) are dynamic.
612    if control_flow_ops.get_enclosing_xla_context() is not None:
613      return compute_loss(labels, predictions, weights, loss_collection)
614    else:
615      with ops.control_dependencies(
616          (weights_broadcast_ops.assert_broadcastable(weights, labels),)):
617        return compute_loss(labels, predictions, weights, loss_collection)
618
619
620@tf_export(v1=["losses.mean_squared_error"])
621@dispatch.add_dispatch_support
622def mean_squared_error(
623    labels, predictions, weights=1.0, scope=None,
624    loss_collection=ops.GraphKeys.LOSSES,
625    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
626  """Adds a Sum-of-Squares loss to the training procedure.
627
628  `weights` acts as a coefficient for the loss. If a scalar is provided, then
629  the loss is simply scaled by the given value. If `weights` is a tensor of size
630  `[batch_size]`, then the total loss for each sample of the batch is rescaled
631  by the corresponding element in the `weights` vector. If the shape of
632  `weights` matches the shape of `predictions`, then the loss of each
633  measurable element of `predictions` is scaled by the corresponding value of
634  `weights`.
635
636  Args:
637    labels: The ground truth output tensor, same dimensions as 'predictions'.
638    predictions: The predicted outputs.
639    weights: Optional `Tensor` whose rank is either 0, or the same rank as
640      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
641      be either `1`, or the same as the corresponding `losses` dimension).
642    scope: The scope for the operations performed in computing the loss.
643    loss_collection: collection to which the loss will be added.
644    reduction: Type of reduction to apply to loss.
645
646  Returns:
647    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
648    shape as `labels`; otherwise, it is scalar.
649
650  Raises:
651    ValueError: If the shape of `predictions` doesn't match that of `labels` or
652      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
653      is None.
654
655  @compatibility(TF2)
656
657  `tf.compat.v1.losses.mean_squared_error` is mostly compatible with eager
658  execution and `tf.function`. But, the `loss_collection` argument is
659  ignored when executing eagerly and no loss will be written to the loss
660  collections. You will need to either hold on to the return value manually
661  or rely on `tf.keras.Model` loss tracking.
662
663
664  To switch to native TF2 style, instantiate the
665   `tf.keras.losses.MeanSquaredError` class and call the object instead.
666
667
668  #### Structural Mapping to Native TF2
669
670  Before:
671
672  ```python
673  loss = tf.compat.v1.losses.mean_squared_error(
674    labels=labels,
675    predictions=predictions,
676    weights=weights,
677    reduction=reduction)
678  ```
679
680  After:
681
682  ```python
683  loss_fn = tf.keras.losses.MeanSquaredError(
684    reduction=reduction)
685  loss = loss_fn(
686    y_true=labels,
687    y_pred=predictions,
688    sample_weight=weights)
689  ```
690
691  #### How to Map Arguments
692
693  | TF1 Arg Name          | TF2 Arg Name     | Note                       |
694  | :-------------------- | :--------------- | :------------------------- |
695  | `labels`              | `y_true`         | In `__call__()` method     |
696  | `predictions`         | `y_pred`         | In `__call__()` method     |
697  | `weights`             | `sample_weight`  | In `__call__()` method.    |
698  : : : The shape requirements for `sample_weight` is different from      :
699  : : : `weights`. Please check the [argument definition][api_docs] for   :
700  : : : details.                                                          :
701  | `scope`               | Not supported    | -                          |
702  | `loss_collection`     | Not supported    | Losses should be tracked   |
703  : : : explicitly or with Keras APIs, for example, [add_loss][add_loss], :
704  : : : instead of via collections                                        :
705  | `reduction`           | `reduction`      | In constructor. Value of   |
706  : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`,              :
707  : : : `tf.compat.v1.losses.Reduction.SUM`,                              :
708  : : : `tf.compat.v1.losses.Reduction.NONE` in                           :
709  : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to         :
710  : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`,                  :
711  : : : `tf.keras.losses.Reduction.SUM`,                                  :
712  : : : `tf.keras.losses.Reduction.NONE`, respectively. If you            :
713  : : : used other value for `reduction`, including the default value     :
714  : : :  `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
715  : : : no directly corresponding value. Please modify the loss           :
716  : : : implementation manually.                                          :
717
718  [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
719  [api_docs]:https://www.tensorflow.org/api_docs/python/tf/keras/losses/MeanSquaredError#__call__
720
721
722  #### Before & After Usage Example
723
724  Before:
725
726  >>> y_true = [1, 2, 3]
727  >>> y_pred = [1, 3, 5]
728  >>> weights = [0, 1, 0.25]
729  >>> # samples with zero-weight are excluded from calculation when `reduction`
730  >>> # argument is set to default value `Reduction.SUM_BY_NONZERO_WEIGHTS`
731  >>> tf.compat.v1.losses.mean_squared_error(
732  ...    labels=y_true,
733  ...    predictions=y_pred,
734  ...    weights=weights).numpy()
735  1.0
736
737  >>> tf.compat.v1.losses.mean_squared_error(
738  ...    labels=y_true,
739  ...    predictions=y_pred,
740  ...    weights=weights,
741  ...    reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE).numpy()
742  0.66667
743
744  After:
745
746  >>> y_true = [[1.0], [2.0], [3.0]]
747  >>> y_pred = [[1.0], [3.0], [5.0]]
748  >>> weights = [1, 1, 0.25]
749  >>> mse = tf.keras.losses.MeanSquaredError(
750  ...    reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)
751  >>> mse(y_true=y_true, y_pred=y_pred, sample_weight=weights).numpy()
752  0.66667
753
754  @end_compatibility
755  """
756  if labels is None:
757    raise ValueError("Argument `labels` must not be None.")
758  if predictions is None:
759    raise ValueError("Argument `predictions` must not be None.")
760  with ops.name_scope(scope, "mean_squared_error",
761                      (predictions, labels, weights)) as scope:
762    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
763    labels = math_ops.cast(labels, dtype=dtypes.float32)
764    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
765    losses = math_ops.squared_difference(predictions, labels)
766    return compute_weighted_loss(
767        losses, weights, scope, loss_collection, reduction=reduction)
768
769
770@tf_export(v1=["losses.sigmoid_cross_entropy"])
771@dispatch.add_dispatch_support
772def sigmoid_cross_entropy(
773    multi_class_labels, logits, weights=1.0, label_smoothing=0, scope=None,
774    loss_collection=ops.GraphKeys.LOSSES,
775    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
776  """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits.
777
778  `weights` acts as a coefficient for the loss. If a scalar is provided,
779  then the loss is simply scaled by the given value. If `weights` is a
780  tensor of shape `[batch_size]`, then the loss weights apply to each
781  corresponding sample.
782
783  If `label_smoothing` is nonzero, smooth the labels towards 1/2:
784
785      new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
786                              + 0.5 * label_smoothing
787
788  Args:
789    multi_class_labels: `[batch_size, num_classes]` target integer labels in
790      `{0, 1}`.
791    logits: Float `[batch_size, num_classes]` logits outputs of the network.
792    weights: Optional `Tensor` whose rank is either 0, or the same rank as
793    `multi_class_labels`, and must be broadcastable to `multi_class_labels`
794    (i.e., all dimensions must be either `1`, or the same as the
795    corresponding `losses` dimension).
796    label_smoothing: If greater than `0` then smooth the labels.
797    scope: The scope for the operations performed in computing the loss.
798    loss_collection: collection to which the loss will be added.
799    reduction: Type of reduction to apply to loss.
800
801  Returns:
802    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
803    `NONE`, this has the same shape as `logits`; otherwise, it is scalar.
804
805  Raises:
806    ValueError: If the shape of `logits` doesn't match that of
807      `multi_class_labels` or if the shape of `weights` is invalid, or if
808      `weights` is None.  Also if `multi_class_labels` or `logits` is None.
809
810  @compatibility(eager)
811  The `loss_collection` argument is ignored when executing eagerly. Consider
812  holding on to the return value or collecting losses via a `tf.keras.Model`.
813  @end_compatibility
814  """
815  if multi_class_labels is None:
816    raise ValueError("Argument `multi_class_labels` must not be None.")
817  if logits is None:
818    raise ValueError("Argument `logits` must not be None.")
819  with ops.name_scope(scope, "sigmoid_cross_entropy_loss",
820                      (logits, multi_class_labels, weights)) as scope:
821    logits = ops.convert_to_tensor(logits)
822    multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype)
823    logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape())
824
825    if label_smoothing > 0:
826      multi_class_labels = (multi_class_labels * (1 - label_smoothing) +
827                            0.5 * label_smoothing)
828
829    losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels,
830                                                  logits=logits,
831                                                  name="xentropy")
832    return compute_weighted_loss(
833        losses, weights, scope, loss_collection, reduction=reduction)
834
835
836@tf_export(v1=["losses.softmax_cross_entropy"])
837@dispatch.add_dispatch_support
838def softmax_cross_entropy(
839    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
840    loss_collection=ops.GraphKeys.LOSSES,
841    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
842  r"""Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.
843
844  `weights` acts as a coefficient for the loss. If a scalar is provided,
845  then the loss is simply scaled by the given value. If `weights` is a
846  tensor of shape `[batch_size]`, then the loss weights apply to each
847  corresponding sample.
848
849  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
850      new_onehot_labels = onehot_labels * (1 - label_smoothing)
851                          + label_smoothing / num_classes
852
853  Note that `onehot_labels` and `logits` must have the same shape,
854  e.g. `[batch_size, num_classes]`. The shape of `weights` must be
855  broadcastable to loss, whose shape is decided by the shape of `logits`.
856  In case the shape of `logits` is `[batch_size, num_classes]`, loss is
857  a `Tensor` of shape `[batch_size]`.
858
859  Args:
860    onehot_labels: One-hot-encoded labels.
861    logits: Logits outputs of the network.
862    weights: Optional `Tensor` that is broadcastable to loss.
863    label_smoothing: If greater than 0 then smooth the labels.
864    scope: the scope for the operations performed in computing the loss.
865    loss_collection: collection to which the loss will be added.
866    reduction: Type of reduction to apply to loss.
867
868  Returns:
869    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
870    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.
871
872  Raises:
873    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
874      or if the shape of `weights` is invalid or if `weights` is None.  Also if
875      `onehot_labels` or `logits` is None.
876
877  @compatibility(TF2)
878
879  `tf.compat.v1.losses.softmax_cross_entropy` is mostly compatible with eager
880  execution and `tf.function`. But, the `loss_collection` argument is
881  ignored when executing eagerly and no loss will be written to the loss
882  collections. You will need to either hold on to the return value manually
883  or rely on `tf.keras.Model` loss tracking.
884
885
886  To switch to native TF2 style, instantiate the
887   `tf.keras.losses.CategoricalCrossentropy` class with `from_logits` set
888  as `True` and call the object instead.
889
890
891  #### Structural Mapping to Native TF2
892
893  Before:
894
895  ```python
896  loss = tf.compat.v1.losses.softmax_cross_entropy(
897    onehot_labels=onehot_labels,
898    logits=logits,
899    weights=weights,
900    label_smoothing=smoothing)
901  ```
902
903  After:
904
905  ```python
906  loss_fn = tf.keras.losses.CategoricalCrossentropy(
907    from_logits=True,
908    label_smoothing=smoothing)
909  loss = loss_fn(
910    y_true=onehot_labels,
911    y_pred=logits,
912    sample_weight=weights)
913  ```
914
915  #### How to Map Arguments
916
917  | TF1 Arg Name          | TF2 Arg Name     | Note                       |
918  | :-------------------- | :--------------- | :------------------------- |
919  |  -                    | `from_logits`    | Set `from_logits` as True  |
920  :                       :                  : to have identical behavior :
921  | `onehot_labels`       | `y_true`         | In `__call__()` method     |
922  | `logits`              | `y_pred`         | In `__call__()` method     |
923  | `weights`             | `sample_weight`  | In `__call__()` method     |
924  | `label_smoothing`     | `label_smoothing`| In constructor             |
925  | `scope`               | Not supported    | -                          |
926  | `loss_collection`     | Not supported    | Losses should be tracked   |
927  :                       :                  : explicitly or with Keras   :
928  :                       :                  : APIs, for example,         :
929  :                       :                  : [add_loss][add_loss],      :
930  :                       :                  : instead of via collections :
931  | `reduction`           | `reduction`      | In constructor. Value of   |
932  : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`,              :
933  : : : `tf.compat.v1.losses.Reduction.SUM`,                              :
934  : : : `tf.compat.v1.losses.Reduction.NONE` in                           :
935  : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to         :
936  : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`,                  :
937  : : : `tf.keras.losses.Reduction.SUM`,                                  :
938  : : : `tf.keras.losses.Reduction.NONE`, respectively. If you            :
939  : : : used other value for `reduction`, including the default value     :
940  : : :  `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
941  : : : no directly corresponding value. Please modify the loss           :
942  : : : implementation manually.                                          :
943
944  [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
945
946
947  #### Before & After Usage Example
948
949  Before:
950
951  >>> y_true = [[0, 1, 0], [0, 0, 1]]
952  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
953  >>> weights = [0.3, 0.7]
954  >>> smoothing = 0.2
955  >>> tf.compat.v1.losses.softmax_cross_entropy(y_true, y_pred, weights=weights,
956  ...   label_smoothing=smoothing).numpy()
957  0.57618
958
959  After:
960
961  >>> cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True,
962  ...   label_smoothing=smoothing)
963  >>> cce(y_true, y_pred, sample_weight=weights).numpy()
964  0.57618
965
966  @end_compatibility
967  """
968  if onehot_labels is None:
969    raise ValueError("Argument `onehot_labels` must not be None.")
970  if logits is None:
971    raise ValueError("Argument `logits` must not be None.")
972  with ops.name_scope(scope, "softmax_cross_entropy_loss",
973                      (logits, onehot_labels, weights)) as scope:
974    logits = ops.convert_to_tensor(logits)
975    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
976    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())
977
978    if label_smoothing > 0:
979      num_classes = math_ops.cast(
980          array_ops.shape(onehot_labels)[-1], logits.dtype)
981      smooth_positives = 1.0 - label_smoothing
982      smooth_negatives = label_smoothing / num_classes
983      onehot_labels = onehot_labels * smooth_positives + smooth_negatives
984
985    onehot_labels = array_ops.stop_gradient(
986        onehot_labels, name="labels_stop_gradient")
987    losses = nn.softmax_cross_entropy_with_logits_v2(
988        labels=onehot_labels, logits=logits, name="xentropy")
989
990    return compute_weighted_loss(
991        losses, weights, scope, loss_collection, reduction=reduction)
992
993
994# TODO(ptucker): Merge this with similar method in metrics_impl.
995def _remove_squeezable_dimensions(
996    labels, predictions, weights=None, expected_rank_diff=0):
997  """Internal version of _remove_squeezable_dimensions which handles weights.
998
999  Squeezes `predictions` and `labels` if their ranks differ from expected by
1000  exactly 1.
1001  Squeezes `weights` if its rank is 1 more than the new rank of `predictions`
1002
1003  This will use static shape if available. Otherwise, it will add graph
1004  operations, which could result in a performance hit.
1005
1006  Args:
1007    labels: Label values, a `Tensor` whose dimensions match `predictions`.
1008    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
1009    weights: Optional weight `Tensor`. It will be squeezed if it's not scalar,
1010      and its rank is 1 more than the new rank of `labels`.
1011    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
1012
1013  Returns:
1014    Tuple of `predictions`, `labels` and `weights`, possibly with the last
1015    dimension squeezed.
1016  """
1017  labels, predictions = confusion_matrix.remove_squeezable_dimensions(
1018      labels, predictions, expected_rank_diff=expected_rank_diff)
1019
1020  if weights is not None:
1021    weights = ops.convert_to_tensor(weights)
1022    labels_rank = labels.get_shape().ndims
1023    weights_shape = weights.get_shape()
1024    weights_rank = weights_shape.ndims
1025
1026    if (labels_rank is not None) and (weights_rank is not None):
1027      # Use static rank.
1028      rank_diff = weights_rank - labels_rank
1029      if rank_diff == 1:
1030        weights = array_ops.squeeze(weights, [-1])
1031      return labels, predictions, weights
1032
1033    # Use dynamic rank.
1034    rank_diff = array_ops.rank(weights) - array_ops.rank(labels)
1035    if (weights_rank is None) or (
1036        weights_rank > 0 and weights_shape.dims[-1].is_compatible_with(1)):
1037      weights = control_flow_ops.cond(
1038          math_ops.equal(1, rank_diff),
1039          lambda: array_ops.squeeze(weights, [-1]),
1040          lambda: weights)
1041
1042  return labels, predictions, weights
1043
1044
1045@tf_export(v1=["losses.sparse_softmax_cross_entropy"])
1046@dispatch.add_dispatch_support
1047def sparse_softmax_cross_entropy(
1048    labels, logits, weights=1.0, scope=None,
1049    loss_collection=ops.GraphKeys.LOSSES,
1050    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
1051  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.
1052
1053  `weights` acts as a coefficient for the loss. If a scalar is provided,
1054  then the loss is simply scaled by the given value. If `weights` is a
1055  tensor of shape `[batch_size]`, then the loss weights apply to each
1056  corresponding sample.
1057
1058  Args:
1059    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
1060      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
1061      must be an index in `[0, num_classes)`. Other values will raise an
1062      exception when this op is run on CPU, and return `NaN` for corresponding
1063      loss and gradient rows on GPU.
1064    logits: Unscaled log probabilities of shape
1065      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
1066      `float64`.
1067    weights: Coefficients for the loss. This must be scalar or broadcastable to
1068      `labels` (i.e. same rank and each dimension is either 1 or the same).
1069    scope: the scope for the operations performed in computing the loss.
1070    loss_collection: collection to which the loss will be added.
1071    reduction: Type of reduction to apply to loss.
1072
1073  Returns:
1074    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
1075    `NONE`, this has the same shape as `labels`; otherwise, it is scalar.
1076
1077  Raises:
1078    ValueError: If the shapes of `logits`, `labels`, and `weights` are
1079      incompatible, or if any of them are None.
1080
1081  @compatibility(eager)
1082  The `loss_collection` argument is ignored when executing eagerly. Consider
1083  holding on to the return value or collecting losses via a `tf.keras.Model`.
1084  @end_compatibility
1085  """
1086  if labels is None:
1087    raise ValueError("Argument `labels` must not be None.")
1088  if logits is None:
1089    raise ValueError("Argument `logits` must not be None.")
1090  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
1091                      (logits, labels, weights)) as scope:
1092    # As documented above in Args, labels contain class IDs and logits contains
1093    # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
1094    # therefore, expected_rank_diff=1.
1095    labels, logits, weights = _remove_squeezable_dimensions(
1096        labels, logits, weights, expected_rank_diff=1)
1097    losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
1098                                                         logits=logits,
1099                                                         name="xentropy")
1100    return compute_weighted_loss(
1101        losses, weights, scope, loss_collection, reduction=reduction)
1102