1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Implementation of Loss operations for use in neural networks.""" 16 17from tensorflow.python.eager import context 18from tensorflow.python.framework import dtypes 19from tensorflow.python.framework import ops 20from tensorflow.python.ops import array_ops 21from tensorflow.python.ops import confusion_matrix 22from tensorflow.python.ops import control_flow_ops 23from tensorflow.python.ops import math_ops 24from tensorflow.python.ops import nn 25from tensorflow.python.ops import nn_ops 26from tensorflow.python.ops import weights_broadcast_ops 27from tensorflow.python.ops.losses import util 28from tensorflow.python.util import dispatch 29from tensorflow.python.util.deprecation import deprecated_args 30from tensorflow.python.util.deprecation import deprecated_argument_lookup 31from tensorflow.python.util.tf_export import tf_export 32 33 34@tf_export(v1=["losses.Reduction"]) 35class Reduction: 36 """Types of loss reduction. 37 38 Contains the following values: 39 40 * `NONE`: Un-reduced weighted losses with the same shape as input. 41 * `SUM`: Scalar sum of weighted losses. 42 * `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED. 43 * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. 44 * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero 45 weights. DEPRECATED. 46 * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED. 47 """ 48 49 NONE = "none" 50 SUM = "weighted_sum" 51 SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size" 52 MEAN = "weighted_mean" 53 SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights" 54 SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS 55 56 @classmethod 57 def all(cls): 58 return ( 59 cls.NONE, 60 cls.SUM, 61 cls.MEAN, 62 cls.SUM_OVER_BATCH_SIZE, 63 cls.SUM_OVER_NONZERO_WEIGHTS, 64 cls.SUM_BY_NONZERO_WEIGHTS) 65 66 @classmethod 67 def validate(cls, key): 68 if key not in cls.all(): 69 raise ValueError(f"Invalid Reduction Key {key}. Key should be one of " 70 f"{cls.all()}.") 71 72 73def _safe_mean(losses, num_present): 74 """Computes a safe mean of the losses. 75 76 Args: 77 losses: `Tensor` whose elements contain individual loss measurements. 78 num_present: The number of measurable elements in `losses`. 79 80 Returns: 81 A scalar representing the mean of `losses`. If `num_present` is zero, 82 then zero is returned. 83 """ 84 total_loss = math_ops.reduce_sum(losses) 85 return math_ops.div_no_nan(total_loss, num_present, name="value") 86 87 88def _num_present(losses, weights, per_batch=False): 89 """Computes the number of elements in the loss function induced by `weights`. 90 91 A given weights tensor induces different numbers of usable elements in the 92 `losses` tensor. The `weights` tensor is broadcast across `losses` for all 93 possible dimensions. For example, if `losses` is a tensor of dimension 94 `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is, 95 in effect, tiled to match the shape of `losses`. Following this effective 96 tile, the total number of present elements is the number of non-zero weights. 97 98 Args: 99 losses: `Tensor` of shape `[batch_size, d1, ... dN]`. 100 weights: `Tensor` of shape `[]`, `[batch_size]` or 101 `[batch_size, d1, ... dK]`, where K < N. 102 per_batch: Whether to return the number of elements per batch or as a sum 103 total. 104 105 Returns: 106 The number of present (non-zero) elements in the losses tensor. If 107 `per_batch` is `True`, the value is returned as a tensor of size 108 `[batch_size]`. Otherwise, a single scalar tensor is returned. 109 """ 110 if ((isinstance(weights, float) and weights != 0.0) or 111 (context.executing_eagerly() and weights._rank() == 0 # pylint: disable=protected-access 112 and not math_ops.equal(weights, 0.0))): 113 return _num_elements(losses) 114 with ops.name_scope(None, "num_present", (losses, weights)) as scope: 115 weights = math_ops.cast(weights, dtype=dtypes.float32) 116 present = array_ops.where( 117 math_ops.equal(weights, 0.0), 118 array_ops.zeros_like(weights), 119 array_ops.ones_like(weights)) 120 present = weights_broadcast_ops.broadcast_weights(present, losses) 121 if per_batch: 122 return math_ops.reduce_sum( 123 present, 124 axis=math_ops.range(1, array_ops.rank(present)), 125 keepdims=True, 126 name=scope) 127 return math_ops.reduce_sum(present, name=scope) 128 129 130def _num_elements(losses): 131 """Computes the number of elements in `losses` tensor.""" 132 with ops.name_scope(None, "num_elements", values=[losses]) as scope: 133 return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype) 134 135 136@tf_export(v1=["losses.compute_weighted_loss"]) 137@dispatch.add_dispatch_support 138def compute_weighted_loss( 139 losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, 140 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 141 """Computes the weighted loss. 142 143 Args: 144 losses: `Tensor` of shape `[batch_size, d1, ... dN]`. 145 weights: Optional `Tensor` whose rank is either 0, or the same rank as 146 `losses`, and must be broadcastable to `losses` (i.e., all dimensions must 147 be either `1`, or the same as the corresponding `losses` dimension). 148 scope: the scope for the operations performed in computing the loss. 149 loss_collection: the loss will be added to these collections. 150 reduction: Type of reduction to apply to loss. 151 152 Returns: 153 Weighted loss `Tensor` of the same type as `losses`. If `reduction` is 154 `NONE`, this has the same shape as `losses`; otherwise, it is scalar. 155 156 Raises: 157 ValueError: If `weights` is `None` or the shape is not compatible with 158 `losses`, or if the number of dimensions (rank) of either `losses` or 159 `weights` is missing. 160 161 Note: 162 When calculating the gradient of a weighted loss contributions from 163 both `losses` and `weights` are considered. If your `weights` depend 164 on some model parameters but you do not want this to affect the loss 165 gradient, you need to apply `tf.stop_gradient` to `weights` before 166 passing them to `compute_weighted_loss`. 167 168 @compatibility(eager) 169 The `loss_collection` argument is ignored when executing eagerly. Consider 170 holding on to the return value or collecting losses via a `tf.keras.Model`. 171 @end_compatibility 172 """ 173 Reduction.validate(reduction) 174 with ops.name_scope(scope, "weighted_loss", (losses, weights)): 175 # Save the `reduction` argument for loss normalization when distributing 176 # to multiple replicas. Used only for estimator + v1 optimizer flow. 177 ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access 178 179 def compute_loss(losses, weights, loss_collection, reduction): 180 losses = ops.convert_to_tensor(losses) 181 input_dtype = losses.dtype 182 losses = math_ops.cast(losses, dtype=dtypes.float32) 183 weights = math_ops.cast(weights, dtype=dtypes.float32) 184 weighted_losses = math_ops.multiply(losses, weights) 185 if reduction == Reduction.NONE: 186 loss = weighted_losses 187 else: 188 loss = math_ops.reduce_sum(weighted_losses) 189 if reduction == Reduction.MEAN: 190 loss = _safe_mean( 191 loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights)) 192 elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or 193 reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): 194 loss = _safe_mean(loss, _num_present(losses, weights)) 195 elif reduction == Reduction.SUM_OVER_BATCH_SIZE: 196 loss = _safe_mean(loss, _num_elements(losses)) 197 198 # Convert the result back to the input type. 199 loss = math_ops.cast(loss, input_dtype) 200 util.add_loss(loss, loss_collection) 201 return loss 202 203 # Skip the assert_broadcastable in XLA context because asserts are not 204 # supported so it only causes unnecessary ops. Also skip it because it uses 205 # a DenseToDenseSetOperation op that is incompatible with XLA when 206 # the shape(s) are dynamic. 207 if control_flow_ops.get_enclosing_xla_context() is not None: 208 return compute_loss(losses, weights, loss_collection, reduction) 209 else: 210 with ops.control_dependencies( 211 (weights_broadcast_ops.assert_broadcastable(weights, losses),)): 212 return compute_loss(losses, weights, loss_collection, reduction) 213 214 215@tf_export(v1=["losses.absolute_difference"]) 216@dispatch.add_dispatch_support 217def absolute_difference( 218 labels, predictions, weights=1.0, scope=None, 219 loss_collection=ops.GraphKeys.LOSSES, 220 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 221 """Adds an Absolute Difference loss to the training procedure. 222 223 `weights` acts as a coefficient for the loss. If a scalar is provided, then 224 the loss is simply scaled by the given value. If `weights` is a `Tensor` of 225 shape `[batch_size]`, then the total loss for each sample of the batch is 226 rescaled by the corresponding element in the `weights` vector. If the shape of 227 `weights` matches the shape of `predictions`, then the loss of each 228 measurable element of `predictions` is scaled by the corresponding value of 229 `weights`. 230 231 Args: 232 labels: The ground truth output tensor, same dimensions as 'predictions'. 233 predictions: The predicted outputs. 234 weights: Optional `Tensor` whose rank is either 0, or the same rank as 235 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 236 be either `1`, or the same as the corresponding `losses` dimension). 237 scope: The scope for the operations performed in computing the loss. 238 loss_collection: collection to which this loss will be added. 239 reduction: Type of reduction to apply to loss. 240 241 Returns: 242 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 243 shape as `labels`; otherwise, it is scalar. 244 245 Raises: 246 ValueError: If the shape of `predictions` doesn't match that of 247 `labels` or if the shape of `weights` is invalid or if `labels` 248 or `predictions` is None. 249 250 @compatibility(eager) 251 The `loss_collection` argument is ignored when executing eagerly. Consider 252 holding on to the return value or collecting losses via a `tf.keras.Model`. 253 @end_compatibility 254 """ 255 if labels is None: 256 raise ValueError("Argument `labels` must not be None.") 257 if predictions is None: 258 raise ValueError("Argument `predictions` must not be None.") 259 with ops.name_scope(scope, "absolute_difference", 260 (predictions, labels, weights)) as scope: 261 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 262 labels = math_ops.cast(labels, dtype=dtypes.float32) 263 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 264 losses = math_ops.abs(math_ops.subtract(predictions, labels)) 265 return compute_weighted_loss( 266 losses, weights, scope, loss_collection, reduction=reduction) 267 268 269@tf_export(v1=["losses.cosine_distance"]) 270@dispatch.add_dispatch_support 271@deprecated_args(None, "dim is deprecated, use axis instead", "dim") 272def cosine_distance( 273 labels, predictions, axis=None, weights=1.0, scope=None, 274 loss_collection=ops.GraphKeys.LOSSES, 275 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS, 276 dim=None): 277 """Adds a cosine-distance loss to the training procedure. 278 279 Note that the function assumes that `predictions` and `labels` are already 280 unit-normalized. 281 282 Args: 283 labels: `Tensor` whose shape matches 'predictions' 284 predictions: An arbitrary matrix. 285 axis: The dimension along which the cosine distance is computed. 286 weights: Optional `Tensor` whose rank is either 0, or the same rank as 287 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 288 be either `1`, or the same as the corresponding `losses` dimension). 289 scope: The scope for the operations performed in computing the loss. 290 loss_collection: collection to which this loss will be added. 291 reduction: Type of reduction to apply to loss. 292 dim: The old (deprecated) name for `axis`. 293 294 Returns: 295 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 296 shape as `labels`; otherwise, it is scalar. 297 298 Raises: 299 ValueError: If `predictions` shape doesn't match `labels` shape, or 300 `axis`, `labels`, `predictions` or `weights` is `None`. 301 302 @compatibility(eager) 303 The `loss_collection` argument is ignored when executing eagerly. Consider 304 holding on to the return value or collecting losses via a `tf.keras.Model`. 305 @end_compatibility 306 """ 307 axis = deprecated_argument_lookup("axis", axis, "dim", dim) 308 if axis is None: 309 raise ValueError("You must specify argument `axis`.") 310 if labels is None: 311 raise ValueError("Argument `labels` must not be None.") 312 if predictions is None: 313 raise ValueError("Argument `predictions` must not be None.") 314 with ops.name_scope(scope, "cosine_distance_loss", 315 (predictions, labels, weights)) as scope: 316 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 317 labels = math_ops.cast(labels, dtype=dtypes.float32) 318 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 319 320 radial_diffs = math_ops.multiply(predictions, labels) 321 losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True) 322 return compute_weighted_loss( 323 losses, weights, scope, loss_collection, reduction=reduction) 324 325 326@tf_export(v1=["losses.hinge_loss"]) 327@dispatch.add_dispatch_support 328def hinge_loss(labels, logits, weights=1.0, scope=None, 329 loss_collection=ops.GraphKeys.LOSSES, 330 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 331 """Adds a hinge loss to the training procedure. 332 333 Args: 334 labels: The ground truth output tensor. Its shape should match the shape of 335 logits. The values of the tensor are expected to be 0.0 or 1.0. Internally 336 the {0,1} labels are converted to {-1,1} when calculating the hinge loss. 337 logits: The logits, a float tensor. Note that logits are assumed to be 338 unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive 339 (resp. negative) binary prediction. 340 weights: Optional `Tensor` whose rank is either 0, or the same rank as 341 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 342 be either `1`, or the same as the corresponding `losses` dimension). 343 scope: The scope for the operations performed in computing the loss. 344 loss_collection: collection to which the loss will be added. 345 reduction: Type of reduction to apply to loss. 346 347 Returns: 348 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 349 shape as `labels`; otherwise, it is scalar. 350 351 Raises: 352 ValueError: If the shapes of `logits` and `labels` don't match or 353 if `labels` or `logits` is None. 354 355 @compatibility(eager) 356 The `loss_collection` argument is ignored when executing eagerly. Consider 357 holding on to the return value or collecting losses via a `tf.keras.Model`. 358 @end_compatibility 359 """ 360 if labels is None: 361 raise ValueError("Argument `labels` must not be None.") 362 if logits is None: 363 raise ValueError("Argument `logits` must not be None.") 364 with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope: 365 logits = math_ops.cast(logits, dtype=dtypes.float32) 366 labels = math_ops.cast(labels, dtype=dtypes.float32) 367 logits.get_shape().assert_is_compatible_with(labels.get_shape()) 368 # We first need to convert binary labels to -1/1 labels (as floats). 369 all_ones = array_ops.ones_like(labels) 370 labels = math_ops.subtract(2 * labels, all_ones) 371 losses = nn_ops.relu( 372 math_ops.subtract(all_ones, math_ops.multiply(labels, logits))) 373 return compute_weighted_loss( 374 losses, weights, scope, loss_collection, reduction=reduction) 375 376 377@tf_export(v1=["losses.huber_loss"]) 378@dispatch.add_dispatch_support 379def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None, 380 loss_collection=ops.GraphKeys.LOSSES, 381 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 382 """Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure. 383 384 For each value x in `error=labels-predictions`, the following is calculated: 385 386 ``` 387 0.5 * x^2 if |x| <= d 388 0.5 * d^2 + d * (|x| - d) if |x| > d 389 ``` 390 391 where d is `delta`. 392 393 `weights` acts as a coefficient for the loss. If a scalar is provided, then 394 the loss is simply scaled by the given value. If `weights` is a tensor of size 395 `[batch_size]`, then the total loss for each sample of the batch is rescaled 396 by the corresponding element in the `weights` vector. If the shape of 397 `weights` matches the shape of `predictions`, then the loss of each 398 measurable element of `predictions` is scaled by the corresponding value of 399 `weights`. 400 401 Args: 402 labels: The ground truth output tensor, same dimensions as 'predictions'. 403 predictions: The predicted outputs. 404 weights: Optional `Tensor` whose rank is either 0, or the same rank as 405 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 406 be either `1`, or the same as the corresponding `losses` dimension). 407 delta: `float`, the point where the huber loss function changes from a 408 quadratic to linear. 409 scope: The scope for the operations performed in computing the loss. 410 loss_collection: collection to which the loss will be added. 411 reduction: Type of reduction to apply to loss. 412 413 Returns: 414 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 415 shape as `labels`; otherwise, it is scalar. 416 417 Raises: 418 ValueError: If the shape of `predictions` doesn't match that of `labels` or 419 if the shape of `weights` is invalid. Also if `labels` or 420 `predictions` is None. 421 422 @compatibility(eager) 423 The `loss_collection` argument is ignored when executing eagerly. Consider 424 holding on to the return value or collecting losses via a `tf.keras.Model`. 425 @end_compatibility 426 """ 427 if labels is None: 428 raise ValueError("Argument `labels` must not be None.") 429 if predictions is None: 430 raise ValueError("Argument `predictions` must not be None.") 431 with ops.name_scope(scope, "huber_loss", 432 (predictions, labels, weights)) as scope: 433 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 434 labels = math_ops.cast(labels, dtype=dtypes.float32) 435 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 436 error = math_ops.subtract(predictions, labels) 437 abs_error = math_ops.abs(error) 438 quadratic = math_ops.minimum(abs_error, delta) 439 # The following expression is the same in value as 440 # tf.maximum(abs_error - delta, 0), but importantly the gradient for the 441 # expression when abs_error == delta is 0 (for tf.maximum it would be 1). 442 # This is necessary to avoid doubling the gradient, since there is already a 443 # nonzero contribution to the gradient from the quadratic term. 444 linear = math_ops.subtract(abs_error, quadratic) 445 losses = math_ops.add( 446 math_ops.multiply( 447 ops.convert_to_tensor(0.5, dtype=quadratic.dtype), 448 math_ops.multiply(quadratic, quadratic)), 449 math_ops.multiply(delta, linear)) 450 return compute_weighted_loss( 451 losses, weights, scope, loss_collection, reduction=reduction) 452 453 454@tf_export(v1=["losses.log_loss"]) 455@dispatch.add_dispatch_support 456def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None, 457 loss_collection=ops.GraphKeys.LOSSES, 458 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 459 """Adds a Log Loss term to the training procedure. 460 461 `weights` acts as a coefficient for the loss. If a scalar is provided, then 462 the loss is simply scaled by the given value. If `weights` is a tensor of size 463 `[batch_size]`, then the total loss for each sample of the batch is rescaled 464 by the corresponding element in the `weights` vector. If the shape of 465 `weights` matches the shape of `predictions`, then the loss of each 466 measurable element of `predictions` is scaled by the corresponding value of 467 `weights`. 468 469 Args: 470 labels: The ground truth output tensor, same dimensions as 'predictions'. 471 predictions: The predicted outputs. 472 weights: Optional `Tensor` whose rank is either 0, or the same rank as 473 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 474 be either `1`, or the same as the corresponding `losses` dimension). 475 epsilon: A small increment to add to avoid taking a log of zero. 476 scope: The scope for the operations performed in computing the loss. 477 loss_collection: collection to which the loss will be added. 478 reduction: Type of reduction to apply to loss. 479 480 Returns: 481 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 482 shape as `labels`; otherwise, it is scalar. 483 484 Raises: 485 ValueError: If the shape of `predictions` doesn't match that of `labels` or 486 if the shape of `weights` is invalid. Also if `labels` or `predictions` 487 is None. 488 489 @compatibility(eager) 490 The `loss_collection` argument is ignored when executing eagerly. Consider 491 holding on to the return value or collecting losses via a `tf.keras.Model`. 492 @end_compatibility 493 """ 494 if labels is None: 495 raise ValueError("Argument `labels` must not be None.") 496 if predictions is None: 497 raise ValueError("Argument `predictions` must not be None.") 498 with ops.name_scope(scope, "log_loss", 499 (predictions, labels, weights)) as scope: 500 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 501 labels = math_ops.cast(labels, dtype=dtypes.float32) 502 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 503 losses = -math_ops.multiply( 504 labels, 505 math_ops.log(predictions + epsilon)) - math_ops.multiply( 506 (1 - labels), math_ops.log(1 - predictions + epsilon)) 507 return compute_weighted_loss( 508 losses, weights, scope, loss_collection, reduction=reduction) 509 510 511# TODO(b/37208492): Add reduction arg. 512@tf_export(v1=["losses.mean_pairwise_squared_error"]) 513@dispatch.add_dispatch_support 514def mean_pairwise_squared_error( 515 labels, predictions, weights=1.0, scope=None, 516 loss_collection=ops.GraphKeys.LOSSES): 517 """Adds a pairwise-errors-squared loss to the training procedure. 518 519 Unlike `mean_squared_error`, which is a measure of the differences between 520 corresponding elements of `predictions` and `labels`, 521 `mean_pairwise_squared_error` is a measure of the differences between pairs of 522 corresponding elements of `predictions` and `labels`. 523 524 For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are 525 three pairs of differences are summed to compute the loss: 526 loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 527 528 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the 529 corresponding pairs are computed within each batch sample but not across 530 samples within a batch. For example, if `predictions` represents a batch of 531 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs 532 is drawn from each image, but not across images. 533 534 `weights` acts as a coefficient for the loss. If a scalar is provided, then 535 the loss is simply scaled by the given value. If `weights` is a tensor of size 536 `[batch_size]`, then the total loss for each sample of the batch is rescaled 537 by the corresponding element in the `weights` vector. 538 539 Args: 540 labels: The ground truth output tensor, whose shape must match the shape of 541 `predictions`. 542 predictions: The predicted outputs, a tensor of size 543 `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in 544 `predictions`. 545 weights: Coefficients for the loss a scalar, a tensor of shape 546 `[batch_size]` or a tensor whose shape matches `predictions`. 547 scope: The scope for the operations performed in computing the loss. 548 loss_collection: collection to which the loss will be added. 549 550 Returns: 551 A scalar `Tensor` that returns the weighted loss. 552 553 Raises: 554 ValueError: If the shape of `predictions` doesn't match that of `labels` or 555 if the shape of `weights` is invalid. Also if `labels` or `predictions` 556 is None. 557 558 @compatibility(eager) 559 The `loss_collection` argument is ignored when executing eagerly. Consider 560 holding on to the return value or collecting losses via a `tf.keras.Model`. 561 @end_compatibility 562 """ 563 if labels is None: 564 raise ValueError("Argument `labels` must not be None.") 565 if predictions is None: 566 raise ValueError("Argument `predictions` must not be None.") 567 with ops.name_scope(scope, "mean_pairwise_squared_error", 568 (predictions, labels, weights)) as scope: 569 weights = math_ops.cast(weights, dtype=dtypes.float32) 570 labels = math_ops.cast(labels, dtype=dtypes.float32) 571 572 def compute_loss(labels, predictions, weights, loss_collection): 573 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 574 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 575 576 diffs = math_ops.subtract(predictions, labels) 577 578 axis = math_ops.range(1, array_ops.rank(diffs)) 579 580 sum_squares_diff_per_batch = math_ops.reduce_sum( 581 math_ops.square(diffs), axis=axis, keepdims=True) 582 num_present_per_batch = _num_present(diffs, weights, per_batch=True) 583 584 term1 = 2.0 * math_ops.div_no_nan( 585 sum_squares_diff_per_batch, 586 math_ops.maximum(num_present_per_batch - 1, 0), 587 name="value") 588 589 sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True) 590 term2 = 2.0 * math_ops.div_no_nan( 591 math_ops.square(sum_diff), 592 math_ops.maximum( 593 math_ops.multiply(num_present_per_batch, 594 num_present_per_batch - 1), 0), 595 name="value") 596 597 weighted_losses = math_ops.multiply(term1 - term2, weights) 598 loss = math_ops.reduce_sum(weighted_losses) 599 600 mean_loss = array_ops.where( 601 math_ops.reduce_sum(num_present_per_batch) > 0, 602 loss, 603 array_ops.zeros_like(loss), 604 name="value") 605 util.add_loss(mean_loss, loss_collection) 606 return mean_loss 607 608 # Skip the assert_broadcastable in XLA context because asserts are not 609 # supported so it only causes unnecessary ops. Also skip it because it uses 610 # a DenseToDenseSetOperation op that is incompatible with XLA when 611 # the shape(s) are dynamic. 612 if control_flow_ops.get_enclosing_xla_context() is not None: 613 return compute_loss(labels, predictions, weights, loss_collection) 614 else: 615 with ops.control_dependencies( 616 (weights_broadcast_ops.assert_broadcastable(weights, labels),)): 617 return compute_loss(labels, predictions, weights, loss_collection) 618 619 620@tf_export(v1=["losses.mean_squared_error"]) 621@dispatch.add_dispatch_support 622def mean_squared_error( 623 labels, predictions, weights=1.0, scope=None, 624 loss_collection=ops.GraphKeys.LOSSES, 625 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 626 """Adds a Sum-of-Squares loss to the training procedure. 627 628 `weights` acts as a coefficient for the loss. If a scalar is provided, then 629 the loss is simply scaled by the given value. If `weights` is a tensor of size 630 `[batch_size]`, then the total loss for each sample of the batch is rescaled 631 by the corresponding element in the `weights` vector. If the shape of 632 `weights` matches the shape of `predictions`, then the loss of each 633 measurable element of `predictions` is scaled by the corresponding value of 634 `weights`. 635 636 Args: 637 labels: The ground truth output tensor, same dimensions as 'predictions'. 638 predictions: The predicted outputs. 639 weights: Optional `Tensor` whose rank is either 0, or the same rank as 640 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 641 be either `1`, or the same as the corresponding `losses` dimension). 642 scope: The scope for the operations performed in computing the loss. 643 loss_collection: collection to which the loss will be added. 644 reduction: Type of reduction to apply to loss. 645 646 Returns: 647 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 648 shape as `labels`; otherwise, it is scalar. 649 650 Raises: 651 ValueError: If the shape of `predictions` doesn't match that of `labels` or 652 if the shape of `weights` is invalid. Also if `labels` or `predictions` 653 is None. 654 655 @compatibility(TF2) 656 657 `tf.compat.v1.losses.mean_squared_error` is mostly compatible with eager 658 execution and `tf.function`. But, the `loss_collection` argument is 659 ignored when executing eagerly and no loss will be written to the loss 660 collections. You will need to either hold on to the return value manually 661 or rely on `tf.keras.Model` loss tracking. 662 663 664 To switch to native TF2 style, instantiate the 665 `tf.keras.losses.MeanSquaredError` class and call the object instead. 666 667 668 #### Structural Mapping to Native TF2 669 670 Before: 671 672 ```python 673 loss = tf.compat.v1.losses.mean_squared_error( 674 labels=labels, 675 predictions=predictions, 676 weights=weights, 677 reduction=reduction) 678 ``` 679 680 After: 681 682 ```python 683 loss_fn = tf.keras.losses.MeanSquaredError( 684 reduction=reduction) 685 loss = loss_fn( 686 y_true=labels, 687 y_pred=predictions, 688 sample_weight=weights) 689 ``` 690 691 #### How to Map Arguments 692 693 | TF1 Arg Name | TF2 Arg Name | Note | 694 | :-------------------- | :--------------- | :------------------------- | 695 | `labels` | `y_true` | In `__call__()` method | 696 | `predictions` | `y_pred` | In `__call__()` method | 697 | `weights` | `sample_weight` | In `__call__()` method. | 698 : : : The shape requirements for `sample_weight` is different from : 699 : : : `weights`. Please check the [argument definition][api_docs] for : 700 : : : details. : 701 | `scope` | Not supported | - | 702 | `loss_collection` | Not supported | Losses should be tracked | 703 : : : explicitly or with Keras APIs, for example, [add_loss][add_loss], : 704 : : : instead of via collections : 705 | `reduction` | `reduction` | In constructor. Value of | 706 : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`, : 707 : : : `tf.compat.v1.losses.Reduction.SUM`, : 708 : : : `tf.compat.v1.losses.Reduction.NONE` in : 709 : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to : 710 : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`, : 711 : : : `tf.keras.losses.Reduction.SUM`, : 712 : : : `tf.keras.losses.Reduction.NONE`, respectively. If you : 713 : : : used other value for `reduction`, including the default value : 714 : : : `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is : 715 : : : no directly corresponding value. Please modify the loss : 716 : : : implementation manually. : 717 718 [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss 719 [api_docs]:https://www.tensorflow.org/api_docs/python/tf/keras/losses/MeanSquaredError#__call__ 720 721 722 #### Before & After Usage Example 723 724 Before: 725 726 >>> y_true = [1, 2, 3] 727 >>> y_pred = [1, 3, 5] 728 >>> weights = [0, 1, 0.25] 729 >>> # samples with zero-weight are excluded from calculation when `reduction` 730 >>> # argument is set to default value `Reduction.SUM_BY_NONZERO_WEIGHTS` 731 >>> tf.compat.v1.losses.mean_squared_error( 732 ... labels=y_true, 733 ... predictions=y_pred, 734 ... weights=weights).numpy() 735 1.0 736 737 >>> tf.compat.v1.losses.mean_squared_error( 738 ... labels=y_true, 739 ... predictions=y_pred, 740 ... weights=weights, 741 ... reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE).numpy() 742 0.66667 743 744 After: 745 746 >>> y_true = [[1.0], [2.0], [3.0]] 747 >>> y_pred = [[1.0], [3.0], [5.0]] 748 >>> weights = [1, 1, 0.25] 749 >>> mse = tf.keras.losses.MeanSquaredError( 750 ... reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE) 751 >>> mse(y_true=y_true, y_pred=y_pred, sample_weight=weights).numpy() 752 0.66667 753 754 @end_compatibility 755 """ 756 if labels is None: 757 raise ValueError("Argument `labels` must not be None.") 758 if predictions is None: 759 raise ValueError("Argument `predictions` must not be None.") 760 with ops.name_scope(scope, "mean_squared_error", 761 (predictions, labels, weights)) as scope: 762 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 763 labels = math_ops.cast(labels, dtype=dtypes.float32) 764 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 765 losses = math_ops.squared_difference(predictions, labels) 766 return compute_weighted_loss( 767 losses, weights, scope, loss_collection, reduction=reduction) 768 769 770@tf_export(v1=["losses.sigmoid_cross_entropy"]) 771@dispatch.add_dispatch_support 772def sigmoid_cross_entropy( 773 multi_class_labels, logits, weights=1.0, label_smoothing=0, scope=None, 774 loss_collection=ops.GraphKeys.LOSSES, 775 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 776 """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits. 777 778 `weights` acts as a coefficient for the loss. If a scalar is provided, 779 then the loss is simply scaled by the given value. If `weights` is a 780 tensor of shape `[batch_size]`, then the loss weights apply to each 781 corresponding sample. 782 783 If `label_smoothing` is nonzero, smooth the labels towards 1/2: 784 785 new_multiclass_labels = multiclass_labels * (1 - label_smoothing) 786 + 0.5 * label_smoothing 787 788 Args: 789 multi_class_labels: `[batch_size, num_classes]` target integer labels in 790 `{0, 1}`. 791 logits: Float `[batch_size, num_classes]` logits outputs of the network. 792 weights: Optional `Tensor` whose rank is either 0, or the same rank as 793 `multi_class_labels`, and must be broadcastable to `multi_class_labels` 794 (i.e., all dimensions must be either `1`, or the same as the 795 corresponding `losses` dimension). 796 label_smoothing: If greater than `0` then smooth the labels. 797 scope: The scope for the operations performed in computing the loss. 798 loss_collection: collection to which the loss will be added. 799 reduction: Type of reduction to apply to loss. 800 801 Returns: 802 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is 803 `NONE`, this has the same shape as `logits`; otherwise, it is scalar. 804 805 Raises: 806 ValueError: If the shape of `logits` doesn't match that of 807 `multi_class_labels` or if the shape of `weights` is invalid, or if 808 `weights` is None. Also if `multi_class_labels` or `logits` is None. 809 810 @compatibility(eager) 811 The `loss_collection` argument is ignored when executing eagerly. Consider 812 holding on to the return value or collecting losses via a `tf.keras.Model`. 813 @end_compatibility 814 """ 815 if multi_class_labels is None: 816 raise ValueError("Argument `multi_class_labels` must not be None.") 817 if logits is None: 818 raise ValueError("Argument `logits` must not be None.") 819 with ops.name_scope(scope, "sigmoid_cross_entropy_loss", 820 (logits, multi_class_labels, weights)) as scope: 821 logits = ops.convert_to_tensor(logits) 822 multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype) 823 logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape()) 824 825 if label_smoothing > 0: 826 multi_class_labels = (multi_class_labels * (1 - label_smoothing) + 827 0.5 * label_smoothing) 828 829 losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels, 830 logits=logits, 831 name="xentropy") 832 return compute_weighted_loss( 833 losses, weights, scope, loss_collection, reduction=reduction) 834 835 836@tf_export(v1=["losses.softmax_cross_entropy"]) 837@dispatch.add_dispatch_support 838def softmax_cross_entropy( 839 onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None, 840 loss_collection=ops.GraphKeys.LOSSES, 841 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 842 r"""Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2. 843 844 `weights` acts as a coefficient for the loss. If a scalar is provided, 845 then the loss is simply scaled by the given value. If `weights` is a 846 tensor of shape `[batch_size]`, then the loss weights apply to each 847 corresponding sample. 848 849 If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes: 850 new_onehot_labels = onehot_labels * (1 - label_smoothing) 851 + label_smoothing / num_classes 852 853 Note that `onehot_labels` and `logits` must have the same shape, 854 e.g. `[batch_size, num_classes]`. The shape of `weights` must be 855 broadcastable to loss, whose shape is decided by the shape of `logits`. 856 In case the shape of `logits` is `[batch_size, num_classes]`, loss is 857 a `Tensor` of shape `[batch_size]`. 858 859 Args: 860 onehot_labels: One-hot-encoded labels. 861 logits: Logits outputs of the network. 862 weights: Optional `Tensor` that is broadcastable to loss. 863 label_smoothing: If greater than 0 then smooth the labels. 864 scope: the scope for the operations performed in computing the loss. 865 loss_collection: collection to which the loss will be added. 866 reduction: Type of reduction to apply to loss. 867 868 Returns: 869 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is 870 `NONE`, this has shape `[batch_size]`; otherwise, it is scalar. 871 872 Raises: 873 ValueError: If the shape of `logits` doesn't match that of `onehot_labels` 874 or if the shape of `weights` is invalid or if `weights` is None. Also if 875 `onehot_labels` or `logits` is None. 876 877 @compatibility(TF2) 878 879 `tf.compat.v1.losses.softmax_cross_entropy` is mostly compatible with eager 880 execution and `tf.function`. But, the `loss_collection` argument is 881 ignored when executing eagerly and no loss will be written to the loss 882 collections. You will need to either hold on to the return value manually 883 or rely on `tf.keras.Model` loss tracking. 884 885 886 To switch to native TF2 style, instantiate the 887 `tf.keras.losses.CategoricalCrossentropy` class with `from_logits` set 888 as `True` and call the object instead. 889 890 891 #### Structural Mapping to Native TF2 892 893 Before: 894 895 ```python 896 loss = tf.compat.v1.losses.softmax_cross_entropy( 897 onehot_labels=onehot_labels, 898 logits=logits, 899 weights=weights, 900 label_smoothing=smoothing) 901 ``` 902 903 After: 904 905 ```python 906 loss_fn = tf.keras.losses.CategoricalCrossentropy( 907 from_logits=True, 908 label_smoothing=smoothing) 909 loss = loss_fn( 910 y_true=onehot_labels, 911 y_pred=logits, 912 sample_weight=weights) 913 ``` 914 915 #### How to Map Arguments 916 917 | TF1 Arg Name | TF2 Arg Name | Note | 918 | :-------------------- | :--------------- | :------------------------- | 919 | - | `from_logits` | Set `from_logits` as True | 920 : : : to have identical behavior : 921 | `onehot_labels` | `y_true` | In `__call__()` method | 922 | `logits` | `y_pred` | In `__call__()` method | 923 | `weights` | `sample_weight` | In `__call__()` method | 924 | `label_smoothing` | `label_smoothing`| In constructor | 925 | `scope` | Not supported | - | 926 | `loss_collection` | Not supported | Losses should be tracked | 927 : : : explicitly or with Keras : 928 : : : APIs, for example, : 929 : : : [add_loss][add_loss], : 930 : : : instead of via collections : 931 | `reduction` | `reduction` | In constructor. Value of | 932 : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`, : 933 : : : `tf.compat.v1.losses.Reduction.SUM`, : 934 : : : `tf.compat.v1.losses.Reduction.NONE` in : 935 : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to : 936 : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`, : 937 : : : `tf.keras.losses.Reduction.SUM`, : 938 : : : `tf.keras.losses.Reduction.NONE`, respectively. If you : 939 : : : used other value for `reduction`, including the default value : 940 : : : `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is : 941 : : : no directly corresponding value. Please modify the loss : 942 : : : implementation manually. : 943 944 [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss 945 946 947 #### Before & After Usage Example 948 949 Before: 950 951 >>> y_true = [[0, 1, 0], [0, 0, 1]] 952 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 953 >>> weights = [0.3, 0.7] 954 >>> smoothing = 0.2 955 >>> tf.compat.v1.losses.softmax_cross_entropy(y_true, y_pred, weights=weights, 956 ... label_smoothing=smoothing).numpy() 957 0.57618 958 959 After: 960 961 >>> cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True, 962 ... label_smoothing=smoothing) 963 >>> cce(y_true, y_pred, sample_weight=weights).numpy() 964 0.57618 965 966 @end_compatibility 967 """ 968 if onehot_labels is None: 969 raise ValueError("Argument `onehot_labels` must not be None.") 970 if logits is None: 971 raise ValueError("Argument `logits` must not be None.") 972 with ops.name_scope(scope, "softmax_cross_entropy_loss", 973 (logits, onehot_labels, weights)) as scope: 974 logits = ops.convert_to_tensor(logits) 975 onehot_labels = math_ops.cast(onehot_labels, logits.dtype) 976 logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape()) 977 978 if label_smoothing > 0: 979 num_classes = math_ops.cast( 980 array_ops.shape(onehot_labels)[-1], logits.dtype) 981 smooth_positives = 1.0 - label_smoothing 982 smooth_negatives = label_smoothing / num_classes 983 onehot_labels = onehot_labels * smooth_positives + smooth_negatives 984 985 onehot_labels = array_ops.stop_gradient( 986 onehot_labels, name="labels_stop_gradient") 987 losses = nn.softmax_cross_entropy_with_logits_v2( 988 labels=onehot_labels, logits=logits, name="xentropy") 989 990 return compute_weighted_loss( 991 losses, weights, scope, loss_collection, reduction=reduction) 992 993 994# TODO(ptucker): Merge this with similar method in metrics_impl. 995def _remove_squeezable_dimensions( 996 labels, predictions, weights=None, expected_rank_diff=0): 997 """Internal version of _remove_squeezable_dimensions which handles weights. 998 999 Squeezes `predictions` and `labels` if their ranks differ from expected by 1000 exactly 1. 1001 Squeezes `weights` if its rank is 1 more than the new rank of `predictions` 1002 1003 This will use static shape if available. Otherwise, it will add graph 1004 operations, which could result in a performance hit. 1005 1006 Args: 1007 labels: Label values, a `Tensor` whose dimensions match `predictions`. 1008 predictions: Predicted values, a `Tensor` of arbitrary dimensions. 1009 weights: Optional weight `Tensor`. It will be squeezed if it's not scalar, 1010 and its rank is 1 more than the new rank of `labels`. 1011 expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. 1012 1013 Returns: 1014 Tuple of `predictions`, `labels` and `weights`, possibly with the last 1015 dimension squeezed. 1016 """ 1017 labels, predictions = confusion_matrix.remove_squeezable_dimensions( 1018 labels, predictions, expected_rank_diff=expected_rank_diff) 1019 1020 if weights is not None: 1021 weights = ops.convert_to_tensor(weights) 1022 labels_rank = labels.get_shape().ndims 1023 weights_shape = weights.get_shape() 1024 weights_rank = weights_shape.ndims 1025 1026 if (labels_rank is not None) and (weights_rank is not None): 1027 # Use static rank. 1028 rank_diff = weights_rank - labels_rank 1029 if rank_diff == 1: 1030 weights = array_ops.squeeze(weights, [-1]) 1031 return labels, predictions, weights 1032 1033 # Use dynamic rank. 1034 rank_diff = array_ops.rank(weights) - array_ops.rank(labels) 1035 if (weights_rank is None) or ( 1036 weights_rank > 0 and weights_shape.dims[-1].is_compatible_with(1)): 1037 weights = control_flow_ops.cond( 1038 math_ops.equal(1, rank_diff), 1039 lambda: array_ops.squeeze(weights, [-1]), 1040 lambda: weights) 1041 1042 return labels, predictions, weights 1043 1044 1045@tf_export(v1=["losses.sparse_softmax_cross_entropy"]) 1046@dispatch.add_dispatch_support 1047def sparse_softmax_cross_entropy( 1048 labels, logits, weights=1.0, scope=None, 1049 loss_collection=ops.GraphKeys.LOSSES, 1050 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 1051 """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. 1052 1053 `weights` acts as a coefficient for the loss. If a scalar is provided, 1054 then the loss is simply scaled by the given value. If `weights` is a 1055 tensor of shape `[batch_size]`, then the loss weights apply to each 1056 corresponding sample. 1057 1058 Args: 1059 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of 1060 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` 1061 must be an index in `[0, num_classes)`. Other values will raise an 1062 exception when this op is run on CPU, and return `NaN` for corresponding 1063 loss and gradient rows on GPU. 1064 logits: Unscaled log probabilities of shape 1065 `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or 1066 `float64`. 1067 weights: Coefficients for the loss. This must be scalar or broadcastable to 1068 `labels` (i.e. same rank and each dimension is either 1 or the same). 1069 scope: the scope for the operations performed in computing the loss. 1070 loss_collection: collection to which the loss will be added. 1071 reduction: Type of reduction to apply to loss. 1072 1073 Returns: 1074 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is 1075 `NONE`, this has the same shape as `labels`; otherwise, it is scalar. 1076 1077 Raises: 1078 ValueError: If the shapes of `logits`, `labels`, and `weights` are 1079 incompatible, or if any of them are None. 1080 1081 @compatibility(eager) 1082 The `loss_collection` argument is ignored when executing eagerly. Consider 1083 holding on to the return value or collecting losses via a `tf.keras.Model`. 1084 @end_compatibility 1085 """ 1086 if labels is None: 1087 raise ValueError("Argument `labels` must not be None.") 1088 if logits is None: 1089 raise ValueError("Argument `logits` must not be None.") 1090 with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", 1091 (logits, labels, weights)) as scope: 1092 # As documented above in Args, labels contain class IDs and logits contains 1093 # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; 1094 # therefore, expected_rank_diff=1. 1095 labels, logits, weights = _remove_squeezable_dimensions( 1096 labels, logits, weights, expected_rank_diff=1) 1097 losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, 1098 logits=logits, 1099 name="xentropy") 1100 return compute_weighted_loss( 1101 losses, weights, scope, loss_collection, reduction=reduction) 1102