xref: /aosp_15_r20/external/tensorflow/tensorflow/python/keras/layers/advanced_activations.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Layers that act as activation functions."""
16# pylint: disable=g-classes-have-attributes
17
18from tensorflow.python.framework import dtypes
19from tensorflow.python.keras import backend
20from tensorflow.python.keras import constraints
21from tensorflow.python.keras import initializers
22from tensorflow.python.keras import regularizers
23from tensorflow.python.keras.engine.base_layer import Layer
24from tensorflow.python.keras.engine.input_spec import InputSpec
25from tensorflow.python.keras.utils import tf_utils
26from tensorflow.python.ops import math_ops
27from tensorflow.python.util.tf_export import keras_export
28
29
30def get_globals():
31  return globals()
32
33
34@keras_export('keras.layers.LeakyReLU')
35class LeakyReLU(Layer):
36  """Leaky version of a Rectified Linear Unit.
37
38  It allows a small gradient when the unit is not active:
39
40  ```
41    f(x) = alpha * x if x < 0
42    f(x) = x if x >= 0
43  ```
44
45  Usage:
46
47  >>> layer = tf.keras.layers.LeakyReLU()
48  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
49  >>> list(output.numpy())
50  [-0.9, -0.3, 0.0, 2.0]
51  >>> layer = tf.keras.layers.LeakyReLU(alpha=0.1)
52  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
53  >>> list(output.numpy())
54  [-0.3, -0.1, 0.0, 2.0]
55
56  Input shape:
57    Arbitrary. Use the keyword argument `input_shape`
58    (tuple of integers, does not include the batch axis)
59    when using this layer as the first layer in a model.
60
61  Output shape:
62    Same shape as the input.
63
64  Args:
65    alpha: Float >= 0. Negative slope coefficient. Default to 0.3.
66
67  """
68
69  def __init__(self, alpha=0.3, **kwargs):
70    super(LeakyReLU, self).__init__(**kwargs)
71    if alpha is None:
72      raise ValueError('The alpha value of a Leaky ReLU layer '
73                       'cannot be None, needs a float. '
74                       'Got %s' % alpha)
75    self.supports_masking = True
76    self.alpha = backend.cast_to_floatx(alpha)
77
78  def call(self, inputs):
79    return backend.relu(inputs, alpha=self.alpha)
80
81  def get_config(self):
82    config = {'alpha': float(self.alpha)}
83    base_config = super(LeakyReLU, self).get_config()
84    return dict(list(base_config.items()) + list(config.items()))
85
86  @tf_utils.shape_type_conversion
87  def compute_output_shape(self, input_shape):
88    return input_shape
89
90
91@keras_export('keras.layers.PReLU')
92class PReLU(Layer):
93  """Parametric Rectified Linear Unit.
94
95  It follows:
96
97  ```
98    f(x) = alpha * x for x < 0
99    f(x) = x for x >= 0
100  ```
101
102  where `alpha` is a learned array with the same shape as x.
103
104  Input shape:
105    Arbitrary. Use the keyword argument `input_shape`
106    (tuple of integers, does not include the samples axis)
107    when using this layer as the first layer in a model.
108
109  Output shape:
110    Same shape as the input.
111
112  Args:
113    alpha_initializer: Initializer function for the weights.
114    alpha_regularizer: Regularizer for the weights.
115    alpha_constraint: Constraint for the weights.
116    shared_axes: The axes along which to share learnable
117      parameters for the activation function.
118      For example, if the incoming feature maps
119      are from a 2D convolution
120      with output shape `(batch, height, width, channels)`,
121      and you wish to share parameters across space
122      so that each filter only has one set of parameters,
123      set `shared_axes=[1, 2]`.
124  """
125
126  def __init__(self,
127               alpha_initializer='zeros',
128               alpha_regularizer=None,
129               alpha_constraint=None,
130               shared_axes=None,
131               **kwargs):
132    super(PReLU, self).__init__(**kwargs)
133    self.supports_masking = True
134    self.alpha_initializer = initializers.get(alpha_initializer)
135    self.alpha_regularizer = regularizers.get(alpha_regularizer)
136    self.alpha_constraint = constraints.get(alpha_constraint)
137    if shared_axes is None:
138      self.shared_axes = None
139    elif not isinstance(shared_axes, (list, tuple)):
140      self.shared_axes = [shared_axes]
141    else:
142      self.shared_axes = list(shared_axes)
143
144  @tf_utils.shape_type_conversion
145  def build(self, input_shape):
146    param_shape = list(input_shape[1:])
147    if self.shared_axes is not None:
148      for i in self.shared_axes:
149        param_shape[i - 1] = 1
150    self.alpha = self.add_weight(
151        shape=param_shape,
152        name='alpha',
153        initializer=self.alpha_initializer,
154        regularizer=self.alpha_regularizer,
155        constraint=self.alpha_constraint)
156    # Set input spec
157    axes = {}
158    if self.shared_axes:
159      for i in range(1, len(input_shape)):
160        if i not in self.shared_axes:
161          axes[i] = input_shape[i]
162    self.input_spec = InputSpec(ndim=len(input_shape), axes=axes)
163    self.built = True
164
165  def call(self, inputs):
166    pos = backend.relu(inputs)
167    neg = -self.alpha * backend.relu(-inputs)
168    return pos + neg
169
170  def get_config(self):
171    config = {
172        'alpha_initializer': initializers.serialize(self.alpha_initializer),
173        'alpha_regularizer': regularizers.serialize(self.alpha_regularizer),
174        'alpha_constraint': constraints.serialize(self.alpha_constraint),
175        'shared_axes': self.shared_axes
176    }
177    base_config = super(PReLU, self).get_config()
178    return dict(list(base_config.items()) + list(config.items()))
179
180  @tf_utils.shape_type_conversion
181  def compute_output_shape(self, input_shape):
182    return input_shape
183
184
185@keras_export('keras.layers.ELU')
186class ELU(Layer):
187  """Exponential Linear Unit.
188
189  It follows:
190
191  ```
192    f(x) =  alpha * (exp(x) - 1.) for x < 0
193    f(x) = x for x >= 0
194  ```
195
196  Input shape:
197    Arbitrary. Use the keyword argument `input_shape`
198    (tuple of integers, does not include the samples axis)
199    when using this layer as the first layer in a model.
200
201  Output shape:
202    Same shape as the input.
203
204  Args:
205    alpha: Scale for the negative factor.
206  """
207
208  def __init__(self, alpha=1.0, **kwargs):
209    super(ELU, self).__init__(**kwargs)
210    if alpha is None:
211      raise ValueError('Alpha of an ELU layer cannot be None, '
212                       'requires a float. Got %s' % alpha)
213    self.supports_masking = True
214    self.alpha = backend.cast_to_floatx(alpha)
215
216  def call(self, inputs):
217    return backend.elu(inputs, self.alpha)
218
219  def get_config(self):
220    config = {'alpha': float(self.alpha)}
221    base_config = super(ELU, self).get_config()
222    return dict(list(base_config.items()) + list(config.items()))
223
224  @tf_utils.shape_type_conversion
225  def compute_output_shape(self, input_shape):
226    return input_shape
227
228
229@keras_export('keras.layers.ThresholdedReLU')
230class ThresholdedReLU(Layer):
231  """Thresholded Rectified Linear Unit.
232
233  It follows:
234
235  ```
236    f(x) = x for x > theta
237    f(x) = 0 otherwise`
238  ```
239
240  Input shape:
241    Arbitrary. Use the keyword argument `input_shape`
242    (tuple of integers, does not include the samples axis)
243    when using this layer as the first layer in a model.
244
245  Output shape:
246    Same shape as the input.
247
248  Args:
249    theta: Float >= 0. Threshold location of activation.
250  """
251
252  def __init__(self, theta=1.0, **kwargs):
253    super(ThresholdedReLU, self).__init__(**kwargs)
254    if theta is None:
255      raise ValueError('Theta of a Thresholded ReLU layer cannot be '
256                       'None, requires a float. Got %s' % theta)
257    if theta < 0:
258      raise ValueError('The theta value of a Thresholded ReLU layer '
259                       'should be >=0, got %s' % theta)
260    self.supports_masking = True
261    self.theta = backend.cast_to_floatx(theta)
262
263  def call(self, inputs):
264    theta = math_ops.cast(self.theta, inputs.dtype)
265    return inputs * math_ops.cast(math_ops.greater(inputs, theta), inputs.dtype)
266
267  def get_config(self):
268    config = {'theta': float(self.theta)}
269    base_config = super(ThresholdedReLU, self).get_config()
270    return dict(list(base_config.items()) + list(config.items()))
271
272  @tf_utils.shape_type_conversion
273  def compute_output_shape(self, input_shape):
274    return input_shape
275
276
277def _large_compatible_negative(tensor_type):
278  """Large negative number as Tensor.
279
280  This function is necessary because the standard value for epsilon
281  in this module (-1e9) cannot be represented using tf.float16
282
283  Args:
284    tensor_type: a dtype to determine the type.
285
286  Returns:
287    a large negative number.
288  """
289  if tensor_type == dtypes.float16:
290    return dtypes.float16.min
291  return -1e9
292
293
294@keras_export('keras.layers.Softmax')
295class Softmax(Layer):
296  """Softmax activation function.
297
298  Example without mask:
299
300  >>> inp = np.asarray([1., 2., 1.])
301  >>> layer = tf.keras.layers.Softmax()
302  >>> layer(inp).numpy()
303  array([0.21194157, 0.5761169 , 0.21194157], dtype=float32)
304  >>> mask = np.asarray([True, False, True], dtype=bool)
305  >>> layer(inp, mask).numpy()
306  array([0.5, 0. , 0.5], dtype=float32)
307
308  Input shape:
309    Arbitrary. Use the keyword argument `input_shape`
310    (tuple of integers, does not include the samples axis)
311    when using this layer as the first layer in a model.
312
313  Output shape:
314    Same shape as the input.
315
316  Args:
317    axis: Integer, or list of Integers, axis along which the softmax
318      normalization is applied.
319  Call arguments:
320    inputs: The inputs, or logits to the softmax layer.
321    mask: A boolean mask of the same shape as `inputs`. Defaults to `None`. The
322      mask specifies 1 to keep and 0 to mask.
323
324  Returns:
325    softmaxed output with the same shape as `inputs`.
326  """
327
328  def __init__(self, axis=-1, **kwargs):
329    super(Softmax, self).__init__(**kwargs)
330    self.supports_masking = True
331    self.axis = axis
332
333  def call(self, inputs, mask=None):
334    if mask is not None:
335      # Since mask is 1.0 for positions we want to keep and 0.0 for
336      # masked positions, this operation will create a tensor which is 0.0 for
337      # positions we want to attend and -1e.9 for masked positions.
338      adder = (1.0 - math_ops.cast(mask, inputs.dtype)) * (
339          _large_compatible_negative(inputs.dtype))
340
341      # Since we are adding it to the raw scores before the softmax, this is
342      # effectively the same as removing these entirely.
343      inputs += adder
344    if isinstance(self.axis, (tuple, list)):
345      if len(self.axis) > 1:
346        return math_ops.exp(inputs - math_ops.reduce_logsumexp(
347            inputs, axis=self.axis, keepdims=True))
348      else:
349        return backend.softmax(inputs, axis=self.axis[0])
350    return backend.softmax(inputs, axis=self.axis)
351
352  def get_config(self):
353    config = {'axis': self.axis}
354    base_config = super(Softmax, self).get_config()
355    return dict(list(base_config.items()) + list(config.items()))
356
357  @tf_utils.shape_type_conversion
358  def compute_output_shape(self, input_shape):
359    return input_shape
360
361
362@keras_export('keras.layers.ReLU')
363class ReLU(Layer):
364  """Rectified Linear Unit activation function.
365
366  With default values, it returns element-wise `max(x, 0)`.
367
368  Otherwise, it follows:
369
370  ```
371    f(x) = max_value if x >= max_value
372    f(x) = x if threshold <= x < max_value
373    f(x) = negative_slope * (x - threshold) otherwise
374  ```
375
376  Usage:
377
378  >>> layer = tf.keras.layers.ReLU()
379  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
380  >>> list(output.numpy())
381  [0.0, 0.0, 0.0, 2.0]
382  >>> layer = tf.keras.layers.ReLU(max_value=1.0)
383  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
384  >>> list(output.numpy())
385  [0.0, 0.0, 0.0, 1.0]
386  >>> layer = tf.keras.layers.ReLU(negative_slope=1.0)
387  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
388  >>> list(output.numpy())
389  [-3.0, -1.0, 0.0, 2.0]
390  >>> layer = tf.keras.layers.ReLU(threshold=1.5)
391  >>> output = layer([-3.0, -1.0, 1.0, 2.0])
392  >>> list(output.numpy())
393  [0.0, 0.0, 0.0, 2.0]
394
395  Input shape:
396    Arbitrary. Use the keyword argument `input_shape`
397    (tuple of integers, does not include the batch axis)
398    when using this layer as the first layer in a model.
399
400  Output shape:
401    Same shape as the input.
402
403  Args:
404    max_value: Float >= 0. Maximum activation value. Default to None, which
405      means unlimited.
406    negative_slope: Float >= 0. Negative slope coefficient. Default to 0.
407    threshold: Float >= 0. Threshold value for thresholded activation. Default
408      to 0.
409  """
410
411  def __init__(self, max_value=None, negative_slope=0, threshold=0, **kwargs):
412    super(ReLU, self).__init__(**kwargs)
413    if max_value is not None and max_value < 0.:
414      raise ValueError('max_value of a ReLU layer cannot be a negative '
415                       'value. Got: %s' % max_value)
416    if negative_slope is None or negative_slope < 0.:
417      raise ValueError('negative_slope of a ReLU layer cannot be a negative '
418                       'value. Got: %s' % negative_slope)
419    if threshold is None or threshold < 0.:
420      raise ValueError('threshold of a ReLU layer cannot be a negative '
421                       'value. Got: %s' % threshold)
422
423    self.supports_masking = True
424    if max_value is not None:
425      max_value = backend.cast_to_floatx(max_value)
426    self.max_value = max_value
427    self.negative_slope = backend.cast_to_floatx(negative_slope)
428    self.threshold = backend.cast_to_floatx(threshold)
429
430  def call(self, inputs):
431    # alpha is used for leaky relu slope in activations instead of
432    # negative_slope.
433    return backend.relu(inputs,
434                        alpha=self.negative_slope,
435                        max_value=self.max_value,
436                        threshold=self.threshold)
437
438  def get_config(self):
439    config = {
440        'max_value': self.max_value,
441        'negative_slope': self.negative_slope,
442        'threshold': self.threshold
443    }
444    base_config = super(ReLU, self).get_config()
445    return dict(list(base_config.items()) + list(config.items()))
446
447  @tf_utils.shape_type_conversion
448  def compute_output_shape(self, input_shape):
449    return input_shape
450