xref: /aosp_15_r20/external/tensorflow/tensorflow/python/ops/signal/reconstruction_ops.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Signal reconstruction via overlapped addition of frames."""
16
17from tensorflow.python.framework import dtypes
18from tensorflow.python.framework import ops
19from tensorflow.python.framework import tensor_util
20from tensorflow.python.ops import array_ops
21from tensorflow.python.ops import math_ops
22from tensorflow.python.util import dispatch
23from tensorflow.python.util.tf_export import tf_export
24
25
26@tf_export("signal.overlap_and_add")
27@dispatch.add_dispatch_support
28def overlap_and_add(signal, frame_step, name=None):
29  """Reconstructs a signal from a framed representation.
30
31  Adds potentially overlapping frames of a signal with shape
32  `[..., frames, frame_length]`, offsetting subsequent frames by `frame_step`.
33  The resulting tensor has shape `[..., output_size]` where
34
35      output_size = (frames - 1) * frame_step + frame_length
36
37  Args:
38    signal: A [..., frames, frame_length] `Tensor`. All dimensions may be
39      unknown, and rank must be at least 2.
40    frame_step: An integer or scalar `Tensor` denoting overlap offsets. Must be
41      less than or equal to `frame_length`.
42    name: An optional name for the operation.
43
44  Returns:
45    A `Tensor` with shape `[..., output_size]` containing the overlap-added
46    frames of `signal`'s inner-most two dimensions.
47
48  Raises:
49    ValueError: If `signal`'s rank is less than 2, or `frame_step` is not a
50      scalar integer.
51  """
52  with ops.name_scope(name, "overlap_and_add", [signal, frame_step]):
53    signal = ops.convert_to_tensor(signal, name="signal")
54    signal.shape.with_rank_at_least(2)
55    frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
56    frame_step.shape.assert_has_rank(0)
57    if not frame_step.dtype.is_integer:
58      raise ValueError("frame_step must be an integer. Got %s" %
59                       frame_step.dtype)
60    frame_step_static = tensor_util.constant_value(frame_step)
61    frame_step_is_static = frame_step_static is not None
62    frame_step = frame_step_static if frame_step_is_static else frame_step
63
64    signal_shape = array_ops.shape(signal)
65    signal_shape_static = tensor_util.constant_value(signal_shape)
66    if signal_shape_static is not None:
67      signal_shape = signal_shape_static
68
69    # All dimensions that are not part of the overlap-and-add. Can be empty for
70    # rank 2 inputs.
71    outer_dimensions = signal_shape[:-2]
72    outer_rank = array_ops.size(outer_dimensions)
73    outer_rank_static = tensor_util.constant_value(outer_rank)
74    if outer_rank_static is not None:
75      outer_rank = outer_rank_static
76
77    def full_shape(inner_shape):
78      return array_ops.concat([outer_dimensions, inner_shape], 0)
79
80    frame_length = signal_shape[-1]
81    frames = signal_shape[-2]
82
83    # Compute output length.
84    output_length = frame_length + frame_step * (frames - 1)
85
86    # If frame_length is equal to frame_step, there's no overlap so just
87    # reshape the tensor.
88    if (frame_step_is_static and signal.shape.dims is not None and
89        frame_step == signal.shape.dims[-1].value):
90      output_shape = full_shape([output_length])
91      return array_ops.reshape(signal, output_shape, name="fast_path")
92
93    # The following code is documented using this example:
94    #
95    # frame_step = 2
96    # signal.shape = (3, 5)
97    # a b c d e
98    # f g h i j
99    # k l m n o
100
101    # Compute the number of segments, per frame.
102    segments = -(-frame_length // frame_step)  # Divide and round up.
103
104    # Pad the frame_length dimension to a multiple of the frame step.
105    # Pad the frames dimension by `segments` so that signal.shape = (6, 6)
106    # a b c d e 0
107    # f g h i j 0
108    # k l m n o 0
109    # 0 0 0 0 0 0
110    # 0 0 0 0 0 0
111    # 0 0 0 0 0 0
112    paddings = [[0, segments], [0, segments * frame_step - frame_length]]
113    outer_paddings = array_ops.zeros([outer_rank, 2], dtypes.int32)
114    paddings = array_ops.concat([outer_paddings, paddings], 0)
115    signal = array_ops.pad(signal, paddings)
116
117    # Reshape so that signal.shape = (3, 6, 2)
118    # ab cd e0
119    # fg hi j0
120    # kl mn o0
121    # 00 00 00
122    # 00 00 00
123    # 00 00 00
124    shape = full_shape([frames + segments, segments, frame_step])
125    signal = array_ops.reshape(signal, shape)
126
127    # Transpose dimensions so that signal.shape = (3, 6, 2)
128    # ab fg kl 00 00 00
129    # cd hi mn 00 00 00
130    # e0 j0 o0 00 00 00
131    perm = array_ops.concat(
132        [math_ops.range(outer_rank), outer_rank + [1, 0, 2]], 0)
133    perm_static = tensor_util.constant_value(perm)
134    perm = perm_static if perm_static is not None else perm
135    signal = array_ops.transpose(signal, perm)
136
137    # Reshape so that signal.shape = (18, 2)
138    # ab fg kl 00 00 00 cd hi mn 00 00 00 e0 j0 o0 00 00 00
139    shape = full_shape([(frames + segments) * segments, frame_step])
140    signal = array_ops.reshape(signal, shape)
141
142    # Truncate so that signal.shape = (15, 2)
143    # ab fg kl 00 00 00 cd hi mn 00 00 00 e0 j0 o0
144    signal = signal[..., :(frames + segments - 1) * segments, :]
145
146    # Reshape so that signal.shape = (3, 5, 2)
147    # ab fg kl 00 00
148    # 00 cd hi mn 00
149    # 00 00 e0 j0 o0
150    shape = full_shape([segments, (frames + segments - 1), frame_step])
151    signal = array_ops.reshape(signal, shape)
152
153    # Now, reduce over the columns, to achieve the desired sum.
154    signal = math_ops.reduce_sum(signal, -3)
155
156    # Flatten the array.
157    shape = full_shape([(frames + segments - 1) * frame_step])
158    signal = array_ops.reshape(signal, shape)
159
160    # Truncate to final length.
161    signal = signal[..., :output_length]
162
163    return signal
164