xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/UpSampleBicubic2d.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/Dispatch.h>
4 #include <ATen/TensorMeta.h>
5 #include <ATen/native/UpSample.h>
6 #include <c10/util/irange.h>
7 #include <ATen/Parallel.h>
8 
9 #ifndef AT_PER_OPERATOR_HEADERS
10 #include <ATen/Functions.h>
11 #include <ATen/NativeFunctions.h>
12 #else
13 #include <ATen/ops/_upsample_bicubic2d_aa.h>
14 #include <ATen/ops/_upsample_bicubic2d_aa_backward.h>
15 #include <ATen/ops/_upsample_bicubic2d_aa_backward_native.h>
16 #include <ATen/ops/_upsample_bicubic2d_aa_native.h>
17 #include <ATen/ops/upsample_bicubic2d.h>
18 #include <ATen/ops/upsample_bicubic2d_backward.h>
19 #include <ATen/ops/upsample_bicubic2d_backward_native.h>
20 #include <ATen/ops/upsample_bicubic2d_native.h>
21 #endif
22 
23 namespace at::meta {
24 
TORCH_META_FUNC(upsample_bicubic2d)25 TORCH_META_FUNC(upsample_bicubic2d) (
26   const Tensor& input, IntArrayRef output_size, bool align_corners, std::optional<double> scales_h, std::optional<double> scales_w
27 ) {
28   auto full_output_size = native::upsample_2d_common_check(input.sizes(), output_size);
29 
30   // Allow for empty batch size but not other dimensions
31   TORCH_CHECK(
32       input.numel() != 0 || c10::multiply_integers(input.sizes().begin() + 1, input.sizes().end()),
33       "Non-empty 4D data tensor expected but got a tensor with sizes ",
34       input.sizes());
35 
36   set_output_raw_strided(0, full_output_size, {}, input.options().memory_format(input.suggest_memory_format()));
37 }
38 
TORCH_META_FUNC(upsample_bicubic2d_backward)39 TORCH_META_FUNC(upsample_bicubic2d_backward) (
40   const Tensor& grad_output,
41   IntArrayRef output_size,
42   IntArrayRef input_size,
43   bool align_corners,
44   std::optional<double> scales_h,
45   std::optional<double> scales_w
46 ) {
47   auto full_output_size = native::upsample_2d_common_check(input_size, output_size);
48 
49   TORCH_CHECK(
50       grad_output.dim() == 4,
51       "Expected grad_output to be a tensor of dimension 4 but got: dimension ", grad_output.dim());
52 
53   for (const auto i : c10::irange(4)) {
54     TORCH_CHECK(
55         grad_output.size(i) == full_output_size[i],
56         "Expected grad_output to have the same shape as output;",
57         " output.size(", i, ") = ", full_output_size[i],
58         " but got grad_output.size(", i, ") = ", grad_output.size(i));
59   }
60 
61   set_output_raw_strided(0, input_size, {}, grad_output.options());
62 }
63 
TORCH_META_FUNC(_upsample_bicubic2d_aa)64 TORCH_META_FUNC(_upsample_bicubic2d_aa) (
65   const Tensor& input, IntArrayRef output_size, bool align_corners, std::optional<double> scales_h, std::optional<double> scales_w
66 ) {
67   auto full_output_size = native::upsample_2d_common_check(input.sizes(), output_size);
68 
69   // Allow for empty batch size but not other dimensions
70   TORCH_CHECK(
71       input.numel() != 0 || c10::multiply_integers(input.sizes().begin() + 1, input.sizes().end()),
72       "Non-empty 4D data tensor expected but got a tensor with sizes ",
73       input.sizes());
74 
75   set_output_raw_strided(0, full_output_size, {}, input.options().memory_format(input.suggest_memory_format()));
76 }
77 
TORCH_META_FUNC(_upsample_bicubic2d_aa_backward)78 TORCH_META_FUNC(_upsample_bicubic2d_aa_backward) (
79   const Tensor& grad_output,
80   IntArrayRef output_size,
81   IntArrayRef input_size,
82   bool align_corners,
83   std::optional<double> scales_h,
84   std::optional<double> scales_w
85 ) {
86   auto full_output_size = native::upsample_2d_common_check(input_size, output_size);
87 
88   TORCH_CHECK(
89       grad_output.dim() == 4,
90       "Expected grad_output to be a tensor of dimension 4 but got: dimension ", grad_output.dim());
91 
92   for (const auto i : c10::irange(4)) {
93     TORCH_CHECK(
94         grad_output.size(i) == full_output_size[i],
95         "Expected grad_output to have the same shape as output;",
96         " output.size(", i, ") = ", full_output_size[i],
97         " but got grad_output.size(", i, ") = ", grad_output.size(i));
98   }
99 
100   set_output_raw_strided(0, input_size, {}, grad_output.options());
101 }
102 
103 } // namespace at::meta
104 namespace at::native {
105 namespace {
106 
107 template <typename scalar_t>
upsample_bicubic2d_backward_out_frame(const scalar_t * odata,scalar_t * idata,int64_t input_height,int64_t input_width,int64_t output_height,int64_t output_width,int64_t nbatch,int64_t channels,bool align_corners,std::optional<double> scales_h,std::optional<double> scales_w)108 static void upsample_bicubic2d_backward_out_frame(
109     const scalar_t* odata,
110     scalar_t* idata,
111     int64_t input_height,
112     int64_t input_width,
113     int64_t output_height,
114     int64_t output_width,
115     int64_t nbatch,
116     int64_t channels,
117     bool align_corners,
118     std::optional<double> scales_h,
119     std::optional<double> scales_w) {
120   channels = channels * nbatch;
121   auto input_slice_size = input_height * input_width;
122   auto output_slice_size = output_height * output_width;
123 
124   using opmath_t = at::opmath_type<scalar_t>;
125   const opmath_t height_scale = area_pixel_compute_scale<opmath_t>(
126       input_height, output_height, align_corners, scales_h);
127   const opmath_t width_scale = area_pixel_compute_scale<opmath_t>(
128       input_width, output_width, align_corners, scales_w);
129   at::parallel_for(0, channels, at::internal::GRAIN_SIZE / output_slice_size / 4, [&](int64_t start, int64_t end) {
130     opmath_t* acc_data_ptr = nullptr;
131     std::unique_ptr<opmath_t[]> buffer_data;
132     if constexpr (!std::is_same<scalar_t, opmath_t>::value) {
133       buffer_data = std::make_unique<opmath_t[]>(input_slice_size);
134       acc_data_ptr = buffer_data.get();
135       memset(acc_data_ptr, 0, sizeof(opmath_t) * input_slice_size);
136     }
137     for (const auto i : c10::irange(start, end)) {
138       scalar_t* in = idata + i * input_slice_size;
139       const scalar_t* out = odata + i * output_slice_size;
140       for (const auto output_y : c10::irange(output_height)) {
141         for (const auto output_x : c10::irange(output_width)) {
142 
143           const opmath_t real_x = area_pixel_compute_source_index(width_scale, output_x, align_corners, /*cubic=*/true);
144           int64_t input_x;
145           opmath_t t_x;
146           guard_index_and_lambda(real_x, input_width, input_x, t_x);
147 
148           const opmath_t real_y = area_pixel_compute_source_index(height_scale, output_y, align_corners, /*cubic=*/true);
149           int64_t input_y;
150           opmath_t t_y;
151           guard_index_and_lambda(real_y, input_height, input_y, t_y);
152 
153           // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
154           opmath_t x_coeffs[4];
155           // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
156           opmath_t y_coeffs[4];
157 
158           get_cubic_upsample_coefficients<opmath_t>(x_coeffs, t_x);
159           get_cubic_upsample_coefficients<opmath_t>(y_coeffs, t_y);
160 
161           opmath_t out_value = out[output_y * output_width + output_x];
162           for (const auto ii : c10::irange(4)) {
163             for (const auto jj : c10::irange(4)) {
164               upsample_increment_value_bounded<opmath_t>(
165                   acc_data_ptr == nullptr ? reinterpret_cast<opmath_t*>(in) : acc_data_ptr,
166                   input_width,
167                   input_height,
168                   input_x - 1 + ii,
169                   input_y - 1 + jj,
170                   out_value * y_coeffs[jj] * x_coeffs[ii]);
171             }
172           }
173         }
174       }
175       if (acc_data_ptr != nullptr) {
176         apply_grad_input(acc_data_ptr, in, input_slice_size);
177       }
178     }
179   });
180 }
181 
upsample_bicubic2d_backward_kernel(const Tensor & grad_input,const Tensor & grad_output_,IntArrayRef output_size,IntArrayRef input_size,bool align_corners,std::optional<double> scales_h,std::optional<double> scales_w)182 static void upsample_bicubic2d_backward_kernel(
183     const Tensor& grad_input,
184     const Tensor& grad_output_,
185     IntArrayRef output_size,
186     IntArrayRef input_size,
187     bool align_corners,
188     std::optional<double> scales_h,
189     std::optional<double> scales_w) {
190 
191   int64_t output_height = output_size[0];
192   int64_t output_width = output_size[1];
193 
194   int64_t nbatch = input_size[0];
195   int64_t channels = input_size[1];
196   int64_t input_height = input_size[2];
197   int64_t input_width = input_size[3];
198 
199   auto grad_output = grad_output_.contiguous();
200   // Special case: input/output same size, just copy
201   if (input_height == output_height && input_width == output_width) {
202     grad_input.copy_(grad_output);
203     return;
204   }
205   AT_DISPATCH_FLOATING_TYPES_AND2(ScalarType::Half, ScalarType::BFloat16,
206       grad_output.scalar_type(), "upsample_bicubic2d_backward", [&] {
207         scalar_t* idata = grad_input.mutable_data_ptr<scalar_t>();
208         const scalar_t* odata = grad_output.const_data_ptr<scalar_t>();
209 
210         upsample_bicubic2d_backward_out_frame<scalar_t>(
211             odata,
212             idata,
213             input_height,
214             input_width,
215             output_height,
216             output_width,
217             nbatch,
218             channels,
219             align_corners,
220             scales_h,
221             scales_w);
222       });
223 }
224 } // namespace
225 
TORCH_IMPL_FUNC(upsample_bicubic2d_out_cpu)226 TORCH_IMPL_FUNC(upsample_bicubic2d_out_cpu) (
227     const Tensor& input,
228     IntArrayRef output_size,
229     bool align_corners,
230     std::optional<double> scales_h,
231     std::optional<double> scales_w,
232     const Tensor& output
233 ) {
234   upsample_bicubic2d_kernel(kCPU, output, input, align_corners, scales_h, scales_w);
235 }
236 
TORCH_IMPL_FUNC(upsample_bicubic2d_backward_out_cpu)237 TORCH_IMPL_FUNC(upsample_bicubic2d_backward_out_cpu) (
238     const Tensor& grad_output,
239     IntArrayRef output_size,
240     IntArrayRef input_size,
241     bool align_corners,
242     std::optional<double> scales_h,
243     std::optional<double> scales_w,
244     const Tensor& grad_input
245 ) {
246   grad_input.zero_();
247   upsample_bicubic2d_backward_kernel(grad_input, grad_output, output_size, input_size, align_corners, scales_h, scales_w);
248 }
249 
TORCH_IMPL_FUNC(_upsample_bicubic2d_aa_out_cpu)250 TORCH_IMPL_FUNC(_upsample_bicubic2d_aa_out_cpu) (
251     const Tensor& input,
252     IntArrayRef output_size,
253     bool align_corners,
254     std::optional<double> scales_h,
255     std::optional<double> scales_w,
256     const Tensor& output
257 ) {
258   _upsample_bicubic2d_aa_kernel(kCPU, output, input, align_corners, scales_h, scales_w);
259 }
260 
TORCH_IMPL_FUNC(_upsample_bicubic2d_aa_backward_out_cpu)261 TORCH_IMPL_FUNC(_upsample_bicubic2d_aa_backward_out_cpu) (
262     const Tensor& grad_output,
263     IntArrayRef output_size,
264     IntArrayRef input_size,
265     bool align_corners,
266     std::optional<double> scales_h,
267     std::optional<double> scales_w,
268     const Tensor& grad_input
269 ) {
270   grad_input.zero_();
271   _upsample_bicubic2d_aa_backward_kernel(kCPU, grad_input, grad_output, align_corners, scales_h, scales_w);
272 }
273 
274 // vec variants
275 
276 using at::native::upsample::compute_output_size;
277 using at::native::upsample::get_scale_value;
278 
upsample_bicubic2d(const Tensor & input,at::OptionalIntArrayRef output_size,bool align_corners,std::optional<ArrayRef<double>> scale_factors)279 Tensor upsample_bicubic2d(
280     const Tensor& input,
281     at::OptionalIntArrayRef output_size,
282     bool align_corners,
283     std::optional<ArrayRef<double>> scale_factors) {
284   auto osize = compute_output_size(input.sizes(), output_size, scale_factors);
285   auto scale_h = get_scale_value(scale_factors, 0);
286   auto scale_w = get_scale_value(scale_factors, 1);
287   return at::upsample_bicubic2d(input, osize, align_corners, scale_h, scale_w);
288 }
289 
_upsample_bicubic2d_aa(const Tensor & input,at::OptionalIntArrayRef output_size,bool align_corners,std::optional<ArrayRef<double>> scale_factors)290 Tensor _upsample_bicubic2d_aa(
291     const Tensor& input,
292     at::OptionalIntArrayRef output_size,
293     bool align_corners,
294     std::optional<ArrayRef<double>> scale_factors) {
295   auto osize = compute_output_size(input.sizes(), output_size, scale_factors);
296   auto scale_h = get_scale_value(scale_factors, 0);
297   auto scale_w = get_scale_value(scale_factors, 1);
298   return at::_upsample_bicubic2d_aa(input, osize, align_corners, scale_h, scale_w);
299 }
300 
301 DEFINE_DISPATCH(upsample_bicubic2d_kernel);
302 DEFINE_DISPATCH(_upsample_bicubic2d_aa_kernel);
303 DEFINE_DISPATCH(_upsample_bicubic2d_aa_backward_kernel);
304 
305 } // namespace at::native
306