xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/elementwise_test_util.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise_test_util.h"
17 
18 #include <memory>
19 #include <vector>
20 
21 #include "tensorflow/lite/delegates/gpu/common/operations.h"
22 #include "tensorflow/lite/delegates/gpu/common/status.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
24 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
25 
26 namespace tflite {
27 namespace gpu {
28 
AbsTest(TestExecutionEnvironment * env)29 absl::Status AbsTest(TestExecutionEnvironment* env) {
30   TensorFloat32 src_tensor;
31   src_tensor.shape = BHWC(1, 2, 1, 2);
32   src_tensor.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)};
33 
34   for (auto precision : env->GetSupportedPrecisions()) {
35     auto data_type = DeduceDataTypeFromPrecision(precision);
36     for (auto storage : env->GetSupportedStorages(data_type)) {
37       OperationDef op_def;
38       op_def.precision = precision;
39       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
40       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
41       TensorFloat32 dst_tensor;
42       GPUOperation operation = CreateElementwiseOneInput(
43           env->GetGpuInfo(), op_def, OperationType::ABS);
44       RETURN_IF_ERROR(env->ExecuteGPUOperation(
45           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
46           BHWC(1, 2, 1, 2), &dst_tensor));
47       RETURN_IF_ERROR(
48           PointWiseNear({half(0.0f), half(1.0f), half(0.05f), half(0.045f)},
49                         dst_tensor.data, 0.0f));
50     }
51   }
52   return absl::OkStatus();
53 }
54 
CosTest(TestExecutionEnvironment * env)55 absl::Status CosTest(TestExecutionEnvironment* env) {
56   TensorFloat32 src_tensor;
57   src_tensor.shape = BHWC(1, 2, 1, 2);
58   src_tensor.data = {0.0f, -1.0f, -0.05f, 0.045f};
59 
60   for (auto precision : env->GetSupportedPrecisions()) {
61     auto data_type = DeduceDataTypeFromPrecision(precision);
62     for (auto storage : env->GetSupportedStorages(data_type)) {
63       const float eps = precision == CalculationsPrecision::F32 ? 5e-5f : 1e-3f;
64       OperationDef op_def;
65       op_def.precision = precision;
66       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
67       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
68       TensorFloat32 dst_tensor;
69       GPUOperation operation = CreateElementwiseOneInput(
70           env->GetGpuInfo(), op_def, OperationType::COS);
71       RETURN_IF_ERROR(env->ExecuteGPUOperation(
72           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
73           BHWC(1, 2, 1, 2), &dst_tensor));
74       RETURN_IF_ERROR(PointWiseNear(
75           {std::cos(0.0f), std::cos(-1.0f), std::cos(-0.05f), std::cos(0.045f)},
76           dst_tensor.data, eps));
77     }
78   }
79   return absl::OkStatus();
80 }
81 
CopyTest(TestExecutionEnvironment * env)82 absl::Status CopyTest(TestExecutionEnvironment* env) {
83   TensorFloat32 src_tensor;
84   src_tensor.shape = BHWC(1, 2, 1, 2);
85   src_tensor.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)};
86 
87   for (auto precision : env->GetSupportedPrecisions()) {
88     auto data_type = DeduceDataTypeFromPrecision(precision);
89     for (auto storage : env->GetSupportedStorages(data_type)) {
90       OperationDef op_def;
91       op_def.precision = precision;
92       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
93       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
94       TensorFloat32 dst_tensor;
95       GPUOperation operation = CreateElementwiseOneInput(
96           env->GetGpuInfo(), op_def, OperationType::COPY);
97       RETURN_IF_ERROR(env->ExecuteGPUOperation(
98           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
99           BHWC(1, 2, 1, 2), &dst_tensor));
100       RETURN_IF_ERROR(PointWiseNear(src_tensor.data, dst_tensor.data, 0.0f));
101     }
102   }
103   return absl::OkStatus();
104 }
105 
EluTest(TestExecutionEnvironment * env)106 absl::Status EluTest(TestExecutionEnvironment* env) {
107   TensorFloat32 src_tensor;
108   src_tensor.shape = BHWC(1, 1, 1, 7);
109   src_tensor.data = {0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
110 
111   for (auto precision : env->GetSupportedPrecisions()) {
112     auto data_type = DeduceDataTypeFromPrecision(precision);
113     for (auto storage : env->GetSupportedStorages(data_type)) {
114       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
115       OperationDef op_def;
116       op_def.precision = precision;
117       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
118       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
119       TensorFloat32 dst_tensor;
120       GPUOperation operation = CreateElementwiseOneInput(
121           env->GetGpuInfo(), op_def, OperationType::ELU);
122       RETURN_IF_ERROR(env->ExecuteGPUOperation(
123           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
124           BHWC(1, 1, 1, 7), &dst_tensor));
125       RETURN_IF_ERROR(PointWiseNear(
126           {0.0f, 1.0f, std::exp(-1.0f) - 1.0f, 100.0f, std::exp(-100.0f) - 1.0f,
127            0.01f, std::exp(-0.01f) - 1.0f},
128           dst_tensor.data, eps));
129     }
130   }
131   return absl::OkStatus();
132 }
133 
ExpTest(TestExecutionEnvironment * env)134 absl::Status ExpTest(TestExecutionEnvironment* env) {
135   TensorFloat32 src_tensor;
136   src_tensor.shape = BHWC(1, 1, 1, 7);
137   src_tensor.data = {0.0f, 1.0f, -1.0f, 2.5f, -1.7f, 0.01f, -0.01f};
138 
139   for (auto precision : env->GetSupportedPrecisions()) {
140     auto data_type = DeduceDataTypeFromPrecision(precision);
141     for (auto storage : env->GetSupportedStorages(data_type)) {
142       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 2e-2f;
143       OperationDef op_def;
144       op_def.precision = precision;
145       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
146       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
147       TensorFloat32 dst_tensor;
148       GPUOperation operation = CreateElementwiseOneInput(
149           env->GetGpuInfo(), op_def, OperationType::EXP);
150       RETURN_IF_ERROR(env->ExecuteGPUOperation(
151           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
152           BHWC(1, 1, 1, 7), &dst_tensor));
153       RETURN_IF_ERROR(PointWiseNear(
154           {std::exp(0.0f), std::exp(1.0f), std::exp(-1.0f), std::exp(2.5f),
155            std::exp(-1.7f), std::exp(0.01f), std::exp(-0.01f)},
156           dst_tensor.data, eps));
157     }
158   }
159   return absl::OkStatus();
160 }
161 
FloorTest(TestExecutionEnvironment * env)162 absl::Status FloorTest(TestExecutionEnvironment* env) {
163   TensorFloat32 src_tensor;
164   src_tensor.shape = BHWC(1, 1, 1, 7);
165   src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
166 
167   for (auto precision : env->GetSupportedPrecisions()) {
168     auto data_type = DeduceDataTypeFromPrecision(precision);
169     for (auto storage : env->GetSupportedStorages(data_type)) {
170       const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
171       OperationDef op_def;
172       op_def.precision = precision;
173       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
174       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
175       TensorFloat32 dst_tensor;
176       GPUOperation operation = CreateElementwiseOneInput(
177           env->GetGpuInfo(), op_def, OperationType::FLOOR);
178       RETURN_IF_ERROR(env->ExecuteGPUOperation(
179           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
180           src_tensor.shape, &dst_tensor));
181       RETURN_IF_ERROR(PointWiseNear(
182           {-5.0, -3.0f, -2.0f, 0.0f, 1.0f, 3.0f, 4.0f}, dst_tensor.data, eps));
183     }
184   }
185   return absl::OkStatus();
186 }
187 
FloorDivTest(TestExecutionEnvironment * env)188 absl::Status FloorDivTest(TestExecutionEnvironment* env) {
189   TensorFloat32 src_tensor;
190   src_tensor.shape = BHWC(1, 1, 1, 7);
191   src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
192 
193   float scalar = 2.7f;
194   ElementwiseAttributes attr;
195   attr.param = scalar;
196 
197   for (auto precision : env->GetSupportedPrecisions()) {
198     auto data_type = DeduceDataTypeFromPrecision(precision);
199     for (auto storage : env->GetSupportedStorages(data_type)) {
200       const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
201       OperationDef op_def;
202       op_def.precision = precision;
203       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
204       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
205       TensorFloat32 dst_tensor;
206       GPUOperation operation = CreateElementwise(
207           env->GetGpuInfo(), op_def, OperationType::FLOOR_DIV, attr);
208       RETURN_IF_ERROR(env->ExecuteGPUOperation(
209           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
210           src_tensor.shape, &dst_tensor));
211       RETURN_IF_ERROR(
212           PointWiseNear({std::floor(-4.5f / scalar), std::floor(-3.0f / scalar),
213                          std::floor(-1.5f / scalar), std::floor(0.0f / scalar),
214                          std::floor(1.5f / scalar), std::floor(3.0f / scalar),
215                          std::floor(4.5f / scalar)},
216                         dst_tensor.data, eps));
217     }
218   }
219   return absl::OkStatus();
220 }
221 
FloorModTest(TestExecutionEnvironment * env)222 absl::Status FloorModTest(TestExecutionEnvironment* env) {
223   TensorFloat32 src_tensor;
224   src_tensor.shape = BHWC(1, 1, 1, 7);
225   src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
226 
227   float scalar = 2.7f;
228   ElementwiseAttributes attr;
229   attr.param = scalar;
230 
231   for (auto precision : env->GetSupportedPrecisions()) {
232     auto data_type = DeduceDataTypeFromPrecision(precision);
233     for (auto storage : env->GetSupportedStorages(data_type)) {
234       const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
235       OperationDef op_def;
236       op_def.precision = precision;
237       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
238       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
239       TensorFloat32 dst_tensor;
240       GPUOperation operation = CreateElementwise(
241           env->GetGpuInfo(), op_def, OperationType::FLOOR_MOD, attr);
242       RETURN_IF_ERROR(env->ExecuteGPUOperation(
243           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
244           src_tensor.shape, &dst_tensor));
245       RETURN_IF_ERROR(
246           PointWiseNear({-4.5f - std::floor(-4.5f / scalar) * scalar,
247                          -3.0f - std::floor(-3.0f / scalar) * scalar,
248                          -1.5f - std::floor(-1.5f / scalar) * scalar,
249                          0.0f - std::floor(0.0f / scalar) * scalar,
250                          1.5f - std::floor(1.5f / scalar) * scalar,
251                          3.0f - std::floor(3.0f / scalar) * scalar,
252                          4.5f - std::floor(4.5f / scalar) * scalar},
253                         dst_tensor.data, eps));
254     }
255   }
256   return absl::OkStatus();
257 }
258 
HardSwishTest(TestExecutionEnvironment * env)259 absl::Status HardSwishTest(TestExecutionEnvironment* env) {
260   TensorFloat32 src_tensor;
261   src_tensor.shape = BHWC(1, 1, 1, 7);
262   src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
263 
264   for (auto precision : env->GetSupportedPrecisions()) {
265     auto data_type = DeduceDataTypeFromPrecision(precision);
266     for (auto storage : env->GetSupportedStorages(data_type)) {
267       const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
268       OperationDef op_def;
269       op_def.precision = precision;
270       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
271       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
272       TensorFloat32 dst_tensor;
273       GPUOperation operation = CreateElementwiseOneInput(
274           env->GetGpuInfo(), op_def, OperationType::HARD_SWISH);
275       RETURN_IF_ERROR(env->ExecuteGPUOperation(
276           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
277           src_tensor.shape, &dst_tensor));
278       RETURN_IF_ERROR(
279           PointWiseNear({0.0f, 0.0f, -0.375f, 0.0f, 1.125f, 3.f, 4.5f},
280                         dst_tensor.data, eps));
281     }
282   }
283   return absl::OkStatus();
284 }
285 
LogTest(TestExecutionEnvironment * env)286 absl::Status LogTest(TestExecutionEnvironment* env) {
287   TensorFloat32 src_tensor;
288   src_tensor.shape = BHWC(1, 2, 1, 2);
289   src_tensor.data = {1.0f, 2.0f, 3.0f, 4.0f};
290 
291   for (auto precision : env->GetSupportedPrecisions()) {
292     auto data_type = DeduceDataTypeFromPrecision(precision);
293     for (auto storage : env->GetSupportedStorages(data_type)) {
294       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
295       OperationDef op_def;
296       op_def.precision = precision;
297       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
298       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
299       TensorFloat32 dst_tensor;
300       GPUOperation operation = CreateElementwiseOneInput(
301           env->GetGpuInfo(), op_def, OperationType::LOG);
302       RETURN_IF_ERROR(env->ExecuteGPUOperation(
303           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
304           BHWC(1, 2, 1, 2), &dst_tensor));
305       RETURN_IF_ERROR(PointWiseNear(
306           {std::log(1.0f), std::log(2.0f), std::log(3.0f), std::log(4.0f)},
307           dst_tensor.data, eps));
308     }
309   }
310   return absl::OkStatus();
311 }
312 
NegTest(TestExecutionEnvironment * env)313 absl::Status NegTest(TestExecutionEnvironment* env) {
314   TensorFloat32 src_tensor;
315   src_tensor.shape = BHWC(1, 2, 1, 2);
316   src_tensor.data = {1.0f, -2.0f, 0.0f, 4.0f};
317 
318   for (auto precision : env->GetSupportedPrecisions()) {
319     auto data_type = DeduceDataTypeFromPrecision(precision);
320     for (auto storage : env->GetSupportedStorages(data_type)) {
321       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
322       OperationDef op_def;
323       op_def.precision = precision;
324       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
325       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
326       TensorFloat32 dst_tensor;
327       GPUOperation operation = CreateElementwiseOneInput(
328           env->GetGpuInfo(), op_def, OperationType::NEG);
329       RETURN_IF_ERROR(env->ExecuteGPUOperation(
330           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
331           BHWC(1, 2, 1, 2), &dst_tensor));
332       RETURN_IF_ERROR(
333           PointWiseNear({-1.0f, 2.0f, 0.0f, -4.0f}, dst_tensor.data, eps));
334     }
335   }
336   return absl::OkStatus();
337 }
338 
RsqrtTest(TestExecutionEnvironment * env)339 absl::Status RsqrtTest(TestExecutionEnvironment* env) {
340   TensorFloat32 src_tensor;
341   src_tensor.shape = BHWC(1, 2, 1, 2);
342   src_tensor.data = {1.0f, 2.0f, 3.0f, 4.0f};
343 
344   for (auto precision : env->GetSupportedPrecisions()) {
345     auto data_type = DeduceDataTypeFromPrecision(precision);
346     for (auto storage : env->GetSupportedStorages(data_type)) {
347       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
348       OperationDef op_def;
349       op_def.precision = precision;
350       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
351       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
352       TensorFloat32 dst_tensor;
353       GPUOperation operation = CreateElementwiseOneInput(
354           env->GetGpuInfo(), op_def, OperationType::RSQRT);
355       RETURN_IF_ERROR(env->ExecuteGPUOperation(
356           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
357           BHWC(1, 2, 1, 2), &dst_tensor));
358       RETURN_IF_ERROR(
359           PointWiseNear({1.0f / std::sqrt(1.0f), 1.0f / std::sqrt(2.0f),
360                          1.0f / std::sqrt(3.0f), 1.0f / std::sqrt(4.0f)},
361                         dst_tensor.data, eps));
362     }
363   }
364   return absl::OkStatus();
365 }
366 
SigmoidTest(TestExecutionEnvironment * env)367 absl::Status SigmoidTest(TestExecutionEnvironment* env) {
368   TensorFloat32 src_tensor;
369   src_tensor.shape = BHWC(1, 2, 1, 2);
370   src_tensor.data = {-std::log(1.0f), -std::log(2.0f), -std::log(3.0f),
371                      -std::log(4.0f)};
372 
373   for (auto precision : env->GetSupportedPrecisions()) {
374     auto data_type = DeduceDataTypeFromPrecision(precision);
375     for (auto storage : env->GetSupportedStorages(data_type)) {
376       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
377       OperationDef op_def;
378       op_def.precision = precision;
379       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
380       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
381       TensorFloat32 dst_tensor;
382       GPUOperation operation = CreateElementwiseOneInput(
383           env->GetGpuInfo(), op_def, OperationType::SIGMOID);
384       RETURN_IF_ERROR(env->ExecuteGPUOperation(
385           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
386           BHWC(1, 2, 1, 2), &dst_tensor));
387       RETURN_IF_ERROR(PointWiseNear({0.5f, 1.0f / 3.0f, 0.25f, 0.2f},
388                                     dst_tensor.data, eps));
389     }
390   }
391   return absl::OkStatus();
392 }
393 
SinTest(TestExecutionEnvironment * env)394 absl::Status SinTest(TestExecutionEnvironment* env) {
395   TensorFloat32 src_tensor;
396   src_tensor.shape = BHWC(1, 2, 1, 2);
397   src_tensor.data = {0.0f, -1.0f, -0.05f, 0.045f};
398 
399   for (auto precision : env->GetSupportedPrecisions()) {
400     auto data_type = DeduceDataTypeFromPrecision(precision);
401     for (auto storage : env->GetSupportedStorages(data_type)) {
402       const float eps = precision == CalculationsPrecision::F32 ? 2e-5f : 5e-3f;
403       OperationDef op_def;
404       op_def.precision = precision;
405       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
406       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
407       TensorFloat32 dst_tensor;
408       GPUOperation operation = CreateElementwiseOneInput(
409           env->GetGpuInfo(), op_def, OperationType::SIN);
410       RETURN_IF_ERROR(env->ExecuteGPUOperation(
411           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
412           BHWC(1, 2, 1, 2), &dst_tensor));
413       RETURN_IF_ERROR(PointWiseNear(
414           {std::sin(0.0f), std::sin(-1.0f), std::sin(-0.05f), std::sin(0.045f)},
415           dst_tensor.data, eps));
416     }
417   }
418   return absl::OkStatus();
419 }
420 
SqrtTest(TestExecutionEnvironment * env)421 absl::Status SqrtTest(TestExecutionEnvironment* env) {
422   TensorFloat32 src_tensor;
423   src_tensor.shape = BHWC(1, 2, 1, 2);
424   src_tensor.data = {1.0f, 2.0f, 3.0f, 4.0f};
425 
426   for (auto precision : env->GetSupportedPrecisions()) {
427     auto data_type = DeduceDataTypeFromPrecision(precision);
428     for (auto storage : env->GetSupportedStorages(data_type)) {
429       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
430       OperationDef op_def;
431       op_def.precision = precision;
432       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
433       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
434       TensorFloat32 dst_tensor;
435       GPUOperation operation = CreateElementwiseOneInput(
436           env->GetGpuInfo(), op_def, OperationType::SQRT);
437       RETURN_IF_ERROR(env->ExecuteGPUOperation(
438           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
439           BHWC(1, 2, 1, 2), &dst_tensor));
440       RETURN_IF_ERROR(PointWiseNear(
441           {std::sqrt(1.0f), std::sqrt(2.0f), std::sqrt(3.0f), std::sqrt(4.0f)},
442           dst_tensor.data, eps));
443     }
444   }
445   return absl::OkStatus();
446 }
447 
SquareTest(TestExecutionEnvironment * env)448 absl::Status SquareTest(TestExecutionEnvironment* env) {
449   TensorFloat32 src_tensor;
450   src_tensor.shape = BHWC(1, 2, 1, 2);
451   src_tensor.data = {1.0f, -2.0f, 3.0f, 4.0f};
452 
453   for (auto precision : env->GetSupportedPrecisions()) {
454     auto data_type = DeduceDataTypeFromPrecision(precision);
455     for (auto storage : env->GetSupportedStorages(data_type)) {
456       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
457       OperationDef op_def;
458       op_def.precision = precision;
459       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
460       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
461       TensorFloat32 dst_tensor;
462       GPUOperation operation = CreateElementwiseOneInput(
463           env->GetGpuInfo(), op_def, OperationType::SQUARE);
464       RETURN_IF_ERROR(env->ExecuteGPUOperation(
465           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
466           BHWC(1, 2, 1, 2), &dst_tensor));
467       RETURN_IF_ERROR(
468           PointWiseNear({1.0f, 4.0f, 9.0f, 16.0f}, dst_tensor.data, eps));
469     }
470   }
471   return absl::OkStatus();
472 }
473 
TanhTest(TestExecutionEnvironment * env)474 absl::Status TanhTest(TestExecutionEnvironment* env) {
475   TensorFloat32 src_tensor;
476   src_tensor.shape = BHWC(1, 2, 1, 2);
477   src_tensor.data = {-4.0f, -0.1f, 0.1f, 2.0f};
478 
479   for (auto precision : env->GetSupportedPrecisions()) {
480     auto data_type = DeduceDataTypeFromPrecision(precision);
481     for (auto storage : env->GetSupportedStorages(data_type)) {
482       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
483       OperationDef op_def;
484       op_def.precision = precision;
485       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
486       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
487       TensorFloat32 dst_tensor;
488       GPUOperation operation = CreateElementwiseOneInput(
489           env->GetGpuInfo(), op_def, OperationType::TANH);
490       RETURN_IF_ERROR(env->ExecuteGPUOperation(
491           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
492           BHWC(1, 2, 1, 2), &dst_tensor));
493       RETURN_IF_ERROR(PointWiseNear({std::tanh(-4.0f), std::tanh(-0.1f),
494                                      std::tanh(0.1f), std::tanh(2.0f)},
495                                     dst_tensor.data, eps));
496     }
497   }
498   return absl::OkStatus();
499 }
500 
SubTest(TestExecutionEnvironment * env)501 absl::Status SubTest(TestExecutionEnvironment* env) {
502   TensorFloat32 src_tensor_0, src_tensor_1;
503   src_tensor_0.shape = BHWC(1, 2, 1, 2);
504   src_tensor_1.shape = BHWC(1, 2, 1, 2);
505   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.0f};
506   src_tensor_1.data = {0.5f, 1.0f, 3.0f, 3.5f};
507 
508   for (auto precision : env->GetSupportedPrecisions()) {
509     auto data_type = DeduceDataTypeFromPrecision(precision);
510     for (auto storage : env->GetSupportedStorages(data_type)) {
511       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
512       OperationDef op_def;
513       op_def.precision = precision;
514       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
515       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
516       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
517       TensorFloat32 dst_tensor;
518       GPUOperation operation = CreateElementwiseTwoInput(
519           op_def, OperationType::SUB, src_tensor_1.shape);
520       RETURN_IF_ERROR(env->ExecuteGPUOperation(
521           {src_tensor_0, src_tensor_1},
522           std::make_unique<GPUOperation>(std::move(operation)),
523           BHWC(1, 2, 1, 2), &dst_tensor));
524       RETURN_IF_ERROR(
525           PointWiseNear({0.5f, 1.0f, 0.0f, 0.5f}, dst_tensor.data, eps));
526     }
527   }
528   return absl::OkStatus();
529 }
530 
SquaredDiffTest(TestExecutionEnvironment * env)531 absl::Status SquaredDiffTest(TestExecutionEnvironment* env) {
532   TensorFloat32 src_tensor_0, src_tensor_1;
533   src_tensor_0.shape = BHWC(1, 2, 1, 2);
534   src_tensor_1.shape = BHWC(1, 2, 1, 2);
535   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.0f};
536   src_tensor_1.data = {0.5f, 1.0f, 3.0f, 3.5f};
537 
538   for (auto precision : env->GetSupportedPrecisions()) {
539     auto data_type = DeduceDataTypeFromPrecision(precision);
540     for (auto storage : env->GetSupportedStorages(data_type)) {
541       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
542       OperationDef op_def;
543       op_def.precision = precision;
544       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
545       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
546       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
547       TensorFloat32 dst_tensor;
548       GPUOperation operation = CreateElementwiseTwoInput(
549           op_def, OperationType::SQUARED_DIFF, src_tensor_1.shape);
550       RETURN_IF_ERROR(env->ExecuteGPUOperation(
551           {src_tensor_0, src_tensor_1},
552           std::make_unique<GPUOperation>(std::move(operation)),
553           BHWC(1, 2, 1, 2), &dst_tensor));
554       RETURN_IF_ERROR(
555           PointWiseNear({0.25f, 1.0f, 0.0f, 0.25f}, dst_tensor.data, eps));
556     }
557   }
558   return absl::OkStatus();
559 }
560 
DivTest(TestExecutionEnvironment * env)561 absl::Status DivTest(TestExecutionEnvironment* env) {
562   TensorFloat32 src_tensor_0, src_tensor_1;
563   src_tensor_0.shape = BHWC(1, 2, 1, 2);
564   src_tensor_1.shape = BHWC(1, 2, 1, 2);
565   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
566   src_tensor_1.data = {0.5f, 1.0f, 3.0f, 1.5f};
567 
568   for (auto precision : env->GetSupportedPrecisions()) {
569     auto data_type = DeduceDataTypeFromPrecision(precision);
570     for (auto storage : env->GetSupportedStorages(data_type)) {
571       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
572       OperationDef op_def;
573       op_def.precision = precision;
574       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
575       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
576       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
577       TensorFloat32 dst_tensor;
578       GPUOperation operation = CreateElementwiseTwoInput(
579           op_def, OperationType::DIV, src_tensor_1.shape);
580       RETURN_IF_ERROR(env->ExecuteGPUOperation(
581           {src_tensor_0, src_tensor_1},
582           std::make_unique<GPUOperation>(std::move(operation)),
583           BHWC(1, 2, 1, 2), &dst_tensor));
584       RETURN_IF_ERROR(
585           PointWiseNear({2.0f, 2.0f, 1.0f, 3.0f}, dst_tensor.data, eps));
586     }
587   }
588   return absl::OkStatus();
589 }
590 
PowTest(TestExecutionEnvironment * env)591 absl::Status PowTest(TestExecutionEnvironment* env) {
592   TensorFloat32 src_tensor_0, src_tensor_1;
593   src_tensor_0.shape = BHWC(1, 2, 1, 2);
594   src_tensor_1.shape = BHWC(1, 2, 1, 2);
595   src_tensor_0.data = {6.0f, 7.0f, 4.0f, 2.0f};
596   src_tensor_1.data = {0.0f, 1.0f, 2.0f, 3.0f};
597 
598   for (auto precision : env->GetSupportedPrecisions()) {
599     auto data_type = DeduceDataTypeFromPrecision(precision);
600     for (auto storage : env->GetSupportedStorages(data_type)) {
601       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
602       OperationDef op_def;
603       op_def.precision = precision;
604       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
605       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
606       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
607       TensorFloat32 dst_tensor;
608       GPUOperation operation = CreateElementwiseTwoInput(
609           op_def, OperationType::POW, src_tensor_1.shape);
610       RETURN_IF_ERROR(env->ExecuteGPUOperation(
611           {src_tensor_0, src_tensor_1},
612           std::make_unique<GPUOperation>(std::move(operation)),
613           BHWC(1, 2, 1, 2), &dst_tensor));
614       RETURN_IF_ERROR(
615           PointWiseNear({1.0f, 7.0f, 16.0f, 8.0f}, dst_tensor.data, eps));
616     }
617   }
618   return absl::OkStatus();
619 }
620 
AddTest(TestExecutionEnvironment * env)621 absl::Status AddTest(TestExecutionEnvironment* env) {
622   TensorFloat32 src_tensor_0, src_tensor_1;
623   src_tensor_0.shape = BHWC(1, 2, 1, 2);
624   src_tensor_1.shape = BHWC(1, 2, 1, 2);
625   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
626   src_tensor_1.data = {0.5f, 1.0f, 3.0f, 1.5f};
627 
628   for (auto precision : env->GetSupportedPrecisions()) {
629     auto data_type = DeduceDataTypeFromPrecision(precision);
630     for (auto storage : env->GetSupportedStorages(data_type)) {
631       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
632       OperationDef op_def;
633       op_def.precision = precision;
634       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
635       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
636       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
637       TensorFloat32 dst_tensor;
638       GPUOperation operation = CreateElementwiseTwoInput(
639           op_def, OperationType::ADD, src_tensor_1.shape);
640       RETURN_IF_ERROR(env->ExecuteGPUOperation(
641           {src_tensor_0, src_tensor_1},
642           std::make_unique<GPUOperation>(std::move(operation)),
643           BHWC(1, 2, 1, 2), &dst_tensor));
644       RETURN_IF_ERROR(
645           PointWiseNear({1.5f, 3.0f, 6.0f, 6.0f}, dst_tensor.data, eps));
646     }
647   }
648   return absl::OkStatus();
649 }
650 
MaximumTest(TestExecutionEnvironment * env)651 absl::Status MaximumTest(TestExecutionEnvironment* env) {
652   TensorFloat32 src_tensor_0, src_tensor_1;
653   src_tensor_0.shape = BHWC(1, 2, 1, 2);
654   src_tensor_1.shape = BHWC(1, 2, 1, 2);
655   src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
656   src_tensor_1.data = {1.0f, 2.0f, 3.0f, -2.0f};
657 
658   for (auto precision : env->GetSupportedPrecisions()) {
659     auto data_type = DeduceDataTypeFromPrecision(precision);
660     for (auto storage : env->GetSupportedStorages(data_type)) {
661       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
662       OperationDef op_def;
663       op_def.precision = precision;
664       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
665       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
666       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
667       TensorFloat32 dst_tensor;
668       GPUOperation operation = CreateElementwiseTwoInput(
669           op_def, OperationType::MAXIMUM, src_tensor_1.shape);
670       RETURN_IF_ERROR(env->ExecuteGPUOperation(
671           {src_tensor_0, src_tensor_1},
672           std::make_unique<GPUOperation>(std::move(operation)),
673           BHWC(1, 2, 1, 2), &dst_tensor));
674       RETURN_IF_ERROR(
675           PointWiseNear({1.0f, 2.0f, 3.0f, -2.0f}, dst_tensor.data, eps));
676     }
677   }
678   return absl::OkStatus();
679 }
680 
MaximumWithScalarTest(TestExecutionEnvironment * env)681 absl::Status MaximumWithScalarTest(TestExecutionEnvironment* env) {
682   TensorFloat32 src_tensor_0;
683   src_tensor_0.shape = BHWC(1, 4, 1, 1);
684   src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
685 
686   ElementwiseAttributes attr;
687   attr.param = -1.0f;
688 
689   for (auto precision : env->GetSupportedPrecisions()) {
690     auto data_type = DeduceDataTypeFromPrecision(precision);
691     for (auto storage : env->GetSupportedStorages(data_type)) {
692       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
693       OperationDef op_def;
694       op_def.precision = precision;
695       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
696       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
697       TensorFloat32 dst_tensor;
698       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
699                                                  OperationType::MAXIMUM, attr);
700       RETURN_IF_ERROR(env->ExecuteGPUOperation(
701           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
702           BHWC(1, 4, 1, 1), &dst_tensor));
703       RETURN_IF_ERROR(
704           PointWiseNear({0.0f, -1.0f, 2.0f, -1.0f}, dst_tensor.data, eps));
705     }
706   }
707   return absl::OkStatus();
708 }
709 
MaximumWithConstantLinearTensorTest(TestExecutionEnvironment * env)710 absl::Status MaximumWithConstantLinearTensorTest(
711     TestExecutionEnvironment* env) {
712   TensorFloat32 src_tensor_0;
713   src_tensor_0.shape = BHWC(1, 2, 1, 2);
714   src_tensor_0.data = {1.0f, -6.2f, -2.0f, 3.0f};
715 
716   ::tflite::gpu::Tensor<Linear, DataType::FLOAT32> linear_tensor;
717   linear_tensor.shape = Linear(2);
718   linear_tensor.data = {0.5f, 2.0f};
719   ElementwiseAttributes attr;
720   attr.param = linear_tensor;
721 
722   for (auto precision : env->GetSupportedPrecisions()) {
723     auto data_type = DeduceDataTypeFromPrecision(precision);
724     for (auto storage : env->GetSupportedStorages(data_type)) {
725       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
726       OperationDef op_def;
727       op_def.precision = precision;
728       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
729       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
730       TensorFloat32 dst_tensor;
731       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
732                                                  OperationType::MAXIMUM, attr);
733       RETURN_IF_ERROR(env->ExecuteGPUOperation(
734           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
735           BHWC(1, 2, 1, 2), &dst_tensor));
736       RETURN_IF_ERROR(
737           PointWiseNear({1.0f, 2.0f, 0.5f, 3.0f}, dst_tensor.data, eps));
738     }
739   }
740   return absl::OkStatus();
741 }
742 
MaximumWithConstantHWCTensorTest(TestExecutionEnvironment * env)743 absl::Status MaximumWithConstantHWCTensorTest(TestExecutionEnvironment* env) {
744   TensorFloat32 src_tensor_0;
745   src_tensor_0.shape = BHWC(1, 2, 1, 2);
746   src_tensor_0.data = {1.0f, -6.2f, -2.0f, 3.0f};
747 
748   ::tflite::gpu::Tensor<HWC, DataType::FLOAT32> hwc_tensor;
749   hwc_tensor.shape = HWC(2, 1, 2);
750   hwc_tensor.data = {0.5f, 2.0f, 0.7f, 4.7f};
751   ElementwiseAttributes attr;
752   attr.param = hwc_tensor;
753 
754   for (auto precision : env->GetSupportedPrecisions()) {
755     auto data_type = DeduceDataTypeFromPrecision(precision);
756     for (auto storage : env->GetSupportedStorages(data_type)) {
757       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
758       OperationDef op_def;
759       op_def.precision = precision;
760       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
761       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
762       TensorFloat32 dst_tensor;
763       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
764                                                  OperationType::MAXIMUM, attr);
765       RETURN_IF_ERROR(env->ExecuteGPUOperation(
766           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
767           BHWC(1, 2, 1, 2), &dst_tensor));
768       RETURN_IF_ERROR(
769           PointWiseNear({1.0f, 2.0f, 0.7f, 4.7f}, dst_tensor.data, eps));
770     }
771   }
772   return absl::OkStatus();
773 }
MaximumWithConstantHWCTensorBroadcastChannelsTest(TestExecutionEnvironment * env)774 absl::Status MaximumWithConstantHWCTensorBroadcastChannelsTest(
775     TestExecutionEnvironment* env) {
776   TensorFloat32 src_tensor_0;
777   src_tensor_0.shape = BHWC(1, 2, 1, 2);
778   src_tensor_0.data = {1.0f, -6.2f, -2.0f, 3.0f};
779 
780   ::tflite::gpu::Tensor<HWC, DataType::FLOAT32> hwc_tensor;
781   hwc_tensor.shape = HWC(2, 1, 1);
782   hwc_tensor.data = {0.5f, 2.0f};
783   ElementwiseAttributes attr;
784   attr.param = hwc_tensor;
785 
786   for (auto precision : env->GetSupportedPrecisions()) {
787     auto data_type = DeduceDataTypeFromPrecision(precision);
788     for (auto storage : env->GetSupportedStorages(data_type)) {
789       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
790       OperationDef op_def;
791       op_def.precision = precision;
792       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
793       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
794       TensorFloat32 dst_tensor;
795       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
796                                                  OperationType::MAXIMUM, attr);
797       RETURN_IF_ERROR(env->ExecuteGPUOperation(
798           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
799           BHWC(1, 2, 1, 2), &dst_tensor));
800       RETURN_IF_ERROR(
801           PointWiseNear({1.0f, 0.5f, 2.0f, 3.0f}, dst_tensor.data, eps));
802     }
803   }
804   return absl::OkStatus();
805 }
806 
MinimumTest(TestExecutionEnvironment * env)807 absl::Status MinimumTest(TestExecutionEnvironment* env) {
808   TensorFloat32 src_tensor_0, src_tensor_1;
809   src_tensor_0.shape = BHWC(1, 2, 1, 2);
810   src_tensor_1.shape = BHWC(1, 2, 1, 2);
811   src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
812   src_tensor_1.data = {1.0f, 2.0f, 3.0f, -2.0f};
813 
814   for (auto precision : env->GetSupportedPrecisions()) {
815     auto data_type = DeduceDataTypeFromPrecision(precision);
816     for (auto storage : env->GetSupportedStorages(data_type)) {
817       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
818       OperationDef op_def;
819       op_def.precision = precision;
820       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
821       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
822       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
823       TensorFloat32 dst_tensor;
824       GPUOperation operation = CreateElementwiseTwoInput(
825           op_def, OperationType::MINIMUM, src_tensor_1.shape);
826       RETURN_IF_ERROR(env->ExecuteGPUOperation(
827           {src_tensor_0, src_tensor_1},
828           std::make_unique<GPUOperation>(std::move(operation)),
829           BHWC(1, 2, 1, 2), &dst_tensor));
830       RETURN_IF_ERROR(
831           PointWiseNear({0.0f, -6.2f, 2.0f, -3.0f}, dst_tensor.data, eps));
832     }
833   }
834   return absl::OkStatus();
835 }
836 
MinimumWithScalarTest(TestExecutionEnvironment * env)837 absl::Status MinimumWithScalarTest(TestExecutionEnvironment* env) {
838   TensorFloat32 src_tensor_0;
839   src_tensor_0.shape = BHWC(1, 4, 1, 1);
840   src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
841 
842   ElementwiseAttributes attr;
843   attr.param = -1.0f;
844 
845   for (auto precision : env->GetSupportedPrecisions()) {
846     auto data_type = DeduceDataTypeFromPrecision(precision);
847     for (auto storage : env->GetSupportedStorages(data_type)) {
848       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
849       OperationDef op_def;
850       op_def.precision = precision;
851       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
852       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
853       TensorFloat32 dst_tensor;
854       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
855                                                  OperationType::MINIMUM, attr);
856       RETURN_IF_ERROR(env->ExecuteGPUOperation(
857           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
858           BHWC(1, 4, 1, 1), &dst_tensor));
859       RETURN_IF_ERROR(
860           PointWiseNear({-1.0f, -6.2f, -1.0f, -3.0f}, dst_tensor.data, eps));
861     }
862   }
863   return absl::OkStatus();
864 }
865 
MulTest(TestExecutionEnvironment * env)866 absl::Status MulTest(TestExecutionEnvironment* env) {
867   TensorFloat32 src_tensor_0, src_tensor_1;
868   src_tensor_0.shape = BHWC(1, 2, 1, 2);
869   src_tensor_1.shape = BHWC(1, 2, 1, 2);
870   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
871   src_tensor_1.data = {0.5f, 1.0f, 3.0f, 1.5f};
872 
873   for (auto precision : env->GetSupportedPrecisions()) {
874     auto data_type = DeduceDataTypeFromPrecision(precision);
875     for (auto storage : env->GetSupportedStorages(data_type)) {
876       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
877       OperationDef op_def;
878       op_def.precision = precision;
879       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
880       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
881       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
882       TensorFloat32 dst_tensor;
883       GPUOperation operation = CreateElementwiseTwoInput(
884           op_def, OperationType::MUL, src_tensor_1.shape);
885       RETURN_IF_ERROR(env->ExecuteGPUOperation(
886           {src_tensor_0, src_tensor_1},
887           std::make_unique<GPUOperation>(std::move(operation)),
888           BHWC(1, 2, 1, 2), &dst_tensor));
889       RETURN_IF_ERROR(
890           PointWiseNear({0.5f, 2.0f, 9.0f, 6.75f}, dst_tensor.data, eps));
891     }
892   }
893   return absl::OkStatus();
894 }
895 
MulBroadcastHWTest(TestExecutionEnvironment * env)896 absl::Status MulBroadcastHWTest(TestExecutionEnvironment* env) {
897   TensorFloat32 src_tensor_0, src_tensor_1;
898   src_tensor_0.shape = BHWC(1, 2, 1, 2);
899   src_tensor_1.shape = BHWC(1, 1, 1, 2);
900   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
901   src_tensor_1.data = {0.5f, 3.0f};
902 
903   for (auto precision : env->GetSupportedPrecisions()) {
904     auto data_type = DeduceDataTypeFromPrecision(precision);
905     for (auto storage : env->GetSupportedStorages(data_type)) {
906       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
907       OperationDef op_def;
908       op_def.precision = precision;
909       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
910       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
911       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
912       TensorFloat32 dst_tensor;
913       GPUOperation operation = CreateElementwiseTwoInput(
914           op_def, OperationType::MUL, src_tensor_1.shape);
915       RETURN_IF_ERROR(env->ExecuteGPUOperation(
916           {src_tensor_0, src_tensor_1},
917           std::make_unique<GPUOperation>(std::move(operation)),
918           BHWC(1, 2, 1, 2), &dst_tensor));
919       RETURN_IF_ERROR(
920           PointWiseNear({0.5f, 6.0f, 1.5f, 13.5f}, dst_tensor.data, eps));
921     }
922   }
923   return absl::OkStatus();
924 }
925 
MulBroadcastChannelsTest(TestExecutionEnvironment * env)926 absl::Status MulBroadcastChannelsTest(TestExecutionEnvironment* env) {
927   TensorFloat32 src_tensor_0, src_tensor_1;
928   src_tensor_0.shape = BHWC(1, 2, 1, 2);
929   src_tensor_1.shape = BHWC(1, 2, 1, 1);
930   src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
931   src_tensor_1.data = {0.5f, 3.0f};
932 
933   for (auto precision : env->GetSupportedPrecisions()) {
934     auto data_type = DeduceDataTypeFromPrecision(precision);
935     for (auto storage : env->GetSupportedStorages(data_type)) {
936       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
937       OperationDef op_def;
938       op_def.precision = precision;
939       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
940       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
941       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
942       TensorFloat32 dst_tensor;
943       GPUOperation operation = CreateElementwiseTwoInput(
944           op_def, OperationType::MUL, src_tensor_1.shape);
945       RETURN_IF_ERROR(env->ExecuteGPUOperation(
946           {src_tensor_0, src_tensor_1},
947           std::make_unique<GPUOperation>(std::move(operation)),
948           BHWC(1, 2, 1, 2), &dst_tensor));
949       RETURN_IF_ERROR(
950           PointWiseNear({0.5f, 1.0f, 9.0f, 13.5f}, dst_tensor.data, eps));
951     }
952   }
953   return absl::OkStatus();
954 }
955 
SubWithScalarAtFirstPositionTest(TestExecutionEnvironment * env)956 absl::Status SubWithScalarAtFirstPositionTest(TestExecutionEnvironment* env) {
957   TensorFloat32 src_tensor_0;
958   src_tensor_0.shape = BHWC(1, 4, 1, 1);
959   src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
960 
961   ElementwiseAttributes attr;
962   attr.param = 4.0f;
963   attr.runtime_tensor_is_second = true;
964 
965   for (auto precision : env->GetSupportedPrecisions()) {
966     auto data_type = DeduceDataTypeFromPrecision(precision);
967     for (auto storage : env->GetSupportedStorages(data_type)) {
968       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
969       OperationDef op_def;
970       op_def.precision = precision;
971       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
972       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
973       TensorFloat32 dst_tensor;
974       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
975                                                  OperationType::SUB, attr);
976       RETURN_IF_ERROR(env->ExecuteGPUOperation(
977           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
978           BHWC(1, 4, 1, 1), &dst_tensor));
979       RETURN_IF_ERROR(
980           PointWiseNear({4.0f, 10.2f, 2.0f, 7.0f}, dst_tensor.data, eps));
981     }
982   }
983   return absl::OkStatus();
984 }
985 
LessTest(TestExecutionEnvironment * env)986 absl::Status LessTest(TestExecutionEnvironment* env) {
987   TensorFloat32 src_tensor_0, src_tensor_1;
988   src_tensor_0.shape = BHWC(1, 2, 1, 2);
989   src_tensor_1.shape = BHWC(1, 2, 1, 2);
990   src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
991   src_tensor_1.data = {1.0f, 0.0f, 2.0f, -4.0f};
992 
993   tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
994   ref_tensor.shape = BHWC(1, 2, 1, 2);
995   ref_tensor.data = {true, false, false, false};
996 
997   for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
998     for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
999       OperationDef op_def;
1000       op_def.precision = CalculationsPrecision::F32;
1001       op_def.src_tensors.push_back(
1002           {DataType::FLOAT32, src_storage, Layout::HWC});
1003       op_def.src_tensors.push_back(
1004           {DataType::FLOAT32, src_storage, Layout::HWC});
1005       op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1006 
1007       TensorDescriptor src_desc0, src_desc1, dst_desc;
1008       src_desc0 = op_def.src_tensors[0];
1009       src_desc0.UploadData(src_tensor_0);
1010       src_desc1 = op_def.src_tensors[1];
1011       src_desc1.UploadData(src_tensor_1);
1012       dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1013       GPUOperation operation = CreateElementwiseTwoInput(
1014           op_def, OperationType::LESS, src_tensor_1.shape);
1015       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1016           {&src_desc0, &src_desc1}, {&dst_desc},
1017           std::make_unique<GPUOperation>(std::move(operation))));
1018 
1019       tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1020       dst_desc.DownloadData(&dst_tensor);
1021       if (dst_tensor.data != ref_tensor.data) {
1022         return absl::InternalError("not equal");
1023       }
1024     }
1025   }
1026   return absl::OkStatus();
1027 }
1028 
LessEqualTest(TestExecutionEnvironment * env)1029 absl::Status LessEqualTest(TestExecutionEnvironment* env) {
1030   TensorFloat32 src_tensor_0;
1031   src_tensor_0.shape = BHWC(1, 2, 1, 2);
1032   src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1033 
1034   tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1035   ref_tensor.shape = BHWC(1, 2, 1, 2);
1036   ref_tensor.data = {true, true, true, false};
1037 
1038   ElementwiseAttributes attr;
1039   attr.param = 2.0f;
1040 
1041   for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1042     for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1043       OperationDef op_def;
1044       op_def.precision = CalculationsPrecision::F32;
1045       op_def.src_tensors.push_back(
1046           {DataType::FLOAT32, src_storage, Layout::HWC});
1047       op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1048       TensorDescriptor src_desc, dst_desc;
1049       src_desc = op_def.src_tensors[0];
1050       src_desc.UploadData(src_tensor_0);
1051       dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1052       GPUOperation operation = CreateElementwise(
1053           env->GetGpuInfo(), op_def, OperationType::LESS_EQUAL, attr);
1054       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1055           {&src_desc}, {&dst_desc},
1056           std::make_unique<GPUOperation>(std::move(operation))));
1057 
1058       tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1059       dst_desc.DownloadData(&dst_tensor);
1060       if (dst_tensor.data != ref_tensor.data) {
1061         return absl::InternalError("not equal");
1062       }
1063     }
1064   }
1065   return absl::OkStatus();
1066 }
1067 
GreaterTest(TestExecutionEnvironment * env)1068 absl::Status GreaterTest(TestExecutionEnvironment* env) {
1069   TensorFloat32 src_tensor_0;
1070   src_tensor_0.shape = BHWC(1, 2, 1, 2);
1071   src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1072 
1073   tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1074   ref_tensor.shape = BHWC(1, 2, 1, 2);
1075   ref_tensor.data = {false, false, false, true};
1076 
1077   ElementwiseAttributes attr;
1078   attr.param = 2.0f;
1079 
1080   for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1081     for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1082       OperationDef op_def;
1083       op_def.precision = CalculationsPrecision::F32;
1084       op_def.src_tensors.push_back(
1085           {DataType::FLOAT32, src_storage, Layout::HWC});
1086       op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1087       TensorDescriptor src_desc, dst_desc;
1088       src_desc = op_def.src_tensors[0];
1089       src_desc.UploadData(src_tensor_0);
1090       dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1091       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
1092                                                  OperationType::GREATER, attr);
1093       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1094           {&src_desc}, {&dst_desc},
1095           std::make_unique<GPUOperation>(std::move(operation))));
1096 
1097       tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1098       dst_desc.DownloadData(&dst_tensor);
1099       if (dst_tensor.data != ref_tensor.data) {
1100         return absl::InternalError("not equal");
1101       }
1102     }
1103   }
1104   return absl::OkStatus();
1105 }
1106 
GreaterEqualTest(TestExecutionEnvironment * env)1107 absl::Status GreaterEqualTest(TestExecutionEnvironment* env) {
1108   TensorFloat32 src_tensor_0;
1109   src_tensor_0.shape = BHWC(1, 2, 1, 2);
1110   src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1111 
1112   tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1113   ref_tensor.shape = BHWC(1, 2, 1, 2);
1114   ref_tensor.data = {false, false, true, true};
1115 
1116   ElementwiseAttributes attr;
1117   attr.param = 2.0f;
1118 
1119   for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1120     for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1121       OperationDef op_def;
1122       op_def.precision = CalculationsPrecision::F32;
1123       op_def.src_tensors.push_back(
1124           {DataType::FLOAT32, src_storage, Layout::HWC});
1125       op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1126       TensorDescriptor src_desc, dst_desc;
1127       src_desc = op_def.src_tensors[0];
1128       src_desc.UploadData(src_tensor_0);
1129       dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1130       GPUOperation operation = CreateElementwise(
1131           env->GetGpuInfo(), op_def, OperationType::GREATER_EQUAL, attr);
1132       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1133           {&src_desc}, {&dst_desc},
1134           std::make_unique<GPUOperation>(std::move(operation))));
1135 
1136       tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1137       dst_desc.DownloadData(&dst_tensor);
1138       if (dst_tensor.data != ref_tensor.data) {
1139         return absl::InternalError("not equal");
1140       }
1141     }
1142   }
1143   return absl::OkStatus();
1144 }
1145 
EqualTest(TestExecutionEnvironment * env)1146 absl::Status EqualTest(TestExecutionEnvironment* env) {
1147   TensorFloat32 src_tensor_0;
1148   src_tensor_0.shape = BHWC(1, 2, 1, 2);
1149   src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1150 
1151   tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1152   ref_tensor.shape = BHWC(1, 2, 1, 2);
1153   ref_tensor.data = {false, false, true, false};
1154 
1155   ElementwiseAttributes attr;
1156   attr.param = 2.0f;
1157 
1158   for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1159     for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1160       OperationDef op_def;
1161       op_def.precision = CalculationsPrecision::F32;
1162       op_def.src_tensors.push_back(
1163           {DataType::FLOAT32, src_storage, Layout::HWC});
1164       op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1165       TensorDescriptor src_desc, dst_desc;
1166       src_desc = op_def.src_tensors[0];
1167       src_desc.UploadData(src_tensor_0);
1168       dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1169       GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
1170                                                  OperationType::EQUAL, attr);
1171       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1172           {&src_desc}, {&dst_desc},
1173           std::make_unique<GPUOperation>(std::move(operation))));
1174 
1175       tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1176       dst_desc.DownloadData(&dst_tensor);
1177       if (dst_tensor.data != ref_tensor.data) {
1178         return absl::InternalError("not equal");
1179       }
1180     }
1181   }
1182   return absl::OkStatus();
1183 }
1184 
NotEqualTest(TestExecutionEnvironment * env)1185 absl::Status NotEqualTest(TestExecutionEnvironment* env) {
1186   TensorFloat32 src_tensor_0;
1187   src_tensor_0.shape = BHWC(1, 2, 1, 2);
1188   src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1189 
1190   tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1191   ref_tensor.shape = BHWC(1, 2, 1, 2);
1192   ref_tensor.data = {true, true, false, true};
1193 
1194   ElementwiseAttributes attr;
1195   attr.param = 2.0f;
1196 
1197   for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1198     for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1199       OperationDef op_def;
1200       op_def.precision = CalculationsPrecision::F32;
1201       op_def.src_tensors.push_back(
1202           {DataType::FLOAT32, src_storage, Layout::HWC});
1203       op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1204       TensorDescriptor src_desc, dst_desc;
1205       src_desc = op_def.src_tensors[0];
1206       src_desc.UploadData(src_tensor_0);
1207       dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1208       GPUOperation operation = CreateElementwise(
1209           env->GetGpuInfo(), op_def, OperationType::NOT_EQUAL, attr);
1210       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1211           {&src_desc}, {&dst_desc},
1212           std::make_unique<GPUOperation>(std::move(operation))));
1213 
1214       tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1215       dst_desc.DownloadData(&dst_tensor);
1216       if (dst_tensor.data != ref_tensor.data) {
1217         return absl::InternalError("not equal");
1218       }
1219     }
1220   }
1221   return absl::OkStatus();
1222 }
1223 
CosBroadcastTest(TestExecutionEnvironment * env)1224 absl::Status CosBroadcastTest(TestExecutionEnvironment* env) {
1225   TensorFloat32 src_tensor;
1226   src_tensor.shape = BHWC(1, 2, 1, 1);
1227   src_tensor.data = {0.7f, -1.5f};
1228 
1229   for (auto precision : env->GetSupportedPrecisions()) {
1230     auto data_type = DeduceDataTypeFromPrecision(precision);
1231     for (auto storage : env->GetSupportedStorages(data_type)) {
1232       const float eps = precision == CalculationsPrecision::F32 ? 5e-5f : 1e-3f;
1233       OperationDef op_def;
1234       op_def.precision = precision;
1235       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1236       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1237       TensorFloat32 dst_tensor;
1238       BHWC output_shape(1, 2, 1, 2);
1239       GPUOperation operation = CreateElementwiseOneInputWithBroadcast(
1240           env->GetGpuInfo(), op_def, OperationType::COS, src_tensor.shape,
1241           output_shape);
1242       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1243           src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
1244           output_shape, &dst_tensor));
1245       RETURN_IF_ERROR(PointWiseNear(
1246           {std::cos(0.7f), std::cos(0.7f), std::cos(-1.5f), std::cos(-1.5f)},
1247           dst_tensor.data, eps));
1248     }
1249   }
1250   return absl::OkStatus();
1251 }
1252 
MaximumScalarBroadcastInputTest(TestExecutionEnvironment * env)1253 absl::Status MaximumScalarBroadcastInputTest(TestExecutionEnvironment* env) {
1254   TensorFloat32 src_tensor_0;
1255   src_tensor_0.shape = BHWC(1, 2, 1, 1);
1256   src_tensor_0.data = {2.0f, -3.0f};
1257 
1258   ElementwiseAttributes attr;
1259   attr.param = -2.0f;
1260 
1261   for (auto precision : env->GetSupportedPrecisions()) {
1262     auto data_type = DeduceDataTypeFromPrecision(precision);
1263     for (auto storage : env->GetSupportedStorages(data_type)) {
1264       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
1265       OperationDef op_def;
1266       op_def.precision = precision;
1267       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1268       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1269       TensorFloat32 dst_tensor;
1270       BHWC output_shape(1, 2, 1, 2);
1271       GPUOperation operation = CreateElementwiseWithBroadcast(
1272           env->GetGpuInfo(), op_def, OperationType::MAXIMUM, attr,
1273           src_tensor_0.shape, output_shape);
1274       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1275           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
1276           output_shape, &dst_tensor));
1277       RETURN_IF_ERROR(
1278           PointWiseNear({2.0f, 2.0f, -2.0f, -2.0f}, dst_tensor.data, eps));
1279     }
1280   }
1281   return absl::OkStatus();
1282 }
1283 
MulLinearBroadcastInputTest(TestExecutionEnvironment * env)1284 absl::Status MulLinearBroadcastInputTest(TestExecutionEnvironment* env) {
1285   TensorFloat32 src_tensor_0;
1286   src_tensor_0.shape = BHWC(1, 2, 1, 1);
1287   src_tensor_0.data = {2.0f, -3.0f};
1288 
1289   ::tflite::gpu::Tensor<Linear, DataType::FLOAT32> linear_tensor;
1290   linear_tensor.shape = Linear(2);
1291   linear_tensor.data = {0.5f, 2.0f};
1292   ElementwiseAttributes attr;
1293   attr.param = linear_tensor;
1294 
1295   for (auto precision : env->GetSupportedPrecisions()) {
1296     auto data_type = DeduceDataTypeFromPrecision(precision);
1297     for (auto storage : env->GetSupportedStorages(data_type)) {
1298       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
1299       OperationDef op_def;
1300       op_def.precision = precision;
1301       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1302       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1303       TensorFloat32 dst_tensor;
1304       BHWC output_shape(1, 2, 1, 2);
1305       GPUOperation operation = CreateElementwiseWithBroadcast(
1306           env->GetGpuInfo(), op_def, OperationType::MUL, attr,
1307           src_tensor_0.shape, output_shape);
1308       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1309           src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
1310           output_shape, &dst_tensor));
1311       RETURN_IF_ERROR(
1312           PointWiseNear({1.0f, 4.0f, -1.5f, -6.0f}, dst_tensor.data, eps));
1313     }
1314   }
1315   return absl::OkStatus();
1316 }
1317 
MulBroadcastBothInputsTest(TestExecutionEnvironment * env)1318 absl::Status MulBroadcastBothInputsTest(TestExecutionEnvironment* env) {
1319   TensorFloat32 src_tensor_0, src_tensor_1;
1320   src_tensor_0.shape = BHWC(1, 1, 2, 1);
1321   src_tensor_1.shape = BHWC(1, 1, 1, 2);
1322   src_tensor_0.data = {1.0f, 2.0f};
1323   src_tensor_1.data = {3.0f, 4.0f};
1324 
1325   for (auto precision : env->GetSupportedPrecisions()) {
1326     auto data_type = DeduceDataTypeFromPrecision(precision);
1327     for (auto storage : env->GetSupportedStorages(data_type)) {
1328       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
1329       OperationDef op_def;
1330       op_def.precision = precision;
1331       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1332       op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1333       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1334       TensorFloat32 dst_tensor;
1335       BHWC output_shape(1, 1, 2, 2);
1336       GPUOperation operation = CreateElementwiseTwoInputWithBroadcast(
1337           op_def, OperationType::MUL, src_tensor_0.shape, src_tensor_1.shape,
1338           output_shape);
1339       RETURN_IF_ERROR(env->ExecuteGPUOperation(
1340           {src_tensor_0, src_tensor_1},
1341           std::make_unique<GPUOperation>(std::move(operation)), output_shape,
1342           &dst_tensor));
1343       RETURN_IF_ERROR(
1344           PointWiseNear({3.0f, 4.0f, 6.0f, 8.0f}, dst_tensor.data, eps));
1345     }
1346   }
1347   return absl::OkStatus();
1348 }
1349 
1350 }  // namespace gpu
1351 }  // namespace tflite
1352