1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise_test_util.h"
17
18 #include <memory>
19 #include <vector>
20
21 #include "tensorflow/lite/delegates/gpu/common/operations.h"
22 #include "tensorflow/lite/delegates/gpu/common/status.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
24 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
25
26 namespace tflite {
27 namespace gpu {
28
AbsTest(TestExecutionEnvironment * env)29 absl::Status AbsTest(TestExecutionEnvironment* env) {
30 TensorFloat32 src_tensor;
31 src_tensor.shape = BHWC(1, 2, 1, 2);
32 src_tensor.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)};
33
34 for (auto precision : env->GetSupportedPrecisions()) {
35 auto data_type = DeduceDataTypeFromPrecision(precision);
36 for (auto storage : env->GetSupportedStorages(data_type)) {
37 OperationDef op_def;
38 op_def.precision = precision;
39 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
40 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
41 TensorFloat32 dst_tensor;
42 GPUOperation operation = CreateElementwiseOneInput(
43 env->GetGpuInfo(), op_def, OperationType::ABS);
44 RETURN_IF_ERROR(env->ExecuteGPUOperation(
45 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
46 BHWC(1, 2, 1, 2), &dst_tensor));
47 RETURN_IF_ERROR(
48 PointWiseNear({half(0.0f), half(1.0f), half(0.05f), half(0.045f)},
49 dst_tensor.data, 0.0f));
50 }
51 }
52 return absl::OkStatus();
53 }
54
CosTest(TestExecutionEnvironment * env)55 absl::Status CosTest(TestExecutionEnvironment* env) {
56 TensorFloat32 src_tensor;
57 src_tensor.shape = BHWC(1, 2, 1, 2);
58 src_tensor.data = {0.0f, -1.0f, -0.05f, 0.045f};
59
60 for (auto precision : env->GetSupportedPrecisions()) {
61 auto data_type = DeduceDataTypeFromPrecision(precision);
62 for (auto storage : env->GetSupportedStorages(data_type)) {
63 const float eps = precision == CalculationsPrecision::F32 ? 5e-5f : 1e-3f;
64 OperationDef op_def;
65 op_def.precision = precision;
66 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
67 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
68 TensorFloat32 dst_tensor;
69 GPUOperation operation = CreateElementwiseOneInput(
70 env->GetGpuInfo(), op_def, OperationType::COS);
71 RETURN_IF_ERROR(env->ExecuteGPUOperation(
72 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
73 BHWC(1, 2, 1, 2), &dst_tensor));
74 RETURN_IF_ERROR(PointWiseNear(
75 {std::cos(0.0f), std::cos(-1.0f), std::cos(-0.05f), std::cos(0.045f)},
76 dst_tensor.data, eps));
77 }
78 }
79 return absl::OkStatus();
80 }
81
CopyTest(TestExecutionEnvironment * env)82 absl::Status CopyTest(TestExecutionEnvironment* env) {
83 TensorFloat32 src_tensor;
84 src_tensor.shape = BHWC(1, 2, 1, 2);
85 src_tensor.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)};
86
87 for (auto precision : env->GetSupportedPrecisions()) {
88 auto data_type = DeduceDataTypeFromPrecision(precision);
89 for (auto storage : env->GetSupportedStorages(data_type)) {
90 OperationDef op_def;
91 op_def.precision = precision;
92 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
93 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
94 TensorFloat32 dst_tensor;
95 GPUOperation operation = CreateElementwiseOneInput(
96 env->GetGpuInfo(), op_def, OperationType::COPY);
97 RETURN_IF_ERROR(env->ExecuteGPUOperation(
98 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
99 BHWC(1, 2, 1, 2), &dst_tensor));
100 RETURN_IF_ERROR(PointWiseNear(src_tensor.data, dst_tensor.data, 0.0f));
101 }
102 }
103 return absl::OkStatus();
104 }
105
EluTest(TestExecutionEnvironment * env)106 absl::Status EluTest(TestExecutionEnvironment* env) {
107 TensorFloat32 src_tensor;
108 src_tensor.shape = BHWC(1, 1, 1, 7);
109 src_tensor.data = {0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
110
111 for (auto precision : env->GetSupportedPrecisions()) {
112 auto data_type = DeduceDataTypeFromPrecision(precision);
113 for (auto storage : env->GetSupportedStorages(data_type)) {
114 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
115 OperationDef op_def;
116 op_def.precision = precision;
117 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
118 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
119 TensorFloat32 dst_tensor;
120 GPUOperation operation = CreateElementwiseOneInput(
121 env->GetGpuInfo(), op_def, OperationType::ELU);
122 RETURN_IF_ERROR(env->ExecuteGPUOperation(
123 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
124 BHWC(1, 1, 1, 7), &dst_tensor));
125 RETURN_IF_ERROR(PointWiseNear(
126 {0.0f, 1.0f, std::exp(-1.0f) - 1.0f, 100.0f, std::exp(-100.0f) - 1.0f,
127 0.01f, std::exp(-0.01f) - 1.0f},
128 dst_tensor.data, eps));
129 }
130 }
131 return absl::OkStatus();
132 }
133
ExpTest(TestExecutionEnvironment * env)134 absl::Status ExpTest(TestExecutionEnvironment* env) {
135 TensorFloat32 src_tensor;
136 src_tensor.shape = BHWC(1, 1, 1, 7);
137 src_tensor.data = {0.0f, 1.0f, -1.0f, 2.5f, -1.7f, 0.01f, -0.01f};
138
139 for (auto precision : env->GetSupportedPrecisions()) {
140 auto data_type = DeduceDataTypeFromPrecision(precision);
141 for (auto storage : env->GetSupportedStorages(data_type)) {
142 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 2e-2f;
143 OperationDef op_def;
144 op_def.precision = precision;
145 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
146 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
147 TensorFloat32 dst_tensor;
148 GPUOperation operation = CreateElementwiseOneInput(
149 env->GetGpuInfo(), op_def, OperationType::EXP);
150 RETURN_IF_ERROR(env->ExecuteGPUOperation(
151 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
152 BHWC(1, 1, 1, 7), &dst_tensor));
153 RETURN_IF_ERROR(PointWiseNear(
154 {std::exp(0.0f), std::exp(1.0f), std::exp(-1.0f), std::exp(2.5f),
155 std::exp(-1.7f), std::exp(0.01f), std::exp(-0.01f)},
156 dst_tensor.data, eps));
157 }
158 }
159 return absl::OkStatus();
160 }
161
FloorTest(TestExecutionEnvironment * env)162 absl::Status FloorTest(TestExecutionEnvironment* env) {
163 TensorFloat32 src_tensor;
164 src_tensor.shape = BHWC(1, 1, 1, 7);
165 src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
166
167 for (auto precision : env->GetSupportedPrecisions()) {
168 auto data_type = DeduceDataTypeFromPrecision(precision);
169 for (auto storage : env->GetSupportedStorages(data_type)) {
170 const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
171 OperationDef op_def;
172 op_def.precision = precision;
173 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
174 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
175 TensorFloat32 dst_tensor;
176 GPUOperation operation = CreateElementwiseOneInput(
177 env->GetGpuInfo(), op_def, OperationType::FLOOR);
178 RETURN_IF_ERROR(env->ExecuteGPUOperation(
179 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
180 src_tensor.shape, &dst_tensor));
181 RETURN_IF_ERROR(PointWiseNear(
182 {-5.0, -3.0f, -2.0f, 0.0f, 1.0f, 3.0f, 4.0f}, dst_tensor.data, eps));
183 }
184 }
185 return absl::OkStatus();
186 }
187
FloorDivTest(TestExecutionEnvironment * env)188 absl::Status FloorDivTest(TestExecutionEnvironment* env) {
189 TensorFloat32 src_tensor;
190 src_tensor.shape = BHWC(1, 1, 1, 7);
191 src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
192
193 float scalar = 2.7f;
194 ElementwiseAttributes attr;
195 attr.param = scalar;
196
197 for (auto precision : env->GetSupportedPrecisions()) {
198 auto data_type = DeduceDataTypeFromPrecision(precision);
199 for (auto storage : env->GetSupportedStorages(data_type)) {
200 const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
201 OperationDef op_def;
202 op_def.precision = precision;
203 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
204 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
205 TensorFloat32 dst_tensor;
206 GPUOperation operation = CreateElementwise(
207 env->GetGpuInfo(), op_def, OperationType::FLOOR_DIV, attr);
208 RETURN_IF_ERROR(env->ExecuteGPUOperation(
209 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
210 src_tensor.shape, &dst_tensor));
211 RETURN_IF_ERROR(
212 PointWiseNear({std::floor(-4.5f / scalar), std::floor(-3.0f / scalar),
213 std::floor(-1.5f / scalar), std::floor(0.0f / scalar),
214 std::floor(1.5f / scalar), std::floor(3.0f / scalar),
215 std::floor(4.5f / scalar)},
216 dst_tensor.data, eps));
217 }
218 }
219 return absl::OkStatus();
220 }
221
FloorModTest(TestExecutionEnvironment * env)222 absl::Status FloorModTest(TestExecutionEnvironment* env) {
223 TensorFloat32 src_tensor;
224 src_tensor.shape = BHWC(1, 1, 1, 7);
225 src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
226
227 float scalar = 2.7f;
228 ElementwiseAttributes attr;
229 attr.param = scalar;
230
231 for (auto precision : env->GetSupportedPrecisions()) {
232 auto data_type = DeduceDataTypeFromPrecision(precision);
233 for (auto storage : env->GetSupportedStorages(data_type)) {
234 const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
235 OperationDef op_def;
236 op_def.precision = precision;
237 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
238 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
239 TensorFloat32 dst_tensor;
240 GPUOperation operation = CreateElementwise(
241 env->GetGpuInfo(), op_def, OperationType::FLOOR_MOD, attr);
242 RETURN_IF_ERROR(env->ExecuteGPUOperation(
243 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
244 src_tensor.shape, &dst_tensor));
245 RETURN_IF_ERROR(
246 PointWiseNear({-4.5f - std::floor(-4.5f / scalar) * scalar,
247 -3.0f - std::floor(-3.0f / scalar) * scalar,
248 -1.5f - std::floor(-1.5f / scalar) * scalar,
249 0.0f - std::floor(0.0f / scalar) * scalar,
250 1.5f - std::floor(1.5f / scalar) * scalar,
251 3.0f - std::floor(3.0f / scalar) * scalar,
252 4.5f - std::floor(4.5f / scalar) * scalar},
253 dst_tensor.data, eps));
254 }
255 }
256 return absl::OkStatus();
257 }
258
HardSwishTest(TestExecutionEnvironment * env)259 absl::Status HardSwishTest(TestExecutionEnvironment* env) {
260 TensorFloat32 src_tensor;
261 src_tensor.shape = BHWC(1, 1, 1, 7);
262 src_tensor.data = {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f};
263
264 for (auto precision : env->GetSupportedPrecisions()) {
265 auto data_type = DeduceDataTypeFromPrecision(precision);
266 for (auto storage : env->GetSupportedStorages(data_type)) {
267 const float eps = precision == CalculationsPrecision::F32 ? 1e-5f : 1e-2f;
268 OperationDef op_def;
269 op_def.precision = precision;
270 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
271 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
272 TensorFloat32 dst_tensor;
273 GPUOperation operation = CreateElementwiseOneInput(
274 env->GetGpuInfo(), op_def, OperationType::HARD_SWISH);
275 RETURN_IF_ERROR(env->ExecuteGPUOperation(
276 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
277 src_tensor.shape, &dst_tensor));
278 RETURN_IF_ERROR(
279 PointWiseNear({0.0f, 0.0f, -0.375f, 0.0f, 1.125f, 3.f, 4.5f},
280 dst_tensor.data, eps));
281 }
282 }
283 return absl::OkStatus();
284 }
285
LogTest(TestExecutionEnvironment * env)286 absl::Status LogTest(TestExecutionEnvironment* env) {
287 TensorFloat32 src_tensor;
288 src_tensor.shape = BHWC(1, 2, 1, 2);
289 src_tensor.data = {1.0f, 2.0f, 3.0f, 4.0f};
290
291 for (auto precision : env->GetSupportedPrecisions()) {
292 auto data_type = DeduceDataTypeFromPrecision(precision);
293 for (auto storage : env->GetSupportedStorages(data_type)) {
294 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
295 OperationDef op_def;
296 op_def.precision = precision;
297 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
298 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
299 TensorFloat32 dst_tensor;
300 GPUOperation operation = CreateElementwiseOneInput(
301 env->GetGpuInfo(), op_def, OperationType::LOG);
302 RETURN_IF_ERROR(env->ExecuteGPUOperation(
303 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
304 BHWC(1, 2, 1, 2), &dst_tensor));
305 RETURN_IF_ERROR(PointWiseNear(
306 {std::log(1.0f), std::log(2.0f), std::log(3.0f), std::log(4.0f)},
307 dst_tensor.data, eps));
308 }
309 }
310 return absl::OkStatus();
311 }
312
NegTest(TestExecutionEnvironment * env)313 absl::Status NegTest(TestExecutionEnvironment* env) {
314 TensorFloat32 src_tensor;
315 src_tensor.shape = BHWC(1, 2, 1, 2);
316 src_tensor.data = {1.0f, -2.0f, 0.0f, 4.0f};
317
318 for (auto precision : env->GetSupportedPrecisions()) {
319 auto data_type = DeduceDataTypeFromPrecision(precision);
320 for (auto storage : env->GetSupportedStorages(data_type)) {
321 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
322 OperationDef op_def;
323 op_def.precision = precision;
324 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
325 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
326 TensorFloat32 dst_tensor;
327 GPUOperation operation = CreateElementwiseOneInput(
328 env->GetGpuInfo(), op_def, OperationType::NEG);
329 RETURN_IF_ERROR(env->ExecuteGPUOperation(
330 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
331 BHWC(1, 2, 1, 2), &dst_tensor));
332 RETURN_IF_ERROR(
333 PointWiseNear({-1.0f, 2.0f, 0.0f, -4.0f}, dst_tensor.data, eps));
334 }
335 }
336 return absl::OkStatus();
337 }
338
RsqrtTest(TestExecutionEnvironment * env)339 absl::Status RsqrtTest(TestExecutionEnvironment* env) {
340 TensorFloat32 src_tensor;
341 src_tensor.shape = BHWC(1, 2, 1, 2);
342 src_tensor.data = {1.0f, 2.0f, 3.0f, 4.0f};
343
344 for (auto precision : env->GetSupportedPrecisions()) {
345 auto data_type = DeduceDataTypeFromPrecision(precision);
346 for (auto storage : env->GetSupportedStorages(data_type)) {
347 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
348 OperationDef op_def;
349 op_def.precision = precision;
350 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
351 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
352 TensorFloat32 dst_tensor;
353 GPUOperation operation = CreateElementwiseOneInput(
354 env->GetGpuInfo(), op_def, OperationType::RSQRT);
355 RETURN_IF_ERROR(env->ExecuteGPUOperation(
356 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
357 BHWC(1, 2, 1, 2), &dst_tensor));
358 RETURN_IF_ERROR(
359 PointWiseNear({1.0f / std::sqrt(1.0f), 1.0f / std::sqrt(2.0f),
360 1.0f / std::sqrt(3.0f), 1.0f / std::sqrt(4.0f)},
361 dst_tensor.data, eps));
362 }
363 }
364 return absl::OkStatus();
365 }
366
SigmoidTest(TestExecutionEnvironment * env)367 absl::Status SigmoidTest(TestExecutionEnvironment* env) {
368 TensorFloat32 src_tensor;
369 src_tensor.shape = BHWC(1, 2, 1, 2);
370 src_tensor.data = {-std::log(1.0f), -std::log(2.0f), -std::log(3.0f),
371 -std::log(4.0f)};
372
373 for (auto precision : env->GetSupportedPrecisions()) {
374 auto data_type = DeduceDataTypeFromPrecision(precision);
375 for (auto storage : env->GetSupportedStorages(data_type)) {
376 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
377 OperationDef op_def;
378 op_def.precision = precision;
379 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
380 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
381 TensorFloat32 dst_tensor;
382 GPUOperation operation = CreateElementwiseOneInput(
383 env->GetGpuInfo(), op_def, OperationType::SIGMOID);
384 RETURN_IF_ERROR(env->ExecuteGPUOperation(
385 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
386 BHWC(1, 2, 1, 2), &dst_tensor));
387 RETURN_IF_ERROR(PointWiseNear({0.5f, 1.0f / 3.0f, 0.25f, 0.2f},
388 dst_tensor.data, eps));
389 }
390 }
391 return absl::OkStatus();
392 }
393
SinTest(TestExecutionEnvironment * env)394 absl::Status SinTest(TestExecutionEnvironment* env) {
395 TensorFloat32 src_tensor;
396 src_tensor.shape = BHWC(1, 2, 1, 2);
397 src_tensor.data = {0.0f, -1.0f, -0.05f, 0.045f};
398
399 for (auto precision : env->GetSupportedPrecisions()) {
400 auto data_type = DeduceDataTypeFromPrecision(precision);
401 for (auto storage : env->GetSupportedStorages(data_type)) {
402 const float eps = precision == CalculationsPrecision::F32 ? 2e-5f : 5e-3f;
403 OperationDef op_def;
404 op_def.precision = precision;
405 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
406 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
407 TensorFloat32 dst_tensor;
408 GPUOperation operation = CreateElementwiseOneInput(
409 env->GetGpuInfo(), op_def, OperationType::SIN);
410 RETURN_IF_ERROR(env->ExecuteGPUOperation(
411 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
412 BHWC(1, 2, 1, 2), &dst_tensor));
413 RETURN_IF_ERROR(PointWiseNear(
414 {std::sin(0.0f), std::sin(-1.0f), std::sin(-0.05f), std::sin(0.045f)},
415 dst_tensor.data, eps));
416 }
417 }
418 return absl::OkStatus();
419 }
420
SqrtTest(TestExecutionEnvironment * env)421 absl::Status SqrtTest(TestExecutionEnvironment* env) {
422 TensorFloat32 src_tensor;
423 src_tensor.shape = BHWC(1, 2, 1, 2);
424 src_tensor.data = {1.0f, 2.0f, 3.0f, 4.0f};
425
426 for (auto precision : env->GetSupportedPrecisions()) {
427 auto data_type = DeduceDataTypeFromPrecision(precision);
428 for (auto storage : env->GetSupportedStorages(data_type)) {
429 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
430 OperationDef op_def;
431 op_def.precision = precision;
432 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
433 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
434 TensorFloat32 dst_tensor;
435 GPUOperation operation = CreateElementwiseOneInput(
436 env->GetGpuInfo(), op_def, OperationType::SQRT);
437 RETURN_IF_ERROR(env->ExecuteGPUOperation(
438 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
439 BHWC(1, 2, 1, 2), &dst_tensor));
440 RETURN_IF_ERROR(PointWiseNear(
441 {std::sqrt(1.0f), std::sqrt(2.0f), std::sqrt(3.0f), std::sqrt(4.0f)},
442 dst_tensor.data, eps));
443 }
444 }
445 return absl::OkStatus();
446 }
447
SquareTest(TestExecutionEnvironment * env)448 absl::Status SquareTest(TestExecutionEnvironment* env) {
449 TensorFloat32 src_tensor;
450 src_tensor.shape = BHWC(1, 2, 1, 2);
451 src_tensor.data = {1.0f, -2.0f, 3.0f, 4.0f};
452
453 for (auto precision : env->GetSupportedPrecisions()) {
454 auto data_type = DeduceDataTypeFromPrecision(precision);
455 for (auto storage : env->GetSupportedStorages(data_type)) {
456 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
457 OperationDef op_def;
458 op_def.precision = precision;
459 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
460 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
461 TensorFloat32 dst_tensor;
462 GPUOperation operation = CreateElementwiseOneInput(
463 env->GetGpuInfo(), op_def, OperationType::SQUARE);
464 RETURN_IF_ERROR(env->ExecuteGPUOperation(
465 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
466 BHWC(1, 2, 1, 2), &dst_tensor));
467 RETURN_IF_ERROR(
468 PointWiseNear({1.0f, 4.0f, 9.0f, 16.0f}, dst_tensor.data, eps));
469 }
470 }
471 return absl::OkStatus();
472 }
473
TanhTest(TestExecutionEnvironment * env)474 absl::Status TanhTest(TestExecutionEnvironment* env) {
475 TensorFloat32 src_tensor;
476 src_tensor.shape = BHWC(1, 2, 1, 2);
477 src_tensor.data = {-4.0f, -0.1f, 0.1f, 2.0f};
478
479 for (auto precision : env->GetSupportedPrecisions()) {
480 auto data_type = DeduceDataTypeFromPrecision(precision);
481 for (auto storage : env->GetSupportedStorages(data_type)) {
482 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
483 OperationDef op_def;
484 op_def.precision = precision;
485 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
486 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
487 TensorFloat32 dst_tensor;
488 GPUOperation operation = CreateElementwiseOneInput(
489 env->GetGpuInfo(), op_def, OperationType::TANH);
490 RETURN_IF_ERROR(env->ExecuteGPUOperation(
491 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
492 BHWC(1, 2, 1, 2), &dst_tensor));
493 RETURN_IF_ERROR(PointWiseNear({std::tanh(-4.0f), std::tanh(-0.1f),
494 std::tanh(0.1f), std::tanh(2.0f)},
495 dst_tensor.data, eps));
496 }
497 }
498 return absl::OkStatus();
499 }
500
SubTest(TestExecutionEnvironment * env)501 absl::Status SubTest(TestExecutionEnvironment* env) {
502 TensorFloat32 src_tensor_0, src_tensor_1;
503 src_tensor_0.shape = BHWC(1, 2, 1, 2);
504 src_tensor_1.shape = BHWC(1, 2, 1, 2);
505 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.0f};
506 src_tensor_1.data = {0.5f, 1.0f, 3.0f, 3.5f};
507
508 for (auto precision : env->GetSupportedPrecisions()) {
509 auto data_type = DeduceDataTypeFromPrecision(precision);
510 for (auto storage : env->GetSupportedStorages(data_type)) {
511 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
512 OperationDef op_def;
513 op_def.precision = precision;
514 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
515 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
516 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
517 TensorFloat32 dst_tensor;
518 GPUOperation operation = CreateElementwiseTwoInput(
519 op_def, OperationType::SUB, src_tensor_1.shape);
520 RETURN_IF_ERROR(env->ExecuteGPUOperation(
521 {src_tensor_0, src_tensor_1},
522 std::make_unique<GPUOperation>(std::move(operation)),
523 BHWC(1, 2, 1, 2), &dst_tensor));
524 RETURN_IF_ERROR(
525 PointWiseNear({0.5f, 1.0f, 0.0f, 0.5f}, dst_tensor.data, eps));
526 }
527 }
528 return absl::OkStatus();
529 }
530
SquaredDiffTest(TestExecutionEnvironment * env)531 absl::Status SquaredDiffTest(TestExecutionEnvironment* env) {
532 TensorFloat32 src_tensor_0, src_tensor_1;
533 src_tensor_0.shape = BHWC(1, 2, 1, 2);
534 src_tensor_1.shape = BHWC(1, 2, 1, 2);
535 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.0f};
536 src_tensor_1.data = {0.5f, 1.0f, 3.0f, 3.5f};
537
538 for (auto precision : env->GetSupportedPrecisions()) {
539 auto data_type = DeduceDataTypeFromPrecision(precision);
540 for (auto storage : env->GetSupportedStorages(data_type)) {
541 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
542 OperationDef op_def;
543 op_def.precision = precision;
544 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
545 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
546 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
547 TensorFloat32 dst_tensor;
548 GPUOperation operation = CreateElementwiseTwoInput(
549 op_def, OperationType::SQUARED_DIFF, src_tensor_1.shape);
550 RETURN_IF_ERROR(env->ExecuteGPUOperation(
551 {src_tensor_0, src_tensor_1},
552 std::make_unique<GPUOperation>(std::move(operation)),
553 BHWC(1, 2, 1, 2), &dst_tensor));
554 RETURN_IF_ERROR(
555 PointWiseNear({0.25f, 1.0f, 0.0f, 0.25f}, dst_tensor.data, eps));
556 }
557 }
558 return absl::OkStatus();
559 }
560
DivTest(TestExecutionEnvironment * env)561 absl::Status DivTest(TestExecutionEnvironment* env) {
562 TensorFloat32 src_tensor_0, src_tensor_1;
563 src_tensor_0.shape = BHWC(1, 2, 1, 2);
564 src_tensor_1.shape = BHWC(1, 2, 1, 2);
565 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
566 src_tensor_1.data = {0.5f, 1.0f, 3.0f, 1.5f};
567
568 for (auto precision : env->GetSupportedPrecisions()) {
569 auto data_type = DeduceDataTypeFromPrecision(precision);
570 for (auto storage : env->GetSupportedStorages(data_type)) {
571 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
572 OperationDef op_def;
573 op_def.precision = precision;
574 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
575 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
576 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
577 TensorFloat32 dst_tensor;
578 GPUOperation operation = CreateElementwiseTwoInput(
579 op_def, OperationType::DIV, src_tensor_1.shape);
580 RETURN_IF_ERROR(env->ExecuteGPUOperation(
581 {src_tensor_0, src_tensor_1},
582 std::make_unique<GPUOperation>(std::move(operation)),
583 BHWC(1, 2, 1, 2), &dst_tensor));
584 RETURN_IF_ERROR(
585 PointWiseNear({2.0f, 2.0f, 1.0f, 3.0f}, dst_tensor.data, eps));
586 }
587 }
588 return absl::OkStatus();
589 }
590
PowTest(TestExecutionEnvironment * env)591 absl::Status PowTest(TestExecutionEnvironment* env) {
592 TensorFloat32 src_tensor_0, src_tensor_1;
593 src_tensor_0.shape = BHWC(1, 2, 1, 2);
594 src_tensor_1.shape = BHWC(1, 2, 1, 2);
595 src_tensor_0.data = {6.0f, 7.0f, 4.0f, 2.0f};
596 src_tensor_1.data = {0.0f, 1.0f, 2.0f, 3.0f};
597
598 for (auto precision : env->GetSupportedPrecisions()) {
599 auto data_type = DeduceDataTypeFromPrecision(precision);
600 for (auto storage : env->GetSupportedStorages(data_type)) {
601 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
602 OperationDef op_def;
603 op_def.precision = precision;
604 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
605 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
606 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
607 TensorFloat32 dst_tensor;
608 GPUOperation operation = CreateElementwiseTwoInput(
609 op_def, OperationType::POW, src_tensor_1.shape);
610 RETURN_IF_ERROR(env->ExecuteGPUOperation(
611 {src_tensor_0, src_tensor_1},
612 std::make_unique<GPUOperation>(std::move(operation)),
613 BHWC(1, 2, 1, 2), &dst_tensor));
614 RETURN_IF_ERROR(
615 PointWiseNear({1.0f, 7.0f, 16.0f, 8.0f}, dst_tensor.data, eps));
616 }
617 }
618 return absl::OkStatus();
619 }
620
AddTest(TestExecutionEnvironment * env)621 absl::Status AddTest(TestExecutionEnvironment* env) {
622 TensorFloat32 src_tensor_0, src_tensor_1;
623 src_tensor_0.shape = BHWC(1, 2, 1, 2);
624 src_tensor_1.shape = BHWC(1, 2, 1, 2);
625 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
626 src_tensor_1.data = {0.5f, 1.0f, 3.0f, 1.5f};
627
628 for (auto precision : env->GetSupportedPrecisions()) {
629 auto data_type = DeduceDataTypeFromPrecision(precision);
630 for (auto storage : env->GetSupportedStorages(data_type)) {
631 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
632 OperationDef op_def;
633 op_def.precision = precision;
634 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
635 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
636 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
637 TensorFloat32 dst_tensor;
638 GPUOperation operation = CreateElementwiseTwoInput(
639 op_def, OperationType::ADD, src_tensor_1.shape);
640 RETURN_IF_ERROR(env->ExecuteGPUOperation(
641 {src_tensor_0, src_tensor_1},
642 std::make_unique<GPUOperation>(std::move(operation)),
643 BHWC(1, 2, 1, 2), &dst_tensor));
644 RETURN_IF_ERROR(
645 PointWiseNear({1.5f, 3.0f, 6.0f, 6.0f}, dst_tensor.data, eps));
646 }
647 }
648 return absl::OkStatus();
649 }
650
MaximumTest(TestExecutionEnvironment * env)651 absl::Status MaximumTest(TestExecutionEnvironment* env) {
652 TensorFloat32 src_tensor_0, src_tensor_1;
653 src_tensor_0.shape = BHWC(1, 2, 1, 2);
654 src_tensor_1.shape = BHWC(1, 2, 1, 2);
655 src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
656 src_tensor_1.data = {1.0f, 2.0f, 3.0f, -2.0f};
657
658 for (auto precision : env->GetSupportedPrecisions()) {
659 auto data_type = DeduceDataTypeFromPrecision(precision);
660 for (auto storage : env->GetSupportedStorages(data_type)) {
661 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
662 OperationDef op_def;
663 op_def.precision = precision;
664 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
665 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
666 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
667 TensorFloat32 dst_tensor;
668 GPUOperation operation = CreateElementwiseTwoInput(
669 op_def, OperationType::MAXIMUM, src_tensor_1.shape);
670 RETURN_IF_ERROR(env->ExecuteGPUOperation(
671 {src_tensor_0, src_tensor_1},
672 std::make_unique<GPUOperation>(std::move(operation)),
673 BHWC(1, 2, 1, 2), &dst_tensor));
674 RETURN_IF_ERROR(
675 PointWiseNear({1.0f, 2.0f, 3.0f, -2.0f}, dst_tensor.data, eps));
676 }
677 }
678 return absl::OkStatus();
679 }
680
MaximumWithScalarTest(TestExecutionEnvironment * env)681 absl::Status MaximumWithScalarTest(TestExecutionEnvironment* env) {
682 TensorFloat32 src_tensor_0;
683 src_tensor_0.shape = BHWC(1, 4, 1, 1);
684 src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
685
686 ElementwiseAttributes attr;
687 attr.param = -1.0f;
688
689 for (auto precision : env->GetSupportedPrecisions()) {
690 auto data_type = DeduceDataTypeFromPrecision(precision);
691 for (auto storage : env->GetSupportedStorages(data_type)) {
692 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
693 OperationDef op_def;
694 op_def.precision = precision;
695 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
696 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
697 TensorFloat32 dst_tensor;
698 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
699 OperationType::MAXIMUM, attr);
700 RETURN_IF_ERROR(env->ExecuteGPUOperation(
701 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
702 BHWC(1, 4, 1, 1), &dst_tensor));
703 RETURN_IF_ERROR(
704 PointWiseNear({0.0f, -1.0f, 2.0f, -1.0f}, dst_tensor.data, eps));
705 }
706 }
707 return absl::OkStatus();
708 }
709
MaximumWithConstantLinearTensorTest(TestExecutionEnvironment * env)710 absl::Status MaximumWithConstantLinearTensorTest(
711 TestExecutionEnvironment* env) {
712 TensorFloat32 src_tensor_0;
713 src_tensor_0.shape = BHWC(1, 2, 1, 2);
714 src_tensor_0.data = {1.0f, -6.2f, -2.0f, 3.0f};
715
716 ::tflite::gpu::Tensor<Linear, DataType::FLOAT32> linear_tensor;
717 linear_tensor.shape = Linear(2);
718 linear_tensor.data = {0.5f, 2.0f};
719 ElementwiseAttributes attr;
720 attr.param = linear_tensor;
721
722 for (auto precision : env->GetSupportedPrecisions()) {
723 auto data_type = DeduceDataTypeFromPrecision(precision);
724 for (auto storage : env->GetSupportedStorages(data_type)) {
725 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
726 OperationDef op_def;
727 op_def.precision = precision;
728 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
729 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
730 TensorFloat32 dst_tensor;
731 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
732 OperationType::MAXIMUM, attr);
733 RETURN_IF_ERROR(env->ExecuteGPUOperation(
734 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
735 BHWC(1, 2, 1, 2), &dst_tensor));
736 RETURN_IF_ERROR(
737 PointWiseNear({1.0f, 2.0f, 0.5f, 3.0f}, dst_tensor.data, eps));
738 }
739 }
740 return absl::OkStatus();
741 }
742
MaximumWithConstantHWCTensorTest(TestExecutionEnvironment * env)743 absl::Status MaximumWithConstantHWCTensorTest(TestExecutionEnvironment* env) {
744 TensorFloat32 src_tensor_0;
745 src_tensor_0.shape = BHWC(1, 2, 1, 2);
746 src_tensor_0.data = {1.0f, -6.2f, -2.0f, 3.0f};
747
748 ::tflite::gpu::Tensor<HWC, DataType::FLOAT32> hwc_tensor;
749 hwc_tensor.shape = HWC(2, 1, 2);
750 hwc_tensor.data = {0.5f, 2.0f, 0.7f, 4.7f};
751 ElementwiseAttributes attr;
752 attr.param = hwc_tensor;
753
754 for (auto precision : env->GetSupportedPrecisions()) {
755 auto data_type = DeduceDataTypeFromPrecision(precision);
756 for (auto storage : env->GetSupportedStorages(data_type)) {
757 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
758 OperationDef op_def;
759 op_def.precision = precision;
760 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
761 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
762 TensorFloat32 dst_tensor;
763 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
764 OperationType::MAXIMUM, attr);
765 RETURN_IF_ERROR(env->ExecuteGPUOperation(
766 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
767 BHWC(1, 2, 1, 2), &dst_tensor));
768 RETURN_IF_ERROR(
769 PointWiseNear({1.0f, 2.0f, 0.7f, 4.7f}, dst_tensor.data, eps));
770 }
771 }
772 return absl::OkStatus();
773 }
MaximumWithConstantHWCTensorBroadcastChannelsTest(TestExecutionEnvironment * env)774 absl::Status MaximumWithConstantHWCTensorBroadcastChannelsTest(
775 TestExecutionEnvironment* env) {
776 TensorFloat32 src_tensor_0;
777 src_tensor_0.shape = BHWC(1, 2, 1, 2);
778 src_tensor_0.data = {1.0f, -6.2f, -2.0f, 3.0f};
779
780 ::tflite::gpu::Tensor<HWC, DataType::FLOAT32> hwc_tensor;
781 hwc_tensor.shape = HWC(2, 1, 1);
782 hwc_tensor.data = {0.5f, 2.0f};
783 ElementwiseAttributes attr;
784 attr.param = hwc_tensor;
785
786 for (auto precision : env->GetSupportedPrecisions()) {
787 auto data_type = DeduceDataTypeFromPrecision(precision);
788 for (auto storage : env->GetSupportedStorages(data_type)) {
789 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
790 OperationDef op_def;
791 op_def.precision = precision;
792 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
793 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
794 TensorFloat32 dst_tensor;
795 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
796 OperationType::MAXIMUM, attr);
797 RETURN_IF_ERROR(env->ExecuteGPUOperation(
798 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
799 BHWC(1, 2, 1, 2), &dst_tensor));
800 RETURN_IF_ERROR(
801 PointWiseNear({1.0f, 0.5f, 2.0f, 3.0f}, dst_tensor.data, eps));
802 }
803 }
804 return absl::OkStatus();
805 }
806
MinimumTest(TestExecutionEnvironment * env)807 absl::Status MinimumTest(TestExecutionEnvironment* env) {
808 TensorFloat32 src_tensor_0, src_tensor_1;
809 src_tensor_0.shape = BHWC(1, 2, 1, 2);
810 src_tensor_1.shape = BHWC(1, 2, 1, 2);
811 src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
812 src_tensor_1.data = {1.0f, 2.0f, 3.0f, -2.0f};
813
814 for (auto precision : env->GetSupportedPrecisions()) {
815 auto data_type = DeduceDataTypeFromPrecision(precision);
816 for (auto storage : env->GetSupportedStorages(data_type)) {
817 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
818 OperationDef op_def;
819 op_def.precision = precision;
820 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
821 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
822 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
823 TensorFloat32 dst_tensor;
824 GPUOperation operation = CreateElementwiseTwoInput(
825 op_def, OperationType::MINIMUM, src_tensor_1.shape);
826 RETURN_IF_ERROR(env->ExecuteGPUOperation(
827 {src_tensor_0, src_tensor_1},
828 std::make_unique<GPUOperation>(std::move(operation)),
829 BHWC(1, 2, 1, 2), &dst_tensor));
830 RETURN_IF_ERROR(
831 PointWiseNear({0.0f, -6.2f, 2.0f, -3.0f}, dst_tensor.data, eps));
832 }
833 }
834 return absl::OkStatus();
835 }
836
MinimumWithScalarTest(TestExecutionEnvironment * env)837 absl::Status MinimumWithScalarTest(TestExecutionEnvironment* env) {
838 TensorFloat32 src_tensor_0;
839 src_tensor_0.shape = BHWC(1, 4, 1, 1);
840 src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
841
842 ElementwiseAttributes attr;
843 attr.param = -1.0f;
844
845 for (auto precision : env->GetSupportedPrecisions()) {
846 auto data_type = DeduceDataTypeFromPrecision(precision);
847 for (auto storage : env->GetSupportedStorages(data_type)) {
848 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
849 OperationDef op_def;
850 op_def.precision = precision;
851 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
852 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
853 TensorFloat32 dst_tensor;
854 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
855 OperationType::MINIMUM, attr);
856 RETURN_IF_ERROR(env->ExecuteGPUOperation(
857 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
858 BHWC(1, 4, 1, 1), &dst_tensor));
859 RETURN_IF_ERROR(
860 PointWiseNear({-1.0f, -6.2f, -1.0f, -3.0f}, dst_tensor.data, eps));
861 }
862 }
863 return absl::OkStatus();
864 }
865
MulTest(TestExecutionEnvironment * env)866 absl::Status MulTest(TestExecutionEnvironment* env) {
867 TensorFloat32 src_tensor_0, src_tensor_1;
868 src_tensor_0.shape = BHWC(1, 2, 1, 2);
869 src_tensor_1.shape = BHWC(1, 2, 1, 2);
870 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
871 src_tensor_1.data = {0.5f, 1.0f, 3.0f, 1.5f};
872
873 for (auto precision : env->GetSupportedPrecisions()) {
874 auto data_type = DeduceDataTypeFromPrecision(precision);
875 for (auto storage : env->GetSupportedStorages(data_type)) {
876 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
877 OperationDef op_def;
878 op_def.precision = precision;
879 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
880 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
881 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
882 TensorFloat32 dst_tensor;
883 GPUOperation operation = CreateElementwiseTwoInput(
884 op_def, OperationType::MUL, src_tensor_1.shape);
885 RETURN_IF_ERROR(env->ExecuteGPUOperation(
886 {src_tensor_0, src_tensor_1},
887 std::make_unique<GPUOperation>(std::move(operation)),
888 BHWC(1, 2, 1, 2), &dst_tensor));
889 RETURN_IF_ERROR(
890 PointWiseNear({0.5f, 2.0f, 9.0f, 6.75f}, dst_tensor.data, eps));
891 }
892 }
893 return absl::OkStatus();
894 }
895
MulBroadcastHWTest(TestExecutionEnvironment * env)896 absl::Status MulBroadcastHWTest(TestExecutionEnvironment* env) {
897 TensorFloat32 src_tensor_0, src_tensor_1;
898 src_tensor_0.shape = BHWC(1, 2, 1, 2);
899 src_tensor_1.shape = BHWC(1, 1, 1, 2);
900 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
901 src_tensor_1.data = {0.5f, 3.0f};
902
903 for (auto precision : env->GetSupportedPrecisions()) {
904 auto data_type = DeduceDataTypeFromPrecision(precision);
905 for (auto storage : env->GetSupportedStorages(data_type)) {
906 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
907 OperationDef op_def;
908 op_def.precision = precision;
909 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
910 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
911 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
912 TensorFloat32 dst_tensor;
913 GPUOperation operation = CreateElementwiseTwoInput(
914 op_def, OperationType::MUL, src_tensor_1.shape);
915 RETURN_IF_ERROR(env->ExecuteGPUOperation(
916 {src_tensor_0, src_tensor_1},
917 std::make_unique<GPUOperation>(std::move(operation)),
918 BHWC(1, 2, 1, 2), &dst_tensor));
919 RETURN_IF_ERROR(
920 PointWiseNear({0.5f, 6.0f, 1.5f, 13.5f}, dst_tensor.data, eps));
921 }
922 }
923 return absl::OkStatus();
924 }
925
MulBroadcastChannelsTest(TestExecutionEnvironment * env)926 absl::Status MulBroadcastChannelsTest(TestExecutionEnvironment* env) {
927 TensorFloat32 src_tensor_0, src_tensor_1;
928 src_tensor_0.shape = BHWC(1, 2, 1, 2);
929 src_tensor_1.shape = BHWC(1, 2, 1, 1);
930 src_tensor_0.data = {1.0f, 2.0f, 3.0f, 4.5f};
931 src_tensor_1.data = {0.5f, 3.0f};
932
933 for (auto precision : env->GetSupportedPrecisions()) {
934 auto data_type = DeduceDataTypeFromPrecision(precision);
935 for (auto storage : env->GetSupportedStorages(data_type)) {
936 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
937 OperationDef op_def;
938 op_def.precision = precision;
939 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
940 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
941 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
942 TensorFloat32 dst_tensor;
943 GPUOperation operation = CreateElementwiseTwoInput(
944 op_def, OperationType::MUL, src_tensor_1.shape);
945 RETURN_IF_ERROR(env->ExecuteGPUOperation(
946 {src_tensor_0, src_tensor_1},
947 std::make_unique<GPUOperation>(std::move(operation)),
948 BHWC(1, 2, 1, 2), &dst_tensor));
949 RETURN_IF_ERROR(
950 PointWiseNear({0.5f, 1.0f, 9.0f, 13.5f}, dst_tensor.data, eps));
951 }
952 }
953 return absl::OkStatus();
954 }
955
SubWithScalarAtFirstPositionTest(TestExecutionEnvironment * env)956 absl::Status SubWithScalarAtFirstPositionTest(TestExecutionEnvironment* env) {
957 TensorFloat32 src_tensor_0;
958 src_tensor_0.shape = BHWC(1, 4, 1, 1);
959 src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f};
960
961 ElementwiseAttributes attr;
962 attr.param = 4.0f;
963 attr.runtime_tensor_is_second = true;
964
965 for (auto precision : env->GetSupportedPrecisions()) {
966 auto data_type = DeduceDataTypeFromPrecision(precision);
967 for (auto storage : env->GetSupportedStorages(data_type)) {
968 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
969 OperationDef op_def;
970 op_def.precision = precision;
971 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
972 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
973 TensorFloat32 dst_tensor;
974 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
975 OperationType::SUB, attr);
976 RETURN_IF_ERROR(env->ExecuteGPUOperation(
977 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
978 BHWC(1, 4, 1, 1), &dst_tensor));
979 RETURN_IF_ERROR(
980 PointWiseNear({4.0f, 10.2f, 2.0f, 7.0f}, dst_tensor.data, eps));
981 }
982 }
983 return absl::OkStatus();
984 }
985
LessTest(TestExecutionEnvironment * env)986 absl::Status LessTest(TestExecutionEnvironment* env) {
987 TensorFloat32 src_tensor_0, src_tensor_1;
988 src_tensor_0.shape = BHWC(1, 2, 1, 2);
989 src_tensor_1.shape = BHWC(1, 2, 1, 2);
990 src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
991 src_tensor_1.data = {1.0f, 0.0f, 2.0f, -4.0f};
992
993 tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
994 ref_tensor.shape = BHWC(1, 2, 1, 2);
995 ref_tensor.data = {true, false, false, false};
996
997 for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
998 for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
999 OperationDef op_def;
1000 op_def.precision = CalculationsPrecision::F32;
1001 op_def.src_tensors.push_back(
1002 {DataType::FLOAT32, src_storage, Layout::HWC});
1003 op_def.src_tensors.push_back(
1004 {DataType::FLOAT32, src_storage, Layout::HWC});
1005 op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1006
1007 TensorDescriptor src_desc0, src_desc1, dst_desc;
1008 src_desc0 = op_def.src_tensors[0];
1009 src_desc0.UploadData(src_tensor_0);
1010 src_desc1 = op_def.src_tensors[1];
1011 src_desc1.UploadData(src_tensor_1);
1012 dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1013 GPUOperation operation = CreateElementwiseTwoInput(
1014 op_def, OperationType::LESS, src_tensor_1.shape);
1015 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1016 {&src_desc0, &src_desc1}, {&dst_desc},
1017 std::make_unique<GPUOperation>(std::move(operation))));
1018
1019 tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1020 dst_desc.DownloadData(&dst_tensor);
1021 if (dst_tensor.data != ref_tensor.data) {
1022 return absl::InternalError("not equal");
1023 }
1024 }
1025 }
1026 return absl::OkStatus();
1027 }
1028
LessEqualTest(TestExecutionEnvironment * env)1029 absl::Status LessEqualTest(TestExecutionEnvironment* env) {
1030 TensorFloat32 src_tensor_0;
1031 src_tensor_0.shape = BHWC(1, 2, 1, 2);
1032 src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1033
1034 tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1035 ref_tensor.shape = BHWC(1, 2, 1, 2);
1036 ref_tensor.data = {true, true, true, false};
1037
1038 ElementwiseAttributes attr;
1039 attr.param = 2.0f;
1040
1041 for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1042 for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1043 OperationDef op_def;
1044 op_def.precision = CalculationsPrecision::F32;
1045 op_def.src_tensors.push_back(
1046 {DataType::FLOAT32, src_storage, Layout::HWC});
1047 op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1048 TensorDescriptor src_desc, dst_desc;
1049 src_desc = op_def.src_tensors[0];
1050 src_desc.UploadData(src_tensor_0);
1051 dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1052 GPUOperation operation = CreateElementwise(
1053 env->GetGpuInfo(), op_def, OperationType::LESS_EQUAL, attr);
1054 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1055 {&src_desc}, {&dst_desc},
1056 std::make_unique<GPUOperation>(std::move(operation))));
1057
1058 tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1059 dst_desc.DownloadData(&dst_tensor);
1060 if (dst_tensor.data != ref_tensor.data) {
1061 return absl::InternalError("not equal");
1062 }
1063 }
1064 }
1065 return absl::OkStatus();
1066 }
1067
GreaterTest(TestExecutionEnvironment * env)1068 absl::Status GreaterTest(TestExecutionEnvironment* env) {
1069 TensorFloat32 src_tensor_0;
1070 src_tensor_0.shape = BHWC(1, 2, 1, 2);
1071 src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1072
1073 tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1074 ref_tensor.shape = BHWC(1, 2, 1, 2);
1075 ref_tensor.data = {false, false, false, true};
1076
1077 ElementwiseAttributes attr;
1078 attr.param = 2.0f;
1079
1080 for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1081 for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1082 OperationDef op_def;
1083 op_def.precision = CalculationsPrecision::F32;
1084 op_def.src_tensors.push_back(
1085 {DataType::FLOAT32, src_storage, Layout::HWC});
1086 op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1087 TensorDescriptor src_desc, dst_desc;
1088 src_desc = op_def.src_tensors[0];
1089 src_desc.UploadData(src_tensor_0);
1090 dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1091 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
1092 OperationType::GREATER, attr);
1093 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1094 {&src_desc}, {&dst_desc},
1095 std::make_unique<GPUOperation>(std::move(operation))));
1096
1097 tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1098 dst_desc.DownloadData(&dst_tensor);
1099 if (dst_tensor.data != ref_tensor.data) {
1100 return absl::InternalError("not equal");
1101 }
1102 }
1103 }
1104 return absl::OkStatus();
1105 }
1106
GreaterEqualTest(TestExecutionEnvironment * env)1107 absl::Status GreaterEqualTest(TestExecutionEnvironment* env) {
1108 TensorFloat32 src_tensor_0;
1109 src_tensor_0.shape = BHWC(1, 2, 1, 2);
1110 src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1111
1112 tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1113 ref_tensor.shape = BHWC(1, 2, 1, 2);
1114 ref_tensor.data = {false, false, true, true};
1115
1116 ElementwiseAttributes attr;
1117 attr.param = 2.0f;
1118
1119 for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1120 for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1121 OperationDef op_def;
1122 op_def.precision = CalculationsPrecision::F32;
1123 op_def.src_tensors.push_back(
1124 {DataType::FLOAT32, src_storage, Layout::HWC});
1125 op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1126 TensorDescriptor src_desc, dst_desc;
1127 src_desc = op_def.src_tensors[0];
1128 src_desc.UploadData(src_tensor_0);
1129 dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1130 GPUOperation operation = CreateElementwise(
1131 env->GetGpuInfo(), op_def, OperationType::GREATER_EQUAL, attr);
1132 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1133 {&src_desc}, {&dst_desc},
1134 std::make_unique<GPUOperation>(std::move(operation))));
1135
1136 tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1137 dst_desc.DownloadData(&dst_tensor);
1138 if (dst_tensor.data != ref_tensor.data) {
1139 return absl::InternalError("not equal");
1140 }
1141 }
1142 }
1143 return absl::OkStatus();
1144 }
1145
EqualTest(TestExecutionEnvironment * env)1146 absl::Status EqualTest(TestExecutionEnvironment* env) {
1147 TensorFloat32 src_tensor_0;
1148 src_tensor_0.shape = BHWC(1, 2, 1, 2);
1149 src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1150
1151 tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1152 ref_tensor.shape = BHWC(1, 2, 1, 2);
1153 ref_tensor.data = {false, false, true, false};
1154
1155 ElementwiseAttributes attr;
1156 attr.param = 2.0f;
1157
1158 for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1159 for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1160 OperationDef op_def;
1161 op_def.precision = CalculationsPrecision::F32;
1162 op_def.src_tensors.push_back(
1163 {DataType::FLOAT32, src_storage, Layout::HWC});
1164 op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1165 TensorDescriptor src_desc, dst_desc;
1166 src_desc = op_def.src_tensors[0];
1167 src_desc.UploadData(src_tensor_0);
1168 dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1169 GPUOperation operation = CreateElementwise(env->GetGpuInfo(), op_def,
1170 OperationType::EQUAL, attr);
1171 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1172 {&src_desc}, {&dst_desc},
1173 std::make_unique<GPUOperation>(std::move(operation))));
1174
1175 tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1176 dst_desc.DownloadData(&dst_tensor);
1177 if (dst_tensor.data != ref_tensor.data) {
1178 return absl::InternalError("not equal");
1179 }
1180 }
1181 }
1182 return absl::OkStatus();
1183 }
1184
NotEqualTest(TestExecutionEnvironment * env)1185 absl::Status NotEqualTest(TestExecutionEnvironment* env) {
1186 TensorFloat32 src_tensor_0;
1187 src_tensor_0.shape = BHWC(1, 2, 1, 2);
1188 src_tensor_0.data = {0.0f, 1.0f, 2.0f, 3.0f};
1189
1190 tflite::gpu::Tensor<BHWC, DataType::BOOL> ref_tensor;
1191 ref_tensor.shape = BHWC(1, 2, 1, 2);
1192 ref_tensor.data = {true, true, false, true};
1193
1194 ElementwiseAttributes attr;
1195 attr.param = 2.0f;
1196
1197 for (auto src_storage : env->GetSupportedStorages(DataType::FLOAT32)) {
1198 for (auto dst_storage : env->GetSupportedStorages(DataType::BOOL)) {
1199 OperationDef op_def;
1200 op_def.precision = CalculationsPrecision::F32;
1201 op_def.src_tensors.push_back(
1202 {DataType::FLOAT32, src_storage, Layout::HWC});
1203 op_def.dst_tensors.push_back({DataType::BOOL, dst_storage, Layout::HWC});
1204 TensorDescriptor src_desc, dst_desc;
1205 src_desc = op_def.src_tensors[0];
1206 src_desc.UploadData(src_tensor_0);
1207 dst_desc.SetBHWCShape(BHWC(1, 2, 1, 2));
1208 GPUOperation operation = CreateElementwise(
1209 env->GetGpuInfo(), op_def, OperationType::NOT_EQUAL, attr);
1210 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1211 {&src_desc}, {&dst_desc},
1212 std::make_unique<GPUOperation>(std::move(operation))));
1213
1214 tflite::gpu::Tensor<BHWC, DataType::BOOL> dst_tensor;
1215 dst_desc.DownloadData(&dst_tensor);
1216 if (dst_tensor.data != ref_tensor.data) {
1217 return absl::InternalError("not equal");
1218 }
1219 }
1220 }
1221 return absl::OkStatus();
1222 }
1223
CosBroadcastTest(TestExecutionEnvironment * env)1224 absl::Status CosBroadcastTest(TestExecutionEnvironment* env) {
1225 TensorFloat32 src_tensor;
1226 src_tensor.shape = BHWC(1, 2, 1, 1);
1227 src_tensor.data = {0.7f, -1.5f};
1228
1229 for (auto precision : env->GetSupportedPrecisions()) {
1230 auto data_type = DeduceDataTypeFromPrecision(precision);
1231 for (auto storage : env->GetSupportedStorages(data_type)) {
1232 const float eps = precision == CalculationsPrecision::F32 ? 5e-5f : 1e-3f;
1233 OperationDef op_def;
1234 op_def.precision = precision;
1235 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1236 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1237 TensorFloat32 dst_tensor;
1238 BHWC output_shape(1, 2, 1, 2);
1239 GPUOperation operation = CreateElementwiseOneInputWithBroadcast(
1240 env->GetGpuInfo(), op_def, OperationType::COS, src_tensor.shape,
1241 output_shape);
1242 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1243 src_tensor, std::make_unique<GPUOperation>(std::move(operation)),
1244 output_shape, &dst_tensor));
1245 RETURN_IF_ERROR(PointWiseNear(
1246 {std::cos(0.7f), std::cos(0.7f), std::cos(-1.5f), std::cos(-1.5f)},
1247 dst_tensor.data, eps));
1248 }
1249 }
1250 return absl::OkStatus();
1251 }
1252
MaximumScalarBroadcastInputTest(TestExecutionEnvironment * env)1253 absl::Status MaximumScalarBroadcastInputTest(TestExecutionEnvironment* env) {
1254 TensorFloat32 src_tensor_0;
1255 src_tensor_0.shape = BHWC(1, 2, 1, 1);
1256 src_tensor_0.data = {2.0f, -3.0f};
1257
1258 ElementwiseAttributes attr;
1259 attr.param = -2.0f;
1260
1261 for (auto precision : env->GetSupportedPrecisions()) {
1262 auto data_type = DeduceDataTypeFromPrecision(precision);
1263 for (auto storage : env->GetSupportedStorages(data_type)) {
1264 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
1265 OperationDef op_def;
1266 op_def.precision = precision;
1267 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1268 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1269 TensorFloat32 dst_tensor;
1270 BHWC output_shape(1, 2, 1, 2);
1271 GPUOperation operation = CreateElementwiseWithBroadcast(
1272 env->GetGpuInfo(), op_def, OperationType::MAXIMUM, attr,
1273 src_tensor_0.shape, output_shape);
1274 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1275 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
1276 output_shape, &dst_tensor));
1277 RETURN_IF_ERROR(
1278 PointWiseNear({2.0f, 2.0f, -2.0f, -2.0f}, dst_tensor.data, eps));
1279 }
1280 }
1281 return absl::OkStatus();
1282 }
1283
MulLinearBroadcastInputTest(TestExecutionEnvironment * env)1284 absl::Status MulLinearBroadcastInputTest(TestExecutionEnvironment* env) {
1285 TensorFloat32 src_tensor_0;
1286 src_tensor_0.shape = BHWC(1, 2, 1, 1);
1287 src_tensor_0.data = {2.0f, -3.0f};
1288
1289 ::tflite::gpu::Tensor<Linear, DataType::FLOAT32> linear_tensor;
1290 linear_tensor.shape = Linear(2);
1291 linear_tensor.data = {0.5f, 2.0f};
1292 ElementwiseAttributes attr;
1293 attr.param = linear_tensor;
1294
1295 for (auto precision : env->GetSupportedPrecisions()) {
1296 auto data_type = DeduceDataTypeFromPrecision(precision);
1297 for (auto storage : env->GetSupportedStorages(data_type)) {
1298 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
1299 OperationDef op_def;
1300 op_def.precision = precision;
1301 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1302 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1303 TensorFloat32 dst_tensor;
1304 BHWC output_shape(1, 2, 1, 2);
1305 GPUOperation operation = CreateElementwiseWithBroadcast(
1306 env->GetGpuInfo(), op_def, OperationType::MUL, attr,
1307 src_tensor_0.shape, output_shape);
1308 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1309 src_tensor_0, std::make_unique<GPUOperation>(std::move(operation)),
1310 output_shape, &dst_tensor));
1311 RETURN_IF_ERROR(
1312 PointWiseNear({1.0f, 4.0f, -1.5f, -6.0f}, dst_tensor.data, eps));
1313 }
1314 }
1315 return absl::OkStatus();
1316 }
1317
MulBroadcastBothInputsTest(TestExecutionEnvironment * env)1318 absl::Status MulBroadcastBothInputsTest(TestExecutionEnvironment* env) {
1319 TensorFloat32 src_tensor_0, src_tensor_1;
1320 src_tensor_0.shape = BHWC(1, 1, 2, 1);
1321 src_tensor_1.shape = BHWC(1, 1, 1, 2);
1322 src_tensor_0.data = {1.0f, 2.0f};
1323 src_tensor_1.data = {3.0f, 4.0f};
1324
1325 for (auto precision : env->GetSupportedPrecisions()) {
1326 auto data_type = DeduceDataTypeFromPrecision(precision);
1327 for (auto storage : env->GetSupportedStorages(data_type)) {
1328 const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f;
1329 OperationDef op_def;
1330 op_def.precision = precision;
1331 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1332 op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
1333 op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
1334 TensorFloat32 dst_tensor;
1335 BHWC output_shape(1, 1, 2, 2);
1336 GPUOperation operation = CreateElementwiseTwoInputWithBroadcast(
1337 op_def, OperationType::MUL, src_tensor_0.shape, src_tensor_1.shape,
1338 output_shape);
1339 RETURN_IF_ERROR(env->ExecuteGPUOperation(
1340 {src_tensor_0, src_tensor_1},
1341 std::make_unique<GPUOperation>(std::move(operation)), output_shape,
1342 &dst_tensor));
1343 RETURN_IF_ERROR(
1344 PointWiseNear({3.0f, 4.0f, 6.0f, 8.0f}, dst_tensor.data, eps));
1345 }
1346 }
1347 return absl::OkStatus();
1348 }
1349
1350 } // namespace gpu
1351 } // namespace tflite
1352