xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/fixtures/GEMMFixture.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE
25 #define ARM_COMPUTE_TEST_GEMM_FIXTURE
26 
27 #include "arm_compute/core/KernelDescriptors.h"
28 #include "arm_compute/core/TensorShape.h"
29 #include "arm_compute/core/Types.h"
30 #include "arm_compute/core/experimental/IPostOp.h"
31 #include "src/core/experimental/PostOpUtils.h"
32 #include "tests/AssetsLibrary.h"
33 #include "tests/Globals.h"
34 #include "tests/IAccessor.h"
35 #include "tests/framework/Asserts.h"
36 #include "tests/framework/Fixture.h"
37 #include "tests/validation/Helpers.h"
38 #include "tests/validation/reference/ActivationLayer.h"
39 #include "tests/validation/reference/ElementwiseOperations.h"
40 #include "tests/validation/reference/GEMM.h"
41 #include "tests/validation/reference/PostOps.h"
42 
43 #include <random>
44 
45 namespace arm_compute
46 {
47 namespace test
48 {
49 namespace validation
50 {
51 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
52 class GEMMValidationFixture : public framework::Fixture
53 {
54 public:
55     template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_c,TensorShape output_shape,float alpha,float beta,bool pretranspose,DataType data_type)56     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
57     {
58         ARM_COMPUTE_UNUSED(pretranspose);
59         _target    = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type);
60         _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type);
61     }
62 
63 protected:
64     template <typename U>
65     void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f)
66     {
67         switch(tensor.data_type())
68         {
69             case DataType::F16:
70             {
71                 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(lo), float(hi) };
72                 library->fill(tensor, distribution, i);
73                 break;
74             }
75             case DataType::F32:
76             {
77                 std::uniform_real_distribution<float> distribution(lo, hi);
78                 library->fill(tensor, distribution, i);
79                 break;
80             }
81             default:
82                 library->fill_tensor_uniform(tensor, i);
83         }
84     }
85 
compute_target(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_c,const TensorShape & output_shape,float alpha,float beta,DataType data_type)86     TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta,
87                               DataType data_type)
88     {
89         // Create tensors
90         TensorType a   = create_tensor<TensorType>(shape_a, data_type, 1);
91         TensorType b   = create_tensor<TensorType>(shape_b, data_type, 1);
92         TensorType c   = create_tensor<TensorType>(shape_c, data_type, 1);
93         TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1);
94 
95         // Create and configure function
96         FunctionType gemm;
97         // The GEMMinfo includes the values of the depth in case of reinterpreted 3d output.
98         // If the output shape has the same number of dimensions of the input the method called is a 2D matrix multiplication (depth_output_reinterpreted_as_3D = 0),
99         // in the other case we have to use the reinterpreted version of GEMM (depth_output_reinterpreted_as_3D = depth of the 3D output).
100         gemm.configure(&a,
101                        &b,
102                        (disable_c) ? nullptr : &c,
103                        &dst,
104                        alpha, beta,
105                        GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d
106                                 || reinterpret_output_as_3d)));
107         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
108         ARM_COMPUTE_ASSERT(b.info()->is_resizable());
109         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
110         ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
111 
112         add_padding_x({ &a, &b, &c, &dst });
113 
114         // Allocate tensors
115         a.allocator()->allocate();
116         b.allocator()->allocate();
117         c.allocator()->allocate();
118         dst.allocator()->allocate();
119 
120         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
121         ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
122         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
123         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
124 
125         // Fill tensors
126         fill(AccessorType(a), 0);
127         fill(AccessorType(b), 1);
128         if(!disable_c)
129         {
130             fill(AccessorType(c), 2);
131         }
132 
133         // Run with variable inputs.
134         if(run_twice)
135         {
136             gemm.run();
137             fill(AccessorType(a), 3); // Fill tensors with new seed after run
138             fill(AccessorType(b), 4);
139             if(!disable_c)
140             {
141                 fill(AccessorType(c), 5);
142             }
143         }
144 
145         // Compute GEMM function
146         gemm.run();
147 
148         return dst;
149     }
150 
compute_reference(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & output_shape,float alpha,float beta,DataType data_type)151     SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta,
152                                       DataType data_type)
153     {
154         TensorShape shape_a_to_use = shape_a;
155         if(reinterpret_input_as_3d)
156         {
157             // Collapse the second and third dimension if the input is 3D
158             shape_a_to_use.collapse(2U, 1U);
159         }
160 
161         // Create reference
162         SimpleTensor<T> a{ shape_a_to_use, data_type, 1 };
163         SimpleTensor<T> b{ shape_b, data_type, 1 };
164         SimpleTensor<T> c{ output_shape, data_type, 1 };
165 
166         // Fill reference
167         fill(a, 0);
168         fill(b, 1);
169         fill(c, 2);
170 
171         if(reinterpret_input_as_3d || reinterpret_output_as_3d)
172         {
173             const int n          = shape_b[0];
174             const int m          = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1];
175             const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2];
176 
177             // In case of broadcast, we need to simply copy the first into the following "M" ones
178             for(int i = 1; i < m * batch_size; i++)
179             {
180                 memcpy(c.data() + i * n, c.data(), n * sizeof(T));
181             }
182         }
183 
184         /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
185            therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
186            in order to be able to call reference implementation that works with (B x M x K) input.
187            Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
188 
189         // Define transposed shapes
190         TensorShape a_transposed_shape(a.shape().y(), a.shape().x());
191         TensorShape b_transposed_shape(b.shape().y(), b.shape().x());
192 
193         // Define transposed tensors
194         SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
195         SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
196 
197         // pretranspose a if necessary
198         if(pretranspose_a)
199         {
200             transpose_matrix<T>(a, a_transposed);
201         }
202 
203         // pretranspose b if necessary
204         if(pretranspose_b)
205         {
206             transpose_matrix<T>(b, b_transposed);
207         }
208 
209         // Run with variable inputs.
210         if(run_twice)
211         {
212             reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
213             fill((pretranspose_a) ? a_transposed : a, 3);
214             fill((pretranspose_b) ? b_transposed : b, 4);
215             fill(c, 5);
216         }
217 
218         // Setting beta to 0 will effectively disable C for the
219         // computation of the reference: alpha * A * B + 0 * C
220         // Use transposed tensors if boolean enabled else use original tensors
221         auto r = reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
222         return r;
223     }
224 
225     TensorType      _target{};
226     SimpleTensor<T> _reference{};
227 };
228 
229 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
230 class GEMMMatrixMultiplyValidationFixture : public framework::Fixture
231 {
232 public:
233     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)234     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info,
235                DataType data_type, GPUTarget gpu_arch)
236     {
237         // Set the tensor shapes for LHS and RHS matrices
238         const TensorShape lhs_shape(k, m, batch_size);
239         const TensorShape rhs_shape(n, k, batch_size);
240         const TensorShape bias_shape(n,
241                                      broadcast_bias ? 1 : m,
242                                      broadcast_bias ? 1 : batch_size);
243 
244         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch);
245         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
246     }
247 
248 protected:
249     template <typename U>
fill(U && tensor,int i)250     void fill(U &&tensor, int i)
251     {
252         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
253         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
254 
255         DistributionType distribution{ T(-1.0f), T(1.0f) };
256         library->fill(tensor, distribution, i);
257 
258         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
259         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
260         library->fill_borders_with_garbage(tensor, distribution_inf, i);
261     }
262 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)263     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
264                               bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
265     {
266         // Create tensors
267         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
268         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
269         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
270         TensorType dst;
271 
272         const unsigned int m = lhs_shape[1];
273         const unsigned int n = rhs_shape[0];
274         const unsigned int k = lhs_shape[0];
275         GEMMReshapeInfo    reshape_info(m, n, k, 1, 1, 0, false, broadcast_bias);
276 
277         // The output tensor will be auto-initialized within the function
278 
279         // Create and configure function
280         GEMMOperatorType gemm;
281         gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
282 
283         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
284         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
285         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
286 
287         add_padding_x({ &lhs, &rhs, &bias, &dst });
288 
289         // Allocate tensors
290         lhs.allocator()->allocate();
291         rhs.allocator()->allocate();
292         bias.allocator()->allocate();
293         dst.allocator()->allocate();
294 
295         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
296         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
297         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
298         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
299 
300         // Fill tensors
301         fill(AccessorType(lhs), 0);
302         fill(AccessorType(rhs), 1);
303         fill(AccessorType(bias), 2);
304 
305         // Compute GEMM
306         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
307             { ACL_SRC_1, &rhs },
308             { ACL_SRC_2, &bias },
309             { ACL_DST, &dst }
310         });
311         gemm.run(gemm_pack);
312 
313         return dst;
314     }
315 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)316     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
317                                       const ActivationLayerInfo &act_info)
318     {
319         TensorShape dst_shape = lhs_shape;
320         dst_shape[0]          = rhs_shape[0];
321         dst_shape[1]          = lhs_shape[1];
322 
323         // Create reference
324         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
325         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
326         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
327 
328         const int n          = rhs_shape[0];
329         const int m          = lhs_shape[1];
330         const int batch_size = lhs_shape[2];
331 
332         // Fill reference
333         fill(lhs, 0);
334         fill(rhs, 1);
335         fill(bias, 2);
336 
337         if(broadcast_bias)
338         {
339             // In case of broadcast, we need to simply copy the first into the following "M" ones
340             for(int i = 1; i < m * batch_size; i++)
341             {
342                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
343             }
344         }
345 
346         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
347     }
348 
349     TensorType      _target{};
350     SimpleTensor<T> _reference{};
351 };
352 
353 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
354 class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture
355 {
356 public:
357     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)358     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision,
359                const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
360     {
361         ARM_COMPUTE_UNUSED(broadcast_bias);
362 
363         // In case of GEMM3D, m is the product between m_w and m_h
364         const unsigned int m = m_w * m_h;
365 
366         // Set the tensor shapes for LHS and RHS matrices
367         const TensorShape lhs_shape(k, m, batch_size);
368         const TensorShape rhs_shape(n, k, batch_size);
369         const TensorShape bias_shape(n, 1, 1);
370 
371         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch);
372         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
373     }
374 
375 protected:
376     template <typename U>
fill(U && tensor,int i)377     void fill(U &&tensor, int i)
378     {
379         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
380         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
381 
382         DistributionType distribution{ T(-1.0f), T(1.0f) };
383         library->fill(tensor, distribution, i);
384     }
385 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,DataType data_type,float alpha,float beta,unsigned int m_h,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)386     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
387                               bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
388     {
389         // Create tensors
390         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
391         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
392         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
393         TensorType dst;
394 
395         const unsigned int m = lhs_shape[1];
396         const unsigned int n = rhs_shape[0];
397         const unsigned int k = lhs_shape[0];
398         GEMMReshapeInfo    reshape_info(m, n, k, 1, 1, m_h, false, true);
399 
400         // The output tensor will be auto-initialized within the function
401 
402         // Create and configure function
403         GEMMOperatorType gemm;
404         gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
405 
406         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
407         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
408         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
409 
410         add_padding_x({ &lhs, &rhs, &bias, &dst });
411 
412         // Allocate tensors
413         lhs.allocator()->allocate();
414         rhs.allocator()->allocate();
415         bias.allocator()->allocate();
416         dst.allocator()->allocate();
417 
418         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
419         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
420         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
421         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
422 
423         // Fill tensors
424         fill(AccessorType(lhs), 0);
425         fill(AccessorType(rhs), 1);
426         fill(AccessorType(bias), 2);
427 
428         // Compute GEMM
429         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
430             { ACL_SRC_1, &rhs },
431             { ACL_SRC_2, &bias },
432             { ACL_DST, &dst }
433         });
434         gemm.run(gemm_pack);
435 
436         return dst;
437     }
438 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)439     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
440                                       const ActivationLayerInfo &act_info)
441     {
442         TensorShape dst_shape = lhs_shape;
443         dst_shape.set(0, rhs_shape[0]);
444         dst_shape.set(1, lhs_shape[1] / m_h);
445         dst_shape.set(2, m_h);
446         dst_shape.set(3, lhs_shape[2]);
447 
448         // Create reference
449         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
450         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
451         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
452 
453         const int n          = rhs_shape[0];
454         const int m          = lhs_shape[1];
455         const int batch_size = lhs_shape[2];
456 
457         // Fill reference
458         fill(lhs, 0);
459         fill(rhs, 1);
460         fill(bias, 2);
461 
462         // In case of broadcast, we need to simply copy the first into the following "M" ones
463         for(int i = 1; i < m * batch_size; i++)
464         {
465             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
466         }
467 
468         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
469     }
470 
471     TensorType      _target{};
472     SimpleTensor<T> _reference{};
473 };
474 
475 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
476 class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture
477 {
478 public:
479     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,unsigned int v0,unsigned int h0,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)480     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision,
481                const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
482     {
483         GEMMLHSMatrixInfo lhs_info;
484         lhs_info.m0         = 4;
485         lhs_info.k0         = 4;
486         lhs_info.v0         = v0;
487         lhs_info.interleave = true;
488         lhs_info.transpose  = true;
489 
490         GEMMRHSMatrixInfo rhs_info;
491         rhs_info.n0         = 16 / sizeof(T);
492         rhs_info.k0         = 1;
493         rhs_info.h0         = h0;
494         rhs_info.interleave = false;
495         rhs_info.transpose  = false;
496 
497         // Set the tensor shapes for LHS and RHS matrices
498         const TensorShape lhs_shape(k, m, batch_size);
499         const TensorShape rhs_shape(n, k, batch_size);
500         const TensorShape bias_shape(n,
501                                      broadcast_bias ? 1 : m,
502                                      broadcast_bias ? 1 : batch_size);
503 
504         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch);
505         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
506     }
507 
508 protected:
509     template <typename U>
fill(U && tensor,int i)510     void fill(U &&tensor, int i)
511     {
512         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
513         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
514 
515         DistributionType distribution{ T(-1.0f), T(1.0f) };
516         library->fill(tensor, distribution, i);
517 
518         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
519         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
520         library->fill_borders_with_garbage(tensor, distribution_inf, i);
521     }
522 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)523     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
524                               DataType data_type, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
525     {
526         // Create tensors
527         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
528         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
529         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
530         TensorType lhs_reshaped;
531         TensorType rhs_reshaped;
532         TensorType dst;
533 
534         const unsigned int m = lhs_shape[1];
535         const unsigned int n = rhs_shape[0];
536         const unsigned int k = lhs_shape[0];
537         GEMMReshapeInfo    reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, 0, false, broadcast_bias);
538 
539         // The output tensor will be auto-initialized within the function
540 
541         // Create and configure function
542         ReshapeLHSOperatorType reshape_lhs;
543         ReshapeRHSOperatorType reshape_rhs;
544         GEMMOperatorType       gemm;
545         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
546         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
547         gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
548 
549         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
550         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
551         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
552 
553         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
554         if(!rhs_info.export_to_cl_image)
555         {
556             add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
557         }
558 
559         // Allocate tensors
560         lhs.allocator()->allocate();
561         rhs.allocator()->allocate();
562         lhs_reshaped.allocator()->allocate();
563         rhs_reshaped.allocator()->allocate();
564         bias.allocator()->allocate();
565         dst.allocator()->allocate();
566 
567         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
568         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
569         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
570         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
571         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
572         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
573 
574         // Fill tensors
575         fill(AccessorType(lhs), 0);
576         fill(AccessorType(rhs), 1);
577         fill(AccessorType(bias), 2);
578 
579         // Compute GEMM
580         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
581         reshape_lhs.run(reshape_lhs_pack);
582         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
583         reshape_rhs.run(reshape_rhs_pack);
584         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
585             { ACL_SRC_1, &rhs_reshaped },
586             { ACL_SRC_2, &bias },
587             { ACL_DST, &dst }
588         });
589         gemm.run(gemm_pack);
590 
591         return dst;
592     }
593 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)594     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
595                                       const ActivationLayerInfo &act_info)
596     {
597         TensorShape dst_shape = lhs_shape;
598         dst_shape[0]          = rhs_shape[0];
599         dst_shape[1]          = lhs_shape[1];
600 
601         // Create reference
602         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
603         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
604         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
605 
606         const int n          = rhs_shape[0];
607         const int m          = lhs_shape[1];
608         const int batch_size = lhs_shape[2];
609 
610         // Fill reference
611         fill(lhs, 0);
612         fill(rhs, 1);
613         fill(bias, 2);
614 
615         if(broadcast_bias)
616         {
617             // In case of broadcast, we need to simply copy the first into the following "M" ones
618             for(int i = 1; i < m * batch_size; i++)
619             {
620                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
621             }
622         }
623 
624         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
625     }
626 
627     TensorType      _target{};
628     SimpleTensor<T> _reference{};
629 };
630 
631 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
632 class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture
633 {
634 public:
635     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,unsigned int v0,unsigned int h0,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)636     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias,
637                bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
638     {
639         ARM_COMPUTE_UNUSED(broadcast_bias);
640 
641         GEMMLHSMatrixInfo lhs_info;
642         lhs_info.m0         = 4;
643         lhs_info.k0         = 4;
644         lhs_info.v0         = v0;
645         lhs_info.interleave = true;
646         lhs_info.transpose  = true;
647 
648         GEMMRHSMatrixInfo rhs_info;
649         rhs_info.n0         = 16 / sizeof(T);
650         rhs_info.k0         = 1;
651         rhs_info.h0         = h0;
652         rhs_info.interleave = false;
653         rhs_info.transpose  = false;
654 
655         // In case of GEMM3D, m is the product between m_w and m_h
656         const unsigned int m = m_w * m_h;
657 
658         // Set the tensor shapes for LHS and RHS matrices
659         const TensorShape lhs_shape(k, m, batch_size);
660         const TensorShape rhs_shape(n, k, batch_size);
661         const TensorShape bias_shape(n, 1, 1);
662 
663         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch);
664         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
665     }
666 
667 protected:
668     template <typename U>
fill(U && tensor,int i)669     void fill(U &&tensor, int i)
670     {
671         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
672         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
673 
674         DistributionType distribution{ T(-1.0f), T(1.0f) };
675         library->fill(tensor, distribution, i);
676     }
677 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)678     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
679                               DataType data_type, float alpha, float beta, unsigned int m_h, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
680     {
681         // Create tensors
682         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
683         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
684         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
685         TensorType lhs_reshaped;
686         TensorType rhs_reshaped;
687         TensorType dst;
688 
689         const unsigned int m = lhs_shape[1];
690         const unsigned int n = rhs_shape[0];
691         const unsigned int k = lhs_shape[0];
692         GEMMReshapeInfo    reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, m_h, false, true);
693 
694         // The output tensor will be auto-initialized within the function
695 
696         // Create and configure function
697         ReshapeLHSOperatorType reshape_lhs;
698         ReshapeRHSOperatorType reshape_rhs;
699         GEMMOperatorType       gemm;
700         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
701         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
702         gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
703 
704         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
705         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
706         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
707 
708         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
709         if(!rhs_info.export_to_cl_image)
710         {
711             add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
712         }
713 
714         // Allocate tensors
715         lhs.allocator()->allocate();
716         rhs.allocator()->allocate();
717         lhs_reshaped.allocator()->allocate();
718         rhs_reshaped.allocator()->allocate();
719         bias.allocator()->allocate();
720         dst.allocator()->allocate();
721 
722         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
723         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
724         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
725         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
726         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
727         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
728 
729         // Fill tensors
730         fill(AccessorType(lhs), 0);
731         fill(AccessorType(rhs), 1);
732         fill(AccessorType(bias), 2);
733 
734         // Compute GEMM
735         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
736         reshape_lhs.run(reshape_lhs_pack);
737         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
738         reshape_rhs.run(reshape_rhs_pack);
739         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
740             { ACL_SRC_1, &rhs_reshaped },
741             { ACL_SRC_2, &bias },
742             { ACL_DST, &dst }
743         });
744         gemm.run(gemm_pack);
745 
746         return dst;
747     }
748 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)749     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
750                                       const ActivationLayerInfo &act_info)
751     {
752         TensorShape dst_shape = lhs_shape;
753         dst_shape.set(0, rhs_shape[0]);
754         dst_shape.set(1, lhs_shape[1] / m_h);
755         dst_shape.set(2, m_h);
756         dst_shape.set(3, lhs_shape[2]);
757 
758         // Create reference
759         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
760         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
761         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
762 
763         const int n          = rhs_shape[0];
764         const int m          = lhs_shape[1];
765         const int batch_size = lhs_shape[2];
766 
767         // Fill reference
768         fill(lhs, 0);
769         fill(rhs, 1);
770         fill(bias, 2);
771 
772         // In case of broadcast, we need to simply copy the first into the following "M" ones
773         for(int i = 1; i < m * batch_size; i++)
774         {
775             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
776         }
777 
778         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
779     }
780 
781     TensorType      _target{};
782     SimpleTensor<T> _reference{};
783 };
784 
785 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
786 class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture
787 {
788 public:
789     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,bool lhs_transpose,const ActivationLayerInfo & act_info)790     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
791                bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
792     {
793         GEMMLHSMatrixInfo lhs_info;
794         lhs_info.m0         = m0;
795         lhs_info.k0         = k0;
796         lhs_info.v0         = v0;
797         lhs_info.interleave = interleave_lhs;
798         lhs_info.transpose  = lhs_transpose;
799 
800         GEMMRHSMatrixInfo rhs_info;
801         rhs_info.n0                 = n0;
802         rhs_info.k0                 = k0;
803         rhs_info.h0                 = h0;
804         rhs_info.interleave         = interleave_rhs;
805         rhs_info.transpose          = !lhs_transpose;
806         rhs_info.export_to_cl_image = export_to_cl_image;
807 
808         // Set the tensor shapes for LHS and RHS matrices
809         const TensorShape lhs_shape(k, m, batch_size);
810         const TensorShape rhs_shape(n, k, batch_size);
811         const TensorShape bias_shape(n,
812                                      broadcast_bias ? 1 : m,
813                                      broadcast_bias ? 1 : batch_size);
814 
815         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
816         if(validate_result)
817         {
818             _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
819         }
820     }
821 
822 protected:
823     template <typename U>
fill(U && tensor,int i)824     void fill(U &&tensor, int i)
825     {
826         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
827         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
828 
829         DistributionType distribution{ T(-1.0f), T(1.0f) };
830         library->fill(tensor, distribution, i);
831 
832         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
833         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
834         library->fill_borders_with_garbage(tensor, distribution_inf, i);
835     }
836 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)837     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
838                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
839     {
840         // Create tensors
841         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
842         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
843         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
844         TensorType lhs_reshaped;
845         TensorType rhs_reshaped;
846         TensorType dst;
847 
848         const unsigned int M = lhs_shape[1];
849         const unsigned int N = rhs_shape[0];
850         const unsigned int K = lhs_shape[0];
851         GEMMKernelInfo     kernel_info;
852         kernel_info.m                       = M;
853         kernel_info.n                       = N;
854         kernel_info.k                       = K;
855         kernel_info.depth_output_gemm3d     = 0;
856         kernel_info.reinterpret_input_as_3d = false;
857         kernel_info.broadcast_bias          = broadcast_bias;
858         kernel_info.activation_info         = act_info;
859         kernel_info.fp_mixed_precision      = fp_mixed_precision;
860 
861         // The output tensor will be auto-initialized within the function
862 
863         // Create and configure function
864         ReshapeLHSOperatorType reshape_lhs;
865         ReshapeRHSOperatorType reshape_rhs;
866         GEMMOperatorType       gemm;
867 
868         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
869         validate_result = validate_result || !rhs_info.export_to_cl_image;
870         if(!validate_result)
871         {
872             return nullptr;
873         }
874 
875         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
876         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
877         gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
878 
879         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
880         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
881         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
882 
883         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
884         if(!rhs_info.export_to_cl_image)
885         {
886             add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
887         }
888 
889         // Allocate tensors
890         lhs.allocator()->allocate();
891         rhs.allocator()->allocate();
892         lhs_reshaped.allocator()->allocate();
893         rhs_reshaped.allocator()->allocate();
894         bias.allocator()->allocate();
895         dst.allocator()->allocate();
896 
897         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
898         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
899         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
900         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
901         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
902         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
903 
904         // Fill tensors
905         fill(AccessorType(lhs), 0);
906         fill(AccessorType(rhs), 1);
907         fill(AccessorType(bias), 2);
908 
909         // Compute GEMM
910         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
911         reshape_lhs.run(reshape_lhs_pack);
912         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
913         reshape_rhs.run(reshape_rhs_pack);
914         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
915             { ACL_SRC_1, &rhs_reshaped },
916             { ACL_SRC_2, &bias },
917             { ACL_DST, &dst }
918         });
919         gemm.run(gemm_pack);
920 
921         return dst;
922     }
923 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)924     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
925                                       const ActivationLayerInfo &act_info)
926     {
927         TensorShape dst_shape = lhs_shape;
928         dst_shape[0]          = rhs_shape[0];
929         dst_shape[1]          = lhs_shape[1];
930 
931         // Create reference
932         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
933         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
934         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
935 
936         const int n          = rhs_shape[0];
937         const int m          = lhs_shape[1];
938         const int batch_size = lhs_shape[2];
939 
940         // Fill reference
941         fill(lhs, 0);
942         fill(rhs, 1);
943         fill(bias, 2);
944 
945         if(broadcast_bias)
946         {
947             // In case of broadcast, we need to simply copy the first into the following "M" ones
948             for(int i = 1; i < m * batch_size; i++)
949             {
950                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
951             }
952         }
953 
954         if(fp_mixed_precision)
955         {
956             return reference::activation_layer(reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta), act_info);
957         }
958         else
959         {
960             return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
961         }
962     }
963 
964     bool            validate_result = true;
965     TensorType      _target{};
966     SimpleTensor<T> _reference{};
967 };
968 
969 /** (EXPERIMENTAL_POST_OPS)*/
970 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
971 class GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture : public framework::Fixture
972 {
973 public:
974     using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument
975 public:
976     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,bool lhs_transpose,const ActivationLayerInfo & act_info,const experimental::PostOpList<PostOpArgBroadcast> & post_ops)977     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
978                bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info,
979                const experimental::PostOpList<PostOpArgBroadcast> &post_ops)
980     {
981         GEMMLHSMatrixInfo lhs_info;
982         lhs_info.m0         = m0;
983         lhs_info.k0         = k0;
984         lhs_info.v0         = v0;
985         lhs_info.interleave = interleave_lhs;
986         lhs_info.transpose  = lhs_transpose;
987 
988         GEMMRHSMatrixInfo rhs_info;
989         rhs_info.n0                 = n0;
990         rhs_info.k0                 = k0;
991         rhs_info.h0                 = h0;
992         rhs_info.interleave         = interleave_rhs;
993         rhs_info.transpose          = !lhs_transpose;
994         rhs_info.export_to_cl_image = export_to_cl_image;
995 
996         // Set the tensor shapes for LHS and RHS matrices
997         const TensorShape lhs_shape(k, m, batch_size);
998         const TensorShape rhs_shape(n, k, batch_size);
999         const TensorShape bias_shape(n,
1000                                      broadcast_bias ? 1 : m,
1001                                      broadcast_bias ? 1 : batch_size);
1002         auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops,
1003                                                                                                                     [ = ](auto broadcast)
1004         {
1005             return TensorShape
1006             {
1007                 std::get<0>(broadcast) ? 1 : n,
1008                 std::get<1>(broadcast) ? 1 : m,
1009                 std::get<2>(broadcast) ? 1 : batch_size,
1010             };
1011         });
1012 
1013         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
1014         if(validate_result)
1015         {
1016             _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
1017         }
1018     }
1019 
1020 protected:
1021     template <typename U>
fill(U && tensor,int i)1022     void fill(U &&tensor, int i)
1023     {
1024         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1025         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1026 
1027         DistributionType distribution{ T(-1.0f), T(1.0f) };
1028         library->fill(tensor, distribution, i);
1029 
1030         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
1031         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
1032         library->fill_borders_with_garbage(tensor, distribution_inf, i);
1033     }
1034 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1035     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1036                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1037     {
1038         // Create tensors
1039         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
1040         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
1041         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1042 
1043         // Create post op tensors and populate post op with them
1044         std::vector<TensorType> post_op_tensors_holder{};
1045         auto                    populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops,
1046                                                                                                                                 [&post_op_tensors_holder, &data_type](auto shape)
1047         {
1048             auto t = create_tensor<TensorType>(shape, data_type, 1);
1049             post_op_tensors_holder.push_back(std::move(t));
1050             return post_op_tensors_holder.back().info();
1051         });
1052         TensorType lhs_reshaped;
1053         TensorType rhs_reshaped;
1054         TensorType dst;
1055 
1056         const unsigned int M = lhs_shape[1];
1057         const unsigned int N = rhs_shape[0];
1058         const unsigned int K = lhs_shape[0];
1059         GEMMKernelInfo     kernel_info;
1060         kernel_info.m                       = M;
1061         kernel_info.n                       = N;
1062         kernel_info.k                       = K;
1063         kernel_info.depth_output_gemm3d     = 0;
1064         kernel_info.reinterpret_input_as_3d = false;
1065         kernel_info.broadcast_bias          = broadcast_bias;
1066         kernel_info.activation_info         = act_info;
1067         kernel_info.fp_mixed_precision      = fp_mixed_precision;
1068         kernel_info.post_ops                = populated_post_ops;
1069 
1070         // The output tensor will be auto-initialized within the function
1071 
1072         // Create and configure function
1073         ReshapeLHSOperatorType reshape_lhs;
1074         ReshapeRHSOperatorType reshape_rhs;
1075         GEMMOperatorType       gemm;
1076 
1077         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1078         validate_result = validate_result || !rhs_info.export_to_cl_image;
1079         if(!validate_result)
1080         {
1081             return nullptr;
1082         }
1083 
1084         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1085         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1086         gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1087 
1088         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1089         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1090         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1091         for(const auto &tensor : post_op_tensors_holder)
1092         {
1093             ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
1094         }
1095 
1096         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1097         if(!rhs_info.export_to_cl_image)
1098         {
1099             add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
1100             for(auto &tensor : post_op_tensors_holder)
1101             {
1102                 add_padding_x({ &tensor });
1103             }
1104         }
1105 
1106         // Allocate tensors
1107         lhs.allocator()->allocate();
1108         rhs.allocator()->allocate();
1109         lhs_reshaped.allocator()->allocate();
1110         rhs_reshaped.allocator()->allocate();
1111         bias.allocator()->allocate();
1112         dst.allocator()->allocate();
1113         for(auto &tensor : post_op_tensors_holder)
1114         {
1115             tensor.allocator()->allocate();
1116         }
1117 
1118         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1119         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1120         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1121         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1122         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1123         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1124         for(const auto &tensor : post_op_tensors_holder)
1125         {
1126             ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
1127         }
1128 
1129         // Fill tensors
1130         fill(AccessorType(lhs), 0);
1131         fill(AccessorType(rhs), 1);
1132         fill(AccessorType(bias), 2);
1133         for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1134         {
1135             fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i);
1136         }
1137 
1138         // Compute GEMM
1139         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1140         reshape_lhs.run(reshape_lhs_pack);
1141         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1142         reshape_rhs.run(reshape_rhs_pack);
1143         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
1144             { ACL_SRC_1, &rhs_reshaped },
1145             { ACL_SRC_2, &bias },
1146             { ACL_DST, &dst }
1147         });
1148         for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1149         {
1150             gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i));
1151         }
1152         gemm.run(gemm_pack);
1153 
1154         return dst;
1155     }
1156 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1157     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
1158                                       const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1159     {
1160         TensorShape dst_shape = lhs_shape;
1161         dst_shape[0]          = rhs_shape[0];
1162         dst_shape[1]          = lhs_shape[1];
1163 
1164         // Create reference
1165         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1166         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1167         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1168         // Create post op tensors and populate post op with them
1169         auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape)
1170         {
1171             return SimpleTensor<T> { shape, data_type, 1 };
1172         });
1173 
1174         const int n          = rhs_shape[0];
1175         const int m          = lhs_shape[1];
1176         const int batch_size = lhs_shape[2];
1177 
1178         // Fill reference
1179         int tensor_idx = 0;
1180         fill(lhs, tensor_idx++);
1181         fill(rhs, tensor_idx++);
1182         fill(bias, tensor_idx++);
1183         for(auto &op : populated_post_ops.get_list())
1184         {
1185             for(auto tensor : op->arguments())
1186             {
1187                 fill(*tensor, tensor_idx++);
1188             }
1189         }
1190 
1191         if(broadcast_bias)
1192         {
1193             // In case of broadcast, we need to simply copy the first into the following "M" ones
1194             for(int i = 1; i < m * batch_size; i++)
1195             {
1196                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1197             }
1198         }
1199 
1200         SimpleTensor<T> out;
1201         if(fp_mixed_precision)
1202         {
1203             out = reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta);
1204         }
1205         else
1206         {
1207             out = reference::gemm<T>(lhs, rhs, bias, alpha, beta);
1208         }
1209         // Ignore activation info if post ops are used instead
1210         if(populated_post_ops.size() > 0)
1211         {
1212             out = reference::post_ops<T>(out, populated_post_ops);
1213         }
1214         else
1215         {
1216             out = reference::activation_layer(out, act_info);
1217         }
1218         return out;
1219     }
1220 
1221     bool            validate_result = true;
1222     TensorType      _target{};
1223     SimpleTensor<T> _reference{};
1224 };
1225 
1226 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
1227 class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
1228 {
1229 public:
1230     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool lhs_transpose,const ActivationLayerInfo & act_info)1231     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
1232                bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
1233     {
1234         GEMMLHSMatrixInfo lhs_info;
1235         lhs_info.m0         = m0;
1236         lhs_info.k0         = k0;
1237         lhs_info.v0         = v0;
1238         lhs_info.interleave = interleave_lhs;
1239         lhs_info.transpose  = lhs_transpose;
1240 
1241         GEMMRHSMatrixInfo rhs_info;
1242         rhs_info.n0                 = n0;
1243         rhs_info.k0                 = k0;
1244         rhs_info.h0                 = h0;
1245         rhs_info.interleave         = interleave_rhs;
1246         rhs_info.transpose          = !lhs_transpose;
1247         rhs_info.export_to_cl_image = export_to_cl_image;
1248 
1249         // In case of GEMM3D, m is the product between m_w and m_h
1250         const unsigned int m = m_w * m_h;
1251 
1252         // Set the tensor shapes for LHS and RHS matrices
1253         const TensorShape lhs_shape(k, m, batch_size);
1254         const TensorShape rhs_shape(n, k, batch_size);
1255         const TensorShape bias_shape(n, 1, 1);
1256 
1257         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
1258         if(validate_result)
1259         {
1260             _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
1261         }
1262     }
1263 
1264 protected:
1265     template <typename U>
fill(U && tensor,int i)1266     void fill(U &&tensor, int i)
1267     {
1268         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1269         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1270 
1271         DistributionType distribution{ T(-1.0f), T(1.0f) };
1272         library->fill(tensor, distribution, i);
1273     }
1274 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)1275     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1276                               DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info)
1277     {
1278         // Create tensors
1279         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
1280         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
1281         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1282         TensorType lhs_reshaped;
1283         TensorType rhs_reshaped;
1284         TensorType dst;
1285 
1286         const unsigned int M = lhs_shape[1];
1287         const unsigned int N = rhs_shape[0];
1288         const unsigned int K = lhs_shape[0];
1289         GEMMKernelInfo     kernel_info;
1290         kernel_info.m                       = M;
1291         kernel_info.n                       = N;
1292         kernel_info.k                       = K;
1293         kernel_info.depth_output_gemm3d     = m_h;
1294         kernel_info.reinterpret_input_as_3d = false;
1295         kernel_info.broadcast_bias          = true;
1296         kernel_info.activation_info         = act_info;
1297         kernel_info.fp_mixed_precision      = fp_mixed_precision;
1298 
1299         // The output tensor will be auto-initialized within the function
1300 
1301         // Create and configure function
1302         ReshapeLHSOperatorType reshape_lhs;
1303         ReshapeRHSOperatorType reshape_rhs;
1304         GEMMOperatorType       gemm;
1305 
1306         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1307         validate_result = validate_result || !rhs_info.export_to_cl_image;
1308         if(!validate_result)
1309         {
1310             return nullptr;
1311         }
1312 
1313         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1314         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1315         gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1316 
1317         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1318         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1319         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1320 
1321         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1322         if(!rhs_info.export_to_cl_image)
1323         {
1324             add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
1325         }
1326 
1327         // Allocate tensors
1328         lhs.allocator()->allocate();
1329         rhs.allocator()->allocate();
1330         lhs_reshaped.allocator()->allocate();
1331         rhs_reshaped.allocator()->allocate();
1332         bias.allocator()->allocate();
1333         dst.allocator()->allocate();
1334 
1335         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1336         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1337         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1338         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1339         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1340         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1341 
1342         // Fill tensors
1343         fill(AccessorType(lhs), 0);
1344         fill(AccessorType(rhs), 1);
1345         fill(AccessorType(bias), 2);
1346 
1347         // Compute GEMM
1348         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1349         reshape_lhs.run(reshape_lhs_pack);
1350         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1351         reshape_rhs.run(reshape_rhs_pack);
1352         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
1353             { ACL_SRC_1, &rhs_reshaped },
1354             { ACL_SRC_2, &bias },
1355             { ACL_DST, &dst }
1356         });
1357         gemm.run(gemm_pack);
1358 
1359         return dst;
1360     }
1361 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)1362     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
1363                                       const ActivationLayerInfo &act_info)
1364     {
1365         TensorShape dst_shape = lhs_shape;
1366         dst_shape.set(0, rhs_shape[0]);
1367         dst_shape.set(1, lhs_shape[1] / m_h);
1368         dst_shape.set(2, m_h);
1369         dst_shape.set(3, lhs_shape[2]);
1370 
1371         // Create reference
1372         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1373         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1374         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1375 
1376         const int n          = rhs_shape[0];
1377         const int m          = lhs_shape[1];
1378         const int batch_size = lhs_shape[2];
1379 
1380         // Fill reference
1381         fill(lhs, 0);
1382         fill(rhs, 1);
1383         fill(bias, 2);
1384 
1385         // In case of broadcast, we need to simply copy the first into the following "M" ones
1386         for(int i = 1; i < m * batch_size; i++)
1387         {
1388             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1389         }
1390 
1391         if(fp_mixed_precision)
1392         {
1393             return reference::activation_layer(reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta), act_info);
1394         }
1395         else
1396         {
1397             return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
1398         }
1399     }
1400 
1401     bool            validate_result = true;
1402     TensorType      _target{};
1403     SimpleTensor<T> _reference{};
1404 };
1405 
1406 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
1407 class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
1408 {
1409 public:
1410     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1411     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
1412                bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
1413     {
1414         GEMMLHSMatrixInfo lhs_info;
1415         lhs_info.m0 = m0;
1416         lhs_info.k0 = k0;
1417 
1418         GEMMRHSMatrixInfo rhs_info;
1419         rhs_info.n0                 = n0;
1420         rhs_info.k0                 = k0;
1421         rhs_info.h0                 = h0;
1422         rhs_info.interleave         = interleave_rhs;
1423         rhs_info.transpose          = transpose_rhs;
1424         rhs_info.export_to_cl_image = export_to_cl_image;
1425 
1426         // Set the tensor shapes for LHS and RHS matrices
1427         const TensorShape lhs_shape(k, m, batch_size);
1428         const TensorShape rhs_shape(n, k, batch_size);
1429         const TensorShape bias_shape(n,
1430                                      broadcast_bias ? 1 : m,
1431                                      broadcast_bias ? 1 : batch_size);
1432 
1433         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
1434         if(validate_result)
1435         {
1436             _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
1437         }
1438     }
1439 
1440 protected:
1441     template <typename U>
fill(U && tensor,int i)1442     void fill(U &&tensor, int i)
1443     {
1444         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1445         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1446 
1447         DistributionType distribution{ T(-1.0f), T(1.0f) };
1448         library->fill(tensor, distribution, i);
1449 
1450         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
1451         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
1452         library->fill_borders_with_garbage(tensor, distribution_inf, i);
1453     }
1454 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1455     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1456                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
1457     {
1458         // Create tensors
1459         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
1460         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
1461         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1462         TensorType rhs_reshaped;
1463         TensorType dst;
1464 
1465         const unsigned int M = lhs_shape[1];
1466         const unsigned int N = rhs_shape[0];
1467         const unsigned int K = lhs_shape[0];
1468         GEMMKernelInfo     kernel_info;
1469         kernel_info.m                       = M;
1470         kernel_info.n                       = N;
1471         kernel_info.k                       = K;
1472         kernel_info.depth_output_gemm3d     = 0;
1473         kernel_info.reinterpret_input_as_3d = false;
1474         kernel_info.broadcast_bias          = broadcast_bias;
1475         kernel_info.activation_info         = act_info;
1476 
1477         // The output tensor will be auto-initialized within the function
1478 
1479         // Create and configure function
1480         ReshapeRHSOperatorType reshape_rhs;
1481         GEMMOperatorType       gemm;
1482 
1483         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1484         validate_result = validate_result || !rhs_info.export_to_cl_image;
1485         if(!validate_result)
1486         {
1487             return nullptr;
1488         }
1489 
1490         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1491         gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1492 
1493         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1494         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1495         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1496 
1497         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1498         if(!rhs_info.export_to_cl_image)
1499         {
1500             add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
1501         }
1502 
1503         // Allocate tensors
1504         lhs.allocator()->allocate();
1505         rhs.allocator()->allocate();
1506         rhs_reshaped.allocator()->allocate();
1507         bias.allocator()->allocate();
1508         dst.allocator()->allocate();
1509 
1510         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1511         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1512         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1513         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1514         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1515 
1516         // Fill tensors
1517         fill(AccessorType(lhs), 0);
1518         fill(AccessorType(rhs), 1);
1519         fill(AccessorType(bias), 2);
1520 
1521         // Compute GEMM
1522         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1523         reshape_rhs.run(reshape_rhs_pack);
1524         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
1525             { ACL_SRC_1, &rhs_reshaped },
1526             { ACL_SRC_2, &bias },
1527             { ACL_DST, &dst }
1528         });
1529         gemm.run(gemm_pack);
1530 
1531         return dst;
1532     }
1533 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1534     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
1535                                       const ActivationLayerInfo &act_info)
1536     {
1537         TensorShape dst_shape = lhs_shape;
1538         dst_shape[0]          = rhs_shape[0];
1539         dst_shape[1]          = lhs_shape[1];
1540 
1541         // Create reference
1542         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1543         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1544         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1545 
1546         const int n          = rhs_shape[0];
1547         const int m          = lhs_shape[1];
1548         const int batch_size = lhs_shape[2];
1549 
1550         // Fill reference
1551         fill(lhs, 0);
1552         fill(rhs, 1);
1553         fill(bias, 2);
1554 
1555         if(broadcast_bias)
1556         {
1557             // In case of broadcast, we need to simply copy the first into the following "M" ones
1558             for(int i = 1; i < m * batch_size; i++)
1559             {
1560                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1561             }
1562         }
1563 
1564         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
1565     }
1566 
1567     bool            validate_result = true;
1568     TensorType      _target{};
1569     SimpleTensor<T> _reference{};
1570 };
1571 
1572 /** (EXPERIMENTAL_POST_OPS)*/
1573 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
1574 class GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture : public framework::Fixture
1575 {
1576 public:
1577     using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument
1578     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<PostOpArgBroadcast> & post_ops)1579     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
1580                bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info,
1581                const experimental::PostOpList<PostOpArgBroadcast> &post_ops)
1582     {
1583         GEMMLHSMatrixInfo lhs_info;
1584         lhs_info.m0 = m0;
1585         lhs_info.k0 = k0;
1586 
1587         GEMMRHSMatrixInfo rhs_info;
1588         rhs_info.n0                 = n0;
1589         rhs_info.k0                 = k0;
1590         rhs_info.h0                 = h0;
1591         rhs_info.interleave         = interleave_rhs;
1592         rhs_info.transpose          = transpose_rhs;
1593         rhs_info.export_to_cl_image = export_to_cl_image;
1594 
1595         // Set the tensor shapes for LHS and RHS matrices
1596         const TensorShape lhs_shape(k, m, batch_size);
1597         const TensorShape rhs_shape(n, k, batch_size);
1598         const TensorShape bias_shape(n,
1599                                      broadcast_bias ? 1 : m,
1600                                      broadcast_bias ? 1 : batch_size);
1601         auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops,
1602                                                                                                                     [ = ](auto broadcast)
1603         {
1604             return TensorShape
1605             {
1606                 std::get<0>(broadcast) ? 1 : n,
1607                 std::get<1>(broadcast) ? 1 : m,
1608                 std::get<2>(broadcast) ? 1 : batch_size,
1609             };
1610         });
1611 
1612         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
1613         if(validate_result)
1614         {
1615             _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
1616         }
1617     }
1618 
1619 protected:
1620     template <typename U>
fill(U && tensor,int i)1621     void fill(U &&tensor, int i)
1622     {
1623         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1624         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1625 
1626         DistributionType distribution{ T(-1.0f), T(1.0f) };
1627         library->fill(tensor, distribution, i);
1628 
1629         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
1630         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
1631         library->fill_borders_with_garbage(tensor, distribution_inf, i);
1632     }
1633 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1634     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1635                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1636     {
1637         // Create tensors
1638         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
1639         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
1640         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1641         TensorType rhs_reshaped;
1642         TensorType dst;
1643         // Create post op tensors and populate post op with them
1644         std::vector<TensorType> post_op_tensors_holder{};
1645         auto                    populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops,
1646                                                                                                                                 [&post_op_tensors_holder, &data_type](auto shape)
1647         {
1648             auto t = create_tensor<TensorType>(shape, data_type, 1);
1649             post_op_tensors_holder.push_back(std::move(t));
1650             return post_op_tensors_holder.back().info();
1651         });
1652 
1653         const unsigned int M = lhs_shape[1];
1654         const unsigned int N = rhs_shape[0];
1655         const unsigned int K = lhs_shape[0];
1656         GEMMKernelInfo     kernel_info;
1657         kernel_info.m                       = M;
1658         kernel_info.n                       = N;
1659         kernel_info.k                       = K;
1660         kernel_info.depth_output_gemm3d     = 0;
1661         kernel_info.reinterpret_input_as_3d = false;
1662         kernel_info.broadcast_bias          = broadcast_bias;
1663         kernel_info.activation_info         = act_info;
1664         kernel_info.post_ops                = populated_post_ops;
1665 
1666         // The output tensor will be auto-initialized within the function
1667 
1668         // Create and configure function
1669         ReshapeRHSOperatorType reshape_rhs;
1670         GEMMOperatorType       gemm;
1671 
1672         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1673         validate_result = validate_result || !rhs_info.export_to_cl_image;
1674         if(!validate_result)
1675         {
1676             return nullptr;
1677         }
1678 
1679         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1680         gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1681 
1682         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1683         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1684         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1685         for(const auto &tensor : post_op_tensors_holder)
1686         {
1687             ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
1688         }
1689 
1690         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1691         if(!rhs_info.export_to_cl_image)
1692         {
1693             add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
1694             for(auto &tensor : post_op_tensors_holder)
1695             {
1696                 add_padding_x({ &tensor });
1697             }
1698         }
1699 
1700         // Allocate tensors
1701         lhs.allocator()->allocate();
1702         rhs.allocator()->allocate();
1703         rhs_reshaped.allocator()->allocate();
1704         bias.allocator()->allocate();
1705         dst.allocator()->allocate();
1706         for(auto &tensor : post_op_tensors_holder)
1707         {
1708             tensor.allocator()->allocate();
1709         }
1710 
1711         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1712         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1713         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1714         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1715         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1716         for(const auto &tensor : post_op_tensors_holder)
1717         {
1718             ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
1719         }
1720 
1721         // Fill tensors
1722         fill(AccessorType(lhs), 0);
1723         fill(AccessorType(rhs), 1);
1724         fill(AccessorType(bias), 2);
1725         for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1726         {
1727             fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i);
1728         }
1729 
1730         // Compute GEMM
1731         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1732         reshape_rhs.run(reshape_rhs_pack);
1733         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
1734             { ACL_SRC_1, &rhs_reshaped },
1735             { ACL_SRC_2, &bias },
1736             { ACL_DST, &dst }
1737         });
1738         for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1739         {
1740             gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i));
1741         }
1742         gemm.run(gemm_pack);
1743 
1744         return dst;
1745     }
1746 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1747     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
1748                                       const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1749     {
1750         TensorShape dst_shape = lhs_shape;
1751         dst_shape[0]          = rhs_shape[0];
1752         dst_shape[1]          = lhs_shape[1];
1753 
1754         // Create reference
1755         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1756         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1757         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1758         // Create post op tensors and populate post op with them
1759         auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape)
1760         {
1761             return SimpleTensor<T> { shape, data_type, 1 };
1762         });
1763 
1764         const int n          = rhs_shape[0];
1765         const int m          = lhs_shape[1];
1766         const int batch_size = lhs_shape[2];
1767 
1768         // Fill reference
1769         int tensor_idx = 0;
1770         fill(lhs, tensor_idx++);
1771         fill(rhs, tensor_idx++);
1772         fill(bias, tensor_idx++);
1773         for(auto &op : populated_post_ops.get_list())
1774         {
1775             for(auto tensor : op->arguments())
1776             {
1777                 fill(*tensor, tensor_idx++);
1778             }
1779         }
1780 
1781         if(broadcast_bias)
1782         {
1783             // In case of broadcast, we need to simply copy the first into the following "M" ones
1784             for(int i = 1; i < m * batch_size; i++)
1785             {
1786                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1787             }
1788         }
1789 
1790         SimpleTensor<T> out;
1791         out = reference::gemm<T>(lhs, rhs, bias, alpha, beta);
1792         // Ignore activation info if post ops are used instead
1793         if(populated_post_ops.size() > 0)
1794         {
1795             out = reference::post_ops<T>(out, populated_post_ops);
1796         }
1797         else
1798         {
1799             out = reference::activation_layer(out, act_info);
1800         }
1801         return out;
1802     }
1803 
1804     bool            validate_result = true;
1805     TensorType      _target{};
1806     SimpleTensor<T> _reference{};
1807 };
1808 
1809 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
1810 class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
1811 {
1812 public:
1813     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool export_to_cl_image,bool has_pad_y,DataType data_type,float alpha,float beta,const ActivationLayerInfo & act_info)1814     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
1815                bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
1816     {
1817         GEMMLHSMatrixInfo lhs_info;
1818         lhs_info.m0 = m0;
1819         lhs_info.k0 = k0;
1820 
1821         GEMMRHSMatrixInfo rhs_info;
1822         rhs_info.n0                 = n0;
1823         rhs_info.k0                 = k0;
1824         rhs_info.h0                 = h0;
1825         rhs_info.interleave         = interleave_rhs;
1826         rhs_info.transpose          = transpose_rhs;
1827         rhs_info.export_to_cl_image = export_to_cl_image;
1828 
1829         // In case of GEMM3D, m is the product between m_w and m_h
1830         const unsigned int m = m_w * m_h;
1831 
1832         // Set the tensor shapes for LHS and RHS matrices
1833         const TensorShape lhs_shape(k, m, batch_size);
1834         const TensorShape rhs_shape(n, k, batch_size);
1835         const TensorShape bias_shape(n, 1, 1);
1836 
1837         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info, has_pad_y);
1838         if(validate_result)
1839         {
1840             _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
1841         }
1842     }
1843 
1844 protected:
1845     template <typename U>
fill(U && tensor,int i)1846     void fill(U &&tensor, int i)
1847     {
1848         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1849         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1850 
1851         DistributionType distribution{ T(-1.0f), T(1.0f) };
1852         library->fill(tensor, distribution, i);
1853     }
1854 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info,bool has_pad_y)1855     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1856                               DataType data_type, float alpha, float beta,
1857                               unsigned int m_h, const ActivationLayerInfo &act_info, bool has_pad_y)
1858     {
1859         // Create tensors
1860         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
1861         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
1862         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1863         TensorType rhs_reshaped;
1864         TensorType dst;
1865 
1866         const unsigned int M = lhs_shape[1];
1867         const unsigned int N = rhs_shape[0];
1868         const unsigned int K = lhs_shape[0];
1869         GEMMKernelInfo     kernel_info;
1870         kernel_info.m                       = M;
1871         kernel_info.n                       = N;
1872         kernel_info.k                       = K;
1873         kernel_info.depth_output_gemm3d     = m_h;
1874         kernel_info.reinterpret_input_as_3d = false;
1875         kernel_info.broadcast_bias          = true;
1876         kernel_info.activation_info         = act_info;
1877         kernel_info.has_pad_y               = has_pad_y;
1878 
1879         // The output tensor will be auto-initialized within the function
1880         // Create and configure function
1881         ReshapeRHSOperatorType reshape_rhs;
1882         GEMMOperatorType       gemm;
1883 
1884         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1885         validate_result = validate_result || !rhs_info.export_to_cl_image;
1886         if(!validate_result)
1887         {
1888             return nullptr;
1889         }
1890 
1891         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1892         gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1893 
1894         if(has_pad_y)
1895         {
1896             // Add dummy padding into lhs to validate has_pad_y path
1897             lhs.info()->extend_padding(PaddingSize(2, 0, 2, 0));
1898             dst.info()->extend_padding(PaddingSize(2, 0, 1, 0));
1899         }
1900 
1901         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1902         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1903         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1904 
1905         // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1906         if(!rhs_info.export_to_cl_image)
1907         {
1908             add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
1909         }
1910 
1911         // Allocate tensors
1912         lhs.allocator()->allocate();
1913         rhs.allocator()->allocate();
1914         rhs_reshaped.allocator()->allocate();
1915         bias.allocator()->allocate();
1916         dst.allocator()->allocate();
1917 
1918         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1919         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1920         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1921         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1922         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1923 
1924         // Fill tensors
1925         fill(AccessorType(lhs), 0);
1926         fill(AccessorType(rhs), 1);
1927         fill(AccessorType(bias), 2);
1928 
1929         // Compute GEMM
1930         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1931         reshape_rhs.run(reshape_rhs_pack);
1932         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
1933             { ACL_SRC_1, &rhs_reshaped },
1934             { ACL_SRC_2, &bias },
1935             { ACL_DST, &dst }
1936         });
1937         gemm.run(gemm_pack);
1938 
1939         return dst;
1940     }
1941 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)1942     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
1943                                       const ActivationLayerInfo &act_info)
1944     {
1945         TensorShape dst_shape = lhs_shape;
1946         dst_shape.set(0, rhs_shape[0]);
1947         dst_shape.set(1, lhs_shape[1] / m_h);
1948         dst_shape.set(2, m_h);
1949         dst_shape.set(3, lhs_shape[2]);
1950 
1951         // Create reference
1952         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1953         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1954         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1955 
1956         const int n          = rhs_shape[0];
1957         const int m          = lhs_shape[1];
1958         const int batch_size = lhs_shape[2];
1959 
1960         // Fill reference
1961         fill(lhs, 0);
1962         fill(rhs, 1);
1963         fill(bias, 2);
1964 
1965         // In case of broadcast, we need to simply copy the first into the following "M" ones
1966         for(int i = 1; i < m * batch_size; i++)
1967         {
1968             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1969         }
1970 
1971         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
1972     }
1973 
1974     bool            validate_result = true;
1975     TensorType      _target{};
1976     SimpleTensor<T> _reference{};
1977 };
1978 
1979 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
1980 class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture
1981 {
1982 public:
1983     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1984     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
1985                const ActivationLayerInfo &act_info)
1986     {
1987         GEMMLHSMatrixInfo lhs_info;
1988         lhs_info.m0 = m0;
1989         lhs_info.k0 = k0;
1990 
1991         GEMMRHSMatrixInfo rhs_info;
1992         rhs_info.n0 = n0;
1993         rhs_info.k0 = k0;
1994 
1995         // Set the tensor shapes for LHS and RHS matrices
1996         const TensorShape lhs_shape(k, m, batch_size);
1997         const TensorShape rhs_shape(n, k, batch_size);
1998         const TensorShape bias_shape(n,
1999                                      broadcast_bias ? 1 : m,
2000                                      broadcast_bias ? 1 : batch_size);
2001 
2002         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
2003         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
2004     }
2005 
2006 protected:
2007     template <typename U>
fill(U && tensor,int i)2008     void fill(U &&tensor, int i)
2009     {
2010         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
2011         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
2012 
2013         DistributionType distribution{ T(-1.0f), T(1.0f) };
2014         library->fill(tensor, distribution, i);
2015 
2016         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
2017         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
2018         library->fill_borders_with_garbage(tensor, distribution_inf, i);
2019     }
2020 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2021     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
2022                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
2023     {
2024         // Create tensors
2025         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
2026         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
2027         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
2028         TensorType dst;
2029 
2030         const unsigned int M = lhs_shape[1];
2031         const unsigned int N = rhs_shape[0];
2032         const unsigned int K = lhs_shape[0];
2033         GEMMKernelInfo     kernel_info;
2034         kernel_info.m                       = M;
2035         kernel_info.n                       = N;
2036         kernel_info.k                       = K;
2037         kernel_info.depth_output_gemm3d     = 0;
2038         kernel_info.reinterpret_input_as_3d = false;
2039         kernel_info.broadcast_bias          = broadcast_bias;
2040         kernel_info.activation_info         = act_info;
2041 
2042         // Create and configure function
2043         GEMMOperatorType gemm;
2044         gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
2045 
2046         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2047         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2048         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
2049 
2050         add_padding_x({ &lhs, &rhs, &bias, &dst });
2051 
2052         // Allocate tensors
2053         lhs.allocator()->allocate();
2054         rhs.allocator()->allocate();
2055         bias.allocator()->allocate();
2056         dst.allocator()->allocate();
2057 
2058         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2059         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2060         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2061         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2062 
2063         // Fill tensors
2064         fill(AccessorType(lhs), 0);
2065         fill(AccessorType(rhs), 1);
2066         fill(AccessorType(bias), 2);
2067 
2068         // Compute GEMM
2069         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2070             { ACL_SRC_1, &rhs },
2071             { ACL_SRC_2, &bias },
2072             { ACL_DST, &dst }
2073         });
2074         gemm.run(gemm_pack);
2075 
2076         return dst;
2077     }
2078 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2079     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
2080                                       const ActivationLayerInfo &act_info)
2081     {
2082         TensorShape dst_shape = lhs_shape;
2083         dst_shape[0]          = rhs_shape[0];
2084         dst_shape[1]          = lhs_shape[1];
2085 
2086         // Create reference
2087         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2088         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
2089         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2090 
2091         const int n          = rhs_shape[0];
2092         const int m          = lhs_shape[1];
2093         const int batch_size = lhs_shape[2];
2094 
2095         // Fill reference
2096         fill(lhs, 0);
2097         fill(rhs, 1);
2098         fill(bias, 2);
2099 
2100         if(broadcast_bias)
2101         {
2102             // In case of broadcast, we need to simply copy the first into the following "M" ones
2103             for(int i = 1; i < m * batch_size; i++)
2104             {
2105                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2106             }
2107         }
2108 
2109         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
2110     }
2111 
2112     TensorType      _target{};
2113     SimpleTensor<T> _reference{};
2114 };
2115 
2116 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
2117 class GEMMMatrixMultiplyNativeWithPostOpsValidationFixture : public framework::Fixture
2118 {
2119 public:
2120     using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument
2121 public:
2122     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<PostOpArgBroadcast> & post_ops)2123     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
2124                const ActivationLayerInfo &act_info, const experimental::PostOpList<PostOpArgBroadcast> &post_ops)
2125     {
2126         GEMMLHSMatrixInfo lhs_info;
2127         lhs_info.m0 = m0;
2128         lhs_info.k0 = k0;
2129 
2130         GEMMRHSMatrixInfo rhs_info;
2131         rhs_info.n0 = n0;
2132         rhs_info.k0 = k0;
2133 
2134         // Set the tensor shapes for LHS and RHS matrices
2135         const TensorShape lhs_shape(k, m, batch_size);
2136         const TensorShape rhs_shape(n, k, batch_size);
2137         const TensorShape bias_shape(n,
2138                                      broadcast_bias ? 1 : m,
2139                                      broadcast_bias ? 1 : batch_size);
2140         const auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops,
2141                                                                                                                           [ = ](auto broadcast)
2142         {
2143             return TensorShape
2144             {
2145                 std::get<0>(broadcast) ? 1 : n,
2146                 std::get<1>(broadcast) ? 1 : m,
2147                 std::get<2>(broadcast) ? 1 : batch_size,
2148             };
2149         });
2150 
2151         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
2152         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
2153     }
2154 
2155 protected:
2156     template <typename U>
fill(U && tensor,int i)2157     void fill(U &&tensor, int i)
2158     {
2159         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
2160         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
2161 
2162         DistributionType distribution{ T(-1.0f), T(1.0f) };
2163         library->fill(tensor, distribution, i);
2164 
2165         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
2166         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
2167         library->fill_borders_with_garbage(tensor, distribution_inf, i);
2168     }
2169 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)2170     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
2171                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
2172     {
2173         // Create tensors
2174         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
2175         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
2176         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
2177         TensorType dst;
2178         // Create post op tensors and populate post op with them
2179         std::vector<TensorType> post_op_tensors_holder{};
2180         auto                    populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops,
2181                                                                                                                                 [&post_op_tensors_holder, &data_type](auto shape)
2182         {
2183             auto t = create_tensor<TensorType>(shape, data_type, 1);
2184             post_op_tensors_holder.push_back(std::move(t));
2185             return post_op_tensors_holder.back().info();
2186         });
2187 
2188         const unsigned int M = lhs_shape[1];
2189         const unsigned int N = rhs_shape[0];
2190         const unsigned int K = lhs_shape[0];
2191         GEMMKernelInfo     kernel_info;
2192         kernel_info.m                       = M;
2193         kernel_info.n                       = N;
2194         kernel_info.k                       = K;
2195         kernel_info.depth_output_gemm3d     = 0;
2196         kernel_info.reinterpret_input_as_3d = false;
2197         kernel_info.broadcast_bias          = broadcast_bias;
2198         kernel_info.activation_info         = act_info;
2199         kernel_info.post_ops                = populated_post_ops;
2200 
2201         // Create and configure function
2202         GEMMOperatorType gemm;
2203         gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
2204 
2205         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2206         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2207         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
2208         for(const auto &tensor : post_op_tensors_holder)
2209         {
2210             ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
2211         }
2212 
2213         add_padding_x({ &lhs, &rhs, &bias, &dst });
2214         for(auto &tensor : post_op_tensors_holder)
2215         {
2216             add_padding_x({ &tensor });
2217         }
2218 
2219         // Allocate tensors
2220         lhs.allocator()->allocate();
2221         rhs.allocator()->allocate();
2222         bias.allocator()->allocate();
2223         dst.allocator()->allocate();
2224         for(auto &tensor : post_op_tensors_holder)
2225         {
2226             tensor.allocator()->allocate();
2227         }
2228 
2229         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2230         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2231         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2232         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2233         for(const auto &tensor : post_op_tensors_holder)
2234         {
2235             ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
2236         }
2237 
2238         // Fill tensors
2239         fill(AccessorType(lhs), 0);
2240         fill(AccessorType(rhs), 1);
2241         fill(AccessorType(bias), 2);
2242         for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
2243         {
2244             fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i);
2245         }
2246 
2247         // Compute GEMM
2248         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2249             { ACL_SRC_1, &rhs },
2250             { ACL_SRC_2, &bias },
2251             { ACL_DST, &dst }
2252         });
2253         for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
2254         {
2255             gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i));
2256         }
2257         gemm.run(gemm_pack);
2258 
2259         return dst;
2260     }
2261 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)2262     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
2263                                       const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
2264     {
2265         TensorShape dst_shape = lhs_shape;
2266         dst_shape[0]          = rhs_shape[0];
2267         dst_shape[1]          = lhs_shape[1];
2268 
2269         // Create reference
2270         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2271         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
2272         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2273         // Create post op tensors and populate post op with them
2274         auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape)
2275         {
2276             return SimpleTensor<T> { shape, data_type, 1 };
2277         });
2278 
2279         const int n          = rhs_shape[0];
2280         const int m          = lhs_shape[1];
2281         const int batch_size = lhs_shape[2];
2282 
2283         // Fill reference
2284         int tensor_idx = 0;
2285         fill(lhs, tensor_idx++);
2286         fill(rhs, tensor_idx++);
2287         fill(bias, tensor_idx++);
2288         for(auto &op : populated_post_ops.get_list())
2289         {
2290             for(auto tensor : op->arguments())
2291             {
2292                 fill(*tensor, tensor_idx++);
2293             }
2294         }
2295 
2296         if(broadcast_bias)
2297         {
2298             // In case of broadcast, we need to simply copy the first into the following "M" ones
2299             for(int i = 1; i < m * batch_size; i++)
2300             {
2301                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2302             }
2303         }
2304 
2305         SimpleTensor<T> out;
2306         out = reference::gemm<T>(lhs, rhs, bias, alpha, beta);
2307         // Ignore activation info if post ops are used instead
2308         if(populated_post_ops.size() > 0)
2309         {
2310             out = reference::post_ops<T>(out, populated_post_ops);
2311         }
2312         else
2313         {
2314             out = reference::activation_layer(out, act_info);
2315         }
2316         return out;
2317     }
2318 
2319     TensorType      _target{};
2320     SimpleTensor<T> _reference{};
2321 };
2322 
2323 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
2324 class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture
2325 {
2326 public:
2327     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,DataType data_type,float alpha,float beta,const ActivationLayerInfo & act_info)2328     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta,
2329                const ActivationLayerInfo &act_info)
2330     {
2331         GEMMLHSMatrixInfo lhs_info;
2332         lhs_info.m0 = m0;
2333         lhs_info.k0 = k0;
2334 
2335         GEMMRHSMatrixInfo rhs_info;
2336         rhs_info.n0 = n0;
2337         rhs_info.k0 = k0;
2338 
2339         // In case of GEMM3D, m is the product between m_w and m_h
2340         const unsigned int m = m_w * m_h;
2341 
2342         // Set the tensor shapes for LHS and RHS matrices
2343         const TensorShape lhs_shape(k, m, batch_size);
2344         const TensorShape rhs_shape(n, k, batch_size);
2345         const TensorShape bias_shape(n, 1, 1);
2346 
2347         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
2348         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
2349     }
2350 
2351 protected:
2352     template <typename U>
fill(U && tensor,int i)2353     void fill(U &&tensor, int i)
2354     {
2355         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
2356         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
2357 
2358         DistributionType distribution{ T(-1.0f), T(1.0f) };
2359         library->fill(tensor, distribution, i);
2360     }
2361 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)2362     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
2363                               DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info)
2364     {
2365         // Create tensors
2366         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
2367         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
2368         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
2369         TensorType dst;
2370 
2371         const unsigned int M = lhs_shape[1];
2372         const unsigned int N = rhs_shape[0];
2373         const unsigned int K = lhs_shape[0];
2374         GEMMKernelInfo     kernel_info;
2375         kernel_info.m                       = M;
2376         kernel_info.n                       = N;
2377         kernel_info.k                       = K;
2378         kernel_info.depth_output_gemm3d     = m_h;
2379         kernel_info.reinterpret_input_as_3d = false;
2380         kernel_info.broadcast_bias          = true;
2381         kernel_info.activation_info         = act_info;
2382 
2383         // The output tensor will be auto-initialized within the function
2384 
2385         // Create and configure function
2386         GEMMOperatorType gemm;
2387         gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
2388 
2389         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2390         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2391         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
2392 
2393         add_padding_x({ &lhs, &rhs, &bias, &dst });
2394 
2395         // Allocate tensors
2396         lhs.allocator()->allocate();
2397         rhs.allocator()->allocate();
2398         bias.allocator()->allocate();
2399         dst.allocator()->allocate();
2400 
2401         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2402         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2403         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2404         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2405 
2406         // Fill tensors
2407         fill(AccessorType(lhs), 0);
2408         fill(AccessorType(rhs), 1);
2409         fill(AccessorType(bias), 2);
2410 
2411         // Compute GEMM
2412         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2413             { ACL_SRC_1, &rhs },
2414             { ACL_SRC_2, &bias },
2415             { ACL_DST, &dst }
2416         });
2417         gemm.run(gemm_pack);
2418 
2419         return dst;
2420     }
2421 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)2422     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
2423                                       const ActivationLayerInfo &act_info)
2424     {
2425         TensorShape dst_shape = lhs_shape;
2426         dst_shape.set(0, rhs_shape[0]);
2427         dst_shape.set(1, lhs_shape[1] / m_h);
2428         dst_shape.set(2, m_h);
2429         dst_shape.set(3, lhs_shape[2]);
2430 
2431         // Create reference
2432         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2433         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
2434         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2435 
2436         const int n          = rhs_shape[0];
2437         const int m          = lhs_shape[1];
2438         const int batch_size = lhs_shape[2];
2439 
2440         // Fill reference
2441         fill(lhs, 0);
2442         fill(rhs, 1);
2443         fill(bias, 2);
2444 
2445         // In case of broadcast, we need to simply copy the first into the following "M" ones
2446         for(int i = 1; i < m * batch_size; i++)
2447         {
2448             memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2449         }
2450 
2451         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
2452     }
2453 
2454     TensorType      _target{};
2455     SimpleTensor<T> _reference{};
2456 };
2457 
2458 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
2459 class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture
2460 {
2461 public:
2462     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2463     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha,
2464                float beta, bool broadcast_bias,
2465                const ActivationLayerInfo &act_info)
2466     {
2467         GEMMLHSMatrixInfo lhs_info;
2468         lhs_info.m0 = m0;
2469         lhs_info.k0 = k0;
2470 
2471         GEMMRHSMatrixInfo rhs_info;
2472         rhs_info.n0                 = n0;
2473         rhs_info.k0                 = k0;
2474         rhs_info.interleave         = true;
2475         rhs_info.transpose          = false;
2476         rhs_info.h0                 = 4;
2477         rhs_info.export_to_cl_image = export_to_cl_image;
2478 
2479         // Set the tensor shapes for LHS and RHS matrices
2480         const TensorShape lhs_shape(k, m, batch_size);
2481         const TensorShape rhs_shape(n, k, batch_size);
2482         const TensorShape bias_shape(n,
2483                                      broadcast_bias ? 1 : m,
2484                                      broadcast_bias ? 1 : batch_size);
2485 
2486         _target    = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
2487         _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
2488     }
2489 
2490 protected:
2491     template <typename U>
fill(U && tensor,int i)2492     void fill(U &&tensor, int i)
2493     {
2494         static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
2495         using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
2496 
2497         DistributionType distribution{ T(-1.0f), T(1.0f) };
2498         library->fill(tensor, distribution, i);
2499 
2500         // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
2501         DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
2502         library->fill_borders_with_garbage(tensor, distribution_inf, i);
2503     }
2504 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2505     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
2506                               DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
2507     {
2508         // Create tensors
2509         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1);
2510         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1);
2511         TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
2512         TensorType rhs_reshaped;
2513         TensorType dst;
2514 
2515         const unsigned int M = lhs_shape[1];
2516         const unsigned int N = rhs_shape[0];
2517         const unsigned int K = lhs_shape[0];
2518         GEMMKernelInfo     kernel_info;
2519         kernel_info.m                       = M;
2520         kernel_info.n                       = N;
2521         kernel_info.k                       = K;
2522         kernel_info.depth_output_gemm3d     = 0;
2523         kernel_info.reinterpret_input_as_3d = false;
2524         kernel_info.broadcast_bias          = broadcast_bias;
2525         kernel_info.activation_info         = act_info;
2526 
2527         // Create and configure function
2528         ReshapeRHSOperatorType reshape_rhs;
2529         GEMMOperatorType       gemm;
2530 
2531         validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
2532         if(!validate_result)
2533         {
2534             return nullptr;
2535         }
2536 
2537         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
2538 
2539         validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info));
2540         if(!validate_result)
2541         {
2542             return nullptr;
2543         }
2544 
2545         gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
2546 
2547         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2548         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2549         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
2550 
2551         // Allocate tensors
2552         lhs.allocator()->allocate();
2553         rhs.allocator()->allocate();
2554         rhs_reshaped.allocator()->allocate();
2555         bias.allocator()->allocate();
2556         dst.allocator()->allocate();
2557 
2558         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2559         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2560         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
2561         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2562         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2563 
2564         // Fill tensors
2565         fill(AccessorType(lhs), 0);
2566         fill(AccessorType(rhs), 1);
2567         fill(AccessorType(bias), 2);
2568 
2569         // Compute GEMM
2570         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
2571         reshape_rhs.run(reshape_rhs_pack);
2572         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2573             { ACL_SRC_1, &rhs_reshaped },
2574             { ACL_SRC_2, &bias },
2575             { ACL_DST, &dst }
2576         });
2577         gemm.run(gemm_pack);
2578 
2579         return dst;
2580     }
2581 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2582     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
2583                                       const ActivationLayerInfo &act_info)
2584     {
2585         if(!validate_result)
2586             return SimpleTensor<T>();
2587 
2588         TensorShape dst_shape = lhs_shape;
2589         dst_shape[0]          = rhs_shape[0];
2590         dst_shape[1]          = lhs_shape[1];
2591 
2592         // Create reference
2593         SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2594         SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
2595         SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2596 
2597         const int n          = rhs_shape[0];
2598         const int m          = lhs_shape[1];
2599         const int batch_size = lhs_shape[2];
2600 
2601         // Fill reference
2602         fill(lhs, 0);
2603         fill(rhs, 1);
2604         fill(bias, 2);
2605 
2606         if(broadcast_bias)
2607         {
2608             // In case of broadcast, we need to simply copy the first into the following "M" ones
2609             for(int i = 1; i < m * batch_size; i++)
2610             {
2611                 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2612             }
2613         }
2614 
2615         return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
2616     }
2617 
2618     bool            validate_result = true;
2619     TensorType      _target{};
2620     SimpleTensor<T> _reference{};
2621 };
2622 
2623 } // namespace validation
2624 } // namespace test
2625 } // namespace arm_compute
2626 #endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */
2627