xref: /aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "8b_mla.hpp"
26 
generic_get_packed_size(const VLType vec_type,const unsigned int acc_depth,const unsigned int kernel_rows,const unsigned int kernel_cols,const unsigned int n_input_channels)27 size_t generic_get_packed_size(
28   const VLType vec_type,
29   const unsigned int acc_depth,
30   const unsigned int kernel_rows,
31   const unsigned int kernel_cols,
32   const unsigned int n_input_channels
33 )
34 {
35   const auto per_iter = acc_depth * arm_gemm::utils::get_vector_length<int32_t>(vec_type);
36   return arm_gemm::roundup((long unsigned int) n_input_channels, per_iter) * kernel_rows * kernel_cols * sizeof(int8_t);
37 }
38 
generic_pack(const VLType vec_type,const unsigned int acc_depth,const unsigned int kernel_rows,const unsigned int kernel_cols,const unsigned int n_channels,void * _outptr,const void * _weights,size_t ld_weight_col,size_t ld_weight_row)39 void generic_pack(
40   const VLType vec_type,
41   const unsigned int acc_depth,
42   const unsigned int kernel_rows,
43   const unsigned int kernel_cols,
44   const unsigned int n_channels,
45   void *_outptr,
46   const void *_weights,
47   size_t ld_weight_col,
48   size_t ld_weight_row
49 )
50 {
51   int8_t *outptr = reinterpret_cast<int8_t *>(_outptr);
52   const int8_t *weights = reinterpret_cast<const int8_t *>(_weights);
53 
54   // Get the strides
55   ld_weight_col = (ld_weight_col == 0) ? n_channels * sizeof(int8_t) : ld_weight_col;
56   ld_weight_row = (ld_weight_row == 0) ? kernel_cols * ld_weight_col : ld_weight_row;
57 
58   // Pack into per-iter chunks.
59   const auto per_iter = acc_depth * arm_gemm::utils::get_vector_length<int32_t>(vec_type);
60   for (unsigned int c = 0; c < n_channels; c += per_iter)
61   {
62     auto weight_row = weights + c;
63     const auto to_copy = std::min<unsigned int>(per_iter, n_channels - c);
64 
65     for (unsigned int i = 0; i < kernel_rows; i++)
66     {
67       auto weight_col = weight_row;
68 
69       for (unsigned int j = 0; j < kernel_cols; j++)
70       {
71         memcpy(outptr, weight_col, to_copy);
72         outptr += per_iter;
73         weight_col += ld_weight_col;
74       }
75 
76       weight_row += ld_weight_row;
77     }
78   }
79 }
80 
81 namespace arm_conv {
82 namespace depthwise {
83 
84 ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 3, 3)
85 ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 5, 5)
86 ADD_IMPLEMENTATION(a64, u8q, uint8_t, None, 2, 3, 3)
87 ADD_IMPLEMENTATION(a64, u8q, uint8_t, None, 2, 5, 5)
88 
89 }  // namespace depthwise
90 }  // namespace arm_conv
91