1 /*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "8b_mla.hpp"
26
generic_get_packed_size(const VLType vec_type,const unsigned int acc_depth,const unsigned int kernel_rows,const unsigned int kernel_cols,const unsigned int n_input_channels)27 size_t generic_get_packed_size(
28 const VLType vec_type,
29 const unsigned int acc_depth,
30 const unsigned int kernel_rows,
31 const unsigned int kernel_cols,
32 const unsigned int n_input_channels
33 )
34 {
35 const auto per_iter = acc_depth * arm_gemm::utils::get_vector_length<int32_t>(vec_type);
36 return arm_gemm::roundup((long unsigned int) n_input_channels, per_iter) * kernel_rows * kernel_cols * sizeof(int8_t);
37 }
38
generic_pack(const VLType vec_type,const unsigned int acc_depth,const unsigned int kernel_rows,const unsigned int kernel_cols,const unsigned int n_channels,void * _outptr,const void * _weights,size_t ld_weight_col,size_t ld_weight_row)39 void generic_pack(
40 const VLType vec_type,
41 const unsigned int acc_depth,
42 const unsigned int kernel_rows,
43 const unsigned int kernel_cols,
44 const unsigned int n_channels,
45 void *_outptr,
46 const void *_weights,
47 size_t ld_weight_col,
48 size_t ld_weight_row
49 )
50 {
51 int8_t *outptr = reinterpret_cast<int8_t *>(_outptr);
52 const int8_t *weights = reinterpret_cast<const int8_t *>(_weights);
53
54 // Get the strides
55 ld_weight_col = (ld_weight_col == 0) ? n_channels * sizeof(int8_t) : ld_weight_col;
56 ld_weight_row = (ld_weight_row == 0) ? kernel_cols * ld_weight_col : ld_weight_row;
57
58 // Pack into per-iter chunks.
59 const auto per_iter = acc_depth * arm_gemm::utils::get_vector_length<int32_t>(vec_type);
60 for (unsigned int c = 0; c < n_channels; c += per_iter)
61 {
62 auto weight_row = weights + c;
63 const auto to_copy = std::min<unsigned int>(per_iter, n_channels - c);
64
65 for (unsigned int i = 0; i < kernel_rows; i++)
66 {
67 auto weight_col = weight_row;
68
69 for (unsigned int j = 0; j < kernel_cols; j++)
70 {
71 memcpy(outptr, weight_col, to_copy);
72 outptr += per_iter;
73 weight_col += ld_weight_col;
74 }
75
76 weight_row += ld_weight_row;
77 }
78 }
79 }
80
81 namespace arm_conv {
82 namespace depthwise {
83
84 ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 3, 3)
85 ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 5, 5)
86 ADD_IMPLEMENTATION(a64, u8q, uint8_t, None, 2, 3, 3)
87 ADD_IMPLEMENTATION(a64, u8q, uint8_t, None, 2, 5, 5)
88
89 } // namespace depthwise
90 } // namespace arm_conv
91