1 /*
2  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #include "ref_functions.h"
20 
arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in,const uint16_t dim_im_in_x,const uint16_t dim_im_in_y,const uint16_t ch_im_in,const q7_t * wt,const uint16_t ch_im_out,const uint16_t dim_kernel_x,const uint16_t dim_kernel_y,const uint16_t padding_x,const uint16_t padding_y,const uint16_t stride_x,const uint16_t stride_y,const q7_t * bias,const uint16_t bias_shift,const uint16_t out_shift,q7_t * Im_out,const uint16_t dim_im_out_x,const uint16_t dim_im_out_y,q15_t * bufferA,q7_t * bufferB)21 void arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in,  // input image
22                                                        const uint16_t dim_im_in_x,  // input image dimention x
23                                                        const uint16_t dim_im_in_y,  // input image dimention y
24                                                        const uint16_t ch_im_in, // number of input image channels
25                                                        const q7_t * wt, // kernel weights
26                                                        const uint16_t ch_im_out,    // number of filters, i.e., output image channels
27                                                        const uint16_t dim_kernel_x, // filter kernel size x
28                                                        const uint16_t dim_kernel_y, // filter kernel size y
29                                                        const uint16_t padding_x,    // padding sizes x
30                                                        const uint16_t padding_y,    // padding sizes y
31                                                        const uint16_t stride_x, // stride x
32                                                        const uint16_t stride_y, // stride y
33                                                        const q7_t * bias,   // bias
34                                                        const uint16_t bias_shift,   // amount of left-shift for bias
35                                                        const uint16_t out_shift,    // amount of right-shift for output
36                                                        q7_t * Im_out,   // output image
37                                                        const uint16_t dim_im_out_x, // output image dimension x
38                                                        const uint16_t dim_im_out_y, // output image dimension y
39                                                        q15_t * bufferA, //buffer space for input
40                                                        q7_t * bufferB   //buffer space for output
41     )
42 {
43     int       i_out_y, i_out_x, i_ch_out;
44     int       i_ker_y, i_ker_x;
45     for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
46     {
47         for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
48         {
49             for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
50             {
51                 // for each output
52 #ifndef ARM_NN_TRUNCATE
53                 int       conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1));
54 #else
55                 int       conv_out = bias[i_ch_out] << bias_shift;
56 #endif
57                 for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++)
58                 {
59                     for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++)
60                     {
61                         int       in_row = stride_y * i_out_y + i_ker_y - padding_y;
62                         int       in_col = stride_x * i_out_x + i_ker_x - padding_x;
63                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
64                         {
65                             conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] *
66                                 wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out];
67                         }
68                     }
69                 }
70                 Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] =
71                     (q7_t) __SSAT((conv_out >> out_shift), 8);
72             }
73         }
74     }
75 }
76