xref: /btstack/port/stm32-f4discovery-usb/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c (revision a8f7f3fcbcd51f8d2e92aca076b6a9f812db358c)
1 /*
2  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 /* ----------------------------------------------------------------------
20  * Project:      CMSIS NN Library
21  * Title:        arm_relu_q7.c
22  * Description:  Q7 version of ReLU
23  *
24  * $Date:        17. January 2018
25  * $Revision:    V.1.0.0
26  *
27  * Target Processor:  Cortex-M cores
28  *
29  * -------------------------------------------------------------------- */
30 
31 #include "arm_math.h"
32 #include "arm_nnfunctions.h"
33 
34 /**
35  *  @ingroup groupNN
36  */
37 
38 /**
39  * @addtogroup Acti
40  * @{
41  */
42 
43   /**
44    * @brief Q7 RELU function
45    * @param[in,out]   data        pointer to input
46    * @param[in]       size        number of elements
47    * @return none.
48    *
49    * @details
50    *
51    * Optimized relu with QSUB instructions.
52    *
53    */
54 
arm_relu_q7(q7_t * data,uint16_t size)55 void arm_relu_q7(q7_t * data, uint16_t size)
56 {
57 
58 #if defined (ARM_MATH_DSP)
59     /* Run the following code for Cortex-M4 and Cortex-M7 */
60 
61     uint16_t  i = size >> 2;
62     q7_t     *pIn = data;
63     q7_t     *pOut = data;
64     q31_t     in;
65     q31_t     buf;
66     q31_t     mask;
67 
68     while (i)
69     {
70         in = *__SIMD32(pIn)++;
71 
72         /* extract the first bit */
73         buf = __ROR(in & 0x80808080, 7);
74 
75         /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
76         mask = __QSUB8(0x00000000, buf);
77 
78         *__SIMD32(pOut)++ = in & (~mask);
79         i--;
80     }
81 
82     i = size & 0x3;
83     while (i)
84     {
85         if (*pIn < 0)
86         {
87             *pIn = 0;
88         }
89         pIn++;
90         i--;
91     }
92 
93 #else
94     /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
95 
96     uint16_t  i;
97 
98     for (i = 0; i < size; i++)
99     {
100         if (data[i] < 0)
101             data[i] = 0;
102     }
103 
104 #endif                          /* ARM_MATH_DSP */
105 
106 }
107 
108 /**
109  * @} end of Acti group
110  */
111