1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ConvImpl.hpp"
7
8 #include <armnn/utility/Assert.hpp>
9
10 #include <cmath>
11 #include <limits>
12
13 namespace armnn
14 {
15
QuantizedMultiplierSmallerThanOne(float multiplier)16 QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)
17 {
18 ARMNN_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
19 if (multiplier == 0.0f)
20 {
21 m_Multiplier = 0;
22 m_RightShift = 0;
23 }
24 else
25 {
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed = static_cast<int64_t>(::round(q * (1ll << 31)));
29 ARMNN_ASSERT(qFixed <= (1ll << 31));
30 if (qFixed == (1ll << 31))
31 {
32 qFixed /= 2;
33 --m_RightShift;
34 }
35 ARMNN_ASSERT(m_RightShift >= 0);
36 ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
37 m_Multiplier = static_cast<int32_t>(qFixed);
38 }
39 }
40
operator *(int32_t rhs) const41 int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
42 {
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
45 }
46
SaturatingRoundingDoublingHighMul(int32_t a,int32_t b)47 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
48 {
49 // Check for overflow.
50 if (a == b && a == std::numeric_limits<int32_t>::min())
51 {
52 return std::numeric_limits<int32_t>::max();
53 }
54 int64_t a_64(a);
55 int64_t b_64(b);
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
59 return ab_x2_high32;
60 }
61
RoundingDivideByPOT(int32_t x,int exponent)62 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
63 {
64 ARMNN_ASSERT(exponent >= 0 && exponent <= 31);
65 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
69 }
70
Convolve(const TensorShape & rInputShape,Decoder<float> & rInputDecoder,const TensorShape & rOutputShape,Encoder<float> & rOutputEncoder,const TensorShape & rFilterShape,Decoder<float> & rFilterDecoder,bool biasEnabled,Decoder<float> * pBiasDecoder,DataLayout dataLayout,unsigned int paddingTop,unsigned int paddingLeft,unsigned int xStride,unsigned int yStride,unsigned int xDilation,unsigned int yDilation,bool depthwise)71 void Convolve(const TensorShape& rInputShape,
72 Decoder<float>& rInputDecoder,
73 const TensorShape& rOutputShape,
74 Encoder<float>& rOutputEncoder,
75 const TensorShape& rFilterShape,
76 Decoder<float>& rFilterDecoder,
77 bool biasEnabled,
78 Decoder<float>* pBiasDecoder,
79 DataLayout dataLayout,
80 unsigned int paddingTop,
81 unsigned int paddingLeft,
82 unsigned int xStride,
83 unsigned int yStride,
84 unsigned int xDilation,
85 unsigned int yDilation,
86 bool depthwise)
87 {
88 if (biasEnabled && !pBiasDecoder)
89 {
90 throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
91 }
92 const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
93
94 const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
95 const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
96 const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
97
98 // Weights layout:
99 // Conv2d: [O,H,W,I]
100 // Depthwise: [1,H,W,O]
101 const unsigned int inputChannels = rInputShape[channelsIndex];
102 const unsigned int outputChannels = rOutputShape[channelsIndex];
103 const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
104
105 const unsigned int batchSize = rOutputShape[0];
106 const unsigned int outputHeight = rOutputShape[heightIndex];
107 const unsigned int outputWidth = rOutputShape[widthIndex];
108 const unsigned int inputHeight = rInputShape[heightIndex];
109 const unsigned int inputWidth = rInputShape[widthIndex];
110
111 const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
112 const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
113
114 const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
115 const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
116
117 const TensorShape biasShape{outputChannels};
118 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
119
120 for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
121 {
122 for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
123 {
124 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
125 {
126 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
127 {
128 // This loop goes over each output element.
129 float sum = 0.0f;
130
131 // For depthwise, each output channel corresponds to exactly one input channel.
132 // For normal, must loop over each input channel.
133 for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
134 {
135 for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
136 {
137 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
138 {
139 // This loop goes over each input element for each output element.
140 unsigned int filterIndex = 0;
141
142 // Since dimensionality of kernel depends on depthwiseness, so does index.
143 if (depthwise)
144 {
145 cInput = cOutput / depthMultiplier;
146 // filterDepth = outputChannels;
147 filterIndex = xFilter * outputChannels + cOutput +
148 yFilter * filterWidth * outputChannels;
149 }
150 else
151 {
152 // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
153 // performance regression.
154 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
155 {
156 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
157 yFilter * filterWidth * inputChannels +
158 xFilter * inputChannels +
159 cInput;
160 }
161 else
162 {
163 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
164 cInput * filterWidth * filterHeight +
165 yFilter * filterWidth +
166 xFilter;
167 }
168 }
169
170 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
171 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
172
173 float inputValue;
174
175 // Check if we're in the padding.
176 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
177 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
178 {
179 inputValue = 0.0f;
180 }
181 else
182 {
183 unsigned int inputIndex = 0;
184
185 // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
186 // performance regression.
187 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
188 {
189 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
190 (yInput - paddingTop) * inputWidth * inputChannels +
191 (xInput - paddingLeft) * inputChannels +
192 cInput;
193 }
194 else
195 {
196 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
197 inputWidth * inputHeight * cInput +
198 inputWidth * (yInput - paddingTop) +
199 xInput - paddingLeft;
200 }
201 inputValue = inputVec[inputIndex];
202 }
203
204 sum += filterVec[filterIndex] * inputValue;
205 }
206 }
207 }
208
209 if (biasEnabled)
210 {
211 sum += biasVec[cOutput];
212 }
213
214 unsigned int outIdx;
215 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
216 {
217 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
218 yOutput * outputWidth * outputChannels +
219 xOutput * outputChannels +
220 cOutput;
221 }
222 else
223 {
224 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
225 cOutput * outputHeight * outputWidth +
226 yOutput * outputWidth +
227 xOutput;
228 }
229
230 rOutputEncoder[outIdx];
231 rOutputEncoder.Set(sum);
232 }
233 }
234 }
235 }
236 }
237
238 } // namespace armnn
239