xref: /aosp_15_r20/external/armnn/tests/InferenceTestImage.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "InferenceTestImage.hpp"
6 
7 #include <armnn/utility/Assert.hpp>
8 #include <armnn/utility/IgnoreUnused.hpp>
9 #include <armnn/utility/NumericCast.hpp>
10 
11 #include <fmt/format.h>
12 
13 #include <array>
14 
15 #define STB_IMAGE_IMPLEMENTATION
16 #include <stb/stb_image.h>
17 
18 #define STB_IMAGE_RESIZE_IMPLEMENTATION
19 #include <stb/stb_image_resize.h>
20 
21 #define STB_IMAGE_WRITE_IMPLEMENTATION
22 #include <stb/stb_image_write.h>
23 
24 namespace
25 {
26 
GetImageChannelIndex(ImageChannelLayout channelLayout,ImageChannel channel)27 unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
28 {
29     switch (channelLayout)
30     {
31     case ImageChannelLayout::Rgb:
32         return static_cast<unsigned int>(channel);
33     case ImageChannelLayout::Bgr:
34         return 2u - static_cast<unsigned int>(channel);
35     default:
36         throw UnknownImageChannelLayout(fmt::format("Unknown layout {}", static_cast<int>(channelLayout)));
37     }
38 }
39 
Lerp(float a,float b,float w)40 inline float Lerp(float a, float b, float w)
41 {
42     return w * b + (1.f - w) * a;
43 }
44 
PutData(std::vector<float> & data,const unsigned int width,const unsigned int x,const unsigned int y,const unsigned int c,float value)45 inline void PutData(std::vector<float> & data,
46                     const unsigned int width,
47                     const unsigned int x,
48                     const unsigned int y,
49                     const unsigned int c,
50                     float value)
51 {
52     data[(3*((y*width)+x)) + c] = value;
53 }
54 
ResizeBilinearAndNormalize(const InferenceTestImage & image,const unsigned int outputWidth,const unsigned int outputHeight,const float scale,const std::array<float,3> & mean,const std::array<float,3> & stddev)55 std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
56                                               const unsigned int outputWidth,
57                                               const unsigned int outputHeight,
58                                               const float scale,
59                                               const std::array<float, 3>& mean,
60                                               const std::array<float, 3>& stddev)
61 {
62     std::vector<float> out;
63     out.resize(outputWidth * outputHeight * 3);
64 
65     // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
66     // image is projected into the input image to figure out the interpolants and weights. Note that this
67     // will yield different results than if projecting the centre of output texels.
68 
69     const unsigned int inputWidth = image.GetWidth();
70     const unsigned int inputHeight = image.GetHeight();
71 
72     // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
73     // in the input image.
74     const float scaleY = armnn::numeric_cast<float>(inputHeight) / armnn::numeric_cast<float>(outputHeight);
75     const float scaleX = armnn::numeric_cast<float>(inputWidth) / armnn::numeric_cast<float>(outputWidth);
76 
77     uint8_t rgb_x0y0[3];
78     uint8_t rgb_x1y0[3];
79     uint8_t rgb_x0y1[3];
80     uint8_t rgb_x1y1[3];
81 
82     for (unsigned int y = 0; y < outputHeight; ++y)
83     {
84         // Corresponding real-valued height coordinate in input image.
85         const float iy = armnn::numeric_cast<float>(y) * scaleY;
86 
87         // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
88         const float fiy = floorf(iy);
89         const unsigned int y0 = armnn::numeric_cast<unsigned int>(fiy);
90 
91         // Interpolation weight (range [0,1])
92         const float yw = iy - fiy;
93 
94         for (unsigned int x = 0; x < outputWidth; ++x)
95         {
96             // Real-valued and discrete width coordinates in input image.
97             const float ix = armnn::numeric_cast<float>(x) * scaleX;
98             const float fix = floorf(ix);
99             const unsigned int x0 = armnn::numeric_cast<unsigned int>(fix);
100 
101             // Interpolation weight (range [0,1]).
102             const float xw = ix - fix;
103 
104             // Discrete width/height coordinates of texels below and to the right of (x0, y0).
105             const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
106             const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
107 
108             std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
109             std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
110             std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
111             std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
112 
113             for (unsigned c=0; c<3; ++c)
114             {
115                 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
116                 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
117                 const float l = Lerp(ly0, ly1, yw);
118                 PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
119             }
120         }
121     }
122     return out;
123 }
124 
125 } // namespace
126 
InferenceTestImage(char const * filePath)127 InferenceTestImage::InferenceTestImage(char const* filePath)
128  : m_Width(0u)
129  , m_Height(0u)
130  , m_NumChannels(0u)
131 {
132     int width;
133     int height;
134     int channels;
135 
136     using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
137     StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
138 
139     if (stbData == nullptr)
140     {
141         throw InferenceTestImageLoadFailed(fmt::format("Could not load the image at {}", filePath));
142     }
143 
144     if (width == 0 || height == 0)
145     {
146         throw InferenceTestImageLoadFailed(fmt::format("Could not load empty image at {}", filePath));
147     }
148 
149     m_Width = armnn::numeric_cast<unsigned int>(width);
150     m_Height = armnn::numeric_cast<unsigned int>(height);
151     m_NumChannels = armnn::numeric_cast<unsigned int>(channels);
152 
153     const unsigned int sizeInBytes = GetSizeInBytes();
154     m_Data.resize(sizeInBytes);
155     memcpy(m_Data.data(), stbData.get(), sizeInBytes);
156 }
157 
GetPixelAs3Channels(unsigned int x,unsigned int y) const158 std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
159 {
160     if (x >= m_Width || y >= m_Height)
161     {
162         throw InferenceTestImageOutOfBoundsAccess(fmt::format("Attempted out of bounds image access. "
163             "Requested ({0}, {1}). Maximum valid coordinates ({2}, {3}).", x, y, (m_Width - 1), (m_Height - 1)));
164     }
165 
166     const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
167     const uint8_t* const pixelData = m_Data.data() + pixelOffset;
168     ARMNN_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
169 
170     std::array<uint8_t, 3> outPixelData;
171     outPixelData.fill(0);
172 
173     const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
174     for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
175     {
176         outPixelData[c] = pixelData[c];
177     }
178 
179     return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
180 }
181 
182 
StbResize(InferenceTestImage & im,const unsigned int newWidth,const unsigned int newHeight)183 void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
184 {
185     std::vector<uint8_t> newData;
186     newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
187 
188     // armnn::numeric_cast<>() is used for user-provided data (protecting about overflows).
189     // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
190     // a armnn::numeric_cast<>() handled the conversion).
191     const int nW = armnn::numeric_cast<int>(newWidth);
192     const int nH = armnn::numeric_cast<int>(newHeight);
193 
194     const int w = static_cast<int>(im.GetWidth());
195     const int h = static_cast<int>(im.GetHeight());
196     const int numChannels = static_cast<int>(im.GetNumChannels());
197 
198     const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
199     if (res == 0)
200     {
201         throw InferenceTestImageResizeFailed("The resizing operation failed");
202     }
203 
204     im.m_Data.swap(newData);
205     im.m_Width = newWidth;
206     im.m_Height = newHeight;
207 }
208 
Resize(unsigned int newWidth,unsigned int newHeight,const armnn::CheckLocation & location,const ResizingMethods meth,const std::array<float,3> & mean,const std::array<float,3> & stddev,const float scale)209 std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
210                                               unsigned int newHeight,
211                                               const armnn::CheckLocation& location,
212                                               const ResizingMethods meth,
213                                               const std::array<float, 3>& mean,
214                                               const std::array<float, 3>& stddev,
215                                               const float scale)
216 {
217     std::vector<float> out;
218     if (newWidth == 0 || newHeight == 0)
219     {
220         throw InferenceTestImageResizeFailed(fmt::format("None of the dimensions passed to a resize "
221             "operation can be zero. Requested width: {0}. Requested height: {1}.", newWidth, newHeight));
222     }
223 
224     switch (meth) {
225         case ResizingMethods::STB:
226         {
227             StbResize(*this, newWidth, newHeight);
228             break;
229         }
230         case ResizingMethods::BilinearAndNormalized:
231         {
232             out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
233             break;
234         }
235         default:
236             throw InferenceTestImageResizeFailed(fmt::format("Unknown resizing method asked ArmNN only"
237                                                              " supports {STB, BilinearAndNormalized} {}",
238                                                              location.AsString()));
239     }
240     return out;
241 }
242 
Write(WriteFormat format,const char * filePath) const243 void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
244 {
245     const int w = static_cast<int>(GetWidth());
246     const int h = static_cast<int>(GetHeight());
247     const int numChannels = static_cast<int>(GetNumChannels());
248     int res = 0;
249 
250     switch (format)
251     {
252     case WriteFormat::Png:
253         {
254             res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
255             break;
256         }
257     case WriteFormat::Bmp:
258         {
259             res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
260             break;
261         }
262     case WriteFormat::Tga:
263         {
264             res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
265             break;
266         }
267     default:
268         throw InferenceTestImageWriteFailed(fmt::format("Unknown format {}", static_cast<int>(format)));
269     }
270 
271     if (res == 0)
272     {
273         throw InferenceTestImageWriteFailed(fmt::format("An error occurred when writing to file {}",
274                                                         filePath));
275     }
276 }
277 
278 template <typename TProcessValueCallable>
GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,const InferenceTestImage & image,TProcessValueCallable processValue)279 std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
280     const InferenceTestImage& image,
281     TProcessValueCallable processValue)
282 {
283     const unsigned int h = image.GetHeight();
284     const unsigned int w = image.GetWidth();
285 
286     std::vector<float> imageData;
287     imageData.resize(h * w * 3);
288 
289     for (unsigned int j = 0; j < h; ++j)
290     {
291         for (unsigned int i = 0; i < w; ++i)
292         {
293             uint8_t r, g, b;
294             std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
295 
296             // ArmNN order: C, H, W
297             const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
298             const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
299             const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
300 
301             imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
302             imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
303             imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
304         }
305     }
306 
307     return imageData;
308 }
309 
GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,const InferenceTestImage & image)310 std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
311     const InferenceTestImage& image)
312 {
313     return GetImageDataInArmNnLayoutAsFloats(layout, image,
314         [](ImageChannel channel, float value)
315         {
316             armnn::IgnoreUnused(channel);
317             return value / 255.f;
318         });
319 }
320 
GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,const InferenceTestImage & image,const std::array<float,3> & mean)321 std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
322     const InferenceTestImage& image,
323     const std::array<float, 3>& mean)
324 {
325     return GetImageDataInArmNnLayoutAsFloats(layout, image,
326         [layout, &mean](ImageChannel channel, float value)
327         {
328             const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
329             return value - mean[channelIndex];
330         });
331 }
332 
GetImageDataAsNormalizedFloats(ImageChannelLayout layout,const InferenceTestImage & image)333 std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
334                                                   const InferenceTestImage& image)
335 {
336     std::vector<float> imageData;
337     const unsigned int h = image.GetHeight();
338     const unsigned int w = image.GetWidth();
339 
340     const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
341     const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
342     const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
343 
344     imageData.resize(h * w * 3);
345     unsigned int offset = 0;
346 
347     for (unsigned int j = 0; j < h; ++j)
348     {
349         for (unsigned int i = 0; i < w; ++i)
350         {
351             uint8_t r, g, b;
352             std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
353 
354             imageData[offset+rDstIndex] = float(r) / 255.0f;
355             imageData[offset+gDstIndex] = float(g) / 255.0f;
356             imageData[offset+bDstIndex] = float(b) / 255.0f;
357             offset += 3;
358         }
359     }
360 
361     return imageData;
362 }
363