xref: /aosp_15_r20/external/angle/src/image_util/loadimage.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2013 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // angle_loadimage.cpp: Defines image loading functions.
8 
9 #include "image_util/loadimage.h"
10 
11 #include "common/mathutil.h"
12 #include "common/platform.h"
13 #include "image_util/imageformats.h"
14 
15 #if defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
16 #    if defined(_MSC_VER)
17 #        include <intrin.h>
18 #        define ANGLE_LOADIMAGE_USE_SSE
19 #    elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
20 #        include <x86intrin.h>
21 #        if __SSE__
22 #            define ANGLE_LOADIMAGE_USE_SSE
23 #        endif
24 #    endif
25 #endif
26 
27 #if defined(ANGLE_LOADIMAGE_USE_SSE)
supportsSSE2()28 inline bool supportsSSE2()
29 {
30     static bool checked  = false;
31     static bool supports = false;
32 
33     if (checked)
34     {
35         return supports;
36     }
37 
38     int info[4];
39     __cpuid(info, 0);
40 
41     if (info[0] >= 1)
42     {
43         __cpuid(info, 1);
44 
45         supports = (info[3] >> 26) & 1;
46     }
47 
48     checked = true;
49     return supports;
50 }
51 #endif
52 
53 namespace angle
54 {
55 ImageLoadContext::ImageLoadContext()                              = default;
56 ImageLoadContext::~ImageLoadContext()                             = default;
57 ImageLoadContext::ImageLoadContext(const ImageLoadContext &other) = default;
58 
LoadA8ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)59 void LoadA8ToRGBA8(const ImageLoadContext &context,
60                    size_t width,
61                    size_t height,
62                    size_t depth,
63                    const uint8_t *input,
64                    size_t inputRowPitch,
65                    size_t inputDepthPitch,
66                    uint8_t *output,
67                    size_t outputRowPitch,
68                    size_t outputDepthPitch)
69 {
70 #if defined(ANGLE_LOADIMAGE_USE_SSE)
71     if (supportsSSE2())
72     {
73         __m128i zeroWide = _mm_setzero_si128();
74 
75         for (size_t z = 0; z < depth; z++)
76         {
77             for (size_t y = 0; y < height; y++)
78             {
79                 const uint8_t *source =
80                     priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
81                 uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
82                                                                    outputDepthPitch);
83 
84                 size_t x = 0;
85 
86                 // Make output writes aligned
87                 for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
88                 {
89                     dest[x] = static_cast<uint32_t>(source[x]) << 24;
90                 }
91 
92                 for (; x + 7 < width; x += 8)
93                 {
94                     __m128i sourceData =
95                         _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&source[x]));
96                     // Interleave each byte to 16bit, make the lower byte to zero
97                     sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
98                     // Interleave each 16bit to 32bit, make the lower 16bit to zero
99                     __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
100                     __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
101 
102                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), lo);
103                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x + 4]), hi);
104                 }
105 
106                 // Handle the remainder
107                 for (; x < width; x++)
108                 {
109                     dest[x] = static_cast<uint32_t>(source[x]) << 24;
110                 }
111             }
112         }
113 
114         return;
115     }
116 #endif
117 
118     for (size_t z = 0; z < depth; z++)
119     {
120         for (size_t y = 0; y < height; y++)
121         {
122             const uint8_t *source =
123                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
124             uint32_t *dest =
125                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
126             for (size_t x = 0; x < width; x++)
127             {
128                 dest[x] = static_cast<uint32_t>(source[x]) << 24;
129             }
130         }
131     }
132 }
133 
LoadA8ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)134 void LoadA8ToBGRA8(const ImageLoadContext &context,
135                    size_t width,
136                    size_t height,
137                    size_t depth,
138                    const uint8_t *input,
139                    size_t inputRowPitch,
140                    size_t inputDepthPitch,
141                    uint8_t *output,
142                    size_t outputRowPitch,
143                    size_t outputDepthPitch)
144 {
145     // Same as loading to RGBA
146     LoadA8ToRGBA8(context, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
147                   outputRowPitch, outputDepthPitch);
148 }
149 
LoadA32FToRGBA32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)150 void LoadA32FToRGBA32F(const ImageLoadContext &context,
151                        size_t width,
152                        size_t height,
153                        size_t depth,
154                        const uint8_t *input,
155                        size_t inputRowPitch,
156                        size_t inputDepthPitch,
157                        uint8_t *output,
158                        size_t outputRowPitch,
159                        size_t outputDepthPitch)
160 {
161     for (size_t z = 0; z < depth; z++)
162     {
163         for (size_t y = 0; y < height; y++)
164         {
165             const float *source =
166                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
167             float *dest =
168                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
169             for (size_t x = 0; x < width; x++)
170             {
171                 dest[4 * x + 0] = 0.0f;
172                 dest[4 * x + 1] = 0.0f;
173                 dest[4 * x + 2] = 0.0f;
174                 dest[4 * x + 3] = source[x];
175             }
176         }
177     }
178 }
179 
LoadA16FToRGBA16F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)180 void LoadA16FToRGBA16F(const ImageLoadContext &context,
181                        size_t width,
182                        size_t height,
183                        size_t depth,
184                        const uint8_t *input,
185                        size_t inputRowPitch,
186                        size_t inputDepthPitch,
187                        uint8_t *output,
188                        size_t outputRowPitch,
189                        size_t outputDepthPitch)
190 {
191     for (size_t z = 0; z < depth; z++)
192     {
193         for (size_t y = 0; y < height; y++)
194         {
195             const uint16_t *source =
196                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
197             uint16_t *dest =
198                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
199             for (size_t x = 0; x < width; x++)
200             {
201                 dest[4 * x + 0] = 0;
202                 dest[4 * x + 1] = 0;
203                 dest[4 * x + 2] = 0;
204                 dest[4 * x + 3] = source[x];
205             }
206         }
207     }
208 }
209 
LoadL8ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)210 void LoadL8ToRGBA8(const ImageLoadContext &context,
211                    size_t width,
212                    size_t height,
213                    size_t depth,
214                    const uint8_t *input,
215                    size_t inputRowPitch,
216                    size_t inputDepthPitch,
217                    uint8_t *output,
218                    size_t outputRowPitch,
219                    size_t outputDepthPitch)
220 {
221     for (size_t z = 0; z < depth; z++)
222     {
223         for (size_t y = 0; y < height; y++)
224         {
225             const uint8_t *source =
226                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
227             uint8_t *dest =
228                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
229             for (size_t x = 0; x < width; x++)
230             {
231                 uint8_t sourceVal = source[x];
232                 dest[4 * x + 0]   = sourceVal;
233                 dest[4 * x + 1]   = sourceVal;
234                 dest[4 * x + 2]   = sourceVal;
235                 dest[4 * x + 3]   = 0xFF;
236             }
237         }
238     }
239 }
240 
LoadL8ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)241 void LoadL8ToBGRA8(const ImageLoadContext &context,
242                    size_t width,
243                    size_t height,
244                    size_t depth,
245                    const uint8_t *input,
246                    size_t inputRowPitch,
247                    size_t inputDepthPitch,
248                    uint8_t *output,
249                    size_t outputRowPitch,
250                    size_t outputDepthPitch)
251 {
252     // Same as loading to RGBA
253     LoadL8ToRGBA8(context, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
254                   outputRowPitch, outputDepthPitch);
255 }
256 
LoadL32FToRGBA32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)257 void LoadL32FToRGBA32F(const ImageLoadContext &context,
258                        size_t width,
259                        size_t height,
260                        size_t depth,
261                        const uint8_t *input,
262                        size_t inputRowPitch,
263                        size_t inputDepthPitch,
264                        uint8_t *output,
265                        size_t outputRowPitch,
266                        size_t outputDepthPitch)
267 {
268     for (size_t z = 0; z < depth; z++)
269     {
270         for (size_t y = 0; y < height; y++)
271         {
272             const float *source =
273                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
274             float *dest =
275                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
276             for (size_t x = 0; x < width; x++)
277             {
278                 dest[4 * x + 0] = source[x];
279                 dest[4 * x + 1] = source[x];
280                 dest[4 * x + 2] = source[x];
281                 dest[4 * x + 3] = 1.0f;
282             }
283         }
284     }
285 }
286 
LoadL16FToRGBA16F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)287 void LoadL16FToRGBA16F(const ImageLoadContext &context,
288                        size_t width,
289                        size_t height,
290                        size_t depth,
291                        const uint8_t *input,
292                        size_t inputRowPitch,
293                        size_t inputDepthPitch,
294                        uint8_t *output,
295                        size_t outputRowPitch,
296                        size_t outputDepthPitch)
297 {
298     for (size_t z = 0; z < depth; z++)
299     {
300         for (size_t y = 0; y < height; y++)
301         {
302             const uint16_t *source =
303                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
304             uint16_t *dest =
305                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
306             for (size_t x = 0; x < width; x++)
307             {
308                 dest[4 * x + 0] = source[x];
309                 dest[4 * x + 1] = source[x];
310                 dest[4 * x + 2] = source[x];
311                 dest[4 * x + 3] = gl::Float16One;
312             }
313         }
314     }
315 }
316 
LoadLA8ToRGBA4(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)317 void LoadLA8ToRGBA4(const ImageLoadContext &context,
318                     size_t width,
319                     size_t height,
320                     size_t depth,
321                     const uint8_t *input,
322                     size_t inputRowPitch,
323                     size_t inputDepthPitch,
324                     uint8_t *output,
325                     size_t outputRowPitch,
326                     size_t outputDepthPitch)
327 {
328     for (size_t z = 0; z < depth; z++)
329     {
330         for (size_t y = 0; y < height; y++)
331         {
332             const uint8_t *source =
333                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
334             uint8_t *dest =
335                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
336             for (size_t x = 0; x < width; x++)
337             {
338                 uint8_t l       = source[2 * x + 0] >> 4;
339                 uint8_t a       = source[2 * x + 1] >> 4;
340                 dest[4 * x + 0] = l | l << 4;
341                 dest[4 * x + 1] = l | a << 4;
342             }
343         }
344     }
345 }
346 
LoadLA8ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)347 void LoadLA8ToRGBA8(const ImageLoadContext &context,
348                     size_t width,
349                     size_t height,
350                     size_t depth,
351                     const uint8_t *input,
352                     size_t inputRowPitch,
353                     size_t inputDepthPitch,
354                     uint8_t *output,
355                     size_t outputRowPitch,
356                     size_t outputDepthPitch)
357 {
358     for (size_t z = 0; z < depth; z++)
359     {
360         for (size_t y = 0; y < height; y++)
361         {
362             const uint8_t *source =
363                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
364             uint8_t *dest =
365                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
366             for (size_t x = 0; x < width; x++)
367             {
368                 dest[4 * x + 0] = source[2 * x + 0];
369                 dest[4 * x + 1] = source[2 * x + 0];
370                 dest[4 * x + 2] = source[2 * x + 0];
371                 dest[4 * x + 3] = source[2 * x + 1];
372             }
373         }
374     }
375 }
376 
LoadLA8ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)377 void LoadLA8ToBGRA8(const ImageLoadContext &context,
378                     size_t width,
379                     size_t height,
380                     size_t depth,
381                     const uint8_t *input,
382                     size_t inputRowPitch,
383                     size_t inputDepthPitch,
384                     uint8_t *output,
385                     size_t outputRowPitch,
386                     size_t outputDepthPitch)
387 {
388     // Same as loading to RGBA
389     LoadLA8ToRGBA8(context, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
390                    outputRowPitch, outputDepthPitch);
391 }
392 
LoadLA32FToRGBA32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)393 void LoadLA32FToRGBA32F(const ImageLoadContext &context,
394                         size_t width,
395                         size_t height,
396                         size_t depth,
397                         const uint8_t *input,
398                         size_t inputRowPitch,
399                         size_t inputDepthPitch,
400                         uint8_t *output,
401                         size_t outputRowPitch,
402                         size_t outputDepthPitch)
403 {
404     for (size_t z = 0; z < depth; z++)
405     {
406         for (size_t y = 0; y < height; y++)
407         {
408             const float *source =
409                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
410             float *dest =
411                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
412             for (size_t x = 0; x < width; x++)
413             {
414                 dest[4 * x + 0] = source[2 * x + 0];
415                 dest[4 * x + 1] = source[2 * x + 0];
416                 dest[4 * x + 2] = source[2 * x + 0];
417                 dest[4 * x + 3] = source[2 * x + 1];
418             }
419         }
420     }
421 }
422 
LoadLA16FToRGBA16F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)423 void LoadLA16FToRGBA16F(const ImageLoadContext &context,
424                         size_t width,
425                         size_t height,
426                         size_t depth,
427                         const uint8_t *input,
428                         size_t inputRowPitch,
429                         size_t inputDepthPitch,
430                         uint8_t *output,
431                         size_t outputRowPitch,
432                         size_t outputDepthPitch)
433 {
434     for (size_t z = 0; z < depth; z++)
435     {
436         for (size_t y = 0; y < height; y++)
437         {
438             const uint16_t *source =
439                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
440             uint16_t *dest =
441                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
442             for (size_t x = 0; x < width; x++)
443             {
444                 dest[4 * x + 0] = source[2 * x + 0];
445                 dest[4 * x + 1] = source[2 * x + 0];
446                 dest[4 * x + 2] = source[2 * x + 0];
447                 dest[4 * x + 3] = source[2 * x + 1];
448             }
449         }
450     }
451 }
452 
LoadRGB8ToBGR565(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)453 void LoadRGB8ToBGR565(const ImageLoadContext &context,
454                       size_t width,
455                       size_t height,
456                       size_t depth,
457                       const uint8_t *input,
458                       size_t inputRowPitch,
459                       size_t inputDepthPitch,
460                       uint8_t *output,
461                       size_t outputRowPitch,
462                       size_t outputDepthPitch)
463 {
464     for (size_t z = 0; z < depth; z++)
465     {
466         for (size_t y = 0; y < height; y++)
467         {
468             const uint8_t *source =
469                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
470             uint16_t *dest =
471                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
472             for (size_t x = 0; x < width; x++)
473             {
474                 uint8_t r8 = source[x * 3 + 0];
475                 uint8_t g8 = source[x * 3 + 1];
476                 uint8_t b8 = source[x * 3 + 2];
477                 auto r5    = static_cast<uint16_t>(r8 >> 3);
478                 auto g6    = static_cast<uint16_t>(g8 >> 2);
479                 auto b5    = static_cast<uint16_t>(b8 >> 3);
480                 dest[x]    = (r5 << 11) | (g6 << 5) | b5;
481             }
482         }
483     }
484 }
485 
LoadRGB565ToBGR565(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)486 void LoadRGB565ToBGR565(const ImageLoadContext &context,
487                         size_t width,
488                         size_t height,
489                         size_t depth,
490                         const uint8_t *input,
491                         size_t inputRowPitch,
492                         size_t inputDepthPitch,
493                         uint8_t *output,
494                         size_t outputRowPitch,
495                         size_t outputDepthPitch)
496 {
497     for (size_t z = 0; z < depth; z++)
498     {
499         for (size_t y = 0; y < height; y++)
500         {
501             const uint16_t *source =
502                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
503             uint16_t *dest =
504                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
505             for (size_t x = 0; x < width; x++)
506             {
507                 // The GL type RGB is packed with with red in the MSB, while the D3D11 type BGR
508                 // is packed with red in the LSB
509                 auto rgb    = source[x];
510                 uint16_t r5 = gl::getShiftedData<5, 11>(rgb);
511                 uint16_t g6 = gl::getShiftedData<6, 5>(rgb);
512                 uint16_t b5 = gl::getShiftedData<5, 0>(rgb);
513                 dest[x]     = (r5 << 11) | (g6 << 5) | b5;
514             }
515         }
516     }
517 }
518 
LoadRGB8ToBGRX8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)519 void LoadRGB8ToBGRX8(const ImageLoadContext &context,
520                      size_t width,
521                      size_t height,
522                      size_t depth,
523                      const uint8_t *input,
524                      size_t inputRowPitch,
525                      size_t inputDepthPitch,
526                      uint8_t *output,
527                      size_t outputRowPitch,
528                      size_t outputDepthPitch)
529 {
530     for (size_t z = 0; z < depth; z++)
531     {
532         for (size_t y = 0; y < height; y++)
533         {
534             const uint8_t *source =
535                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
536             uint8_t *dest =
537                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
538             for (size_t x = 0; x < width; x++)
539             {
540                 dest[4 * x + 0] = source[x * 3 + 2];
541                 dest[4 * x + 1] = source[x * 3 + 1];
542                 dest[4 * x + 2] = source[x * 3 + 0];
543                 dest[4 * x + 3] = 0xFF;
544             }
545         }
546     }
547 }
548 
LoadRG8ToBGRX8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)549 void LoadRG8ToBGRX8(const ImageLoadContext &context,
550                     size_t width,
551                     size_t height,
552                     size_t depth,
553                     const uint8_t *input,
554                     size_t inputRowPitch,
555                     size_t inputDepthPitch,
556                     uint8_t *output,
557                     size_t outputRowPitch,
558                     size_t outputDepthPitch)
559 {
560     for (size_t z = 0; z < depth; z++)
561     {
562         for (size_t y = 0; y < height; y++)
563         {
564             const uint8_t *source =
565                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
566             uint8_t *dest =
567                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
568             for (size_t x = 0; x < width; x++)
569             {
570                 dest[4 * x + 0] = 0x00;
571                 dest[4 * x + 1] = source[x * 2 + 1];
572                 dest[4 * x + 2] = source[x * 2 + 0];
573                 dest[4 * x + 3] = 0xFF;
574             }
575         }
576     }
577 }
578 
LoadR8ToBGRX8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)579 void LoadR8ToBGRX8(const ImageLoadContext &context,
580                    size_t width,
581                    size_t height,
582                    size_t depth,
583                    const uint8_t *input,
584                    size_t inputRowPitch,
585                    size_t inputDepthPitch,
586                    uint8_t *output,
587                    size_t outputRowPitch,
588                    size_t outputDepthPitch)
589 {
590     for (size_t z = 0; z < depth; z++)
591     {
592         for (size_t y = 0; y < height; y++)
593         {
594             const uint8_t *source =
595                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
596             uint8_t *dest =
597                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
598             for (size_t x = 0; x < width; x++)
599             {
600                 dest[4 * x + 0] = 0x00;
601                 dest[4 * x + 1] = 0x00;
602                 dest[4 * x + 2] = source[x];
603                 dest[4 * x + 3] = 0xFF;
604             }
605         }
606     }
607 }
608 
LoadR5G6B5ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)609 void LoadR5G6B5ToBGRA8(const ImageLoadContext &context,
610                        size_t width,
611                        size_t height,
612                        size_t depth,
613                        const uint8_t *input,
614                        size_t inputRowPitch,
615                        size_t inputDepthPitch,
616                        uint8_t *output,
617                        size_t outputRowPitch,
618                        size_t outputDepthPitch)
619 {
620     for (size_t z = 0; z < depth; z++)
621     {
622         for (size_t y = 0; y < height; y++)
623         {
624             const uint16_t *source =
625                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
626             uint8_t *dest =
627                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
628             for (size_t x = 0; x < width; x++)
629             {
630                 uint16_t rgb = source[x];
631                 dest[4 * x + 0] =
632                     static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
633                 dest[4 * x + 1] =
634                     static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
635                 dest[4 * x + 2] =
636                     static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
637                 dest[4 * x + 3] = 0xFF;
638             }
639         }
640     }
641 }
642 
LoadR5G6B5ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)643 void LoadR5G6B5ToRGBA8(const ImageLoadContext &context,
644                        size_t width,
645                        size_t height,
646                        size_t depth,
647                        const uint8_t *input,
648                        size_t inputRowPitch,
649                        size_t inputDepthPitch,
650                        uint8_t *output,
651                        size_t outputRowPitch,
652                        size_t outputDepthPitch)
653 {
654     for (size_t z = 0; z < depth; z++)
655     {
656         for (size_t y = 0; y < height; y++)
657         {
658             const uint16_t *source =
659                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
660             uint8_t *dest =
661                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
662             for (size_t x = 0; x < width; x++)
663             {
664                 uint16_t rgb = source[x];
665                 dest[4 * x + 0] =
666                     static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
667                 dest[4 * x + 1] =
668                     static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
669                 dest[4 * x + 2] =
670                     static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
671                 dest[4 * x + 3] = 0xFF;
672             }
673         }
674     }
675 }
676 
LoadRGBA8ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)677 void LoadRGBA8ToBGRA8(const ImageLoadContext &context,
678                       size_t width,
679                       size_t height,
680                       size_t depth,
681                       const uint8_t *input,
682                       size_t inputRowPitch,
683                       size_t inputDepthPitch,
684                       uint8_t *output,
685                       size_t outputRowPitch,
686                       size_t outputDepthPitch)
687 {
688 #if defined(ANGLE_LOADIMAGE_USE_SSE)
689     if (supportsSSE2())
690     {
691         __m128i brMask = _mm_set1_epi32(0x00ff00ff);
692 
693         for (size_t z = 0; z < depth; z++)
694         {
695             for (size_t y = 0; y < height; y++)
696             {
697                 const uint32_t *source =
698                     priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
699                 uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
700                                                                    outputDepthPitch);
701 
702                 size_t x = 0;
703 
704                 // Make output writes aligned
705                 for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
706                 {
707                     uint32_t rgba = source[x];
708                     dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
709                 }
710 
711                 for (; x + 3 < width; x += 4)
712                 {
713                     __m128i sourceData =
714                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(&source[x]));
715                     // Mask out g and a, which don't change
716                     __m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
717                     // Mask out b and r
718                     __m128i brComponents = _mm_and_si128(sourceData, brMask);
719                     // Swap b and r
720                     __m128i brSwapped = _mm_shufflehi_epi16(
721                         _mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)),
722                         _MM_SHUFFLE(2, 3, 0, 1));
723                     __m128i result = _mm_or_si128(gaComponents, brSwapped);
724                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), result);
725                 }
726 
727                 // Perform leftover writes
728                 for (; x < width; x++)
729                 {
730                     uint32_t rgba = source[x];
731                     dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
732                 }
733             }
734         }
735 
736         return;
737     }
738 #endif
739 
740     for (size_t z = 0; z < depth; z++)
741     {
742         for (size_t y = 0; y < height; y++)
743         {
744             const uint32_t *source =
745                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
746             uint32_t *dest =
747                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
748             for (size_t x = 0; x < width; x++)
749             {
750                 uint32_t rgba = source[x];
751                 dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
752             }
753         }
754     }
755 }
756 
LoadRGBA8ToBGRA4(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)757 void LoadRGBA8ToBGRA4(const ImageLoadContext &context,
758                       size_t width,
759                       size_t height,
760                       size_t depth,
761                       const uint8_t *input,
762                       size_t inputRowPitch,
763                       size_t inputDepthPitch,
764                       uint8_t *output,
765                       size_t outputRowPitch,
766                       size_t outputDepthPitch)
767 {
768     for (size_t z = 0; z < depth; z++)
769     {
770         for (size_t y = 0; y < height; y++)
771         {
772             const uint32_t *source =
773                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
774             uint16_t *dest =
775                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
776             for (size_t x = 0; x < width; x++)
777             {
778                 uint32_t rgba8 = source[x];
779                 auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
780                 auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
781                 auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
782                 auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
783                 dest[x]        = (a4 << 12) | (r4 << 8) | (g4 << 4) | b4;
784             }
785         }
786     }
787 }
788 
LoadRGBA8ToRGBA4(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)789 void LoadRGBA8ToRGBA4(const ImageLoadContext &context,
790                       size_t width,
791                       size_t height,
792                       size_t depth,
793                       const uint8_t *input,
794                       size_t inputRowPitch,
795                       size_t inputDepthPitch,
796                       uint8_t *output,
797                       size_t outputRowPitch,
798                       size_t outputDepthPitch)
799 {
800     for (size_t z = 0; z < depth; z++)
801     {
802         for (size_t y = 0; y < height; y++)
803         {
804             const uint32_t *source =
805                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
806             uint16_t *dest =
807                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
808             for (size_t x = 0; x < width; x++)
809             {
810                 uint32_t rgba8 = source[x];
811                 auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
812                 auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
813                 auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
814                 auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
815                 dest[x]        = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4;
816             }
817         }
818     }
819 }
820 
LoadRGBA4ToARGB4(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)821 void LoadRGBA4ToARGB4(const ImageLoadContext &context,
822                       size_t width,
823                       size_t height,
824                       size_t depth,
825                       const uint8_t *input,
826                       size_t inputRowPitch,
827                       size_t inputDepthPitch,
828                       uint8_t *output,
829                       size_t outputRowPitch,
830                       size_t outputDepthPitch)
831 {
832     for (size_t z = 0; z < depth; z++)
833     {
834         for (size_t y = 0; y < height; y++)
835         {
836             const uint16_t *source =
837                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
838             uint16_t *dest =
839                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
840             for (size_t x = 0; x < width; x++)
841             {
842                 dest[x] = ANGLE_ROTR16(source[x], 4);
843             }
844         }
845     }
846 }
847 
LoadRGBA4ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)848 void LoadRGBA4ToBGRA8(const ImageLoadContext &context,
849                       size_t width,
850                       size_t height,
851                       size_t depth,
852                       const uint8_t *input,
853                       size_t inputRowPitch,
854                       size_t inputDepthPitch,
855                       uint8_t *output,
856                       size_t outputRowPitch,
857                       size_t outputDepthPitch)
858 {
859     for (size_t z = 0; z < depth; z++)
860     {
861         for (size_t y = 0; y < height; y++)
862         {
863             const uint16_t *source =
864                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
865             uint8_t *dest =
866                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
867             for (size_t x = 0; x < width; x++)
868             {
869                 uint16_t rgba = source[x];
870                 dest[4 * x + 0] =
871                     static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
872                 dest[4 * x + 1] =
873                     static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
874                 dest[4 * x + 2] =
875                     static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
876                 dest[4 * x + 3] =
877                     static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
878             }
879         }
880     }
881 }
882 
LoadRGBA4ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)883 void LoadRGBA4ToRGBA8(const ImageLoadContext &context,
884                       size_t width,
885                       size_t height,
886                       size_t depth,
887                       const uint8_t *input,
888                       size_t inputRowPitch,
889                       size_t inputDepthPitch,
890                       uint8_t *output,
891                       size_t outputRowPitch,
892                       size_t outputDepthPitch)
893 {
894     for (size_t z = 0; z < depth; z++)
895     {
896         for (size_t y = 0; y < height; y++)
897         {
898             const uint16_t *source =
899                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
900             uint8_t *dest =
901                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
902             for (size_t x = 0; x < width; x++)
903             {
904                 uint16_t rgba = source[x];
905                 dest[4 * x + 0] =
906                     static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
907                 dest[4 * x + 1] =
908                     static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
909                 dest[4 * x + 2] =
910                     static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
911                 dest[4 * x + 3] =
912                     static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
913             }
914         }
915     }
916 }
917 
LoadBGRA4ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)918 void LoadBGRA4ToBGRA8(const ImageLoadContext &context,
919                       size_t width,
920                       size_t height,
921                       size_t depth,
922                       const uint8_t *input,
923                       size_t inputRowPitch,
924                       size_t inputDepthPitch,
925                       uint8_t *output,
926                       size_t outputRowPitch,
927                       size_t outputDepthPitch)
928 {
929     for (size_t z = 0; z < depth; z++)
930     {
931         for (size_t y = 0; y < height; y++)
932         {
933             const uint16_t *source =
934                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
935             uint8_t *dest =
936                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
937             for (size_t x = 0; x < width; x++)
938             {
939                 uint16_t bgra = source[x];
940                 dest[4 * x + 0] =
941                     static_cast<uint8_t>(((bgra & 0xF000) >> 8) | ((bgra & 0xF000) >> 12));
942                 dest[4 * x + 1] =
943                     static_cast<uint8_t>(((bgra & 0x0F00) >> 4) | ((bgra & 0x0F00) >> 8));
944                 dest[4 * x + 2] =
945                     static_cast<uint8_t>(((bgra & 0x00F0) << 0) | ((bgra & 0x00F0) >> 4));
946                 dest[4 * x + 3] =
947                     static_cast<uint8_t>(((bgra & 0x000F) << 4) | ((bgra & 0x000F) >> 0));
948             }
949         }
950     }
951 }
952 
LoadRGBA8ToBGR5A1(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)953 void LoadRGBA8ToBGR5A1(const ImageLoadContext &context,
954                        size_t width,
955                        size_t height,
956                        size_t depth,
957                        const uint8_t *input,
958                        size_t inputRowPitch,
959                        size_t inputDepthPitch,
960                        uint8_t *output,
961                        size_t outputRowPitch,
962                        size_t outputDepthPitch)
963 {
964     for (size_t z = 0; z < depth; z++)
965     {
966         for (size_t y = 0; y < height; y++)
967         {
968             const uint32_t *source =
969                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
970             uint16_t *dest =
971                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
972             for (size_t x = 0; x < width; x++)
973             {
974                 uint32_t rgba8 = source[x];
975                 auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
976                 auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
977                 auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
978                 auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
979                 dest[x]        = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
980             }
981         }
982     }
983 }
984 
LoadRGBA8ToRGB5A1(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)985 void LoadRGBA8ToRGB5A1(const ImageLoadContext &context,
986                        size_t width,
987                        size_t height,
988                        size_t depth,
989                        const uint8_t *input,
990                        size_t inputRowPitch,
991                        size_t inputDepthPitch,
992                        uint8_t *output,
993                        size_t outputRowPitch,
994                        size_t outputDepthPitch)
995 {
996     for (size_t z = 0; z < depth; z++)
997     {
998         for (size_t y = 0; y < height; y++)
999         {
1000             const uint32_t *source =
1001                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1002             uint16_t *dest =
1003                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1004             for (size_t x = 0; x < width; x++)
1005             {
1006                 uint32_t rgba8 = source[x];
1007                 auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
1008                 auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
1009                 auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
1010                 auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
1011                 dest[x]        = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
1012             }
1013         }
1014     }
1015 }
1016 
LoadRGB10A2ToBGR5A1(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1017 void LoadRGB10A2ToBGR5A1(const ImageLoadContext &context,
1018                          size_t width,
1019                          size_t height,
1020                          size_t depth,
1021                          const uint8_t *input,
1022                          size_t inputRowPitch,
1023                          size_t inputDepthPitch,
1024                          uint8_t *output,
1025                          size_t outputRowPitch,
1026                          size_t outputDepthPitch)
1027 {
1028     for (size_t z = 0; z < depth; z++)
1029     {
1030         for (size_t y = 0; y < height; y++)
1031         {
1032             const R10G10B10A2 *source =
1033                 priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
1034             uint16_t *dest =
1035                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1036             for (size_t x = 0; x < width; x++)
1037             {
1038                 R10G10B10A2 rgb10a2 = source[x];
1039 
1040                 uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
1041                 uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
1042                 uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
1043                 uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
1044 
1045                 dest[x] = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
1046             }
1047         }
1048     }
1049 }
1050 
LoadRGB10A2ToRGB5A1(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1051 void LoadRGB10A2ToRGB5A1(const ImageLoadContext &context,
1052                          size_t width,
1053                          size_t height,
1054                          size_t depth,
1055                          const uint8_t *input,
1056                          size_t inputRowPitch,
1057                          size_t inputDepthPitch,
1058                          uint8_t *output,
1059                          size_t outputRowPitch,
1060                          size_t outputDepthPitch)
1061 {
1062     for (size_t z = 0; z < depth; z++)
1063     {
1064         for (size_t y = 0; y < height; y++)
1065         {
1066             const R10G10B10A2 *source =
1067                 priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
1068             uint16_t *dest =
1069                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1070             for (size_t x = 0; x < width; x++)
1071             {
1072                 R10G10B10A2 rgb10a2 = source[x];
1073 
1074                 uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
1075                 uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
1076                 uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
1077                 uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
1078 
1079                 dest[x] = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
1080             }
1081         }
1082     }
1083 }
1084 
LoadRGB10A2ToRGB565(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1085 void LoadRGB10A2ToRGB565(const ImageLoadContext &context,
1086                          size_t width,
1087                          size_t height,
1088                          size_t depth,
1089                          const uint8_t *input,
1090                          size_t inputRowPitch,
1091                          size_t inputDepthPitch,
1092                          uint8_t *output,
1093                          size_t outputRowPitch,
1094                          size_t outputDepthPitch)
1095 {
1096     for (size_t z = 0; z < depth; z++)
1097     {
1098         for (size_t y = 0; y < height; y++)
1099         {
1100             const R10G10B10A2 *source =
1101                 priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
1102             uint16_t *dest =
1103                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1104             for (size_t x = 0; x < width; x++)
1105             {
1106                 R10G10B10A2 rgb10a2 = source[x];
1107 
1108                 uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
1109                 uint16_t g6 = static_cast<uint16_t>(rgb10a2.G >> 4u);
1110                 uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
1111 
1112                 dest[x] = (r5 << 11) | (g6 << 5) | b5;
1113             }
1114         }
1115     }
1116 }
1117 
LoadRGB5A1ToA1RGB5(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1118 void LoadRGB5A1ToA1RGB5(const ImageLoadContext &context,
1119                         size_t width,
1120                         size_t height,
1121                         size_t depth,
1122                         const uint8_t *input,
1123                         size_t inputRowPitch,
1124                         size_t inputDepthPitch,
1125                         uint8_t *output,
1126                         size_t outputRowPitch,
1127                         size_t outputDepthPitch)
1128 {
1129     for (size_t z = 0; z < depth; z++)
1130     {
1131         for (size_t y = 0; y < height; y++)
1132         {
1133             const uint16_t *source =
1134                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1135             uint16_t *dest =
1136                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1137             for (size_t x = 0; x < width; x++)
1138             {
1139                 dest[x] = ANGLE_ROTR16(source[x], 1);
1140             }
1141         }
1142     }
1143 }
1144 
LoadRGB5A1ToBGR5A1(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1145 void LoadRGB5A1ToBGR5A1(const ImageLoadContext &context,
1146                         size_t width,
1147                         size_t height,
1148                         size_t depth,
1149                         const uint8_t *input,
1150                         size_t inputRowPitch,
1151                         size_t inputDepthPitch,
1152                         uint8_t *output,
1153                         size_t outputRowPitch,
1154                         size_t outputDepthPitch)
1155 {
1156     for (size_t z = 0; z < depth; z++)
1157     {
1158         for (size_t y = 0; y < height; y++)
1159         {
1160             const uint16_t *source =
1161                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1162             uint16_t *dest =
1163                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1164             for (size_t x = 0; x < width; x++)
1165             {
1166                 uint16_t rgba = source[x];
1167                 auto r5       = static_cast<uint16_t>((rgba & 0xF800) >> 11);
1168                 auto g5       = static_cast<uint16_t>((rgba & 0x07c0) >> 6);
1169                 auto b5       = static_cast<uint16_t>((rgba & 0x003e) >> 1);
1170                 auto a1       = static_cast<uint16_t>((rgba & 0x0001));
1171                 dest[x]       = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1;
1172             }
1173         }
1174     }
1175 }
1176 
LoadRGB5A1ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1177 void LoadRGB5A1ToBGRA8(const ImageLoadContext &context,
1178                        size_t width,
1179                        size_t height,
1180                        size_t depth,
1181                        const uint8_t *input,
1182                        size_t inputRowPitch,
1183                        size_t inputDepthPitch,
1184                        uint8_t *output,
1185                        size_t outputRowPitch,
1186                        size_t outputDepthPitch)
1187 {
1188     for (size_t z = 0; z < depth; z++)
1189     {
1190         for (size_t y = 0; y < height; y++)
1191         {
1192             const uint16_t *source =
1193                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1194             uint8_t *dest =
1195                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1196             for (size_t x = 0; x < width; x++)
1197             {
1198                 uint16_t rgba = source[x];
1199                 dest[4 * x + 0] =
1200                     static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
1201                 dest[4 * x + 1] =
1202                     static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
1203                 dest[4 * x + 2] =
1204                     static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
1205                 dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
1206             }
1207         }
1208     }
1209 }
1210 
LoadRGB5A1ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1211 void LoadRGB5A1ToRGBA8(const ImageLoadContext &context,
1212                        size_t width,
1213                        size_t height,
1214                        size_t depth,
1215                        const uint8_t *input,
1216                        size_t inputRowPitch,
1217                        size_t inputDepthPitch,
1218                        uint8_t *output,
1219                        size_t outputRowPitch,
1220                        size_t outputDepthPitch)
1221 {
1222     for (size_t z = 0; z < depth; z++)
1223     {
1224         for (size_t y = 0; y < height; y++)
1225         {
1226             const uint16_t *source =
1227                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1228             uint8_t *dest =
1229                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1230             for (size_t x = 0; x < width; x++)
1231             {
1232                 uint16_t rgba = source[x];
1233                 dest[4 * x + 0] =
1234                     static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
1235                 dest[4 * x + 1] =
1236                     static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
1237                 dest[4 * x + 2] =
1238                     static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
1239                 dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
1240             }
1241         }
1242     }
1243 }
1244 
LoadBGR5A1ToBGRA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1245 void LoadBGR5A1ToBGRA8(const ImageLoadContext &context,
1246                        size_t width,
1247                        size_t height,
1248                        size_t depth,
1249                        const uint8_t *input,
1250                        size_t inputRowPitch,
1251                        size_t inputDepthPitch,
1252                        uint8_t *output,
1253                        size_t outputRowPitch,
1254                        size_t outputDepthPitch)
1255 {
1256     for (size_t z = 0; z < depth; z++)
1257     {
1258         for (size_t y = 0; y < height; y++)
1259         {
1260             const uint16_t *source =
1261                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1262             uint8_t *dest =
1263                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1264             for (size_t x = 0; x < width; x++)
1265             {
1266                 uint16_t bgra = source[x];
1267                 dest[4 * x + 0] =
1268                     static_cast<uint8_t>(((bgra & 0xF800) >> 8) | ((bgra & 0xF800) >> 13));
1269                 dest[4 * x + 1] =
1270                     static_cast<uint8_t>(((bgra & 0x07C0) >> 3) | ((bgra & 0x07C0) >> 8));
1271                 dest[4 * x + 2] =
1272                     static_cast<uint8_t>(((bgra & 0x003E) << 2) | ((bgra & 0x003E) >> 3));
1273                 dest[4 * x + 3] = static_cast<uint8_t>((bgra & 0x0001) ? 0xFF : 0);
1274             }
1275         }
1276     }
1277 }
1278 
LoadRGB10A2ToRGBA8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1279 void LoadRGB10A2ToRGBA8(const ImageLoadContext &context,
1280                         size_t width,
1281                         size_t height,
1282                         size_t depth,
1283                         const uint8_t *input,
1284                         size_t inputRowPitch,
1285                         size_t inputDepthPitch,
1286                         uint8_t *output,
1287                         size_t outputRowPitch,
1288                         size_t outputDepthPitch)
1289 {
1290     for (size_t z = 0; z < depth; z++)
1291     {
1292         for (size_t y = 0; y < height; y++)
1293         {
1294             const uint32_t *source =
1295                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1296             uint8_t *dest =
1297                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1298             for (size_t x = 0; x < width; x++)
1299             {
1300                 uint32_t rgba   = source[x];
1301                 dest[4 * x + 0] = static_cast<uint8_t>((rgba & 0x000003FF) >> 2);
1302                 dest[4 * x + 1] = static_cast<uint8_t>((rgba & 0x000FFC00) >> 12);
1303                 dest[4 * x + 2] = static_cast<uint8_t>((rgba & 0x3FF00000) >> 22);
1304                 dest[4 * x + 3] = static_cast<uint8_t>(((rgba & 0xC0000000) >> 30) * 0x55);
1305             }
1306         }
1307     }
1308 }
1309 
LoadRGB10A2ToRGB8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1310 void LoadRGB10A2ToRGB8(const ImageLoadContext &context,
1311                        size_t width,
1312                        size_t height,
1313                        size_t depth,
1314                        const uint8_t *input,
1315                        size_t inputRowPitch,
1316                        size_t inputDepthPitch,
1317                        uint8_t *output,
1318                        size_t outputRowPitch,
1319                        size_t outputDepthPitch)
1320 {
1321     for (size_t z = 0; z < depth; z++)
1322     {
1323         for (size_t y = 0; y < height; y++)
1324         {
1325 
1326             const uint32_t *source =
1327                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1328             uint8_t *dest =
1329                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1330             for (size_t x = 0; x < width; x++)
1331             {
1332                 uint32_t rgba   = source[x];
1333                 dest[3 * x + 0] = static_cast<uint8_t>((rgba & 0x000003FF) >> 2);
1334                 dest[3 * x + 1] = static_cast<uint8_t>((rgba & 0x000FFC00) >> 12);
1335                 dest[3 * x + 2] = static_cast<uint8_t>((rgba & 0x3FF00000) >> 22);
1336             }
1337         }
1338     }
1339 }
1340 
LoadRGB10A2ToRGB10X2(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1341 void LoadRGB10A2ToRGB10X2(const ImageLoadContext &context,
1342                           size_t width,
1343                           size_t height,
1344                           size_t depth,
1345                           const uint8_t *input,
1346                           size_t inputRowPitch,
1347                           size_t inputDepthPitch,
1348                           uint8_t *output,
1349                           size_t outputRowPitch,
1350                           size_t outputDepthPitch)
1351 {
1352     for (size_t z = 0; z < depth; z++)
1353     {
1354         for (size_t y = 0; y < height; y++)
1355         {
1356             const uint32_t *source =
1357                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1358             uint32_t *dest =
1359                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1360             for (size_t x = 0; x < width; x++)
1361             {
1362                 dest[x] = source[x] | 0xC0000000;
1363             }
1364         }
1365     }
1366 }
1367 
LoadBGR10A2ToRGB10A2(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1368 void LoadBGR10A2ToRGB10A2(const ImageLoadContext &context,
1369                           size_t width,
1370                           size_t height,
1371                           size_t depth,
1372                           const uint8_t *input,
1373                           size_t inputRowPitch,
1374                           size_t inputDepthPitch,
1375                           uint8_t *output,
1376                           size_t outputRowPitch,
1377                           size_t outputDepthPitch)
1378 {
1379     for (size_t z = 0; z < depth; z++)
1380     {
1381         for (size_t y = 0; y < height; y++)
1382         {
1383             const uint32_t *source =
1384                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1385             uint32_t *dest =
1386                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1387             for (size_t x = 0; x < width; x++)
1388             {
1389                 const uint32_t src  = source[x];
1390                 const uint32_t srcB = src & 0x3FF;
1391                 const uint32_t srcG = src >> 10 & 0x3FF;
1392                 const uint32_t srcR = src >> 20 & 0x3FF;
1393                 const uint32_t srcA = src >> 30 & 0x3;
1394                 dest[x]             = srcR | srcG << 10 | srcB << 20 | srcA << 30;
1395             }
1396         }
1397     }
1398 }
1399 
LoadRGB16FToRGB9E5(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1400 void LoadRGB16FToRGB9E5(const ImageLoadContext &context,
1401                         size_t width,
1402                         size_t height,
1403                         size_t depth,
1404                         const uint8_t *input,
1405                         size_t inputRowPitch,
1406                         size_t inputDepthPitch,
1407                         uint8_t *output,
1408                         size_t outputRowPitch,
1409                         size_t outputDepthPitch)
1410 {
1411     for (size_t z = 0; z < depth; z++)
1412     {
1413         for (size_t y = 0; y < height; y++)
1414         {
1415             const uint16_t *source =
1416                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1417             uint32_t *dest =
1418                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1419             for (size_t x = 0; x < width; x++)
1420             {
1421                 dest[x] = gl::convertRGBFloatsTo999E5(gl::float16ToFloat32(source[x * 3 + 0]),
1422                                                       gl::float16ToFloat32(source[x * 3 + 1]),
1423                                                       gl::float16ToFloat32(source[x * 3 + 2]));
1424             }
1425         }
1426     }
1427 }
1428 
LoadRGB32FToRGB9E5(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1429 void LoadRGB32FToRGB9E5(const ImageLoadContext &context,
1430                         size_t width,
1431                         size_t height,
1432                         size_t depth,
1433                         const uint8_t *input,
1434                         size_t inputRowPitch,
1435                         size_t inputDepthPitch,
1436                         uint8_t *output,
1437                         size_t outputRowPitch,
1438                         size_t outputDepthPitch)
1439 {
1440     for (size_t z = 0; z < depth; z++)
1441     {
1442         for (size_t y = 0; y < height; y++)
1443         {
1444             const float *source =
1445                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1446             uint32_t *dest =
1447                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1448             for (size_t x = 0; x < width; x++)
1449             {
1450                 dest[x] = gl::convertRGBFloatsTo999E5(source[x * 3 + 0], source[x * 3 + 1],
1451                                                       source[x * 3 + 2]);
1452             }
1453         }
1454     }
1455 }
1456 
LoadRGB16FToRG11B10F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1457 void LoadRGB16FToRG11B10F(const ImageLoadContext &context,
1458                           size_t width,
1459                           size_t height,
1460                           size_t depth,
1461                           const uint8_t *input,
1462                           size_t inputRowPitch,
1463                           size_t inputDepthPitch,
1464                           uint8_t *output,
1465                           size_t outputRowPitch,
1466                           size_t outputDepthPitch)
1467 {
1468     for (size_t z = 0; z < depth; z++)
1469     {
1470         for (size_t y = 0; y < height; y++)
1471         {
1472             const uint16_t *source =
1473                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1474             uint32_t *dest =
1475                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1476             for (size_t x = 0; x < width; x++)
1477             {
1478                 dest[x] = (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 0])) << 0) |
1479                           (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 1])) << 11) |
1480                           (gl::float32ToFloat10(gl::float16ToFloat32(source[x * 3 + 2])) << 22);
1481             }
1482         }
1483     }
1484 }
1485 
LoadRGB32FToRG11B10F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1486 void LoadRGB32FToRG11B10F(const ImageLoadContext &context,
1487                           size_t width,
1488                           size_t height,
1489                           size_t depth,
1490                           const uint8_t *input,
1491                           size_t inputRowPitch,
1492                           size_t inputDepthPitch,
1493                           uint8_t *output,
1494                           size_t outputRowPitch,
1495                           size_t outputDepthPitch)
1496 {
1497     for (size_t z = 0; z < depth; z++)
1498     {
1499         for (size_t y = 0; y < height; y++)
1500         {
1501             const float *source =
1502                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1503             uint32_t *dest =
1504                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1505             for (size_t x = 0; x < width; x++)
1506             {
1507                 dest[x] = (gl::float32ToFloat11(source[x * 3 + 0]) << 0) |
1508                           (gl::float32ToFloat11(source[x * 3 + 1]) << 11) |
1509                           (gl::float32ToFloat10(source[x * 3 + 2]) << 22);
1510             }
1511         }
1512     }
1513 }
1514 
LoadD24S8ToS8D24(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1515 void LoadD24S8ToS8D24(const ImageLoadContext &context,
1516                       size_t width,
1517                       size_t height,
1518                       size_t depth,
1519                       const uint8_t *input,
1520                       size_t inputRowPitch,
1521                       size_t inputDepthPitch,
1522                       uint8_t *output,
1523                       size_t outputRowPitch,
1524                       size_t outputDepthPitch)
1525 {
1526     for (size_t z = 0; z < depth; z++)
1527     {
1528         for (size_t y = 0; y < height; y++)
1529         {
1530             const uint32_t *source =
1531                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1532             uint32_t *dest =
1533                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1534             for (size_t x = 0; x < width; x++)
1535             {
1536                 dest[x] = ANGLE_ROTL(source[x], 24);
1537             }
1538         }
1539     }
1540 }
1541 
LoadD24S8ToD32FS8X24(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1542 void LoadD24S8ToD32FS8X24(const ImageLoadContext &context,
1543                           size_t width,
1544                           size_t height,
1545                           size_t depth,
1546                           const uint8_t *input,
1547                           size_t inputRowPitch,
1548                           size_t inputDepthPitch,
1549                           uint8_t *output,
1550                           size_t outputRowPitch,
1551                           size_t outputDepthPitch)
1552 {
1553     for (size_t z = 0; z < depth; z++)
1554     {
1555         for (size_t y = 0; y < height; y++)
1556         {
1557             const uint32_t *source =
1558                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1559             float *destDepth =
1560                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1561             uint32_t *destStencil =
1562                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
1563                 1;
1564             for (size_t x = 0; x < width; x++)
1565             {
1566                 destDepth[x * 2]   = (source[x] >> 8) / static_cast<float>(0xFFFFFF);
1567                 destStencil[x * 2] = source[x] & 0xFF;
1568             }
1569         }
1570     }
1571 }
1572 
LoadD24S8ToD32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1573 void LoadD24S8ToD32F(const ImageLoadContext &context,
1574                      size_t width,
1575                      size_t height,
1576                      size_t depth,
1577                      const uint8_t *input,
1578                      size_t inputRowPitch,
1579                      size_t inputDepthPitch,
1580                      uint8_t *output,
1581                      size_t outputRowPitch,
1582                      size_t outputDepthPitch)
1583 {
1584     for (size_t z = 0; z < depth; z++)
1585     {
1586         for (size_t y = 0; y < height; y++)
1587         {
1588             const uint32_t *source =
1589                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1590             float *destDepth =
1591                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1592             for (size_t x = 0; x < width; x++)
1593             {
1594                 destDepth[x] = (source[x] >> 8) / static_cast<float>(0xFFFFFF);
1595             }
1596         }
1597     }
1598 }
1599 
LoadD32ToD32FX32(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1600 void LoadD32ToD32FX32(const ImageLoadContext &context,
1601                       size_t width,
1602                       size_t height,
1603                       size_t depth,
1604                       const uint8_t *input,
1605                       size_t inputRowPitch,
1606                       size_t inputDepthPitch,
1607                       uint8_t *output,
1608                       size_t outputRowPitch,
1609                       size_t outputDepthPitch)
1610 {
1611     for (size_t z = 0; z < depth; z++)
1612     {
1613         for (size_t y = 0; y < height; y++)
1614         {
1615             const uint32_t *source =
1616                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1617             float *destDepth =
1618                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1619             for (size_t x = 0; x < width; x++)
1620             {
1621                 destDepth[x * 2] = source[x] / static_cast<float>(0xFFFFFFFF);
1622             }
1623         }
1624     }
1625 }
1626 
LoadD32ToD32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1627 void LoadD32ToD32F(const ImageLoadContext &context,
1628                    size_t width,
1629                    size_t height,
1630                    size_t depth,
1631                    const uint8_t *input,
1632                    size_t inputRowPitch,
1633                    size_t inputDepthPitch,
1634                    uint8_t *output,
1635                    size_t outputRowPitch,
1636                    size_t outputDepthPitch)
1637 {
1638     for (size_t z = 0; z < depth; z++)
1639     {
1640         for (size_t y = 0; y < height; y++)
1641         {
1642             const uint32_t *source =
1643                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1644             float *destDepth =
1645                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1646             for (size_t x = 0; x < width; x++)
1647             {
1648                 uint32_t sourcePixel = source[x];
1649                 destDepth[x]         = sourcePixel / static_cast<float>(0xFFFFFFFF);
1650             }
1651         }
1652     }
1653 }
1654 
LoadD32FToD32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1655 void LoadD32FToD32F(const ImageLoadContext &context,
1656                     size_t width,
1657                     size_t height,
1658                     size_t depth,
1659                     const uint8_t *input,
1660                     size_t inputRowPitch,
1661                     size_t inputDepthPitch,
1662                     uint8_t *output,
1663                     size_t outputRowPitch,
1664                     size_t outputDepthPitch)
1665 {
1666     for (size_t z = 0; z < depth; z++)
1667     {
1668         for (size_t y = 0; y < height; y++)
1669         {
1670             const float *source =
1671                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1672             float *dest =
1673                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1674             for (size_t x = 0; x < width; x++)
1675             {
1676                 dest[x] = gl::clamp01(source[x]);
1677             }
1678         }
1679     }
1680 }
1681 
LoadD32FS8X24ToS8D24(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1682 void LoadD32FS8X24ToS8D24(const ImageLoadContext &context,
1683                           size_t width,
1684                           size_t height,
1685                           size_t depth,
1686                           const uint8_t *input,
1687                           size_t inputRowPitch,
1688                           size_t inputDepthPitch,
1689                           uint8_t *output,
1690                           size_t outputRowPitch,
1691                           size_t outputDepthPitch)
1692 {
1693     for (size_t z = 0; z < depth; z++)
1694     {
1695         for (size_t y = 0; y < height; y++)
1696         {
1697             const float *sourceDepth =
1698                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1699             const uint32_t *sourceStencil =
1700                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
1701             uint32_t *dest =
1702                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1703             for (size_t x = 0; x < width; x++)
1704             {
1705                 uint32_t d = static_cast<uint32_t>(gl::clamp01(sourceDepth[x * 2]) * 0xFFFFFF);
1706                 uint32_t s = sourceStencil[x * 2] << 24;
1707                 dest[x]    = d | s;
1708             }
1709         }
1710     }
1711 }
1712 
LoadX24S8ToS8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1713 void LoadX24S8ToS8(const ImageLoadContext &context,
1714                    size_t width,
1715                    size_t height,
1716                    size_t depth,
1717                    const uint8_t *input,
1718                    size_t inputRowPitch,
1719                    size_t inputDepthPitch,
1720                    uint8_t *output,
1721                    size_t outputRowPitch,
1722                    size_t outputDepthPitch)
1723 {
1724     for (size_t z = 0; z < depth; z++)
1725     {
1726         for (size_t y = 0; y < height; y++)
1727         {
1728             const uint32_t *source = reinterpret_cast<const uint32_t *>(
1729                 input + (y * inputRowPitch) + (z * inputDepthPitch));
1730             uint8_t *destStencil =
1731                 reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
1732             for (size_t x = 0; x < width; x++)
1733             {
1734                 destStencil[x] = (source[x] & 0xFF);
1735             }
1736         }
1737     }
1738 }
1739 
LoadX32S8ToS8(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1740 void LoadX32S8ToS8(const ImageLoadContext &context,
1741                    size_t width,
1742                    size_t height,
1743                    size_t depth,
1744                    const uint8_t *input,
1745                    size_t inputRowPitch,
1746                    size_t inputDepthPitch,
1747                    uint8_t *output,
1748                    size_t outputRowPitch,
1749                    size_t outputDepthPitch)
1750 {
1751     for (size_t z = 0; z < depth; z++)
1752     {
1753         for (size_t y = 0; y < height; y++)
1754         {
1755             const uint32_t *source = reinterpret_cast<const uint32_t *>(
1756                 input + (y * inputRowPitch) + (z * inputDepthPitch));
1757             uint8_t *destStencil =
1758                 reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
1759             for (size_t x = 0; x < width; x++)
1760             {
1761                 destStencil[x] = (source[(x * 2) + 1] & 0xFF);
1762             }
1763         }
1764     }
1765 }
1766 
LoadD32FS8X24ToD32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1767 void LoadD32FS8X24ToD32F(const ImageLoadContext &context,
1768                          size_t width,
1769                          size_t height,
1770                          size_t depth,
1771                          const uint8_t *input,
1772                          size_t inputRowPitch,
1773                          size_t inputDepthPitch,
1774                          uint8_t *output,
1775                          size_t outputRowPitch,
1776                          size_t outputDepthPitch)
1777 {
1778     for (size_t z = 0; z < depth; z++)
1779     {
1780         for (size_t y = 0; y < height; y++)
1781         {
1782             const float *sourceDepth =
1783                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1784             float *destDepth =
1785                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1786             for (size_t x = 0; x < width; x++)
1787             {
1788                 destDepth[x] = gl::clamp01(sourceDepth[x * 2]);
1789             }
1790         }
1791     }
1792 }
1793 
LoadD32FS8X24ToD32FS8X24(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1794 void LoadD32FS8X24ToD32FS8X24(const ImageLoadContext &context,
1795                               size_t width,
1796                               size_t height,
1797                               size_t depth,
1798                               const uint8_t *input,
1799                               size_t inputRowPitch,
1800                               size_t inputDepthPitch,
1801                               uint8_t *output,
1802                               size_t outputRowPitch,
1803                               size_t outputDepthPitch)
1804 {
1805     for (size_t z = 0; z < depth; z++)
1806     {
1807         for (size_t y = 0; y < height; y++)
1808         {
1809             const float *sourceDepth =
1810                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1811             const uint32_t *sourceStencil =
1812                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
1813             float *destDepth =
1814                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1815             uint32_t *destStencil =
1816                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
1817                 1;
1818             for (size_t x = 0; x < width; x++)
1819             {
1820                 destDepth[x * 2]   = gl::clamp01(sourceDepth[x * 2]);
1821                 destStencil[x * 2] = sourceStencil[x * 2] & 0xFF;
1822             }
1823         }
1824     }
1825 }
1826 
LoadRGB32FToRGBA16F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1827 void LoadRGB32FToRGBA16F(const ImageLoadContext &context,
1828                          size_t width,
1829                          size_t height,
1830                          size_t depth,
1831                          const uint8_t *input,
1832                          size_t inputRowPitch,
1833                          size_t inputDepthPitch,
1834                          uint8_t *output,
1835                          size_t outputRowPitch,
1836                          size_t outputDepthPitch)
1837 {
1838     for (size_t z = 0; z < depth; z++)
1839     {
1840         for (size_t y = 0; y < height; y++)
1841         {
1842             const float *source =
1843                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1844             uint16_t *dest =
1845                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1846             for (size_t x = 0; x < width; x++)
1847             {
1848                 dest[x * 4 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
1849                 dest[x * 4 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
1850                 dest[x * 4 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
1851                 dest[x * 4 + 3] = gl::Float16One;
1852             }
1853         }
1854     }
1855 }
1856 
LoadRGB32FToRGB16F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1857 void LoadRGB32FToRGB16F(const ImageLoadContext &context,
1858                         size_t width,
1859                         size_t height,
1860                         size_t depth,
1861                         const uint8_t *input,
1862                         size_t inputRowPitch,
1863                         size_t inputDepthPitch,
1864                         uint8_t *output,
1865                         size_t outputRowPitch,
1866                         size_t outputDepthPitch)
1867 {
1868     for (size_t z = 0; z < depth; z++)
1869     {
1870         for (size_t y = 0; y < height; y++)
1871         {
1872             const float *source =
1873                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1874             uint16_t *dest =
1875                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1876             for (size_t x = 0; x < width; x++)
1877             {
1878                 dest[x * 3 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
1879                 dest[x * 3 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
1880                 dest[x * 3 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
1881             }
1882         }
1883     }
1884 }
1885 
LoadR32ToR16(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1886 void LoadR32ToR16(const ImageLoadContext &context,
1887                   size_t width,
1888                   size_t height,
1889                   size_t depth,
1890                   const uint8_t *input,
1891                   size_t inputRowPitch,
1892                   size_t inputDepthPitch,
1893                   uint8_t *output,
1894                   size_t outputRowPitch,
1895                   size_t outputDepthPitch)
1896 {
1897     for (size_t z = 0; z < depth; z++)
1898     {
1899         for (size_t y = 0; y < height; y++)
1900         {
1901             const uint32_t *source =
1902                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1903             uint16_t *dest =
1904                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1905             for (size_t x = 0; x < width; x++)
1906             {
1907                 dest[x] = source[x] >> 16;
1908             }
1909         }
1910     }
1911 }
1912 
LoadD32ToX8D24(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1913 void LoadD32ToX8D24(const ImageLoadContext &context,
1914                     size_t width,
1915                     size_t height,
1916                     size_t depth,
1917                     const uint8_t *input,
1918                     size_t inputRowPitch,
1919                     size_t inputDepthPitch,
1920                     uint8_t *output,
1921                     size_t outputRowPitch,
1922                     size_t outputDepthPitch)
1923 {
1924     for (size_t z = 0; z < depth; z++)
1925     {
1926         for (size_t y = 0; y < height; y++)
1927         {
1928             const uint32_t *source =
1929                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1930             uint32_t *dest =
1931                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1932 
1933             for (size_t x = 0; x < width; x++)
1934             {
1935                 dest[x] = source[x] >> 8;
1936             }
1937         }
1938     }
1939 }
1940 
1941 // This conversion was added to support using a 32F depth buffer
1942 // as emulation for 16unorm depth buffer in Metal.
1943 // See https://anglebug.com/42265093
LoadD16ToD32F(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1944 void LoadD16ToD32F(const ImageLoadContext &context,
1945                    size_t width,
1946                    size_t height,
1947                    size_t depth,
1948                    const uint8_t *input,
1949                    size_t inputRowPitch,
1950                    size_t inputDepthPitch,
1951                    uint8_t *output,
1952                    size_t outputRowPitch,
1953                    size_t outputDepthPitch)
1954 {
1955     for (size_t z = 0; z < depth; z++)
1956     {
1957         for (size_t y = 0; y < height; y++)
1958         {
1959             const uint16_t *source =
1960                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1961             float *dest =
1962                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1963             for (size_t x = 0; x < width; x++)
1964             {
1965                 dest[x] = static_cast<float>(source[x]) / 0xFFFF;
1966             }
1967         }
1968     }
1969 }
1970 
LoadS8ToS8X24(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1971 void LoadS8ToS8X24(const ImageLoadContext &context,
1972                    size_t width,
1973                    size_t height,
1974                    size_t depth,
1975                    const uint8_t *input,
1976                    size_t inputRowPitch,
1977                    size_t inputDepthPitch,
1978                    uint8_t *output,
1979                    size_t outputRowPitch,
1980                    size_t outputDepthPitch)
1981 {
1982     for (size_t z = 0; z < depth; z++)
1983     {
1984         for (size_t y = 0; y < height; y++)
1985         {
1986             const uint8_t *source =
1987                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
1988             uint32_t *destStencil =
1989                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1990 
1991             for (size_t x = 0; x < width; x++)
1992             {
1993                 destStencil[x] = source[x] << 24;
1994             }
1995         }
1996     }
1997 }
1998 
LoadYuvToNative(const ImageLoadContext & context,size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1999 void LoadYuvToNative(const ImageLoadContext &context,
2000                      size_t width,
2001                      size_t height,
2002                      size_t depth,
2003                      const uint8_t *input,
2004                      size_t inputRowPitch,
2005                      size_t inputDepthPitch,
2006                      uint8_t *output,
2007                      size_t outputRowPitch,
2008                      size_t outputDepthPitch)
2009 {
2010     // For YUV formats it is assumed that source has tightly packed data.
2011     memcpy(output, input, inputDepthPitch);
2012 }
2013 
2014 }  // namespace angle
2015