xref: /aosp_15_r20/external/OpenCL-CTS/test_common/harness/imageHelpers.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017,2021 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi //    http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include "imageHelpers.h"
17*6467f958SSadaf Ebrahimi #include <limits.h>
18*6467f958SSadaf Ebrahimi #include <assert.h>
19*6467f958SSadaf Ebrahimi #if defined(__APPLE__)
20*6467f958SSadaf Ebrahimi #include <sys/mman.h>
21*6467f958SSadaf Ebrahimi #endif
22*6467f958SSadaf Ebrahimi #if !defined(_WIN32) && !defined(__APPLE__)
23*6467f958SSadaf Ebrahimi #include <malloc.h>
24*6467f958SSadaf Ebrahimi #endif
25*6467f958SSadaf Ebrahimi #include <algorithm>
26*6467f958SSadaf Ebrahimi #include <cinttypes>
27*6467f958SSadaf Ebrahimi #include <iterator>
28*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
29*6467f958SSadaf Ebrahimi #include <cmath>
30*6467f958SSadaf Ebrahimi #endif
31*6467f958SSadaf Ebrahimi 
32*6467f958SSadaf Ebrahimi RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
33*6467f958SSadaf Ebrahimi 
34*6467f958SSadaf Ebrahimi cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
35*6467f958SSadaf Ebrahimi bool gTestRounding = false;
sRGBmap(float fc)36*6467f958SSadaf Ebrahimi double sRGBmap(float fc)
37*6467f958SSadaf Ebrahimi {
38*6467f958SSadaf Ebrahimi     double c = (double)fc;
39*6467f958SSadaf Ebrahimi 
40*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
41*6467f958SSadaf Ebrahimi     if (std::isnan(c)) c = 0.0;
42*6467f958SSadaf Ebrahimi #else
43*6467f958SSadaf Ebrahimi     if (_isnan(c)) c = 0.0;
44*6467f958SSadaf Ebrahimi #endif
45*6467f958SSadaf Ebrahimi 
46*6467f958SSadaf Ebrahimi     if (c > 1.0)
47*6467f958SSadaf Ebrahimi         c = 1.0;
48*6467f958SSadaf Ebrahimi     else if (c < 0.0)
49*6467f958SSadaf Ebrahimi         c = 0.0;
50*6467f958SSadaf Ebrahimi     else if (c < 0.0031308)
51*6467f958SSadaf Ebrahimi         c = 12.92 * c;
52*6467f958SSadaf Ebrahimi     else
53*6467f958SSadaf Ebrahimi         c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0);
54*6467f958SSadaf Ebrahimi 
55*6467f958SSadaf Ebrahimi     return c * 255.0;
56*6467f958SSadaf Ebrahimi }
57*6467f958SSadaf Ebrahimi 
sRGBunmap(float fc)58*6467f958SSadaf Ebrahimi double sRGBunmap(float fc)
59*6467f958SSadaf Ebrahimi {
60*6467f958SSadaf Ebrahimi     double c = (double)fc;
61*6467f958SSadaf Ebrahimi     double result;
62*6467f958SSadaf Ebrahimi 
63*6467f958SSadaf Ebrahimi     if (c <= 0.04045)
64*6467f958SSadaf Ebrahimi         result = c / 12.92;
65*6467f958SSadaf Ebrahimi     else
66*6467f958SSadaf Ebrahimi         result = pow((c + 0.055) / 1.055, 2.4);
67*6467f958SSadaf Ebrahimi 
68*6467f958SSadaf Ebrahimi     return result;
69*6467f958SSadaf Ebrahimi }
70*6467f958SSadaf Ebrahimi 
71*6467f958SSadaf Ebrahimi 
get_format_type_size(const cl_image_format * format)72*6467f958SSadaf Ebrahimi uint32_t get_format_type_size(const cl_image_format *format)
73*6467f958SSadaf Ebrahimi {
74*6467f958SSadaf Ebrahimi     return get_channel_data_type_size(format->image_channel_data_type);
75*6467f958SSadaf Ebrahimi }
76*6467f958SSadaf Ebrahimi 
get_channel_data_type_size(cl_channel_type channelType)77*6467f958SSadaf Ebrahimi uint32_t get_channel_data_type_size(cl_channel_type channelType)
78*6467f958SSadaf Ebrahimi {
79*6467f958SSadaf Ebrahimi     switch (channelType)
80*6467f958SSadaf Ebrahimi     {
81*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
82*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8:
83*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8:
84*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: return 1;
85*6467f958SSadaf Ebrahimi 
86*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
87*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16:
88*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16:
89*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16:
90*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT:
91*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
92*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE:
93*6467f958SSadaf Ebrahimi #endif
94*6467f958SSadaf Ebrahimi             return sizeof(cl_short);
95*6467f958SSadaf Ebrahimi 
96*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32:
97*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: return sizeof(cl_int);
98*6467f958SSadaf Ebrahimi 
99*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565:
100*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555:
101*6467f958SSadaf Ebrahimi #ifdef OBSOLETE_FORAMT
102*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565_REV:
103*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555_REV:
104*6467f958SSadaf Ebrahimi #endif
105*6467f958SSadaf Ebrahimi             return 2;
106*6467f958SSadaf Ebrahimi 
107*6467f958SSadaf Ebrahimi #ifdef OBSOLETE_FORAMT
108*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_8888:
109*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_8888_REV: return 4;
110*6467f958SSadaf Ebrahimi #endif
111*6467f958SSadaf Ebrahimi 
112*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010:
113*6467f958SSadaf Ebrahimi #ifdef OBSOLETE_FORAMT
114*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010_REV:
115*6467f958SSadaf Ebrahimi #endif
116*6467f958SSadaf Ebrahimi             return 4;
117*6467f958SSadaf Ebrahimi 
118*6467f958SSadaf Ebrahimi         case CL_FLOAT: return sizeof(cl_float);
119*6467f958SSadaf Ebrahimi 
120*6467f958SSadaf Ebrahimi         default: return 0;
121*6467f958SSadaf Ebrahimi     }
122*6467f958SSadaf Ebrahimi }
123*6467f958SSadaf Ebrahimi 
get_format_channel_count(const cl_image_format * format)124*6467f958SSadaf Ebrahimi uint32_t get_format_channel_count(const cl_image_format *format)
125*6467f958SSadaf Ebrahimi {
126*6467f958SSadaf Ebrahimi     return get_channel_order_channel_count(format->image_channel_order);
127*6467f958SSadaf Ebrahimi }
128*6467f958SSadaf Ebrahimi 
get_channel_order_channel_count(cl_channel_order order)129*6467f958SSadaf Ebrahimi uint32_t get_channel_order_channel_count(cl_channel_order order)
130*6467f958SSadaf Ebrahimi {
131*6467f958SSadaf Ebrahimi     switch (order)
132*6467f958SSadaf Ebrahimi     {
133*6467f958SSadaf Ebrahimi         case CL_R:
134*6467f958SSadaf Ebrahimi         case CL_A:
135*6467f958SSadaf Ebrahimi         case CL_Rx:
136*6467f958SSadaf Ebrahimi         case CL_INTENSITY:
137*6467f958SSadaf Ebrahimi         case CL_LUMINANCE:
138*6467f958SSadaf Ebrahimi         case CL_DEPTH:
139*6467f958SSadaf Ebrahimi         case CL_DEPTH_STENCIL: return 1;
140*6467f958SSadaf Ebrahimi 
141*6467f958SSadaf Ebrahimi         case CL_RG:
142*6467f958SSadaf Ebrahimi         case CL_RA:
143*6467f958SSadaf Ebrahimi         case CL_RGx: return 2;
144*6467f958SSadaf Ebrahimi 
145*6467f958SSadaf Ebrahimi         case CL_RGB:
146*6467f958SSadaf Ebrahimi         case CL_RGBx:
147*6467f958SSadaf Ebrahimi         case CL_sRGB:
148*6467f958SSadaf Ebrahimi         case CL_sRGBx: return 3;
149*6467f958SSadaf Ebrahimi 
150*6467f958SSadaf Ebrahimi         case CL_RGBA:
151*6467f958SSadaf Ebrahimi         case CL_ARGB:
152*6467f958SSadaf Ebrahimi         case CL_BGRA:
153*6467f958SSadaf Ebrahimi         case CL_sRGBA:
154*6467f958SSadaf Ebrahimi         case CL_sBGRA:
155*6467f958SSadaf Ebrahimi         case CL_ABGR:
156*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
157*6467f958SSadaf Ebrahimi         case CL_1RGB_APPLE:
158*6467f958SSadaf Ebrahimi #endif
159*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
160*6467f958SSadaf Ebrahimi         case CL_BGR1_APPLE:
161*6467f958SSadaf Ebrahimi #endif
162*6467f958SSadaf Ebrahimi #ifdef CL_ABGR_APPLE
163*6467f958SSadaf Ebrahimi         case CL_ABGR_APPLE:
164*6467f958SSadaf Ebrahimi #endif
165*6467f958SSadaf Ebrahimi             return 4;
166*6467f958SSadaf Ebrahimi 
167*6467f958SSadaf Ebrahimi         default:
168*6467f958SSadaf Ebrahimi             log_error("%s does not support 0x%x\n", __FUNCTION__, order);
169*6467f958SSadaf Ebrahimi             return 0;
170*6467f958SSadaf Ebrahimi     }
171*6467f958SSadaf Ebrahimi }
172*6467f958SSadaf Ebrahimi 
get_channel_type_from_name(const char * name)173*6467f958SSadaf Ebrahimi cl_channel_type get_channel_type_from_name(const char *name)
174*6467f958SSadaf Ebrahimi {
175*6467f958SSadaf Ebrahimi     struct
176*6467f958SSadaf Ebrahimi     {
177*6467f958SSadaf Ebrahimi         cl_channel_type type;
178*6467f958SSadaf Ebrahimi         const char *name;
179*6467f958SSadaf Ebrahimi     } typeNames[] = { { CL_SNORM_INT8, "CL_SNORM_INT8" },
180*6467f958SSadaf Ebrahimi                       { CL_SNORM_INT16, "CL_SNORM_INT16" },
181*6467f958SSadaf Ebrahimi                       { CL_UNORM_INT8, "CL_UNORM_INT8" },
182*6467f958SSadaf Ebrahimi                       { CL_UNORM_INT16, "CL_UNORM_INT16" },
183*6467f958SSadaf Ebrahimi                       { CL_UNORM_INT24, "CL_UNORM_INT24" },
184*6467f958SSadaf Ebrahimi                       { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
185*6467f958SSadaf Ebrahimi                       { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
186*6467f958SSadaf Ebrahimi                       { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
187*6467f958SSadaf Ebrahimi                       { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
188*6467f958SSadaf Ebrahimi                       { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
189*6467f958SSadaf Ebrahimi                       { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
190*6467f958SSadaf Ebrahimi                       { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
191*6467f958SSadaf Ebrahimi                       { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
192*6467f958SSadaf Ebrahimi                       { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
193*6467f958SSadaf Ebrahimi                       { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
194*6467f958SSadaf Ebrahimi                       { CL_FLOAT, "CL_FLOAT" },
195*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
196*6467f958SSadaf Ebrahimi                       { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
197*6467f958SSadaf Ebrahimi #endif
198*6467f958SSadaf Ebrahimi     };
199*6467f958SSadaf Ebrahimi     for (size_t i = 0; i < sizeof(typeNames) / sizeof(typeNames[0]); i++)
200*6467f958SSadaf Ebrahimi     {
201*6467f958SSadaf Ebrahimi         if (strcmp(typeNames[i].name, name) == 0
202*6467f958SSadaf Ebrahimi             || strcmp(typeNames[i].name + 3, name) == 0)
203*6467f958SSadaf Ebrahimi             return typeNames[i].type;
204*6467f958SSadaf Ebrahimi     }
205*6467f958SSadaf Ebrahimi     return (cl_channel_type)-1;
206*6467f958SSadaf Ebrahimi }
207*6467f958SSadaf Ebrahimi 
get_channel_order_from_name(const char * name)208*6467f958SSadaf Ebrahimi cl_channel_order get_channel_order_from_name(const char *name)
209*6467f958SSadaf Ebrahimi {
210*6467f958SSadaf Ebrahimi     const struct
211*6467f958SSadaf Ebrahimi     {
212*6467f958SSadaf Ebrahimi         cl_channel_order order;
213*6467f958SSadaf Ebrahimi         const char *name;
214*6467f958SSadaf Ebrahimi     } orderNames[] = {
215*6467f958SSadaf Ebrahimi         { CL_R, "CL_R" },
216*6467f958SSadaf Ebrahimi         { CL_A, "CL_A" },
217*6467f958SSadaf Ebrahimi         { CL_Rx, "CL_Rx" },
218*6467f958SSadaf Ebrahimi         { CL_RG, "CL_RG" },
219*6467f958SSadaf Ebrahimi         { CL_RA, "CL_RA" },
220*6467f958SSadaf Ebrahimi         { CL_RGx, "CL_RGx" },
221*6467f958SSadaf Ebrahimi         { CL_RGB, "CL_RGB" },
222*6467f958SSadaf Ebrahimi         { CL_RGBx, "CL_RGBx" },
223*6467f958SSadaf Ebrahimi         { CL_RGBA, "CL_RGBA" },
224*6467f958SSadaf Ebrahimi         { CL_BGRA, "CL_BGRA" },
225*6467f958SSadaf Ebrahimi         { CL_ARGB, "CL_ARGB" },
226*6467f958SSadaf Ebrahimi         { CL_INTENSITY, "CL_INTENSITY" },
227*6467f958SSadaf Ebrahimi         { CL_LUMINANCE, "CL_LUMINANCE" },
228*6467f958SSadaf Ebrahimi         { CL_DEPTH, "CL_DEPTH" },
229*6467f958SSadaf Ebrahimi         { CL_DEPTH_STENCIL, "CL_DEPTH_STENCIL" },
230*6467f958SSadaf Ebrahimi         { CL_sRGB, "CL_sRGB" },
231*6467f958SSadaf Ebrahimi         { CL_sRGBx, "CL_sRGBx" },
232*6467f958SSadaf Ebrahimi         { CL_sRGBA, "CL_sRGBA" },
233*6467f958SSadaf Ebrahimi         { CL_sBGRA, "CL_sBGRA" },
234*6467f958SSadaf Ebrahimi         { CL_ABGR, "CL_ABGR" },
235*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
236*6467f958SSadaf Ebrahimi         { CL_1RGB_APPLE, "CL_1RGB_APPLE" },
237*6467f958SSadaf Ebrahimi #endif
238*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
239*6467f958SSadaf Ebrahimi         { CL_BGR1_APPLE, "CL_BGR1_APPLE" },
240*6467f958SSadaf Ebrahimi #endif
241*6467f958SSadaf Ebrahimi     };
242*6467f958SSadaf Ebrahimi 
243*6467f958SSadaf Ebrahimi     for (size_t i = 0; i < sizeof(orderNames) / sizeof(orderNames[0]); i++)
244*6467f958SSadaf Ebrahimi     {
245*6467f958SSadaf Ebrahimi         if (strcmp(orderNames[i].name, name) == 0
246*6467f958SSadaf Ebrahimi             || strcmp(orderNames[i].name + 3, name) == 0)
247*6467f958SSadaf Ebrahimi             return orderNames[i].order;
248*6467f958SSadaf Ebrahimi     }
249*6467f958SSadaf Ebrahimi     return (cl_channel_order)-1;
250*6467f958SSadaf Ebrahimi }
251*6467f958SSadaf Ebrahimi 
252*6467f958SSadaf Ebrahimi 
is_format_signed(const cl_image_format * format)253*6467f958SSadaf Ebrahimi int is_format_signed(const cl_image_format *format)
254*6467f958SSadaf Ebrahimi {
255*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
256*6467f958SSadaf Ebrahimi     {
257*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
258*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8:
259*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
260*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16:
261*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32:
262*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT:
263*6467f958SSadaf Ebrahimi         case CL_FLOAT:
264*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
265*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE:
266*6467f958SSadaf Ebrahimi #endif
267*6467f958SSadaf Ebrahimi             return 1;
268*6467f958SSadaf Ebrahimi 
269*6467f958SSadaf Ebrahimi         default: return 0;
270*6467f958SSadaf Ebrahimi     }
271*6467f958SSadaf Ebrahimi }
272*6467f958SSadaf Ebrahimi 
get_pixel_size(const cl_image_format * format)273*6467f958SSadaf Ebrahimi uint32_t get_pixel_size(const cl_image_format *format)
274*6467f958SSadaf Ebrahimi {
275*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
276*6467f958SSadaf Ebrahimi     {
277*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
278*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8:
279*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8:
280*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: return get_format_channel_count(format);
281*6467f958SSadaf Ebrahimi 
282*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
283*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16:
284*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16:
285*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16:
286*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT:
287*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
288*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE:
289*6467f958SSadaf Ebrahimi #endif
290*6467f958SSadaf Ebrahimi             return get_format_channel_count(format) * sizeof(cl_ushort);
291*6467f958SSadaf Ebrahimi 
292*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32:
293*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32:
294*6467f958SSadaf Ebrahimi             return get_format_channel_count(format) * sizeof(cl_int);
295*6467f958SSadaf Ebrahimi 
296*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565:
297*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555:
298*6467f958SSadaf Ebrahimi #ifdef OBSOLETE_FORAMT
299*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565_REV:
300*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555_REV:
301*6467f958SSadaf Ebrahimi #endif
302*6467f958SSadaf Ebrahimi             return 2;
303*6467f958SSadaf Ebrahimi 
304*6467f958SSadaf Ebrahimi #ifdef OBSOLETE_FORAMT
305*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_8888:
306*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_8888_REV: return 4;
307*6467f958SSadaf Ebrahimi #endif
308*6467f958SSadaf Ebrahimi 
309*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010:
310*6467f958SSadaf Ebrahimi #ifdef OBSOLETE_FORAMT
311*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010_REV:
312*6467f958SSadaf Ebrahimi #endif
313*6467f958SSadaf Ebrahimi             return 4;
314*6467f958SSadaf Ebrahimi 
315*6467f958SSadaf Ebrahimi         case CL_FLOAT:
316*6467f958SSadaf Ebrahimi             return get_format_channel_count(format) * sizeof(cl_float);
317*6467f958SSadaf Ebrahimi 
318*6467f958SSadaf Ebrahimi         default: return 0;
319*6467f958SSadaf Ebrahimi     }
320*6467f958SSadaf Ebrahimi }
321*6467f958SSadaf Ebrahimi 
next_power_of_two(uint32_t v)322*6467f958SSadaf Ebrahimi uint32_t next_power_of_two(uint32_t v)
323*6467f958SSadaf Ebrahimi {
324*6467f958SSadaf Ebrahimi     v--;
325*6467f958SSadaf Ebrahimi     v |= v >> 1;
326*6467f958SSadaf Ebrahimi     v |= v >> 2;
327*6467f958SSadaf Ebrahimi     v |= v >> 4;
328*6467f958SSadaf Ebrahimi     v |= v >> 8;
329*6467f958SSadaf Ebrahimi     v |= v >> 16;
330*6467f958SSadaf Ebrahimi     v++;
331*6467f958SSadaf Ebrahimi     return v;
332*6467f958SSadaf Ebrahimi }
333*6467f958SSadaf Ebrahimi 
get_pixel_alignment(const cl_image_format * format)334*6467f958SSadaf Ebrahimi uint32_t get_pixel_alignment(const cl_image_format *format)
335*6467f958SSadaf Ebrahimi {
336*6467f958SSadaf Ebrahimi     return next_power_of_two(get_pixel_size(format));
337*6467f958SSadaf Ebrahimi }
338*6467f958SSadaf Ebrahimi 
get_8_bit_image_format(cl_context context,cl_mem_object_type objType,cl_mem_flags flags,size_t channelCount,cl_image_format * outFormat)339*6467f958SSadaf Ebrahimi int get_8_bit_image_format(cl_context context, cl_mem_object_type objType,
340*6467f958SSadaf Ebrahimi                            cl_mem_flags flags, size_t channelCount,
341*6467f958SSadaf Ebrahimi                            cl_image_format *outFormat)
342*6467f958SSadaf Ebrahimi {
343*6467f958SSadaf Ebrahimi     cl_image_format formatList[128];
344*6467f958SSadaf Ebrahimi     unsigned int outFormatCount, i;
345*6467f958SSadaf Ebrahimi     int error;
346*6467f958SSadaf Ebrahimi 
347*6467f958SSadaf Ebrahimi 
348*6467f958SSadaf Ebrahimi     /* Make sure each image format is supported */
349*6467f958SSadaf Ebrahimi     if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
350*6467f958SSadaf Ebrahimi                                             formatList, &outFormatCount)))
351*6467f958SSadaf Ebrahimi         return error;
352*6467f958SSadaf Ebrahimi 
353*6467f958SSadaf Ebrahimi 
354*6467f958SSadaf Ebrahimi     /* Look for one that is an 8-bit format */
355*6467f958SSadaf Ebrahimi     for (i = 0; i < outFormatCount; i++)
356*6467f958SSadaf Ebrahimi     {
357*6467f958SSadaf Ebrahimi         if (formatList[i].image_channel_data_type == CL_SNORM_INT8
358*6467f958SSadaf Ebrahimi             || formatList[i].image_channel_data_type == CL_UNORM_INT8
359*6467f958SSadaf Ebrahimi             || formatList[i].image_channel_data_type == CL_SIGNED_INT8
360*6467f958SSadaf Ebrahimi             || formatList[i].image_channel_data_type == CL_UNSIGNED_INT8)
361*6467f958SSadaf Ebrahimi         {
362*6467f958SSadaf Ebrahimi             if (!channelCount
363*6467f958SSadaf Ebrahimi                 || (channelCount
364*6467f958SSadaf Ebrahimi                     && (get_format_channel_count(&formatList[i])
365*6467f958SSadaf Ebrahimi                         == channelCount)))
366*6467f958SSadaf Ebrahimi             {
367*6467f958SSadaf Ebrahimi                 *outFormat = formatList[i];
368*6467f958SSadaf Ebrahimi                 return 0;
369*6467f958SSadaf Ebrahimi             }
370*6467f958SSadaf Ebrahimi         }
371*6467f958SSadaf Ebrahimi     }
372*6467f958SSadaf Ebrahimi 
373*6467f958SSadaf Ebrahimi     return -1;
374*6467f958SSadaf Ebrahimi }
375*6467f958SSadaf Ebrahimi 
get_32_bit_image_format(cl_context context,cl_mem_object_type objType,cl_mem_flags flags,size_t channelCount,cl_image_format * outFormat)376*6467f958SSadaf Ebrahimi int get_32_bit_image_format(cl_context context, cl_mem_object_type objType,
377*6467f958SSadaf Ebrahimi                             cl_mem_flags flags, size_t channelCount,
378*6467f958SSadaf Ebrahimi                             cl_image_format *outFormat)
379*6467f958SSadaf Ebrahimi {
380*6467f958SSadaf Ebrahimi     cl_image_format formatList[128];
381*6467f958SSadaf Ebrahimi     unsigned int outFormatCount, i;
382*6467f958SSadaf Ebrahimi     int error;
383*6467f958SSadaf Ebrahimi 
384*6467f958SSadaf Ebrahimi 
385*6467f958SSadaf Ebrahimi     /* Make sure each image format is supported */
386*6467f958SSadaf Ebrahimi     if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
387*6467f958SSadaf Ebrahimi                                             formatList, &outFormatCount)))
388*6467f958SSadaf Ebrahimi         return error;
389*6467f958SSadaf Ebrahimi 
390*6467f958SSadaf Ebrahimi     /* Look for one that is an 8-bit format */
391*6467f958SSadaf Ebrahimi     for (i = 0; i < outFormatCount; i++)
392*6467f958SSadaf Ebrahimi     {
393*6467f958SSadaf Ebrahimi         if (formatList[i].image_channel_data_type == CL_UNORM_INT_101010
394*6467f958SSadaf Ebrahimi             || formatList[i].image_channel_data_type == CL_FLOAT
395*6467f958SSadaf Ebrahimi             || formatList[i].image_channel_data_type == CL_SIGNED_INT32
396*6467f958SSadaf Ebrahimi             || formatList[i].image_channel_data_type == CL_UNSIGNED_INT32)
397*6467f958SSadaf Ebrahimi         {
398*6467f958SSadaf Ebrahimi             if (!channelCount
399*6467f958SSadaf Ebrahimi                 || (channelCount
400*6467f958SSadaf Ebrahimi                     && (get_format_channel_count(&formatList[i])
401*6467f958SSadaf Ebrahimi                         == channelCount)))
402*6467f958SSadaf Ebrahimi             {
403*6467f958SSadaf Ebrahimi                 *outFormat = formatList[i];
404*6467f958SSadaf Ebrahimi                 return 0;
405*6467f958SSadaf Ebrahimi             }
406*6467f958SSadaf Ebrahimi         }
407*6467f958SSadaf Ebrahimi     }
408*6467f958SSadaf Ebrahimi 
409*6467f958SSadaf Ebrahimi     return -1;
410*6467f958SSadaf Ebrahimi }
411*6467f958SSadaf Ebrahimi 
print_first_pixel_difference_error(size_t where,const char * sourcePixel,const char * destPixel,image_descriptor * imageInfo,size_t y,size_t thirdDim)412*6467f958SSadaf Ebrahimi void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
413*6467f958SSadaf Ebrahimi                                         const char *destPixel,
414*6467f958SSadaf Ebrahimi                                         image_descriptor *imageInfo, size_t y,
415*6467f958SSadaf Ebrahimi                                         size_t thirdDim)
416*6467f958SSadaf Ebrahimi {
417*6467f958SSadaf Ebrahimi     size_t pixel_size = get_pixel_size(imageInfo->format);
418*6467f958SSadaf Ebrahimi 
419*6467f958SSadaf Ebrahimi     log_error("ERROR: Scanline %d did not verify for image size %d,%d,%d "
420*6467f958SSadaf Ebrahimi               "pitch %d (extra %d bytes)\n",
421*6467f958SSadaf Ebrahimi               (int)y, (int)imageInfo->width, (int)imageInfo->height,
422*6467f958SSadaf Ebrahimi               (int)thirdDim, (int)imageInfo->rowPitch,
423*6467f958SSadaf Ebrahimi               (int)imageInfo->rowPitch
424*6467f958SSadaf Ebrahimi                   - (int)imageInfo->width * (int)pixel_size);
425*6467f958SSadaf Ebrahimi     log_error("Failed at column: %zu   ", where);
426*6467f958SSadaf Ebrahimi 
427*6467f958SSadaf Ebrahimi     switch (pixel_size)
428*6467f958SSadaf Ebrahimi     {
429*6467f958SSadaf Ebrahimi         case 1:
430*6467f958SSadaf Ebrahimi             log_error("*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar *)sourcePixel)[0],
431*6467f958SSadaf Ebrahimi                       ((cl_uchar *)destPixel)[0]);
432*6467f958SSadaf Ebrahimi             break;
433*6467f958SSadaf Ebrahimi         case 2:
434*6467f958SSadaf Ebrahimi             log_error("*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort *)sourcePixel)[0],
435*6467f958SSadaf Ebrahimi                       ((cl_ushort *)destPixel)[0]);
436*6467f958SSadaf Ebrahimi             break;
437*6467f958SSadaf Ebrahimi         case 3:
438*6467f958SSadaf Ebrahimi             log_error("*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. "
439*6467f958SSadaf Ebrahimi                       "{0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
440*6467f958SSadaf Ebrahimi                       ((cl_uchar *)sourcePixel)[0],
441*6467f958SSadaf Ebrahimi                       ((cl_uchar *)sourcePixel)[1],
442*6467f958SSadaf Ebrahimi                       ((cl_uchar *)sourcePixel)[2], ((cl_uchar *)destPixel)[0],
443*6467f958SSadaf Ebrahimi                       ((cl_uchar *)destPixel)[1], ((cl_uchar *)destPixel)[2]);
444*6467f958SSadaf Ebrahimi             break;
445*6467f958SSadaf Ebrahimi         case 4:
446*6467f958SSadaf Ebrahimi             log_error("*0x%8.8x vs. 0x%8.8x\n", ((cl_uint *)sourcePixel)[0],
447*6467f958SSadaf Ebrahimi                       ((cl_uint *)destPixel)[0]);
448*6467f958SSadaf Ebrahimi             break;
449*6467f958SSadaf Ebrahimi         case 6:
450*6467f958SSadaf Ebrahimi             log_error(
451*6467f958SSadaf Ebrahimi                 "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. "
452*6467f958SSadaf Ebrahimi                 "{0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
453*6467f958SSadaf Ebrahimi                 ((cl_ushort *)sourcePixel)[0], ((cl_ushort *)sourcePixel)[1],
454*6467f958SSadaf Ebrahimi                 ((cl_ushort *)sourcePixel)[2], ((cl_ushort *)destPixel)[0],
455*6467f958SSadaf Ebrahimi                 ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
456*6467f958SSadaf Ebrahimi             break;
457*6467f958SSadaf Ebrahimi         case 8:
458*6467f958SSadaf Ebrahimi             log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n",
459*6467f958SSadaf Ebrahimi                       ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
460*6467f958SSadaf Ebrahimi             break;
461*6467f958SSadaf Ebrahimi         case 12:
462*6467f958SSadaf Ebrahimi             log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
463*6467f958SSadaf Ebrahimi                       "{0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
464*6467f958SSadaf Ebrahimi                       ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
465*6467f958SSadaf Ebrahimi                       ((cl_uint *)sourcePixel)[2], ((cl_uint *)destPixel)[0],
466*6467f958SSadaf Ebrahimi                       ((cl_uint *)destPixel)[1], ((cl_uint *)destPixel)[2]);
467*6467f958SSadaf Ebrahimi             break;
468*6467f958SSadaf Ebrahimi         case 16:
469*6467f958SSadaf Ebrahimi             log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
470*6467f958SSadaf Ebrahimi                       "{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
471*6467f958SSadaf Ebrahimi                       ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
472*6467f958SSadaf Ebrahimi                       ((cl_uint *)sourcePixel)[2], ((cl_uint *)sourcePixel)[3],
473*6467f958SSadaf Ebrahimi                       ((cl_uint *)destPixel)[0], ((cl_uint *)destPixel)[1],
474*6467f958SSadaf Ebrahimi                       ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
475*6467f958SSadaf Ebrahimi             break;
476*6467f958SSadaf Ebrahimi         default:
477*6467f958SSadaf Ebrahimi             log_error("Don't know how to print pixel size of %zu\n",
478*6467f958SSadaf Ebrahimi                       pixel_size);
479*6467f958SSadaf Ebrahimi             break;
480*6467f958SSadaf Ebrahimi     }
481*6467f958SSadaf Ebrahimi }
482*6467f958SSadaf Ebrahimi 
compare_scanlines(const image_descriptor * imageInfo,const char * aPtr,const char * bPtr)483*6467f958SSadaf Ebrahimi size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr,
484*6467f958SSadaf Ebrahimi                          const char *bPtr)
485*6467f958SSadaf Ebrahimi {
486*6467f958SSadaf Ebrahimi     size_t pixel_size = get_pixel_size(imageInfo->format);
487*6467f958SSadaf Ebrahimi     size_t column;
488*6467f958SSadaf Ebrahimi 
489*6467f958SSadaf Ebrahimi     for (column = 0; column < imageInfo->width; column++)
490*6467f958SSadaf Ebrahimi     {
491*6467f958SSadaf Ebrahimi         switch (imageInfo->format->image_channel_data_type)
492*6467f958SSadaf Ebrahimi         {
493*6467f958SSadaf Ebrahimi             // If the data type is 101010, then ignore bits 31 and 32 when
494*6467f958SSadaf Ebrahimi             // comparing the row
495*6467f958SSadaf Ebrahimi             case CL_UNORM_INT_101010: {
496*6467f958SSadaf Ebrahimi                 cl_uint aPixel = *(cl_uint *)aPtr;
497*6467f958SSadaf Ebrahimi                 cl_uint bPixel = *(cl_uint *)bPtr;
498*6467f958SSadaf Ebrahimi                 if ((aPixel & 0x3fffffff) != (bPixel & 0x3fffffff))
499*6467f958SSadaf Ebrahimi                     return column;
500*6467f958SSadaf Ebrahimi             }
501*6467f958SSadaf Ebrahimi             break;
502*6467f958SSadaf Ebrahimi 
503*6467f958SSadaf Ebrahimi             // If the data type is 555, ignore bit 15 when comparing the row
504*6467f958SSadaf Ebrahimi             case CL_UNORM_SHORT_555: {
505*6467f958SSadaf Ebrahimi                 cl_ushort aPixel = *(cl_ushort *)aPtr;
506*6467f958SSadaf Ebrahimi                 cl_ushort bPixel = *(cl_ushort *)bPtr;
507*6467f958SSadaf Ebrahimi                 if ((aPixel & 0x7fff) != (bPixel & 0x7fff)) return column;
508*6467f958SSadaf Ebrahimi             }
509*6467f958SSadaf Ebrahimi             break;
510*6467f958SSadaf Ebrahimi 
511*6467f958SSadaf Ebrahimi             default:
512*6467f958SSadaf Ebrahimi                 if (memcmp(aPtr, bPtr, pixel_size) != 0) return column;
513*6467f958SSadaf Ebrahimi                 break;
514*6467f958SSadaf Ebrahimi         }
515*6467f958SSadaf Ebrahimi 
516*6467f958SSadaf Ebrahimi         aPtr += pixel_size;
517*6467f958SSadaf Ebrahimi         bPtr += pixel_size;
518*6467f958SSadaf Ebrahimi     }
519*6467f958SSadaf Ebrahimi 
520*6467f958SSadaf Ebrahimi     // If we didn't find a difference, return the width of the image
521*6467f958SSadaf Ebrahimi     return column;
522*6467f958SSadaf Ebrahimi }
523*6467f958SSadaf Ebrahimi 
random_log_in_range(int minV,int maxV,MTdata d)524*6467f958SSadaf Ebrahimi int random_log_in_range(int minV, int maxV, MTdata d)
525*6467f958SSadaf Ebrahimi {
526*6467f958SSadaf Ebrahimi     double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
527*6467f958SSadaf Ebrahimi     int iv = (int)((float)(maxV - minV) * v);
528*6467f958SSadaf Ebrahimi     return iv + minV;
529*6467f958SSadaf Ebrahimi }
530*6467f958SSadaf Ebrahimi 
531*6467f958SSadaf Ebrahimi 
532*6467f958SSadaf Ebrahimi // Define the addressing functions
533*6467f958SSadaf Ebrahimi typedef int (*AddressFn)(int value, size_t maxValue);
534*6467f958SSadaf Ebrahimi 
NoAddressFn(int value,size_t maxValue)535*6467f958SSadaf Ebrahimi int NoAddressFn(int value, size_t maxValue) { return value; }
RepeatAddressFn(int value,size_t maxValue)536*6467f958SSadaf Ebrahimi int RepeatAddressFn(int value, size_t maxValue)
537*6467f958SSadaf Ebrahimi {
538*6467f958SSadaf Ebrahimi     if (value < 0)
539*6467f958SSadaf Ebrahimi         value += (int)maxValue;
540*6467f958SSadaf Ebrahimi     else if (value >= (int)maxValue)
541*6467f958SSadaf Ebrahimi         value -= (int)maxValue;
542*6467f958SSadaf Ebrahimi     return value;
543*6467f958SSadaf Ebrahimi }
MirroredRepeatAddressFn(int value,size_t maxValue)544*6467f958SSadaf Ebrahimi int MirroredRepeatAddressFn(int value, size_t maxValue)
545*6467f958SSadaf Ebrahimi {
546*6467f958SSadaf Ebrahimi     if (value < 0)
547*6467f958SSadaf Ebrahimi         value = 0;
548*6467f958SSadaf Ebrahimi     else if ((size_t)value >= maxValue)
549*6467f958SSadaf Ebrahimi         value = (int)(maxValue - 1);
550*6467f958SSadaf Ebrahimi     return value;
551*6467f958SSadaf Ebrahimi }
ClampAddressFn(int value,size_t maxValue)552*6467f958SSadaf Ebrahimi int ClampAddressFn(int value, size_t maxValue)
553*6467f958SSadaf Ebrahimi {
554*6467f958SSadaf Ebrahimi     return (value < -1) ? -1
555*6467f958SSadaf Ebrahimi                         : ((value > (cl_long)maxValue) ? (int)maxValue : value);
556*6467f958SSadaf Ebrahimi }
ClampToEdgeNearestFn(int value,size_t maxValue)557*6467f958SSadaf Ebrahimi int ClampToEdgeNearestFn(int value, size_t maxValue)
558*6467f958SSadaf Ebrahimi {
559*6467f958SSadaf Ebrahimi     return (value < 0)
560*6467f958SSadaf Ebrahimi         ? 0
561*6467f958SSadaf Ebrahimi         : (((size_t)value > maxValue - 1) ? (int)maxValue - 1 : value);
562*6467f958SSadaf Ebrahimi }
563*6467f958SSadaf Ebrahimi AddressFn ClampToEdgeLinearFn = ClampToEdgeNearestFn;
564*6467f958SSadaf Ebrahimi 
565*6467f958SSadaf Ebrahimi // Note: normalized coords get repeated in normalized space, not unnormalized
566*6467f958SSadaf Ebrahimi // space! hence the special case here
567*6467f958SSadaf Ebrahimi volatile float gFloatHome;
RepeatNormalizedAddressFn(float fValue,size_t maxValue)568*6467f958SSadaf Ebrahimi float RepeatNormalizedAddressFn(float fValue, size_t maxValue)
569*6467f958SSadaf Ebrahimi {
570*6467f958SSadaf Ebrahimi #ifndef _MSC_VER // Use original if not the VS compiler.
571*6467f958SSadaf Ebrahimi     // General computation for repeat
572*6467f958SSadaf Ebrahimi     return (fValue - floorf(fValue)) * (float)maxValue; // Reduce to [0, 1.f]
573*6467f958SSadaf Ebrahimi #else // Otherwise, use this instead:
574*6467f958SSadaf Ebrahimi     // Home the subtraction to a float to break up the sequence of x87
575*6467f958SSadaf Ebrahimi     // instructions emitted by the VS compiler.
576*6467f958SSadaf Ebrahimi     gFloatHome = fValue - floorf(fValue);
577*6467f958SSadaf Ebrahimi     return gFloatHome * (float)maxValue;
578*6467f958SSadaf Ebrahimi #endif
579*6467f958SSadaf Ebrahimi }
580*6467f958SSadaf Ebrahimi 
MirroredRepeatNormalizedAddressFn(float fValue,size_t maxValue)581*6467f958SSadaf Ebrahimi float MirroredRepeatNormalizedAddressFn(float fValue, size_t maxValue)
582*6467f958SSadaf Ebrahimi {
583*6467f958SSadaf Ebrahimi     // Round to nearest multiple of two.
584*6467f958SSadaf Ebrahimi     // Note halfway values flip flop here due to rte, but they both end up
585*6467f958SSadaf Ebrahimi     // pointing the same place at the end of the day.
586*6467f958SSadaf Ebrahimi     float s_prime = 2.0f * rintf(fValue * 0.5f);
587*6467f958SSadaf Ebrahimi 
588*6467f958SSadaf Ebrahimi     // Reduce to [-1, 1], Apply mirroring -> [0, 1]
589*6467f958SSadaf Ebrahimi     s_prime = fabsf(fValue - s_prime);
590*6467f958SSadaf Ebrahimi 
591*6467f958SSadaf Ebrahimi     // un-normalize
592*6467f958SSadaf Ebrahimi     return s_prime * (float)maxValue;
593*6467f958SSadaf Ebrahimi }
594*6467f958SSadaf Ebrahimi 
595*6467f958SSadaf Ebrahimi struct AddressingTable
596*6467f958SSadaf Ebrahimi {
AddressingTableAddressingTable597*6467f958SSadaf Ebrahimi     AddressingTable()
598*6467f958SSadaf Ebrahimi     {
599*6467f958SSadaf Ebrahimi         static_assert(CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6, "");
600*6467f958SSadaf Ebrahimi         static_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2, "");
601*6467f958SSadaf Ebrahimi 
602*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
603*6467f958SSadaf Ebrahimi               [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
604*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
605*6467f958SSadaf Ebrahimi               [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = NoAddressFn;
606*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
607*6467f958SSadaf Ebrahimi               [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = RepeatAddressFn;
608*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
609*6467f958SSadaf Ebrahimi               [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = RepeatAddressFn;
610*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
611*6467f958SSadaf Ebrahimi               [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampToEdgeNearestFn;
612*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
613*6467f958SSadaf Ebrahimi               [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampToEdgeLinearFn;
614*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
615*6467f958SSadaf Ebrahimi               [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampAddressFn;
616*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
617*6467f958SSadaf Ebrahimi               [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampAddressFn;
618*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
619*6467f958SSadaf Ebrahimi               [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
620*6467f958SSadaf Ebrahimi         mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
621*6467f958SSadaf Ebrahimi               [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
622*6467f958SSadaf Ebrahimi     }
623*6467f958SSadaf Ebrahimi 
operator []AddressingTable624*6467f958SSadaf Ebrahimi     AddressFn operator[](image_sampler_data *sampler)
625*6467f958SSadaf Ebrahimi     {
626*6467f958SSadaf Ebrahimi         return mTable[(int)sampler->addressing_mode - CL_ADDRESS_NONE]
627*6467f958SSadaf Ebrahimi                      [(int)sampler->filter_mode - CL_FILTER_NEAREST];
628*6467f958SSadaf Ebrahimi     }
629*6467f958SSadaf Ebrahimi 
630*6467f958SSadaf Ebrahimi     AddressFn mTable[6][2];
631*6467f958SSadaf Ebrahimi };
632*6467f958SSadaf Ebrahimi 
633*6467f958SSadaf Ebrahimi static AddressingTable sAddressingTable;
634*6467f958SSadaf Ebrahimi 
is_sRGBA_order(cl_channel_order image_channel_order)635*6467f958SSadaf Ebrahimi bool is_sRGBA_order(cl_channel_order image_channel_order)
636*6467f958SSadaf Ebrahimi {
637*6467f958SSadaf Ebrahimi     switch (image_channel_order)
638*6467f958SSadaf Ebrahimi     {
639*6467f958SSadaf Ebrahimi         case CL_sRGB:
640*6467f958SSadaf Ebrahimi         case CL_sRGBx:
641*6467f958SSadaf Ebrahimi         case CL_sRGBA:
642*6467f958SSadaf Ebrahimi         case CL_sBGRA: return true;
643*6467f958SSadaf Ebrahimi         default: return false;
644*6467f958SSadaf Ebrahimi     }
645*6467f958SSadaf Ebrahimi }
646*6467f958SSadaf Ebrahimi 
647*6467f958SSadaf Ebrahimi // Format helpers
648*6467f958SSadaf Ebrahimi 
has_alpha(const cl_image_format * format)649*6467f958SSadaf Ebrahimi int has_alpha(const cl_image_format *format)
650*6467f958SSadaf Ebrahimi {
651*6467f958SSadaf Ebrahimi     switch (format->image_channel_order)
652*6467f958SSadaf Ebrahimi     {
653*6467f958SSadaf Ebrahimi         case CL_R: return 0;
654*6467f958SSadaf Ebrahimi         case CL_A: return 1;
655*6467f958SSadaf Ebrahimi         case CL_Rx: return 0;
656*6467f958SSadaf Ebrahimi         case CL_RG: return 0;
657*6467f958SSadaf Ebrahimi         case CL_RA: return 1;
658*6467f958SSadaf Ebrahimi         case CL_RGx: return 0;
659*6467f958SSadaf Ebrahimi         case CL_RGB:
660*6467f958SSadaf Ebrahimi         case CL_sRGB: return 0;
661*6467f958SSadaf Ebrahimi         case CL_RGBx:
662*6467f958SSadaf Ebrahimi         case CL_sRGBx: return 0;
663*6467f958SSadaf Ebrahimi         case CL_RGBA: return 1;
664*6467f958SSadaf Ebrahimi         case CL_BGRA: return 1;
665*6467f958SSadaf Ebrahimi         case CL_ARGB: return 1;
666*6467f958SSadaf Ebrahimi         case CL_ABGR: return 1;
667*6467f958SSadaf Ebrahimi         case CL_INTENSITY: return 1;
668*6467f958SSadaf Ebrahimi         case CL_LUMINANCE: return 0;
669*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
670*6467f958SSadaf Ebrahimi         case CL_BGR1_APPLE: return 1;
671*6467f958SSadaf Ebrahimi #endif
672*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
673*6467f958SSadaf Ebrahimi         case CL_1RGB_APPLE: return 1;
674*6467f958SSadaf Ebrahimi #endif
675*6467f958SSadaf Ebrahimi         case CL_sRGBA:
676*6467f958SSadaf Ebrahimi         case CL_sBGRA: return 1;
677*6467f958SSadaf Ebrahimi         case CL_DEPTH: return 0;
678*6467f958SSadaf Ebrahimi         default:
679*6467f958SSadaf Ebrahimi             log_error("Invalid image channel order: %d\n",
680*6467f958SSadaf Ebrahimi                       format->image_channel_order);
681*6467f958SSadaf Ebrahimi             return 0;
682*6467f958SSadaf Ebrahimi     }
683*6467f958SSadaf Ebrahimi }
684*6467f958SSadaf Ebrahimi 
685*6467f958SSadaf Ebrahimi #define PRINT_MAX_SIZE_LOGIC 0
686*6467f958SSadaf Ebrahimi 
687*6467f958SSadaf Ebrahimi #define SWAP(_a, _b)                                                           \
688*6467f958SSadaf Ebrahimi     do                                                                         \
689*6467f958SSadaf Ebrahimi     {                                                                          \
690*6467f958SSadaf Ebrahimi         _a ^= _b;                                                              \
691*6467f958SSadaf Ebrahimi         _b ^= _a;                                                              \
692*6467f958SSadaf Ebrahimi         _a ^= _b;                                                              \
693*6467f958SSadaf Ebrahimi     } while (0)
694*6467f958SSadaf Ebrahimi 
get_max_sizes(size_t * numberOfSizes,const int maxNumberOfSizes,size_t sizes[][3],size_t maxWidth,size_t maxHeight,size_t maxDepth,size_t maxArraySize,const cl_ulong maxIndividualAllocSize,const cl_ulong maxTotalAllocSize,cl_mem_object_type image_type,const cl_image_format * format,int usingMaxPixelSizeBuffer)695*6467f958SSadaf Ebrahimi void get_max_sizes(
696*6467f958SSadaf Ebrahimi     size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
697*6467f958SSadaf Ebrahimi     size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
698*6467f958SSadaf Ebrahimi     const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE
699*6467f958SSadaf Ebrahimi     const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE
700*6467f958SSadaf Ebrahimi     cl_mem_object_type image_type, const cl_image_format *format,
701*6467f958SSadaf Ebrahimi     int usingMaxPixelSizeBuffer)
702*6467f958SSadaf Ebrahimi {
703*6467f958SSadaf Ebrahimi 
704*6467f958SSadaf Ebrahimi     bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
705*6467f958SSadaf Ebrahimi     bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
706*6467f958SSadaf Ebrahimi                     || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
707*6467f958SSadaf Ebrahimi 
708*6467f958SSadaf Ebrahimi     // Validate we have a reasonable max depth for 3D
709*6467f958SSadaf Ebrahimi     if (is3D && maxDepth < 2)
710*6467f958SSadaf Ebrahimi     {
711*6467f958SSadaf Ebrahimi         log_error("ERROR: Requesting max image sizes for 3D images when max "
712*6467f958SSadaf Ebrahimi                   "depth is < 2.\n");
713*6467f958SSadaf Ebrahimi         *numberOfSizes = 0;
714*6467f958SSadaf Ebrahimi         return;
715*6467f958SSadaf Ebrahimi     }
716*6467f958SSadaf Ebrahimi     // Validate we have a reasonable max array size for 1D & 2D image arrays
717*6467f958SSadaf Ebrahimi     if (isArray && maxArraySize < 2)
718*6467f958SSadaf Ebrahimi     {
719*6467f958SSadaf Ebrahimi         log_error("ERROR: Requesting max image sizes for an image array when "
720*6467f958SSadaf Ebrahimi                   "max array size is < 1.\n");
721*6467f958SSadaf Ebrahimi         *numberOfSizes = 0;
722*6467f958SSadaf Ebrahimi         return;
723*6467f958SSadaf Ebrahimi     }
724*6467f958SSadaf Ebrahimi 
725*6467f958SSadaf Ebrahimi     // Reduce the maximum because we are trying to test the max image
726*6467f958SSadaf Ebrahimi     // dimensions, not the memory allocation
727*6467f958SSadaf Ebrahimi     cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
728*6467f958SSadaf Ebrahimi     cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
729*6467f958SSadaf Ebrahimi     log_info("Note: max individual allocation adjusted down from %gMB to %gMB "
730*6467f958SSadaf Ebrahimi              "and max total allocation adjusted down from %gMB to %gMB.\n",
731*6467f958SSadaf Ebrahimi              maxIndividualAllocSize / (1024.0 * 1024.0),
732*6467f958SSadaf Ebrahimi              adjustedMaxIndividualAllocSize / (1024.0 * 1024.0),
733*6467f958SSadaf Ebrahimi              maxTotalAllocSize / (1024.0 * 1024.0),
734*6467f958SSadaf Ebrahimi              adjustedMaxTotalAllocSize / (1024.0 * 1024.0));
735*6467f958SSadaf Ebrahimi 
736*6467f958SSadaf Ebrahimi     // Cap our max allocation to 1.0GB.
737*6467f958SSadaf Ebrahimi     // FIXME -- why?  In the interest of not taking a long time?  We should
738*6467f958SSadaf Ebrahimi     // still test this stuff...
739*6467f958SSadaf Ebrahimi     if (adjustedMaxTotalAllocSize > (cl_ulong)1024 * 1024 * 1024)
740*6467f958SSadaf Ebrahimi     {
741*6467f958SSadaf Ebrahimi         adjustedMaxTotalAllocSize = (cl_ulong)1024 * 1024 * 1024;
742*6467f958SSadaf Ebrahimi         log_info("Limiting max total allocation size to %gMB (down from %gMB) "
743*6467f958SSadaf Ebrahimi                  "for test.\n",
744*6467f958SSadaf Ebrahimi                  adjustedMaxTotalAllocSize / (1024.0 * 1024.0),
745*6467f958SSadaf Ebrahimi                  maxTotalAllocSize / (1024.0 * 1024.0));
746*6467f958SSadaf Ebrahimi     }
747*6467f958SSadaf Ebrahimi 
748*6467f958SSadaf Ebrahimi     cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
749*6467f958SSadaf Ebrahimi     if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize * 2)
750*6467f958SSadaf Ebrahimi         maxAllocSize = adjustedMaxTotalAllocSize / 2;
751*6467f958SSadaf Ebrahimi 
752*6467f958SSadaf Ebrahimi     size_t raw_pixel_size = get_pixel_size(format);
753*6467f958SSadaf Ebrahimi     // If the test will be creating input (src) buffer of type int4 or float4,
754*6467f958SSadaf Ebrahimi     // number of pixels will be governed by sizeof(int4 or float4) and not
755*6467f958SSadaf Ebrahimi     // sizeof(dest fomat) Also if pixel size is 12 bytes i.e. RGB or RGBx, we
756*6467f958SSadaf Ebrahimi     // adjust it to 16 bytes as GPUs has no concept of 3 channel images. GPUs
757*6467f958SSadaf Ebrahimi     // expand these to four channel RGBA.
758*6467f958SSadaf Ebrahimi     if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
759*6467f958SSadaf Ebrahimi     size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
760*6467f958SSadaf Ebrahimi 
761*6467f958SSadaf Ebrahimi     log_info("Maximums: [%zu x %zu x %zu], raw pixel size %zu bytes, "
762*6467f958SSadaf Ebrahimi              "per-allocation limit %gMB.\n",
763*6467f958SSadaf Ebrahimi              maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
764*6467f958SSadaf Ebrahimi              raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
765*6467f958SSadaf Ebrahimi 
766*6467f958SSadaf Ebrahimi     // Keep track of the maximum sizes for each dimension
767*6467f958SSadaf Ebrahimi     size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
768*6467f958SSadaf Ebrahimi 
769*6467f958SSadaf Ebrahimi     switch (image_type)
770*6467f958SSadaf Ebrahimi     {
771*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
772*6467f958SSadaf Ebrahimi             maximum_sizes[1] = maxArraySize;
773*6467f958SSadaf Ebrahimi             maximum_sizes[2] = 1;
774*6467f958SSadaf Ebrahimi             break;
775*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
776*6467f958SSadaf Ebrahimi             maximum_sizes[2] = maxArraySize;
777*6467f958SSadaf Ebrahimi             break;
778*6467f958SSadaf Ebrahimi     }
779*6467f958SSadaf Ebrahimi 
780*6467f958SSadaf Ebrahimi 
781*6467f958SSadaf Ebrahimi         // Given one fixed sized dimension, this code finds one or two other
782*6467f958SSadaf Ebrahimi         // dimensions, both with very small size, such that the size does not
783*6467f958SSadaf Ebrahimi         // exceed the maximum passed to this function
784*6467f958SSadaf Ebrahimi 
785*6467f958SSadaf Ebrahimi #if defined(__x86_64) || defined(__arm64__) || defined(__ppc64__)
786*6467f958SSadaf Ebrahimi     size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15 };
787*6467f958SSadaf Ebrahimi #else
788*6467f958SSadaf Ebrahimi     size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13 };
789*6467f958SSadaf Ebrahimi #endif
790*6467f958SSadaf Ebrahimi 
791*6467f958SSadaf Ebrahimi     static size_t other_size = 0;
792*6467f958SSadaf Ebrahimi     enum
793*6467f958SSadaf Ebrahimi     {
794*6467f958SSadaf Ebrahimi         num_other_sizes = sizeof(other_sizes) / sizeof(size_t)
795*6467f958SSadaf Ebrahimi     };
796*6467f958SSadaf Ebrahimi 
797*6467f958SSadaf Ebrahimi     (*numberOfSizes) = 0;
798*6467f958SSadaf Ebrahimi 
799*6467f958SSadaf Ebrahimi     if (image_type == CL_MEM_OBJECT_IMAGE1D)
800*6467f958SSadaf Ebrahimi     {
801*6467f958SSadaf Ebrahimi 
802*6467f958SSadaf Ebrahimi         size_t M = maximum_sizes[0];
803*6467f958SSadaf Ebrahimi 
804*6467f958SSadaf Ebrahimi         // Store the size
805*6467f958SSadaf Ebrahimi         sizes[(*numberOfSizes)][0] = M;
806*6467f958SSadaf Ebrahimi         sizes[(*numberOfSizes)][1] = 1;
807*6467f958SSadaf Ebrahimi         sizes[(*numberOfSizes)][2] = 1;
808*6467f958SSadaf Ebrahimi         ++(*numberOfSizes);
809*6467f958SSadaf Ebrahimi     }
810*6467f958SSadaf Ebrahimi 
811*6467f958SSadaf Ebrahimi     else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
812*6467f958SSadaf Ebrahimi              || image_type == CL_MEM_OBJECT_IMAGE2D)
813*6467f958SSadaf Ebrahimi     {
814*6467f958SSadaf Ebrahimi 
815*6467f958SSadaf Ebrahimi         for (int fixed_dim = 0; fixed_dim < 2; ++fixed_dim)
816*6467f958SSadaf Ebrahimi         {
817*6467f958SSadaf Ebrahimi 
818*6467f958SSadaf Ebrahimi             // Determine the size of the fixed dimension
819*6467f958SSadaf Ebrahimi             size_t M = maximum_sizes[fixed_dim];
820*6467f958SSadaf Ebrahimi             size_t A = max_pixels;
821*6467f958SSadaf Ebrahimi 
822*6467f958SSadaf Ebrahimi             int x0_dim = !fixed_dim;
823*6467f958SSadaf Ebrahimi             size_t x0 = static_cast<size_t>(
824*6467f958SSadaf Ebrahimi                 fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
825*6467f958SSadaf Ebrahimi                      maximum_sizes[x0_dim]));
826*6467f958SSadaf Ebrahimi 
827*6467f958SSadaf Ebrahimi             // Store the size
828*6467f958SSadaf Ebrahimi             sizes[(*numberOfSizes)][fixed_dim] = M;
829*6467f958SSadaf Ebrahimi             sizes[(*numberOfSizes)][x0_dim] = x0;
830*6467f958SSadaf Ebrahimi             sizes[(*numberOfSizes)][2] = 1;
831*6467f958SSadaf Ebrahimi             ++(*numberOfSizes);
832*6467f958SSadaf Ebrahimi         }
833*6467f958SSadaf Ebrahimi     }
834*6467f958SSadaf Ebrahimi 
835*6467f958SSadaf Ebrahimi     else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY
836*6467f958SSadaf Ebrahimi              || image_type == CL_MEM_OBJECT_IMAGE3D)
837*6467f958SSadaf Ebrahimi     {
838*6467f958SSadaf Ebrahimi 
839*6467f958SSadaf Ebrahimi         // Iterate over dimensions, finding sizes for the non-fixed dimension
840*6467f958SSadaf Ebrahimi         for (int fixed_dim = 0; fixed_dim < 3; ++fixed_dim)
841*6467f958SSadaf Ebrahimi         {
842*6467f958SSadaf Ebrahimi 
843*6467f958SSadaf Ebrahimi             // Determine the size of the fixed dimension
844*6467f958SSadaf Ebrahimi             size_t M = maximum_sizes[fixed_dim];
845*6467f958SSadaf Ebrahimi             size_t A = max_pixels;
846*6467f958SSadaf Ebrahimi 
847*6467f958SSadaf Ebrahimi             // Find two other dimensions, x0 and x1
848*6467f958SSadaf Ebrahimi             int x0_dim = (fixed_dim == 0) ? 1 : 0;
849*6467f958SSadaf Ebrahimi             int x1_dim = (fixed_dim == 2) ? 1 : 2;
850*6467f958SSadaf Ebrahimi 
851*6467f958SSadaf Ebrahimi             // Choose two other sizes for these dimensions
852*6467f958SSadaf Ebrahimi             size_t x0 = static_cast<size_t>(
853*6467f958SSadaf Ebrahimi                 fmin(fmin(A / M, maximum_sizes[x0_dim]),
854*6467f958SSadaf Ebrahimi                      other_sizes[(other_size++) % num_other_sizes]));
855*6467f958SSadaf Ebrahimi             // GPUs have certain restrictions on minimum width (row alignment)
856*6467f958SSadaf Ebrahimi             // of images which has given us issues testing small widths in this
857*6467f958SSadaf Ebrahimi             // test (say we set width to 3 for testing, and compute size based
858*6467f958SSadaf Ebrahimi             // on this width and decide it fits within vram ... but GPU driver
859*6467f958SSadaf Ebrahimi             // decides that, due to row alignment requirements, it has to use
860*6467f958SSadaf Ebrahimi             // width of 16 which doesnt fit in vram). For this purpose we are
861*6467f958SSadaf Ebrahimi             // not testing width < 16 for this test.
862*6467f958SSadaf Ebrahimi             if (x0_dim == 0 && x0 < 16) x0 = 16;
863*6467f958SSadaf Ebrahimi             size_t x1 = static_cast<size_t>(
864*6467f958SSadaf Ebrahimi                 fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
865*6467f958SSadaf Ebrahimi                      other_sizes[(other_size++) % num_other_sizes]));
866*6467f958SSadaf Ebrahimi 
867*6467f958SSadaf Ebrahimi             // Valid image sizes cannot be below 1. Due to the workaround for
868*6467f958SSadaf Ebrahimi             // the xo_dim where x0 is overidden to 16 there might not be enough
869*6467f958SSadaf Ebrahimi             // space left for x1 dimension. This could be a fractional 0.x size
870*6467f958SSadaf Ebrahimi             // that when cast to integer would result in a value 0. In these
871*6467f958SSadaf Ebrahimi             // cases we clamp the size to a minimum of 1.
872*6467f958SSadaf Ebrahimi             if (x1 < 1) x1 = 1;
873*6467f958SSadaf Ebrahimi 
874*6467f958SSadaf Ebrahimi             // M and x0 cannot be '0' as they derive from clDeviceInfo calls
875*6467f958SSadaf Ebrahimi             assert(x0 > 0 && M > 0);
876*6467f958SSadaf Ebrahimi 
877*6467f958SSadaf Ebrahimi             // Store the size
878*6467f958SSadaf Ebrahimi             sizes[(*numberOfSizes)][fixed_dim] = M;
879*6467f958SSadaf Ebrahimi             sizes[(*numberOfSizes)][x0_dim] = x0;
880*6467f958SSadaf Ebrahimi             sizes[(*numberOfSizes)][x1_dim] = x1;
881*6467f958SSadaf Ebrahimi             ++(*numberOfSizes);
882*6467f958SSadaf Ebrahimi         }
883*6467f958SSadaf Ebrahimi     }
884*6467f958SSadaf Ebrahimi 
885*6467f958SSadaf Ebrahimi     // Log the results
886*6467f958SSadaf Ebrahimi     for (int j = 0; j < (int)(*numberOfSizes); j++)
887*6467f958SSadaf Ebrahimi     {
888*6467f958SSadaf Ebrahimi         switch (image_type)
889*6467f958SSadaf Ebrahimi         {
890*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
891*6467f958SSadaf Ebrahimi                 log_info(" size[%d] = [%zu] (%g MB image)\n", j, sizes[j][0],
892*6467f958SSadaf Ebrahimi                          raw_pixel_size * sizes[j][0] * sizes[j][1]
893*6467f958SSadaf Ebrahimi                              * sizes[j][2] / (1024.0 * 1024.0));
894*6467f958SSadaf Ebrahimi                 break;
895*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
896*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
897*6467f958SSadaf Ebrahimi                 log_info(" size[%d] = [%zu %zu] (%g MB image)\n", j,
898*6467f958SSadaf Ebrahimi                          sizes[j][0], sizes[j][1],
899*6467f958SSadaf Ebrahimi                          raw_pixel_size * sizes[j][0] * sizes[j][1]
900*6467f958SSadaf Ebrahimi                              * sizes[j][2] / (1024.0 * 1024.0));
901*6467f958SSadaf Ebrahimi                 break;
902*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
903*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
904*6467f958SSadaf Ebrahimi                 log_info(" size[%d] = [%zu %zu %zu] (%g MB image)\n", j,
905*6467f958SSadaf Ebrahimi                          sizes[j][0], sizes[j][1], sizes[j][2],
906*6467f958SSadaf Ebrahimi                          raw_pixel_size * sizes[j][0] * sizes[j][1]
907*6467f958SSadaf Ebrahimi                              * sizes[j][2] / (1024.0 * 1024.0));
908*6467f958SSadaf Ebrahimi                 break;
909*6467f958SSadaf Ebrahimi         }
910*6467f958SSadaf Ebrahimi     }
911*6467f958SSadaf Ebrahimi }
912*6467f958SSadaf Ebrahimi 
get_max_absolute_error(const cl_image_format * format,image_sampler_data * sampler)913*6467f958SSadaf Ebrahimi float get_max_absolute_error(const cl_image_format *format,
914*6467f958SSadaf Ebrahimi                              image_sampler_data *sampler)
915*6467f958SSadaf Ebrahimi {
916*6467f958SSadaf Ebrahimi     if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f;
917*6467f958SSadaf Ebrahimi 
918*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
919*6467f958SSadaf Ebrahimi     {
920*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8: return 1.0f / 127.0f;
921*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8: return 1.0f / 255.0f;
922*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16: return 1.0f / 65535.0f;
923*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16: return 1.0f / 32767.0f;
924*6467f958SSadaf Ebrahimi         case CL_FLOAT: return CL_FLT_MIN;
925*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
926*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE: return 0x1.0p-14f;
927*6467f958SSadaf Ebrahimi #endif
928*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555:
929*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565: return 1.0f / 31.0f;
930*6467f958SSadaf Ebrahimi         default: return 0.0f;
931*6467f958SSadaf Ebrahimi     }
932*6467f958SSadaf Ebrahimi }
933*6467f958SSadaf Ebrahimi 
get_max_relative_error(const cl_image_format * format,image_sampler_data * sampler,int is3D,int isLinearFilter)934*6467f958SSadaf Ebrahimi float get_max_relative_error(const cl_image_format *format,
935*6467f958SSadaf Ebrahimi                              image_sampler_data *sampler, int is3D,
936*6467f958SSadaf Ebrahimi                              int isLinearFilter)
937*6467f958SSadaf Ebrahimi {
938*6467f958SSadaf Ebrahimi     float maxError = 0.0f;
939*6467f958SSadaf Ebrahimi     float sampleCount = 1.0f;
940*6467f958SSadaf Ebrahimi     if (isLinearFilter) sampleCount = is3D ? 8.0f : 4.0f;
941*6467f958SSadaf Ebrahimi 
942*6467f958SSadaf Ebrahimi     // Note that the ULP is defined here as the unit in the last place of the
943*6467f958SSadaf Ebrahimi     // maximum magnitude sample used for filtering.
944*6467f958SSadaf Ebrahimi 
945*6467f958SSadaf Ebrahimi     // Section 8.3
946*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
947*6467f958SSadaf Ebrahimi     {
948*6467f958SSadaf Ebrahimi         // The spec allows 2 ulps of error for normalized formats
949*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
950*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8:
951*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
952*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16:
953*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565:
954*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555:
955*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010:
956*6467f958SSadaf Ebrahimi             // Maximum sampling error for round to zero normalization based on
957*6467f958SSadaf Ebrahimi             // multiplication by reciprocal (using reciprocal generated in
958*6467f958SSadaf Ebrahimi             // round to +inf mode, so that 1.0 matches spec)
959*6467f958SSadaf Ebrahimi             maxError = 2 * FLT_EPSILON * sampleCount;
960*6467f958SSadaf Ebrahimi             break;
961*6467f958SSadaf Ebrahimi 
962*6467f958SSadaf Ebrahimi             // If the implementation supports these formats then it will have to
963*6467f958SSadaf Ebrahimi             // allow rounding error here too, because not all 32-bit ints are
964*6467f958SSadaf Ebrahimi             // exactly representable in float
965*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32:
966*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: maxError = 1 * FLT_EPSILON; break;
967*6467f958SSadaf Ebrahimi     }
968*6467f958SSadaf Ebrahimi 
969*6467f958SSadaf Ebrahimi 
970*6467f958SSadaf Ebrahimi     // Section 8.2
971*6467f958SSadaf Ebrahimi     if (sampler->addressing_mode == CL_ADDRESS_REPEAT
972*6467f958SSadaf Ebrahimi         || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT
973*6467f958SSadaf Ebrahimi         || sampler->filter_mode != CL_FILTER_NEAREST
974*6467f958SSadaf Ebrahimi         || sampler->normalized_coords)
975*6467f958SSadaf Ebrahimi #if defined(__APPLE__)
976*6467f958SSadaf Ebrahimi     {
977*6467f958SSadaf Ebrahimi         if (sampler->filter_mode != CL_FILTER_NEAREST)
978*6467f958SSadaf Ebrahimi         {
979*6467f958SSadaf Ebrahimi             // The maximum
980*6467f958SSadaf Ebrahimi             if (gDeviceType == CL_DEVICE_TYPE_GPU)
981*6467f958SSadaf Ebrahimi                 // Some GPUs ain't so accurate
982*6467f958SSadaf Ebrahimi                 maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);
983*6467f958SSadaf Ebrahimi             else
984*6467f958SSadaf Ebrahimi                 // The standard method of 2d linear filtering delivers 4.0 ulps
985*6467f958SSadaf Ebrahimi                 // of error in round to nearest (8 in rtz).
986*6467f958SSadaf Ebrahimi                 maxError += 4.0f * FLT_EPSILON;
987*6467f958SSadaf Ebrahimi         }
988*6467f958SSadaf Ebrahimi         else
989*6467f958SSadaf Ebrahimi             // normalized coordinates will introduce some error into the
990*6467f958SSadaf Ebrahimi             // fractional part of the address, affecting results
991*6467f958SSadaf Ebrahimi             maxError += 4.0f * FLT_EPSILON;
992*6467f958SSadaf Ebrahimi     }
993*6467f958SSadaf Ebrahimi #else
994*6467f958SSadaf Ebrahimi     {
995*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
996*6467f958SSadaf Ebrahimi #warning Implementations will likely wish to pick a max allowable sampling error policy here that is better than the spec
997*6467f958SSadaf Ebrahimi #endif
998*6467f958SSadaf Ebrahimi         // The spec allows linear filters to return any result most of the time.
999*6467f958SSadaf Ebrahimi         // That's fine for implementations but a problem for testing. After all
1000*6467f958SSadaf Ebrahimi         // users aren't going to like garbage images.  We have "picked a number"
1001*6467f958SSadaf Ebrahimi         // here that we are going to attempt to conform to. Implementations are
1002*6467f958SSadaf Ebrahimi         // free to pick another number, like infinity, if they like.
1003*6467f958SSadaf Ebrahimi         // We picked a number for you, to provide /some/ sanity
1004*6467f958SSadaf Ebrahimi         maxError = MAKE_HEX_FLOAT(0x1.0p-7f, 0x1L, -7);
1005*6467f958SSadaf Ebrahimi         // ...but this is what the spec allows:
1006*6467f958SSadaf Ebrahimi         // maxError = INFINITY;
1007*6467f958SSadaf Ebrahimi         // Please feel free to pick any positive number. (NaN wont work.)
1008*6467f958SSadaf Ebrahimi     }
1009*6467f958SSadaf Ebrahimi #endif
1010*6467f958SSadaf Ebrahimi 
1011*6467f958SSadaf Ebrahimi     // The error calculation itself can introduce error
1012*6467f958SSadaf Ebrahimi     maxError += FLT_EPSILON * 2;
1013*6467f958SSadaf Ebrahimi 
1014*6467f958SSadaf Ebrahimi     return maxError;
1015*6467f958SSadaf Ebrahimi }
1016*6467f958SSadaf Ebrahimi 
get_format_max_int(const cl_image_format * format)1017*6467f958SSadaf Ebrahimi size_t get_format_max_int(const cl_image_format *format)
1018*6467f958SSadaf Ebrahimi {
1019*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
1020*6467f958SSadaf Ebrahimi     {
1021*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
1022*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8: return 127;
1023*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8:
1024*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: return 255;
1025*6467f958SSadaf Ebrahimi 
1026*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
1027*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16: return 32767;
1028*6467f958SSadaf Ebrahimi 
1029*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16:
1030*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16: return 65535;
1031*6467f958SSadaf Ebrahimi 
1032*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32: return 2147483647L;
1033*6467f958SSadaf Ebrahimi 
1034*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: return 4294967295LL;
1035*6467f958SSadaf Ebrahimi 
1036*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565:
1037*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555: return 31;
1038*6467f958SSadaf Ebrahimi 
1039*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010: return 1023;
1040*6467f958SSadaf Ebrahimi 
1041*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT: return 1 << 10;
1042*6467f958SSadaf Ebrahimi 
1043*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
1044*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE: return 16384;
1045*6467f958SSadaf Ebrahimi #endif
1046*6467f958SSadaf Ebrahimi         default: return 0;
1047*6467f958SSadaf Ebrahimi     }
1048*6467f958SSadaf Ebrahimi }
1049*6467f958SSadaf Ebrahimi 
get_format_min_int(const cl_image_format * format)1050*6467f958SSadaf Ebrahimi int get_format_min_int(const cl_image_format *format)
1051*6467f958SSadaf Ebrahimi {
1052*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
1053*6467f958SSadaf Ebrahimi     {
1054*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
1055*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8: return -128;
1056*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8:
1057*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: return 0;
1058*6467f958SSadaf Ebrahimi 
1059*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
1060*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16: return -32768;
1061*6467f958SSadaf Ebrahimi 
1062*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16:
1063*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16: return 0;
1064*6467f958SSadaf Ebrahimi 
1065*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32: return -2147483648LL;
1066*6467f958SSadaf Ebrahimi 
1067*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: return 0;
1068*6467f958SSadaf Ebrahimi 
1069*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565:
1070*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555:
1071*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010: return 0;
1072*6467f958SSadaf Ebrahimi 
1073*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT: return -(1 << 10);
1074*6467f958SSadaf Ebrahimi 
1075*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
1076*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE: return -16384;
1077*6467f958SSadaf Ebrahimi #endif
1078*6467f958SSadaf Ebrahimi 
1079*6467f958SSadaf Ebrahimi         default: return 0;
1080*6467f958SSadaf Ebrahimi     }
1081*6467f958SSadaf Ebrahimi }
1082*6467f958SSadaf Ebrahimi 
convert_float_to_half(float f)1083*6467f958SSadaf Ebrahimi cl_half convert_float_to_half(float f)
1084*6467f958SSadaf Ebrahimi {
1085*6467f958SSadaf Ebrahimi     switch (gFloatToHalfRoundingMode)
1086*6467f958SSadaf Ebrahimi     {
1087*6467f958SSadaf Ebrahimi         case kRoundToNearestEven: return cl_half_from_float(f, CL_HALF_RTE);
1088*6467f958SSadaf Ebrahimi         case kRoundTowardZero: return cl_half_from_float(f, CL_HALF_RTZ);
1089*6467f958SSadaf Ebrahimi         default:
1090*6467f958SSadaf Ebrahimi             log_error("ERROR: Test internal error -- unhandled or unknown "
1091*6467f958SSadaf Ebrahimi                       "float->half rounding mode.\n");
1092*6467f958SSadaf Ebrahimi             exit(-1);
1093*6467f958SSadaf Ebrahimi             return 0xffff;
1094*6467f958SSadaf Ebrahimi     }
1095*6467f958SSadaf Ebrahimi }
1096*6467f958SSadaf Ebrahimi 
get_image_size(image_descriptor const * imageInfo)1097*6467f958SSadaf Ebrahimi cl_ulong get_image_size(image_descriptor const *imageInfo)
1098*6467f958SSadaf Ebrahimi {
1099*6467f958SSadaf Ebrahimi     cl_ulong imageSize;
1100*6467f958SSadaf Ebrahimi 
1101*6467f958SSadaf Ebrahimi     // Assumes rowPitch and slicePitch are always correctly defined
1102*6467f958SSadaf Ebrahimi     if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
1103*6467f958SSadaf Ebrahimi     {
1104*6467f958SSadaf Ebrahimi         imageSize = (size_t)compute_mipmapped_image_size(*imageInfo);
1105*6467f958SSadaf Ebrahimi     }
1106*6467f958SSadaf Ebrahimi     else
1107*6467f958SSadaf Ebrahimi     {
1108*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
1109*6467f958SSadaf Ebrahimi         {
1110*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D: imageSize = imageInfo->rowPitch; break;
1111*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
1112*6467f958SSadaf Ebrahimi                 imageSize = imageInfo->height * imageInfo->rowPitch;
1113*6467f958SSadaf Ebrahimi                 break;
1114*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
1115*6467f958SSadaf Ebrahimi                 imageSize = imageInfo->depth * imageInfo->slicePitch;
1116*6467f958SSadaf Ebrahimi                 break;
1117*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1118*6467f958SSadaf Ebrahimi                 imageSize = imageInfo->arraySize * imageInfo->slicePitch;
1119*6467f958SSadaf Ebrahimi                 break;
1120*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1121*6467f958SSadaf Ebrahimi                 imageSize = imageInfo->arraySize * imageInfo->slicePitch;
1122*6467f958SSadaf Ebrahimi                 break;
1123*6467f958SSadaf Ebrahimi             default:
1124*6467f958SSadaf Ebrahimi                 log_error("ERROR: Cannot identify image type %x\n",
1125*6467f958SSadaf Ebrahimi                           imageInfo->type);
1126*6467f958SSadaf Ebrahimi                 abort();
1127*6467f958SSadaf Ebrahimi         }
1128*6467f958SSadaf Ebrahimi     }
1129*6467f958SSadaf Ebrahimi     return imageSize;
1130*6467f958SSadaf Ebrahimi }
1131*6467f958SSadaf Ebrahimi 
1132*6467f958SSadaf Ebrahimi // Calculate image size in megabytes (strictly, mebibytes). Result is rounded
1133*6467f958SSadaf Ebrahimi // up.
get_image_size_mb(image_descriptor const * imageInfo)1134*6467f958SSadaf Ebrahimi cl_ulong get_image_size_mb(image_descriptor const *imageInfo)
1135*6467f958SSadaf Ebrahimi {
1136*6467f958SSadaf Ebrahimi     cl_ulong imageSize = get_image_size(imageInfo);
1137*6467f958SSadaf Ebrahimi     cl_ulong mb = imageSize / (1024 * 1024);
1138*6467f958SSadaf Ebrahimi     if (imageSize % (1024 * 1024) > 0)
1139*6467f958SSadaf Ebrahimi     {
1140*6467f958SSadaf Ebrahimi         mb += 1;
1141*6467f958SSadaf Ebrahimi     }
1142*6467f958SSadaf Ebrahimi     return mb;
1143*6467f958SSadaf Ebrahimi }
1144*6467f958SSadaf Ebrahimi 
1145*6467f958SSadaf Ebrahimi 
1146*6467f958SSadaf Ebrahimi uint64_t gRoundingStartValue = 0;
1147*6467f958SSadaf Ebrahimi 
1148*6467f958SSadaf Ebrahimi 
escape_inf_nan_values(char * data,size_t allocSize)1149*6467f958SSadaf Ebrahimi void escape_inf_nan_values(char *data, size_t allocSize)
1150*6467f958SSadaf Ebrahimi {
1151*6467f958SSadaf Ebrahimi     // filter values with 8 not-quite-highest bits
1152*6467f958SSadaf Ebrahimi     unsigned int *intPtr = (unsigned int *)data;
1153*6467f958SSadaf Ebrahimi     for (size_t i = 0; i<allocSize>> 2; i++)
1154*6467f958SSadaf Ebrahimi     {
1155*6467f958SSadaf Ebrahimi         if ((intPtr[i] & 0x7F800000) == 0x7F800000) intPtr[i] ^= 0x40000000;
1156*6467f958SSadaf Ebrahimi     }
1157*6467f958SSadaf Ebrahimi 
1158*6467f958SSadaf Ebrahimi     // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits
1159*6467f958SSadaf Ebrahimi     // = 0x7C00 are special)
1160*6467f958SSadaf Ebrahimi     unsigned short *shortPtr = (unsigned short *)data;
1161*6467f958SSadaf Ebrahimi     for (size_t i = 0; i<allocSize>> 1; i++)
1162*6467f958SSadaf Ebrahimi     {
1163*6467f958SSadaf Ebrahimi         if ((shortPtr[i] & 0x7C00) == 0x7C00) shortPtr[i] ^= 0x4000;
1164*6467f958SSadaf Ebrahimi     }
1165*6467f958SSadaf Ebrahimi }
1166*6467f958SSadaf Ebrahimi 
generate_random_image_data(image_descriptor * imageInfo,BufferOwningPtr<char> & P,MTdata d)1167*6467f958SSadaf Ebrahimi char *generate_random_image_data(image_descriptor *imageInfo,
1168*6467f958SSadaf Ebrahimi                                  BufferOwningPtr<char> &P, MTdata d)
1169*6467f958SSadaf Ebrahimi {
1170*6467f958SSadaf Ebrahimi     size_t allocSize = static_cast<size_t>(get_image_size(imageInfo));
1171*6467f958SSadaf Ebrahimi     size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
1172*6467f958SSadaf Ebrahimi     size_t i;
1173*6467f958SSadaf Ebrahimi 
1174*6467f958SSadaf Ebrahimi     if (imageInfo->num_mip_levels > 1)
1175*6467f958SSadaf Ebrahimi         allocSize =
1176*6467f958SSadaf Ebrahimi             static_cast<size_t>(compute_mipmapped_image_size(*imageInfo));
1177*6467f958SSadaf Ebrahimi 
1178*6467f958SSadaf Ebrahimi #if defined(__APPLE__)
1179*6467f958SSadaf Ebrahimi     char *data = NULL;
1180*6467f958SSadaf Ebrahimi     if (gDeviceType == CL_DEVICE_TYPE_CPU)
1181*6467f958SSadaf Ebrahimi     {
1182*6467f958SSadaf Ebrahimi         size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
1183*6467f958SSadaf Ebrahimi 
1184*6467f958SSadaf Ebrahimi         void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
1185*6467f958SSadaf Ebrahimi                          MAP_ANON | MAP_PRIVATE, 0, 0);
1186*6467f958SSadaf Ebrahimi         intptr_t data_end = (intptr_t)map + mapSize - 4096;
1187*6467f958SSadaf Ebrahimi         data = (char *)(data_end - (intptr_t)allocSize);
1188*6467f958SSadaf Ebrahimi 
1189*6467f958SSadaf Ebrahimi         mprotect(map, 4096, PROT_NONE);
1190*6467f958SSadaf Ebrahimi         mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
1191*6467f958SSadaf Ebrahimi         P.reset(data, map, mapSize, allocSize);
1192*6467f958SSadaf Ebrahimi     }
1193*6467f958SSadaf Ebrahimi     else
1194*6467f958SSadaf Ebrahimi     {
1195*6467f958SSadaf Ebrahimi         data = (char *)malloc(allocSize);
1196*6467f958SSadaf Ebrahimi         P.reset(data, NULL, 0, allocSize);
1197*6467f958SSadaf Ebrahimi     }
1198*6467f958SSadaf Ebrahimi #else
1199*6467f958SSadaf Ebrahimi     P.reset(NULL); // Free already allocated memory first, then try to allocate
1200*6467f958SSadaf Ebrahimi                    // new block.
1201*6467f958SSadaf Ebrahimi     char *data =
1202*6467f958SSadaf Ebrahimi         (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
1203*6467f958SSadaf Ebrahimi     P.reset(data, NULL, 0, allocSize, true);
1204*6467f958SSadaf Ebrahimi #endif
1205*6467f958SSadaf Ebrahimi 
1206*6467f958SSadaf Ebrahimi     if (data == NULL)
1207*6467f958SSadaf Ebrahimi     {
1208*6467f958SSadaf Ebrahimi         log_error("ERROR: Unable to malloc %zu bytes for "
1209*6467f958SSadaf Ebrahimi                   "generate_random_image_data\n",
1210*6467f958SSadaf Ebrahimi                   allocSize);
1211*6467f958SSadaf Ebrahimi         return 0;
1212*6467f958SSadaf Ebrahimi     }
1213*6467f958SSadaf Ebrahimi 
1214*6467f958SSadaf Ebrahimi     if (gTestRounding)
1215*6467f958SSadaf Ebrahimi     {
1216*6467f958SSadaf Ebrahimi         // Special case: fill with a ramp from 0 to the size of the type
1217*6467f958SSadaf Ebrahimi         size_t typeSize = get_format_type_size(imageInfo->format);
1218*6467f958SSadaf Ebrahimi         switch (typeSize)
1219*6467f958SSadaf Ebrahimi         {
1220*6467f958SSadaf Ebrahimi             case 1: {
1221*6467f958SSadaf Ebrahimi                 char *ptr = data;
1222*6467f958SSadaf Ebrahimi                 for (i = 0; i < allocSize; i++)
1223*6467f958SSadaf Ebrahimi                     ptr[i] = (cl_char)(i + gRoundingStartValue);
1224*6467f958SSadaf Ebrahimi             }
1225*6467f958SSadaf Ebrahimi             break;
1226*6467f958SSadaf Ebrahimi             case 2: {
1227*6467f958SSadaf Ebrahimi                 cl_short *ptr = (cl_short *)data;
1228*6467f958SSadaf Ebrahimi                 for (i = 0; i < allocSize / 2; i++)
1229*6467f958SSadaf Ebrahimi                     ptr[i] = (cl_short)(i + gRoundingStartValue);
1230*6467f958SSadaf Ebrahimi             }
1231*6467f958SSadaf Ebrahimi             break;
1232*6467f958SSadaf Ebrahimi             case 4: {
1233*6467f958SSadaf Ebrahimi                 cl_int *ptr = (cl_int *)data;
1234*6467f958SSadaf Ebrahimi                 for (i = 0; i < allocSize / 4; i++)
1235*6467f958SSadaf Ebrahimi                     ptr[i] = (cl_int)(i + gRoundingStartValue);
1236*6467f958SSadaf Ebrahimi             }
1237*6467f958SSadaf Ebrahimi             break;
1238*6467f958SSadaf Ebrahimi         }
1239*6467f958SSadaf Ebrahimi 
1240*6467f958SSadaf Ebrahimi         // Note: inf or nan float values would cause problems, although we don't
1241*6467f958SSadaf Ebrahimi         // know this will actually be a float, so we just know what to look for
1242*6467f958SSadaf Ebrahimi         escape_inf_nan_values(data, allocSize);
1243*6467f958SSadaf Ebrahimi         return data;
1244*6467f958SSadaf Ebrahimi     }
1245*6467f958SSadaf Ebrahimi 
1246*6467f958SSadaf Ebrahimi     // Otherwise, we should be able to just fill with random bits no matter what
1247*6467f958SSadaf Ebrahimi     cl_uint *p = (cl_uint *)data;
1248*6467f958SSadaf Ebrahimi     for (i = 0; i + 4 <= allocSize; i += 4) p[i / 4] = genrand_int32(d);
1249*6467f958SSadaf Ebrahimi 
1250*6467f958SSadaf Ebrahimi     for (; i < allocSize; i++) data[i] = genrand_int32(d);
1251*6467f958SSadaf Ebrahimi 
1252*6467f958SSadaf Ebrahimi     // Note: inf or nan float values would cause problems, although we don't
1253*6467f958SSadaf Ebrahimi     // know this will actually be a float, so we just know what to look for
1254*6467f958SSadaf Ebrahimi     escape_inf_nan_values(data, allocSize);
1255*6467f958SSadaf Ebrahimi 
1256*6467f958SSadaf Ebrahimi     if (/*!gTestMipmaps*/ imageInfo->num_mip_levels < 2)
1257*6467f958SSadaf Ebrahimi     {
1258*6467f958SSadaf Ebrahimi         // Fill unused edges with -1, NaN for float
1259*6467f958SSadaf Ebrahimi         if (imageInfo->rowPitch > pixelRowBytes)
1260*6467f958SSadaf Ebrahimi         {
1261*6467f958SSadaf Ebrahimi             size_t height = 0;
1262*6467f958SSadaf Ebrahimi 
1263*6467f958SSadaf Ebrahimi             switch (imageInfo->type)
1264*6467f958SSadaf Ebrahimi             {
1265*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE2D:
1266*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE3D:
1267*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1268*6467f958SSadaf Ebrahimi                     height = imageInfo->height;
1269*6467f958SSadaf Ebrahimi                     break;
1270*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1271*6467f958SSadaf Ebrahimi                     height = imageInfo->arraySize;
1272*6467f958SSadaf Ebrahimi                     break;
1273*6467f958SSadaf Ebrahimi             }
1274*6467f958SSadaf Ebrahimi 
1275*6467f958SSadaf Ebrahimi             // Fill in the row padding regions
1276*6467f958SSadaf Ebrahimi             for (i = 0; i < height; i++)
1277*6467f958SSadaf Ebrahimi             {
1278*6467f958SSadaf Ebrahimi                 size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
1279*6467f958SSadaf Ebrahimi                 size_t length = imageInfo->rowPitch - pixelRowBytes;
1280*6467f958SSadaf Ebrahimi                 memset(data + offset, 0xff, length);
1281*6467f958SSadaf Ebrahimi             }
1282*6467f958SSadaf Ebrahimi         }
1283*6467f958SSadaf Ebrahimi 
1284*6467f958SSadaf Ebrahimi         // Fill in the slice padding regions, if necessary:
1285*6467f958SSadaf Ebrahimi 
1286*6467f958SSadaf Ebrahimi         size_t slice_dimension = imageInfo->height;
1287*6467f958SSadaf Ebrahimi         if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1288*6467f958SSadaf Ebrahimi         {
1289*6467f958SSadaf Ebrahimi             slice_dimension = imageInfo->arraySize;
1290*6467f958SSadaf Ebrahimi         }
1291*6467f958SSadaf Ebrahimi 
1292*6467f958SSadaf Ebrahimi         if (imageInfo->slicePitch > slice_dimension * imageInfo->rowPitch)
1293*6467f958SSadaf Ebrahimi         {
1294*6467f958SSadaf Ebrahimi             size_t depth = 0;
1295*6467f958SSadaf Ebrahimi             switch (imageInfo->type)
1296*6467f958SSadaf Ebrahimi             {
1297*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE2D:
1298*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE3D: depth = imageInfo->depth; break;
1299*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1300*6467f958SSadaf Ebrahimi                 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1301*6467f958SSadaf Ebrahimi                     depth = imageInfo->arraySize;
1302*6467f958SSadaf Ebrahimi                     break;
1303*6467f958SSadaf Ebrahimi             }
1304*6467f958SSadaf Ebrahimi 
1305*6467f958SSadaf Ebrahimi             for (i = 0; i < depth; i++)
1306*6467f958SSadaf Ebrahimi             {
1307*6467f958SSadaf Ebrahimi                 size_t offset = i * imageInfo->slicePitch
1308*6467f958SSadaf Ebrahimi                     + slice_dimension * imageInfo->rowPitch;
1309*6467f958SSadaf Ebrahimi                 size_t length = imageInfo->slicePitch
1310*6467f958SSadaf Ebrahimi                     - slice_dimension * imageInfo->rowPitch;
1311*6467f958SSadaf Ebrahimi                 memset(data + offset, 0xff, length);
1312*6467f958SSadaf Ebrahimi             }
1313*6467f958SSadaf Ebrahimi         }
1314*6467f958SSadaf Ebrahimi     }
1315*6467f958SSadaf Ebrahimi 
1316*6467f958SSadaf Ebrahimi     return data;
1317*6467f958SSadaf Ebrahimi }
1318*6467f958SSadaf Ebrahimi 
1319*6467f958SSadaf Ebrahimi #define CLAMP_FLOAT(v) (fmaxf(fminf(v, 1.f), -1.f))
1320*6467f958SSadaf Ebrahimi 
1321*6467f958SSadaf Ebrahimi 
read_image_pixel_float(void * imageData,image_descriptor * imageInfo,int x,int y,int z,float * outData,int lod)1322*6467f958SSadaf Ebrahimi void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
1323*6467f958SSadaf Ebrahimi                             int y, int z, float *outData, int lod)
1324*6467f958SSadaf Ebrahimi {
1325*6467f958SSadaf Ebrahimi     size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
1326*6467f958SSadaf Ebrahimi            depth_lod = imageInfo->depth;
1327*6467f958SSadaf Ebrahimi     size_t slice_pitch_lod = 0, row_pitch_lod = 0;
1328*6467f958SSadaf Ebrahimi 
1329*6467f958SSadaf Ebrahimi     if (imageInfo->num_mip_levels > 1)
1330*6467f958SSadaf Ebrahimi     {
1331*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
1332*6467f958SSadaf Ebrahimi         {
1333*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
1334*6467f958SSadaf Ebrahimi                 depth_lod =
1335*6467f958SSadaf Ebrahimi                     (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
1336*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
1337*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1338*6467f958SSadaf Ebrahimi                 height_lod =
1339*6467f958SSadaf Ebrahimi                     (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
1340*6467f958SSadaf Ebrahimi             default:
1341*6467f958SSadaf Ebrahimi                 width_lod =
1342*6467f958SSadaf Ebrahimi                     (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
1343*6467f958SSadaf Ebrahimi         }
1344*6467f958SSadaf Ebrahimi         row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
1345*6467f958SSadaf Ebrahimi         if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1346*6467f958SSadaf Ebrahimi             slice_pitch_lod = row_pitch_lod;
1347*6467f958SSadaf Ebrahimi         else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
1348*6467f958SSadaf Ebrahimi                  || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1349*6467f958SSadaf Ebrahimi             slice_pitch_lod = row_pitch_lod * height_lod;
1350*6467f958SSadaf Ebrahimi     }
1351*6467f958SSadaf Ebrahimi     else
1352*6467f958SSadaf Ebrahimi     {
1353*6467f958SSadaf Ebrahimi         row_pitch_lod = imageInfo->rowPitch;
1354*6467f958SSadaf Ebrahimi         slice_pitch_lod = imageInfo->slicePitch;
1355*6467f958SSadaf Ebrahimi     }
1356*6467f958SSadaf Ebrahimi     if (x < 0 || y < 0 || z < 0 || x >= (int)width_lod
1357*6467f958SSadaf Ebrahimi         || (height_lod != 0 && y >= (int)height_lod)
1358*6467f958SSadaf Ebrahimi         || (depth_lod != 0 && z >= (int)depth_lod)
1359*6467f958SSadaf Ebrahimi         || (imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize))
1360*6467f958SSadaf Ebrahimi     {
1361*6467f958SSadaf Ebrahimi         outData[0] = outData[1] = outData[2] = outData[3] = 0;
1362*6467f958SSadaf Ebrahimi         if (!has_alpha(imageInfo->format)) outData[3] = 1;
1363*6467f958SSadaf Ebrahimi         return;
1364*6467f958SSadaf Ebrahimi     }
1365*6467f958SSadaf Ebrahimi 
1366*6467f958SSadaf Ebrahimi     const cl_image_format *format = imageInfo->format;
1367*6467f958SSadaf Ebrahimi 
1368*6467f958SSadaf Ebrahimi     unsigned int i;
1369*6467f958SSadaf Ebrahimi     float tempData[4];
1370*6467f958SSadaf Ebrahimi 
1371*6467f958SSadaf Ebrahimi     // Advance to the right spot
1372*6467f958SSadaf Ebrahimi     char *ptr = (char *)imageData;
1373*6467f958SSadaf Ebrahimi     size_t pixelSize = get_pixel_size(format);
1374*6467f958SSadaf Ebrahimi 
1375*6467f958SSadaf Ebrahimi     ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
1376*6467f958SSadaf Ebrahimi 
1377*6467f958SSadaf Ebrahimi     // OpenCL only supports reading floats from certain formats
1378*6467f958SSadaf Ebrahimi     size_t channelCount = get_format_channel_count(format);
1379*6467f958SSadaf Ebrahimi     switch (format->image_channel_data_type)
1380*6467f958SSadaf Ebrahimi     {
1381*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8: {
1382*6467f958SSadaf Ebrahimi             cl_char *dPtr = (cl_char *)ptr;
1383*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++)
1384*6467f958SSadaf Ebrahimi                 tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 127.0f);
1385*6467f958SSadaf Ebrahimi             break;
1386*6467f958SSadaf Ebrahimi         }
1387*6467f958SSadaf Ebrahimi 
1388*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8: {
1389*6467f958SSadaf Ebrahimi             unsigned char *dPtr = (unsigned char *)ptr;
1390*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++)
1391*6467f958SSadaf Ebrahimi             {
1392*6467f958SSadaf Ebrahimi                 if ((is_sRGBA_order(imageInfo->format->image_channel_order))
1393*6467f958SSadaf Ebrahimi                     && i < 3) // only RGB need to be converted for sRGBA
1394*6467f958SSadaf Ebrahimi                     tempData[i] = (float)sRGBunmap((float)dPtr[i] / 255.0f);
1395*6467f958SSadaf Ebrahimi                 else
1396*6467f958SSadaf Ebrahimi                     tempData[i] = (float)dPtr[i] / 255.0f;
1397*6467f958SSadaf Ebrahimi             }
1398*6467f958SSadaf Ebrahimi             break;
1399*6467f958SSadaf Ebrahimi         }
1400*6467f958SSadaf Ebrahimi 
1401*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8: {
1402*6467f958SSadaf Ebrahimi             cl_char *dPtr = (cl_char *)ptr;
1403*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1404*6467f958SSadaf Ebrahimi             break;
1405*6467f958SSadaf Ebrahimi         }
1406*6467f958SSadaf Ebrahimi 
1407*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: {
1408*6467f958SSadaf Ebrahimi             cl_uchar *dPtr = (cl_uchar *)ptr;
1409*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1410*6467f958SSadaf Ebrahimi             break;
1411*6467f958SSadaf Ebrahimi         }
1412*6467f958SSadaf Ebrahimi 
1413*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16: {
1414*6467f958SSadaf Ebrahimi             cl_short *dPtr = (cl_short *)ptr;
1415*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++)
1416*6467f958SSadaf Ebrahimi                 tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 32767.0f);
1417*6467f958SSadaf Ebrahimi             break;
1418*6467f958SSadaf Ebrahimi         }
1419*6467f958SSadaf Ebrahimi 
1420*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16: {
1421*6467f958SSadaf Ebrahimi             cl_ushort *dPtr = (cl_ushort *)ptr;
1422*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++)
1423*6467f958SSadaf Ebrahimi                 tempData[i] = (float)dPtr[i] / 65535.0f;
1424*6467f958SSadaf Ebrahimi             break;
1425*6467f958SSadaf Ebrahimi         }
1426*6467f958SSadaf Ebrahimi 
1427*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16: {
1428*6467f958SSadaf Ebrahimi             cl_short *dPtr = (cl_short *)ptr;
1429*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1430*6467f958SSadaf Ebrahimi             break;
1431*6467f958SSadaf Ebrahimi         }
1432*6467f958SSadaf Ebrahimi 
1433*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16: {
1434*6467f958SSadaf Ebrahimi             cl_ushort *dPtr = (cl_ushort *)ptr;
1435*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1436*6467f958SSadaf Ebrahimi             break;
1437*6467f958SSadaf Ebrahimi         }
1438*6467f958SSadaf Ebrahimi 
1439*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT: {
1440*6467f958SSadaf Ebrahimi             cl_half *dPtr = (cl_half *)ptr;
1441*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++)
1442*6467f958SSadaf Ebrahimi                 tempData[i] = cl_half_to_float(dPtr[i]);
1443*6467f958SSadaf Ebrahimi             break;
1444*6467f958SSadaf Ebrahimi         }
1445*6467f958SSadaf Ebrahimi 
1446*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32: {
1447*6467f958SSadaf Ebrahimi             cl_int *dPtr = (cl_int *)ptr;
1448*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1449*6467f958SSadaf Ebrahimi             break;
1450*6467f958SSadaf Ebrahimi         }
1451*6467f958SSadaf Ebrahimi 
1452*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: {
1453*6467f958SSadaf Ebrahimi             cl_uint *dPtr = (cl_uint *)ptr;
1454*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1455*6467f958SSadaf Ebrahimi             break;
1456*6467f958SSadaf Ebrahimi         }
1457*6467f958SSadaf Ebrahimi 
1458*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565: {
1459*6467f958SSadaf Ebrahimi             cl_ushort *dPtr = (cl_ushort *)ptr;
1460*6467f958SSadaf Ebrahimi             tempData[0] = (float)(dPtr[0] >> 11) / (float)31;
1461*6467f958SSadaf Ebrahimi             tempData[1] = (float)((dPtr[0] >> 5) & 63) / (float)63;
1462*6467f958SSadaf Ebrahimi             tempData[2] = (float)(dPtr[0] & 31) / (float)31;
1463*6467f958SSadaf Ebrahimi             break;
1464*6467f958SSadaf Ebrahimi         }
1465*6467f958SSadaf Ebrahimi 
1466*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555: {
1467*6467f958SSadaf Ebrahimi             cl_ushort *dPtr = (cl_ushort *)ptr;
1468*6467f958SSadaf Ebrahimi             tempData[0] = (float)((dPtr[0] >> 10) & 31) / (float)31;
1469*6467f958SSadaf Ebrahimi             tempData[1] = (float)((dPtr[0] >> 5) & 31) / (float)31;
1470*6467f958SSadaf Ebrahimi             tempData[2] = (float)(dPtr[0] & 31) / (float)31;
1471*6467f958SSadaf Ebrahimi             break;
1472*6467f958SSadaf Ebrahimi         }
1473*6467f958SSadaf Ebrahimi 
1474*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010: {
1475*6467f958SSadaf Ebrahimi             cl_uint *dPtr = (cl_uint *)ptr;
1476*6467f958SSadaf Ebrahimi             tempData[0] = (float)((dPtr[0] >> 20) & 0x3ff) / (float)1023;
1477*6467f958SSadaf Ebrahimi             tempData[1] = (float)((dPtr[0] >> 10) & 0x3ff) / (float)1023;
1478*6467f958SSadaf Ebrahimi             tempData[2] = (float)(dPtr[0] & 0x3ff) / (float)1023;
1479*6467f958SSadaf Ebrahimi             break;
1480*6467f958SSadaf Ebrahimi         }
1481*6467f958SSadaf Ebrahimi 
1482*6467f958SSadaf Ebrahimi         case CL_FLOAT: {
1483*6467f958SSadaf Ebrahimi             float *dPtr = (float *)ptr;
1484*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1485*6467f958SSadaf Ebrahimi             break;
1486*6467f958SSadaf Ebrahimi         }
1487*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
1488*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE: {
1489*6467f958SSadaf Ebrahimi             cl_ushort *dPtr = (cl_ushort *)ptr;
1490*6467f958SSadaf Ebrahimi             for (i = 0; i < channelCount; i++)
1491*6467f958SSadaf Ebrahimi                 tempData[i] = ((int)dPtr[i] - 16384) * 0x1.0p-14f;
1492*6467f958SSadaf Ebrahimi             break;
1493*6467f958SSadaf Ebrahimi         }
1494*6467f958SSadaf Ebrahimi #endif
1495*6467f958SSadaf Ebrahimi     }
1496*6467f958SSadaf Ebrahimi 
1497*6467f958SSadaf Ebrahimi 
1498*6467f958SSadaf Ebrahimi     outData[0] = outData[1] = outData[2] = 0;
1499*6467f958SSadaf Ebrahimi     outData[3] = 1;
1500*6467f958SSadaf Ebrahimi 
1501*6467f958SSadaf Ebrahimi     switch (format->image_channel_order)
1502*6467f958SSadaf Ebrahimi     {
1503*6467f958SSadaf Ebrahimi         case CL_A: outData[3] = tempData[0]; break;
1504*6467f958SSadaf Ebrahimi         case CL_R:
1505*6467f958SSadaf Ebrahimi         case CL_Rx: outData[0] = tempData[0]; break;
1506*6467f958SSadaf Ebrahimi         case CL_RA:
1507*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1508*6467f958SSadaf Ebrahimi             outData[3] = tempData[1];
1509*6467f958SSadaf Ebrahimi             break;
1510*6467f958SSadaf Ebrahimi         case CL_RG:
1511*6467f958SSadaf Ebrahimi         case CL_RGx:
1512*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1513*6467f958SSadaf Ebrahimi             outData[1] = tempData[1];
1514*6467f958SSadaf Ebrahimi             break;
1515*6467f958SSadaf Ebrahimi         case CL_RGB:
1516*6467f958SSadaf Ebrahimi         case CL_RGBx:
1517*6467f958SSadaf Ebrahimi         case CL_sRGB:
1518*6467f958SSadaf Ebrahimi         case CL_sRGBx:
1519*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1520*6467f958SSadaf Ebrahimi             outData[1] = tempData[1];
1521*6467f958SSadaf Ebrahimi             outData[2] = tempData[2];
1522*6467f958SSadaf Ebrahimi             break;
1523*6467f958SSadaf Ebrahimi         case CL_RGBA:
1524*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1525*6467f958SSadaf Ebrahimi             outData[1] = tempData[1];
1526*6467f958SSadaf Ebrahimi             outData[2] = tempData[2];
1527*6467f958SSadaf Ebrahimi             outData[3] = tempData[3];
1528*6467f958SSadaf Ebrahimi             break;
1529*6467f958SSadaf Ebrahimi         case CL_ARGB:
1530*6467f958SSadaf Ebrahimi             outData[0] = tempData[1];
1531*6467f958SSadaf Ebrahimi             outData[1] = tempData[2];
1532*6467f958SSadaf Ebrahimi             outData[2] = tempData[3];
1533*6467f958SSadaf Ebrahimi             outData[3] = tempData[0];
1534*6467f958SSadaf Ebrahimi             break;
1535*6467f958SSadaf Ebrahimi         case CL_ABGR:
1536*6467f958SSadaf Ebrahimi             outData[0] = tempData[3];
1537*6467f958SSadaf Ebrahimi             outData[1] = tempData[2];
1538*6467f958SSadaf Ebrahimi             outData[2] = tempData[1];
1539*6467f958SSadaf Ebrahimi             outData[3] = tempData[0];
1540*6467f958SSadaf Ebrahimi             break;
1541*6467f958SSadaf Ebrahimi         case CL_BGRA:
1542*6467f958SSadaf Ebrahimi         case CL_sBGRA:
1543*6467f958SSadaf Ebrahimi             outData[0] = tempData[2];
1544*6467f958SSadaf Ebrahimi             outData[1] = tempData[1];
1545*6467f958SSadaf Ebrahimi             outData[2] = tempData[0];
1546*6467f958SSadaf Ebrahimi             outData[3] = tempData[3];
1547*6467f958SSadaf Ebrahimi             break;
1548*6467f958SSadaf Ebrahimi         case CL_INTENSITY:
1549*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1550*6467f958SSadaf Ebrahimi             outData[1] = tempData[0];
1551*6467f958SSadaf Ebrahimi             outData[2] = tempData[0];
1552*6467f958SSadaf Ebrahimi             outData[3] = tempData[0];
1553*6467f958SSadaf Ebrahimi             break;
1554*6467f958SSadaf Ebrahimi         case CL_LUMINANCE:
1555*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1556*6467f958SSadaf Ebrahimi             outData[1] = tempData[0];
1557*6467f958SSadaf Ebrahimi             outData[2] = tempData[0];
1558*6467f958SSadaf Ebrahimi             break;
1559*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
1560*6467f958SSadaf Ebrahimi         case CL_1RGB_APPLE:
1561*6467f958SSadaf Ebrahimi             outData[0] = tempData[1];
1562*6467f958SSadaf Ebrahimi             outData[1] = tempData[2];
1563*6467f958SSadaf Ebrahimi             outData[2] = tempData[3];
1564*6467f958SSadaf Ebrahimi             outData[3] = 1.0f;
1565*6467f958SSadaf Ebrahimi             break;
1566*6467f958SSadaf Ebrahimi #endif
1567*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
1568*6467f958SSadaf Ebrahimi         case CL_BGR1_APPLE:
1569*6467f958SSadaf Ebrahimi             outData[0] = tempData[2];
1570*6467f958SSadaf Ebrahimi             outData[1] = tempData[1];
1571*6467f958SSadaf Ebrahimi             outData[2] = tempData[0];
1572*6467f958SSadaf Ebrahimi             outData[3] = 1.0f;
1573*6467f958SSadaf Ebrahimi             break;
1574*6467f958SSadaf Ebrahimi #endif
1575*6467f958SSadaf Ebrahimi         case CL_sRGBA:
1576*6467f958SSadaf Ebrahimi             outData[0] = tempData[0];
1577*6467f958SSadaf Ebrahimi             outData[1] = tempData[1];
1578*6467f958SSadaf Ebrahimi             outData[2] = tempData[2];
1579*6467f958SSadaf Ebrahimi             outData[3] = tempData[3];
1580*6467f958SSadaf Ebrahimi             break;
1581*6467f958SSadaf Ebrahimi         case CL_DEPTH: outData[0] = tempData[0]; break;
1582*6467f958SSadaf Ebrahimi         default:
1583*6467f958SSadaf Ebrahimi             log_error("Invalid format:");
1584*6467f958SSadaf Ebrahimi             print_header(format, true);
1585*6467f958SSadaf Ebrahimi             break;
1586*6467f958SSadaf Ebrahimi     }
1587*6467f958SSadaf Ebrahimi }
1588*6467f958SSadaf Ebrahimi 
read_image_pixel_float(void * imageData,image_descriptor * imageInfo,int x,int y,int z,float * outData)1589*6467f958SSadaf Ebrahimi void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
1590*6467f958SSadaf Ebrahimi                             int y, int z, float *outData)
1591*6467f958SSadaf Ebrahimi {
1592*6467f958SSadaf Ebrahimi     read_image_pixel_float(imageData, imageInfo, x, y, z, outData, 0);
1593*6467f958SSadaf Ebrahimi }
1594*6467f958SSadaf Ebrahimi 
get_integer_coords(float x,float y,float z,size_t width,size_t height,size_t depth,image_sampler_data * imageSampler,image_descriptor * imageInfo,int & outX,int & outY,int & outZ)1595*6467f958SSadaf Ebrahimi bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
1596*6467f958SSadaf Ebrahimi                         size_t depth, image_sampler_data *imageSampler,
1597*6467f958SSadaf Ebrahimi                         image_descriptor *imageInfo, int &outX, int &outY,
1598*6467f958SSadaf Ebrahimi                         int &outZ)
1599*6467f958SSadaf Ebrahimi {
1600*6467f958SSadaf Ebrahimi     return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height,
1601*6467f958SSadaf Ebrahimi                                      depth, imageSampler, imageInfo, outX, outY,
1602*6467f958SSadaf Ebrahimi                                      outZ);
1603*6467f958SSadaf Ebrahimi }
1604*6467f958SSadaf Ebrahimi 
get_integer_coords_offset(float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,size_t width,size_t height,size_t depth,image_sampler_data * imageSampler,image_descriptor * imageInfo,int & outX,int & outY,int & outZ)1605*6467f958SSadaf Ebrahimi bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
1606*6467f958SSadaf Ebrahimi                                float yAddressOffset, float zAddressOffset,
1607*6467f958SSadaf Ebrahimi                                size_t width, size_t height, size_t depth,
1608*6467f958SSadaf Ebrahimi                                image_sampler_data *imageSampler,
1609*6467f958SSadaf Ebrahimi                                image_descriptor *imageInfo, int &outX,
1610*6467f958SSadaf Ebrahimi                                int &outY, int &outZ)
1611*6467f958SSadaf Ebrahimi {
1612*6467f958SSadaf Ebrahimi     AddressFn adFn = sAddressingTable[imageSampler];
1613*6467f958SSadaf Ebrahimi 
1614*6467f958SSadaf Ebrahimi     float refX = floorf(x), refY = floorf(y), refZ = floorf(z);
1615*6467f958SSadaf Ebrahimi 
1616*6467f958SSadaf Ebrahimi     // Handle sampler-directed coordinate normalization + clamping.  Note that
1617*6467f958SSadaf Ebrahimi     // the array coordinate for image array types is expected to be
1618*6467f958SSadaf Ebrahimi     // unnormalized, and is clamped to 0..arraySize-1.
1619*6467f958SSadaf Ebrahimi     if (imageSampler->normalized_coords)
1620*6467f958SSadaf Ebrahimi     {
1621*6467f958SSadaf Ebrahimi         switch (imageSampler->addressing_mode)
1622*6467f958SSadaf Ebrahimi         {
1623*6467f958SSadaf Ebrahimi             case CL_ADDRESS_REPEAT:
1624*6467f958SSadaf Ebrahimi                 x = RepeatNormalizedAddressFn(x, width);
1625*6467f958SSadaf Ebrahimi                 if (height != 0)
1626*6467f958SSadaf Ebrahimi                 {
1627*6467f958SSadaf Ebrahimi                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1628*6467f958SSadaf Ebrahimi                         y = RepeatNormalizedAddressFn(y, height);
1629*6467f958SSadaf Ebrahimi                 }
1630*6467f958SSadaf Ebrahimi                 if (depth != 0)
1631*6467f958SSadaf Ebrahimi                 {
1632*6467f958SSadaf Ebrahimi                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1633*6467f958SSadaf Ebrahimi                         z = RepeatNormalizedAddressFn(z, depth);
1634*6467f958SSadaf Ebrahimi                 }
1635*6467f958SSadaf Ebrahimi 
1636*6467f958SSadaf Ebrahimi                 if (xAddressOffset != 0.0)
1637*6467f958SSadaf Ebrahimi                 {
1638*6467f958SSadaf Ebrahimi                     // Add in the offset
1639*6467f958SSadaf Ebrahimi                     x += xAddressOffset;
1640*6467f958SSadaf Ebrahimi                     // Handle wrapping
1641*6467f958SSadaf Ebrahimi                     if (x > width) x -= (float)width;
1642*6467f958SSadaf Ebrahimi                     if (x < 0) x += (float)width;
1643*6467f958SSadaf Ebrahimi                 }
1644*6467f958SSadaf Ebrahimi                 if ((yAddressOffset != 0.0)
1645*6467f958SSadaf Ebrahimi                     && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
1646*6467f958SSadaf Ebrahimi                 {
1647*6467f958SSadaf Ebrahimi                     // Add in the offset
1648*6467f958SSadaf Ebrahimi                     y += yAddressOffset;
1649*6467f958SSadaf Ebrahimi                     // Handle wrapping
1650*6467f958SSadaf Ebrahimi                     if (y > height) y -= (float)height;
1651*6467f958SSadaf Ebrahimi                     if (y < 0) y += (float)height;
1652*6467f958SSadaf Ebrahimi                 }
1653*6467f958SSadaf Ebrahimi                 if ((zAddressOffset != 0.0)
1654*6467f958SSadaf Ebrahimi                     && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
1655*6467f958SSadaf Ebrahimi                 {
1656*6467f958SSadaf Ebrahimi                     // Add in the offset
1657*6467f958SSadaf Ebrahimi                     z += zAddressOffset;
1658*6467f958SSadaf Ebrahimi                     // Handle wrapping
1659*6467f958SSadaf Ebrahimi                     if (z > depth) z -= (float)depth;
1660*6467f958SSadaf Ebrahimi                     if (z < 0) z += (float)depth;
1661*6467f958SSadaf Ebrahimi                 }
1662*6467f958SSadaf Ebrahimi                 break;
1663*6467f958SSadaf Ebrahimi 
1664*6467f958SSadaf Ebrahimi             case CL_ADDRESS_MIRRORED_REPEAT:
1665*6467f958SSadaf Ebrahimi                 x = MirroredRepeatNormalizedAddressFn(x, width);
1666*6467f958SSadaf Ebrahimi                 if (height != 0)
1667*6467f958SSadaf Ebrahimi                 {
1668*6467f958SSadaf Ebrahimi                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1669*6467f958SSadaf Ebrahimi                         y = MirroredRepeatNormalizedAddressFn(y, height);
1670*6467f958SSadaf Ebrahimi                 }
1671*6467f958SSadaf Ebrahimi                 if (depth != 0)
1672*6467f958SSadaf Ebrahimi                 {
1673*6467f958SSadaf Ebrahimi                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1674*6467f958SSadaf Ebrahimi                         z = MirroredRepeatNormalizedAddressFn(z, depth);
1675*6467f958SSadaf Ebrahimi                 }
1676*6467f958SSadaf Ebrahimi 
1677*6467f958SSadaf Ebrahimi                 if (xAddressOffset != 0.0)
1678*6467f958SSadaf Ebrahimi                 {
1679*6467f958SSadaf Ebrahimi                     float temp = x + xAddressOffset;
1680*6467f958SSadaf Ebrahimi                     if (temp > (float)width)
1681*6467f958SSadaf Ebrahimi                         temp = (float)width - (temp - (float)width);
1682*6467f958SSadaf Ebrahimi                     x = fabsf(temp);
1683*6467f958SSadaf Ebrahimi                 }
1684*6467f958SSadaf Ebrahimi                 if ((yAddressOffset != 0.0)
1685*6467f958SSadaf Ebrahimi                     && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
1686*6467f958SSadaf Ebrahimi                 {
1687*6467f958SSadaf Ebrahimi                     float temp = y + yAddressOffset;
1688*6467f958SSadaf Ebrahimi                     if (temp > (float)height)
1689*6467f958SSadaf Ebrahimi                         temp = (float)height - (temp - (float)height);
1690*6467f958SSadaf Ebrahimi                     y = fabsf(temp);
1691*6467f958SSadaf Ebrahimi                 }
1692*6467f958SSadaf Ebrahimi                 if ((zAddressOffset != 0.0)
1693*6467f958SSadaf Ebrahimi                     && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
1694*6467f958SSadaf Ebrahimi                 {
1695*6467f958SSadaf Ebrahimi                     float temp = z + zAddressOffset;
1696*6467f958SSadaf Ebrahimi                     if (temp > (float)depth)
1697*6467f958SSadaf Ebrahimi                         temp = (float)depth - (temp - (float)depth);
1698*6467f958SSadaf Ebrahimi                     z = fabsf(temp);
1699*6467f958SSadaf Ebrahimi                 }
1700*6467f958SSadaf Ebrahimi                 break;
1701*6467f958SSadaf Ebrahimi 
1702*6467f958SSadaf Ebrahimi             default:
1703*6467f958SSadaf Ebrahimi                 // Also, remultiply to the original coords. This simulates any
1704*6467f958SSadaf Ebrahimi                 // truncation in the pass to OpenCL
1705*6467f958SSadaf Ebrahimi                 x *= (float)width;
1706*6467f958SSadaf Ebrahimi                 x += xAddressOffset;
1707*6467f958SSadaf Ebrahimi 
1708*6467f958SSadaf Ebrahimi                 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1709*6467f958SSadaf Ebrahimi                 {
1710*6467f958SSadaf Ebrahimi                     y *= (float)height;
1711*6467f958SSadaf Ebrahimi                     y += yAddressOffset;
1712*6467f958SSadaf Ebrahimi                 }
1713*6467f958SSadaf Ebrahimi 
1714*6467f958SSadaf Ebrahimi                 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1715*6467f958SSadaf Ebrahimi                 {
1716*6467f958SSadaf Ebrahimi                     z *= (float)depth;
1717*6467f958SSadaf Ebrahimi                     z += zAddressOffset;
1718*6467f958SSadaf Ebrahimi                 }
1719*6467f958SSadaf Ebrahimi                 break;
1720*6467f958SSadaf Ebrahimi         }
1721*6467f958SSadaf Ebrahimi     }
1722*6467f958SSadaf Ebrahimi 
1723*6467f958SSadaf Ebrahimi     // At this point, we're dealing with non-normalized coordinates.
1724*6467f958SSadaf Ebrahimi 
1725*6467f958SSadaf Ebrahimi     outX = adFn(static_cast<int>(floorf(x)), width);
1726*6467f958SSadaf Ebrahimi 
1727*6467f958SSadaf Ebrahimi     // 1D and 2D arrays require special care for the index coordinate:
1728*6467f958SSadaf Ebrahimi 
1729*6467f958SSadaf Ebrahimi     switch (imageInfo->type)
1730*6467f958SSadaf Ebrahimi     {
1731*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1732*6467f958SSadaf Ebrahimi             outY = static_cast<int>(
1733*6467f958SSadaf Ebrahimi                 calculate_array_index(y, (float)imageInfo->arraySize - 1.0f));
1734*6467f958SSadaf Ebrahimi             outZ = 0; /* don't care! */
1735*6467f958SSadaf Ebrahimi             break;
1736*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1737*6467f958SSadaf Ebrahimi             outY = adFn(static_cast<int>(floorf(y)), height);
1738*6467f958SSadaf Ebrahimi             outZ = static_cast<int>(
1739*6467f958SSadaf Ebrahimi                 calculate_array_index(z, (float)imageInfo->arraySize - 1.0f));
1740*6467f958SSadaf Ebrahimi             break;
1741*6467f958SSadaf Ebrahimi         default:
1742*6467f958SSadaf Ebrahimi             // legacy path:
1743*6467f958SSadaf Ebrahimi             if (height != 0) outY = adFn(static_cast<int>(floorf(y)), height);
1744*6467f958SSadaf Ebrahimi             if (depth != 0) outZ = adFn(static_cast<int>(floorf(z)), depth);
1745*6467f958SSadaf Ebrahimi     }
1746*6467f958SSadaf Ebrahimi 
1747*6467f958SSadaf Ebrahimi     return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
1748*6467f958SSadaf Ebrahimi }
1749*6467f958SSadaf Ebrahimi 
frac(float a)1750*6467f958SSadaf Ebrahimi static float frac(float a) { return a - floorf(a); }
1751*6467f958SSadaf Ebrahimi 
1752*6467f958SSadaf Ebrahimi static inline void pixelMax(const float a[4], const float b[4], float *results);
pixelMax(const float a[4],const float b[4],float * results)1753*6467f958SSadaf Ebrahimi static inline void pixelMax(const float a[4], const float b[4], float *results)
1754*6467f958SSadaf Ebrahimi {
1755*6467f958SSadaf Ebrahimi     for (int i = 0; i < 4; i++) results[i] = errMax(fabsf(a[i]), fabsf(b[i]));
1756*6467f958SSadaf Ebrahimi }
1757*6467f958SSadaf Ebrahimi 
1758*6467f958SSadaf Ebrahimi // If containsDenorms is NULL, flush denorms to zero
1759*6467f958SSadaf Ebrahimi // if containsDenorms is not NULL, record whether there are any denorms
1760*6467f958SSadaf Ebrahimi static inline void check_for_denorms(float a[4], int *containsDenorms);
check_for_denorms(float a[4],int * containsDenorms)1761*6467f958SSadaf Ebrahimi static inline void check_for_denorms(float a[4], int *containsDenorms)
1762*6467f958SSadaf Ebrahimi {
1763*6467f958SSadaf Ebrahimi     if (NULL == containsDenorms)
1764*6467f958SSadaf Ebrahimi     {
1765*6467f958SSadaf Ebrahimi         for (int i = 0; i < 4; i++)
1766*6467f958SSadaf Ebrahimi         {
1767*6467f958SSadaf Ebrahimi             if (IsFloatSubnormal(a[i])) a[i] = copysignf(0.0f, a[i]);
1768*6467f958SSadaf Ebrahimi         }
1769*6467f958SSadaf Ebrahimi     }
1770*6467f958SSadaf Ebrahimi     else
1771*6467f958SSadaf Ebrahimi     {
1772*6467f958SSadaf Ebrahimi         for (int i = 0; i < 4; i++)
1773*6467f958SSadaf Ebrahimi         {
1774*6467f958SSadaf Ebrahimi             if (IsFloatSubnormal(a[i]))
1775*6467f958SSadaf Ebrahimi             {
1776*6467f958SSadaf Ebrahimi                 *containsDenorms = 1;
1777*6467f958SSadaf Ebrahimi                 break;
1778*6467f958SSadaf Ebrahimi             }
1779*6467f958SSadaf Ebrahimi         }
1780*6467f958SSadaf Ebrahimi     }
1781*6467f958SSadaf Ebrahimi }
1782*6467f958SSadaf Ebrahimi 
calculate_array_index(float coord,float extent)1783*6467f958SSadaf Ebrahimi inline float calculate_array_index(float coord, float extent)
1784*6467f958SSadaf Ebrahimi {
1785*6467f958SSadaf Ebrahimi     // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
1786*6467f958SSadaf Ebrahimi     //
1787*6467f958SSadaf Ebrahimi     // given coordinate 'w' that represents an index:
1788*6467f958SSadaf Ebrahimi     // layer_index = clamp( rint(w), 0, image_array_size - 1)
1789*6467f958SSadaf Ebrahimi 
1790*6467f958SSadaf Ebrahimi     float ret = rintf(coord);
1791*6467f958SSadaf Ebrahimi     ret = ret > extent ? extent : ret;
1792*6467f958SSadaf Ebrahimi     ret = ret < 0.0f ? 0.0f : ret;
1793*6467f958SSadaf Ebrahimi 
1794*6467f958SSadaf Ebrahimi     return ret;
1795*6467f958SSadaf Ebrahimi }
1796*6467f958SSadaf Ebrahimi 
1797*6467f958SSadaf Ebrahimi /*
1798*6467f958SSadaf Ebrahimi  * Utility function to unnormalized a coordinate given a particular sampler.
1799*6467f958SSadaf Ebrahimi  *
1800*6467f958SSadaf Ebrahimi  * name     - the name of the coordinate, used for verbose debugging only
1801*6467f958SSadaf Ebrahimi  * coord    - the coordinate requiring unnormalization
1802*6467f958SSadaf Ebrahimi  * offset   - an addressing offset to be added to the coordinate
1803*6467f958SSadaf Ebrahimi  * extent   - the max value for this coordinate (e.g. width for x)
1804*6467f958SSadaf Ebrahimi  */
unnormalize_coordinate(const char * name,float coord,float offset,float extent,cl_addressing_mode addressing_mode,int verbose)1805*6467f958SSadaf Ebrahimi static float unnormalize_coordinate(const char *name, float coord, float offset,
1806*6467f958SSadaf Ebrahimi                                     float extent,
1807*6467f958SSadaf Ebrahimi                                     cl_addressing_mode addressing_mode,
1808*6467f958SSadaf Ebrahimi                                     int verbose)
1809*6467f958SSadaf Ebrahimi {
1810*6467f958SSadaf Ebrahimi     float ret = 0.0f;
1811*6467f958SSadaf Ebrahimi 
1812*6467f958SSadaf Ebrahimi     switch (addressing_mode)
1813*6467f958SSadaf Ebrahimi     {
1814*6467f958SSadaf Ebrahimi         case CL_ADDRESS_REPEAT:
1815*6467f958SSadaf Ebrahimi             ret = RepeatNormalizedAddressFn(coord, static_cast<size_t>(extent));
1816*6467f958SSadaf Ebrahimi 
1817*6467f958SSadaf Ebrahimi             if (verbose)
1818*6467f958SSadaf Ebrahimi             {
1819*6467f958SSadaf Ebrahimi                 log_info("\tRepeat filter denormalizes %s (%f) to %f\n", name,
1820*6467f958SSadaf Ebrahimi                          coord, ret);
1821*6467f958SSadaf Ebrahimi             }
1822*6467f958SSadaf Ebrahimi 
1823*6467f958SSadaf Ebrahimi             if (offset != 0.0)
1824*6467f958SSadaf Ebrahimi             {
1825*6467f958SSadaf Ebrahimi                 // Add in the offset, and handle wrapping.
1826*6467f958SSadaf Ebrahimi                 ret += offset;
1827*6467f958SSadaf Ebrahimi                 if (ret > extent) ret -= extent;
1828*6467f958SSadaf Ebrahimi                 if (ret < 0.0) ret += extent;
1829*6467f958SSadaf Ebrahimi             }
1830*6467f958SSadaf Ebrahimi 
1831*6467f958SSadaf Ebrahimi             if (verbose && offset != 0.0f)
1832*6467f958SSadaf Ebrahimi             {
1833*6467f958SSadaf Ebrahimi                 log_info("\tAddress offset of %f added to get %f\n", offset,
1834*6467f958SSadaf Ebrahimi                          ret);
1835*6467f958SSadaf Ebrahimi             }
1836*6467f958SSadaf Ebrahimi             break;
1837*6467f958SSadaf Ebrahimi 
1838*6467f958SSadaf Ebrahimi         case CL_ADDRESS_MIRRORED_REPEAT:
1839*6467f958SSadaf Ebrahimi             ret = MirroredRepeatNormalizedAddressFn(
1840*6467f958SSadaf Ebrahimi                 coord, static_cast<size_t>(extent));
1841*6467f958SSadaf Ebrahimi 
1842*6467f958SSadaf Ebrahimi             if (verbose)
1843*6467f958SSadaf Ebrahimi             {
1844*6467f958SSadaf Ebrahimi                 log_info(
1845*6467f958SSadaf Ebrahimi                     "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
1846*6467f958SSadaf Ebrahimi                     name, coord, ret);
1847*6467f958SSadaf Ebrahimi             }
1848*6467f958SSadaf Ebrahimi 
1849*6467f958SSadaf Ebrahimi             if (offset != 0.0)
1850*6467f958SSadaf Ebrahimi             {
1851*6467f958SSadaf Ebrahimi                 float temp = ret + offset;
1852*6467f958SSadaf Ebrahimi                 if (temp > extent) temp = extent - (temp - extent);
1853*6467f958SSadaf Ebrahimi                 ret = fabsf(temp);
1854*6467f958SSadaf Ebrahimi             }
1855*6467f958SSadaf Ebrahimi 
1856*6467f958SSadaf Ebrahimi             if (verbose && offset != 0.0f)
1857*6467f958SSadaf Ebrahimi             {
1858*6467f958SSadaf Ebrahimi                 log_info("\tAddress offset of %f added to get %f\n", offset,
1859*6467f958SSadaf Ebrahimi                          ret);
1860*6467f958SSadaf Ebrahimi             }
1861*6467f958SSadaf Ebrahimi             break;
1862*6467f958SSadaf Ebrahimi 
1863*6467f958SSadaf Ebrahimi         default:
1864*6467f958SSadaf Ebrahimi 
1865*6467f958SSadaf Ebrahimi             ret = coord * extent;
1866*6467f958SSadaf Ebrahimi 
1867*6467f958SSadaf Ebrahimi             if (verbose)
1868*6467f958SSadaf Ebrahimi             {
1869*6467f958SSadaf Ebrahimi                 log_info("\tFilter denormalizes %s to %f (%f * %f)\n", name,
1870*6467f958SSadaf Ebrahimi                          ret, coord, extent);
1871*6467f958SSadaf Ebrahimi             }
1872*6467f958SSadaf Ebrahimi 
1873*6467f958SSadaf Ebrahimi             ret += offset;
1874*6467f958SSadaf Ebrahimi 
1875*6467f958SSadaf Ebrahimi             if (verbose && offset != 0.0f)
1876*6467f958SSadaf Ebrahimi             {
1877*6467f958SSadaf Ebrahimi                 log_info("\tAddress offset of %f added to get %f\n", offset,
1878*6467f958SSadaf Ebrahimi                          ret);
1879*6467f958SSadaf Ebrahimi             }
1880*6467f958SSadaf Ebrahimi     }
1881*6467f958SSadaf Ebrahimi 
1882*6467f958SSadaf Ebrahimi     return ret;
1883*6467f958SSadaf Ebrahimi }
1884*6467f958SSadaf Ebrahimi 
1885*6467f958SSadaf Ebrahimi FloatPixel
sample_image_pixel_float(void * imageData,image_descriptor * imageInfo,float x,float y,float z,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms)1886*6467f958SSadaf Ebrahimi sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
1887*6467f958SSadaf Ebrahimi                          float y, float z, image_sampler_data *imageSampler,
1888*6467f958SSadaf Ebrahimi                          float *outData, int verbose, int *containsDenorms)
1889*6467f958SSadaf Ebrahimi {
1890*6467f958SSadaf Ebrahimi     return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
1891*6467f958SSadaf Ebrahimi                                            0.0f, 0.0f, imageSampler, outData,
1892*6467f958SSadaf Ebrahimi                                            verbose, containsDenorms);
1893*6467f958SSadaf Ebrahimi }
1894*6467f958SSadaf Ebrahimi 
1895*6467f958SSadaf Ebrahimi // returns max pixel value of the pixels touched
sample_image_pixel_float(void * imageData,image_descriptor * imageInfo,float x,float y,float z,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms,int lod)1896*6467f958SSadaf Ebrahimi FloatPixel sample_image_pixel_float(void *imageData,
1897*6467f958SSadaf Ebrahimi                                     image_descriptor *imageInfo, float x,
1898*6467f958SSadaf Ebrahimi                                     float y, float z,
1899*6467f958SSadaf Ebrahimi                                     image_sampler_data *imageSampler,
1900*6467f958SSadaf Ebrahimi                                     float *outData, int verbose,
1901*6467f958SSadaf Ebrahimi                                     int *containsDenorms, int lod)
1902*6467f958SSadaf Ebrahimi {
1903*6467f958SSadaf Ebrahimi     return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
1904*6467f958SSadaf Ebrahimi                                            0.0f, 0.0f, imageSampler, outData,
1905*6467f958SSadaf Ebrahimi                                            verbose, containsDenorms, lod);
1906*6467f958SSadaf Ebrahimi }
sample_image_pixel_float_offset(void * imageData,image_descriptor * imageInfo,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms,int lod)1907*6467f958SSadaf Ebrahimi FloatPixel sample_image_pixel_float_offset(
1908*6467f958SSadaf Ebrahimi     void *imageData, image_descriptor *imageInfo, float x, float y, float z,
1909*6467f958SSadaf Ebrahimi     float xAddressOffset, float yAddressOffset, float zAddressOffset,
1910*6467f958SSadaf Ebrahimi     image_sampler_data *imageSampler, float *outData, int verbose,
1911*6467f958SSadaf Ebrahimi     int *containsDenorms, int lod)
1912*6467f958SSadaf Ebrahimi {
1913*6467f958SSadaf Ebrahimi     AddressFn adFn = sAddressingTable[imageSampler];
1914*6467f958SSadaf Ebrahimi     FloatPixel returnVal;
1915*6467f958SSadaf Ebrahimi     size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
1916*6467f958SSadaf Ebrahimi            depth_lod = imageInfo->depth;
1917*6467f958SSadaf Ebrahimi     size_t slice_pitch_lod = 0, row_pitch_lod = 0;
1918*6467f958SSadaf Ebrahimi 
1919*6467f958SSadaf Ebrahimi     if (imageInfo->num_mip_levels > 1)
1920*6467f958SSadaf Ebrahimi     {
1921*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
1922*6467f958SSadaf Ebrahimi         {
1923*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
1924*6467f958SSadaf Ebrahimi                 depth_lod =
1925*6467f958SSadaf Ebrahimi                     (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
1926*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
1927*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1928*6467f958SSadaf Ebrahimi                 height_lod =
1929*6467f958SSadaf Ebrahimi                     (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
1930*6467f958SSadaf Ebrahimi             default:
1931*6467f958SSadaf Ebrahimi                 width_lod =
1932*6467f958SSadaf Ebrahimi                     (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
1933*6467f958SSadaf Ebrahimi         }
1934*6467f958SSadaf Ebrahimi         row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
1935*6467f958SSadaf Ebrahimi         if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1936*6467f958SSadaf Ebrahimi             slice_pitch_lod = row_pitch_lod;
1937*6467f958SSadaf Ebrahimi         else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
1938*6467f958SSadaf Ebrahimi                  || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1939*6467f958SSadaf Ebrahimi             slice_pitch_lod = row_pitch_lod * height_lod;
1940*6467f958SSadaf Ebrahimi     }
1941*6467f958SSadaf Ebrahimi     else
1942*6467f958SSadaf Ebrahimi     {
1943*6467f958SSadaf Ebrahimi         slice_pitch_lod = imageInfo->slicePitch;
1944*6467f958SSadaf Ebrahimi         row_pitch_lod = imageInfo->rowPitch;
1945*6467f958SSadaf Ebrahimi     }
1946*6467f958SSadaf Ebrahimi 
1947*6467f958SSadaf Ebrahimi     if (containsDenorms) *containsDenorms = 0;
1948*6467f958SSadaf Ebrahimi 
1949*6467f958SSadaf Ebrahimi     if (imageSampler->normalized_coords)
1950*6467f958SSadaf Ebrahimi     {
1951*6467f958SSadaf Ebrahimi 
1952*6467f958SSadaf Ebrahimi         // We need to unnormalize our coordinates differently depending on
1953*6467f958SSadaf Ebrahimi         // the image type, but 'x' is always processed the same way.
1954*6467f958SSadaf Ebrahimi 
1955*6467f958SSadaf Ebrahimi         x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
1956*6467f958SSadaf Ebrahimi                                    imageSampler->addressing_mode, verbose);
1957*6467f958SSadaf Ebrahimi 
1958*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
1959*6467f958SSadaf Ebrahimi         {
1960*6467f958SSadaf Ebrahimi 
1961*6467f958SSadaf Ebrahimi                 // The image array types require special care:
1962*6467f958SSadaf Ebrahimi 
1963*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1964*6467f958SSadaf Ebrahimi                 z = 0; // don't care -- unused for 1D arrays
1965*6467f958SSadaf Ebrahimi                 break;
1966*6467f958SSadaf Ebrahimi 
1967*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1968*6467f958SSadaf Ebrahimi                 y = unnormalize_coordinate(
1969*6467f958SSadaf Ebrahimi                     "y", y, yAddressOffset, (float)height_lod,
1970*6467f958SSadaf Ebrahimi                     imageSampler->addressing_mode, verbose);
1971*6467f958SSadaf Ebrahimi                 break;
1972*6467f958SSadaf Ebrahimi 
1973*6467f958SSadaf Ebrahimi                 // Everybody else:
1974*6467f958SSadaf Ebrahimi 
1975*6467f958SSadaf Ebrahimi             default:
1976*6467f958SSadaf Ebrahimi                 y = unnormalize_coordinate(
1977*6467f958SSadaf Ebrahimi                     "y", y, yAddressOffset, (float)height_lod,
1978*6467f958SSadaf Ebrahimi                     imageSampler->addressing_mode, verbose);
1979*6467f958SSadaf Ebrahimi                 z = unnormalize_coordinate(
1980*6467f958SSadaf Ebrahimi                     "z", z, zAddressOffset, (float)depth_lod,
1981*6467f958SSadaf Ebrahimi                     imageSampler->addressing_mode, verbose);
1982*6467f958SSadaf Ebrahimi         }
1983*6467f958SSadaf Ebrahimi     }
1984*6467f958SSadaf Ebrahimi     else if (verbose)
1985*6467f958SSadaf Ebrahimi     {
1986*6467f958SSadaf Ebrahimi 
1987*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
1988*6467f958SSadaf Ebrahimi         {
1989*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1990*6467f958SSadaf Ebrahimi                 log_info("Starting coordinate: %f, array index %f\n", x, y);
1991*6467f958SSadaf Ebrahimi                 break;
1992*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1993*6467f958SSadaf Ebrahimi                 log_info("Starting coordinate: %f, %f, array index %f\n", x, y,
1994*6467f958SSadaf Ebrahimi                          z);
1995*6467f958SSadaf Ebrahimi                 break;
1996*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
1997*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_BUFFER:
1998*6467f958SSadaf Ebrahimi                 log_info("Starting coordinate: %f\n", x);
1999*6467f958SSadaf Ebrahimi                 break;
2000*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
2001*6467f958SSadaf Ebrahimi                 log_info("Starting coordinate: %f, %f\n", x, y);
2002*6467f958SSadaf Ebrahimi                 break;
2003*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
2004*6467f958SSadaf Ebrahimi             default: log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
2005*6467f958SSadaf Ebrahimi         }
2006*6467f958SSadaf Ebrahimi     }
2007*6467f958SSadaf Ebrahimi 
2008*6467f958SSadaf Ebrahimi     // At this point, we have unnormalized coordinates.
2009*6467f958SSadaf Ebrahimi 
2010*6467f958SSadaf Ebrahimi     if (imageSampler->filter_mode == CL_FILTER_NEAREST)
2011*6467f958SSadaf Ebrahimi     {
2012*6467f958SSadaf Ebrahimi         int ix, iy, iz;
2013*6467f958SSadaf Ebrahimi 
2014*6467f958SSadaf Ebrahimi         // We apply the addressing function to the now-unnormalized
2015*6467f958SSadaf Ebrahimi         // coordinates.  Note that the array cases again require special
2016*6467f958SSadaf Ebrahimi         // care, per section 8.4 in the OpenCL 1.2 Specification.
2017*6467f958SSadaf Ebrahimi 
2018*6467f958SSadaf Ebrahimi         ix = adFn(static_cast<int>(floorf(x)), width_lod);
2019*6467f958SSadaf Ebrahimi 
2020*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
2021*6467f958SSadaf Ebrahimi         {
2022*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
2023*6467f958SSadaf Ebrahimi                 iy = static_cast<int>(calculate_array_index(
2024*6467f958SSadaf Ebrahimi                     y, (float)(imageInfo->arraySize - 1)));
2025*6467f958SSadaf Ebrahimi                 iz = 0;
2026*6467f958SSadaf Ebrahimi                 if (verbose)
2027*6467f958SSadaf Ebrahimi                 {
2028*6467f958SSadaf Ebrahimi                     log_info("\tArray index %f evaluates to %d\n", y, iy);
2029*6467f958SSadaf Ebrahimi                 }
2030*6467f958SSadaf Ebrahimi                 break;
2031*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
2032*6467f958SSadaf Ebrahimi                 iy = adFn(static_cast<int>(floorf(y)), height_lod);
2033*6467f958SSadaf Ebrahimi                 iz = static_cast<int>(calculate_array_index(
2034*6467f958SSadaf Ebrahimi                     z, (float)(imageInfo->arraySize - 1)));
2035*6467f958SSadaf Ebrahimi                 if (verbose)
2036*6467f958SSadaf Ebrahimi                 {
2037*6467f958SSadaf Ebrahimi                     log_info("\tArray index %f evaluates to %d\n", z, iz);
2038*6467f958SSadaf Ebrahimi                 }
2039*6467f958SSadaf Ebrahimi                 break;
2040*6467f958SSadaf Ebrahimi             default:
2041*6467f958SSadaf Ebrahimi                 iy = adFn(static_cast<int>(floorf(y)), height_lod);
2042*6467f958SSadaf Ebrahimi                 if (depth_lod != 0)
2043*6467f958SSadaf Ebrahimi                     iz = adFn(static_cast<int>(floorf(z)), depth_lod);
2044*6467f958SSadaf Ebrahimi                 else
2045*6467f958SSadaf Ebrahimi                     iz = 0;
2046*6467f958SSadaf Ebrahimi         }
2047*6467f958SSadaf Ebrahimi 
2048*6467f958SSadaf Ebrahimi         if (verbose)
2049*6467f958SSadaf Ebrahimi         {
2050*6467f958SSadaf Ebrahimi             if (iz)
2051*6467f958SSadaf Ebrahimi                 log_info(
2052*6467f958SSadaf Ebrahimi                     "\tReference integer coords calculated: { %d, %d, %d }\n",
2053*6467f958SSadaf Ebrahimi                     ix, iy, iz);
2054*6467f958SSadaf Ebrahimi             else
2055*6467f958SSadaf Ebrahimi                 log_info("\tReference integer coords calculated: { %d, %d }\n",
2056*6467f958SSadaf Ebrahimi                          ix, iy);
2057*6467f958SSadaf Ebrahimi         }
2058*6467f958SSadaf Ebrahimi 
2059*6467f958SSadaf Ebrahimi         read_image_pixel_float(imageData, imageInfo, ix, iy, iz, outData, lod);
2060*6467f958SSadaf Ebrahimi         check_for_denorms(outData, containsDenorms);
2061*6467f958SSadaf Ebrahimi         for (int i = 0; i < 4; i++) returnVal.p[i] = fabsf(outData[i]);
2062*6467f958SSadaf Ebrahimi         return returnVal;
2063*6467f958SSadaf Ebrahimi     }
2064*6467f958SSadaf Ebrahimi     else
2065*6467f958SSadaf Ebrahimi     {
2066*6467f958SSadaf Ebrahimi         // Linear filtering cases.
2067*6467f958SSadaf Ebrahimi 
2068*6467f958SSadaf Ebrahimi         size_t width = width_lod, height = height_lod, depth = depth_lod;
2069*6467f958SSadaf Ebrahimi 
2070*6467f958SSadaf Ebrahimi         // Image arrays can use 2D filtering, but require us to walk into the
2071*6467f958SSadaf Ebrahimi         // image a certain number of slices before reading.
2072*6467f958SSadaf Ebrahimi 
2073*6467f958SSadaf Ebrahimi         if (depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY
2074*6467f958SSadaf Ebrahimi             || imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
2075*6467f958SSadaf Ebrahimi         {
2076*6467f958SSadaf Ebrahimi             float array_index = 0;
2077*6467f958SSadaf Ebrahimi 
2078*6467f958SSadaf Ebrahimi             size_t layer_offset = 0;
2079*6467f958SSadaf Ebrahimi 
2080*6467f958SSadaf Ebrahimi             if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
2081*6467f958SSadaf Ebrahimi             {
2082*6467f958SSadaf Ebrahimi                 array_index =
2083*6467f958SSadaf Ebrahimi                     calculate_array_index(z, (float)(imageInfo->arraySize - 1));
2084*6467f958SSadaf Ebrahimi                 layer_offset = slice_pitch_lod * (size_t)array_index;
2085*6467f958SSadaf Ebrahimi             }
2086*6467f958SSadaf Ebrahimi             else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
2087*6467f958SSadaf Ebrahimi             {
2088*6467f958SSadaf Ebrahimi                 array_index =
2089*6467f958SSadaf Ebrahimi                     calculate_array_index(y, (float)(imageInfo->arraySize - 1));
2090*6467f958SSadaf Ebrahimi                 layer_offset = slice_pitch_lod * (size_t)array_index;
2091*6467f958SSadaf Ebrahimi 
2092*6467f958SSadaf Ebrahimi                 // Set up y and height so that the filtering below is correct
2093*6467f958SSadaf Ebrahimi                 // 1D filtering on a single slice.
2094*6467f958SSadaf Ebrahimi                 height = 1;
2095*6467f958SSadaf Ebrahimi             }
2096*6467f958SSadaf Ebrahimi 
2097*6467f958SSadaf Ebrahimi             int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width);
2098*6467f958SSadaf Ebrahimi             int y1 = 0;
2099*6467f958SSadaf Ebrahimi             int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width);
2100*6467f958SSadaf Ebrahimi             int y2 = 0;
2101*6467f958SSadaf Ebrahimi             if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
2102*6467f958SSadaf Ebrahimi                 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
2103*6467f958SSadaf Ebrahimi                 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
2104*6467f958SSadaf Ebrahimi             {
2105*6467f958SSadaf Ebrahimi                 y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height);
2106*6467f958SSadaf Ebrahimi                 y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height);
2107*6467f958SSadaf Ebrahimi             }
2108*6467f958SSadaf Ebrahimi             else
2109*6467f958SSadaf Ebrahimi             {
2110*6467f958SSadaf Ebrahimi                 y = 0.5f;
2111*6467f958SSadaf Ebrahimi             }
2112*6467f958SSadaf Ebrahimi 
2113*6467f958SSadaf Ebrahimi             if (verbose)
2114*6467f958SSadaf Ebrahimi             {
2115*6467f958SSadaf Ebrahimi                 log_info("\tActual integer coords used (i = floor(x-.5)): i0:{ "
2116*6467f958SSadaf Ebrahimi                          "%d, %d } and i1:{ %d, %d }\n",
2117*6467f958SSadaf Ebrahimi                          x1, y1, x2, y2);
2118*6467f958SSadaf Ebrahimi                 log_info("\tArray coordinate is %f\n", array_index);
2119*6467f958SSadaf Ebrahimi             }
2120*6467f958SSadaf Ebrahimi 
2121*6467f958SSadaf Ebrahimi             // Walk to beginning of the 'correct' slice, if needed.
2122*6467f958SSadaf Ebrahimi             char *imgPtr = ((char *)imageData) + layer_offset;
2123*6467f958SSadaf Ebrahimi 
2124*6467f958SSadaf Ebrahimi             float upLeft[4], upRight[4], lowLeft[4], lowRight[4];
2125*6467f958SSadaf Ebrahimi             float maxUp[4], maxLow[4];
2126*6467f958SSadaf Ebrahimi             read_image_pixel_float(imgPtr, imageInfo, x1, y1, 0, upLeft, lod);
2127*6467f958SSadaf Ebrahimi             read_image_pixel_float(imgPtr, imageInfo, x2, y1, 0, upRight, lod);
2128*6467f958SSadaf Ebrahimi             check_for_denorms(upLeft, containsDenorms);
2129*6467f958SSadaf Ebrahimi             check_for_denorms(upRight, containsDenorms);
2130*6467f958SSadaf Ebrahimi             pixelMax(upLeft, upRight, maxUp);
2131*6467f958SSadaf Ebrahimi             read_image_pixel_float(imgPtr, imageInfo, x1, y2, 0, lowLeft, lod);
2132*6467f958SSadaf Ebrahimi             read_image_pixel_float(imgPtr, imageInfo, x2, y2, 0, lowRight, lod);
2133*6467f958SSadaf Ebrahimi             check_for_denorms(lowLeft, containsDenorms);
2134*6467f958SSadaf Ebrahimi             check_for_denorms(lowRight, containsDenorms);
2135*6467f958SSadaf Ebrahimi             pixelMax(lowLeft, lowRight, maxLow);
2136*6467f958SSadaf Ebrahimi             pixelMax(maxUp, maxLow, returnVal.p);
2137*6467f958SSadaf Ebrahimi 
2138*6467f958SSadaf Ebrahimi             if (verbose)
2139*6467f958SSadaf Ebrahimi             {
2140*6467f958SSadaf Ebrahimi                 if (NULL == containsDenorms)
2141*6467f958SSadaf Ebrahimi                     log_info("\tSampled pixels (rgba order, denorms flushed to "
2142*6467f958SSadaf Ebrahimi                              "zero):\n");
2143*6467f958SSadaf Ebrahimi                 else
2144*6467f958SSadaf Ebrahimi                     log_info("\tSampled pixels (rgba order):\n");
2145*6467f958SSadaf Ebrahimi                 log_info("\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1],
2146*6467f958SSadaf Ebrahimi                          upLeft[2], upLeft[3]);
2147*6467f958SSadaf Ebrahimi                 log_info("\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1],
2148*6467f958SSadaf Ebrahimi                          upRight[2], upRight[3]);
2149*6467f958SSadaf Ebrahimi                 log_info("\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1],
2150*6467f958SSadaf Ebrahimi                          lowLeft[2], lowLeft[3]);
2151*6467f958SSadaf Ebrahimi                 log_info("\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1],
2152*6467f958SSadaf Ebrahimi                          lowRight[2], lowRight[3]);
2153*6467f958SSadaf Ebrahimi             }
2154*6467f958SSadaf Ebrahimi 
2155*6467f958SSadaf Ebrahimi             double weights[2][2];
2156*6467f958SSadaf Ebrahimi 
2157*6467f958SSadaf Ebrahimi             weights[0][0] = weights[0][1] = 1.0 - frac(x - 0.5f);
2158*6467f958SSadaf Ebrahimi             weights[1][0] = weights[1][1] = frac(x - 0.5f);
2159*6467f958SSadaf Ebrahimi             weights[0][0] *= 1.0 - frac(y - 0.5f);
2160*6467f958SSadaf Ebrahimi             weights[1][0] *= 1.0 - frac(y - 0.5f);
2161*6467f958SSadaf Ebrahimi             weights[0][1] *= frac(y - 0.5f);
2162*6467f958SSadaf Ebrahimi             weights[1][1] *= frac(y - 0.5f);
2163*6467f958SSadaf Ebrahimi 
2164*6467f958SSadaf Ebrahimi             if (verbose)
2165*6467f958SSadaf Ebrahimi                 log_info("\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",
2166*6467f958SSadaf Ebrahimi                          frac(x - 0.5f), frac(y - 0.5f));
2167*6467f958SSadaf Ebrahimi 
2168*6467f958SSadaf Ebrahimi             for (int i = 0; i < 3; i++)
2169*6467f958SSadaf Ebrahimi             {
2170*6467f958SSadaf Ebrahimi                 outData[i] = (float)((upLeft[i] * weights[0][0])
2171*6467f958SSadaf Ebrahimi                                      + (upRight[i] * weights[1][0])
2172*6467f958SSadaf Ebrahimi                                      + (lowLeft[i] * weights[0][1])
2173*6467f958SSadaf Ebrahimi                                      + (lowRight[i] * weights[1][1]));
2174*6467f958SSadaf Ebrahimi                 // flush subnormal results to zero if necessary
2175*6467f958SSadaf Ebrahimi                 if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
2176*6467f958SSadaf Ebrahimi                     outData[i] = copysignf(0.0f, outData[i]);
2177*6467f958SSadaf Ebrahimi             }
2178*6467f958SSadaf Ebrahimi             outData[3] = (float)((upLeft[3] * weights[0][0])
2179*6467f958SSadaf Ebrahimi                                  + (upRight[3] * weights[1][0])
2180*6467f958SSadaf Ebrahimi                                  + (lowLeft[3] * weights[0][1])
2181*6467f958SSadaf Ebrahimi                                  + (lowRight[3] * weights[1][1]));
2182*6467f958SSadaf Ebrahimi             // flush subnormal results to zero if necessary
2183*6467f958SSadaf Ebrahimi             if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
2184*6467f958SSadaf Ebrahimi                 outData[3] = copysignf(0.0f, outData[3]);
2185*6467f958SSadaf Ebrahimi         }
2186*6467f958SSadaf Ebrahimi         else
2187*6467f958SSadaf Ebrahimi         {
2188*6467f958SSadaf Ebrahimi             // 3D linear filtering
2189*6467f958SSadaf Ebrahimi             int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width_lod);
2190*6467f958SSadaf Ebrahimi             int y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height_lod);
2191*6467f958SSadaf Ebrahimi             int z1 = adFn(static_cast<int>(floorf(z - 0.5f)), depth_lod);
2192*6467f958SSadaf Ebrahimi             int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width_lod);
2193*6467f958SSadaf Ebrahimi             int y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height_lod);
2194*6467f958SSadaf Ebrahimi             int z2 = adFn(static_cast<int>(floorf(z - 0.5f) + 1), depth_lod);
2195*6467f958SSadaf Ebrahimi 
2196*6467f958SSadaf Ebrahimi             if (verbose)
2197*6467f958SSadaf Ebrahimi                 log_info("\tActual integer coords used (i = floor(x-.5)): "
2198*6467f958SSadaf Ebrahimi                          "i0:{%d, %d, %d} and i1:{%d, %d, %d}\n",
2199*6467f958SSadaf Ebrahimi                          x1, y1, z1, x2, y2, z2);
2200*6467f958SSadaf Ebrahimi 
2201*6467f958SSadaf Ebrahimi             float upLeftA[4], upRightA[4], lowLeftA[4], lowRightA[4];
2202*6467f958SSadaf Ebrahimi             float upLeftB[4], upRightB[4], lowLeftB[4], lowRightB[4];
2203*6467f958SSadaf Ebrahimi             float pixelMaxA[4], pixelMaxB[4];
2204*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x1, y1, z1, upLeftA,
2205*6467f958SSadaf Ebrahimi                                    lod);
2206*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x2, y1, z1, upRightA,
2207*6467f958SSadaf Ebrahimi                                    lod);
2208*6467f958SSadaf Ebrahimi             check_for_denorms(upLeftA, containsDenorms);
2209*6467f958SSadaf Ebrahimi             check_for_denorms(upRightA, containsDenorms);
2210*6467f958SSadaf Ebrahimi             pixelMax(upLeftA, upRightA, pixelMaxA);
2211*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x1, y2, z1, lowLeftA,
2212*6467f958SSadaf Ebrahimi                                    lod);
2213*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x2, y2, z1, lowRightA,
2214*6467f958SSadaf Ebrahimi                                    lod);
2215*6467f958SSadaf Ebrahimi             check_for_denorms(lowLeftA, containsDenorms);
2216*6467f958SSadaf Ebrahimi             check_for_denorms(lowRightA, containsDenorms);
2217*6467f958SSadaf Ebrahimi             pixelMax(lowLeftA, lowRightA, pixelMaxB);
2218*6467f958SSadaf Ebrahimi             pixelMax(pixelMaxA, pixelMaxB, returnVal.p);
2219*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x1, y1, z2, upLeftB,
2220*6467f958SSadaf Ebrahimi                                    lod);
2221*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x2, y1, z2, upRightB,
2222*6467f958SSadaf Ebrahimi                                    lod);
2223*6467f958SSadaf Ebrahimi             check_for_denorms(upLeftB, containsDenorms);
2224*6467f958SSadaf Ebrahimi             check_for_denorms(upRightB, containsDenorms);
2225*6467f958SSadaf Ebrahimi             pixelMax(upLeftB, upRightB, pixelMaxA);
2226*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x1, y2, z2, lowLeftB,
2227*6467f958SSadaf Ebrahimi                                    lod);
2228*6467f958SSadaf Ebrahimi             read_image_pixel_float(imageData, imageInfo, x2, y2, z2, lowRightB,
2229*6467f958SSadaf Ebrahimi                                    lod);
2230*6467f958SSadaf Ebrahimi             check_for_denorms(lowLeftB, containsDenorms);
2231*6467f958SSadaf Ebrahimi             check_for_denorms(lowRightB, containsDenorms);
2232*6467f958SSadaf Ebrahimi             pixelMax(lowLeftB, lowRightB, pixelMaxB);
2233*6467f958SSadaf Ebrahimi             pixelMax(pixelMaxA, pixelMaxB, pixelMaxA);
2234*6467f958SSadaf Ebrahimi             pixelMax(pixelMaxA, returnVal.p, returnVal.p);
2235*6467f958SSadaf Ebrahimi 
2236*6467f958SSadaf Ebrahimi             if (verbose)
2237*6467f958SSadaf Ebrahimi             {
2238*6467f958SSadaf Ebrahimi                 if (NULL == containsDenorms)
2239*6467f958SSadaf Ebrahimi                     log_info("\tSampled pixels (rgba order, denorms flushed to "
2240*6467f958SSadaf Ebrahimi                              "zero):\n");
2241*6467f958SSadaf Ebrahimi                 else
2242*6467f958SSadaf Ebrahimi                     log_info("\tSampled pixels (rgba order):\n");
2243*6467f958SSadaf Ebrahimi                 log_info("\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1],
2244*6467f958SSadaf Ebrahimi                          upLeftA[2], upLeftA[3]);
2245*6467f958SSadaf Ebrahimi                 log_info("\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1],
2246*6467f958SSadaf Ebrahimi                          upRightA[2], upRightA[3]);
2247*6467f958SSadaf Ebrahimi                 log_info("\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1],
2248*6467f958SSadaf Ebrahimi                          lowLeftA[2], lowLeftA[3]);
2249*6467f958SSadaf Ebrahimi                 log_info("\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0],
2250*6467f958SSadaf Ebrahimi                          lowRightA[1], lowRightA[2], lowRightA[3]);
2251*6467f958SSadaf Ebrahimi                 log_info("\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1],
2252*6467f958SSadaf Ebrahimi                          upLeftB[2], upLeftB[3]);
2253*6467f958SSadaf Ebrahimi                 log_info("\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1],
2254*6467f958SSadaf Ebrahimi                          upRightB[2], upRightB[3]);
2255*6467f958SSadaf Ebrahimi                 log_info("\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1],
2256*6467f958SSadaf Ebrahimi                          lowLeftB[2], lowLeftB[3]);
2257*6467f958SSadaf Ebrahimi                 log_info("\t\tp111: %f, %f, %f, %f\n", lowRightB[0],
2258*6467f958SSadaf Ebrahimi                          lowRightB[1], lowRightB[2], lowRightB[3]);
2259*6467f958SSadaf Ebrahimi             }
2260*6467f958SSadaf Ebrahimi 
2261*6467f958SSadaf Ebrahimi             double weights[2][2][2];
2262*6467f958SSadaf Ebrahimi 
2263*6467f958SSadaf Ebrahimi             float a = frac(x - 0.5f), b = frac(y - 0.5f), c = frac(z - 0.5f);
2264*6467f958SSadaf Ebrahimi             weights[0][0][0] = weights[0][1][0] = weights[0][0][1] =
2265*6467f958SSadaf Ebrahimi                 weights[0][1][1] = 1.f - a;
2266*6467f958SSadaf Ebrahimi             weights[1][0][0] = weights[1][1][0] = weights[1][0][1] =
2267*6467f958SSadaf Ebrahimi                 weights[1][1][1] = a;
2268*6467f958SSadaf Ebrahimi             weights[0][0][0] *= 1.f - b;
2269*6467f958SSadaf Ebrahimi             weights[1][0][0] *= 1.f - b;
2270*6467f958SSadaf Ebrahimi             weights[0][0][1] *= 1.f - b;
2271*6467f958SSadaf Ebrahimi             weights[1][0][1] *= 1.f - b;
2272*6467f958SSadaf Ebrahimi             weights[0][1][0] *= b;
2273*6467f958SSadaf Ebrahimi             weights[1][1][0] *= b;
2274*6467f958SSadaf Ebrahimi             weights[0][1][1] *= b;
2275*6467f958SSadaf Ebrahimi             weights[1][1][1] *= b;
2276*6467f958SSadaf Ebrahimi             weights[0][0][0] *= 1.f - c;
2277*6467f958SSadaf Ebrahimi             weights[0][1][0] *= 1.f - c;
2278*6467f958SSadaf Ebrahimi             weights[1][0][0] *= 1.f - c;
2279*6467f958SSadaf Ebrahimi             weights[1][1][0] *= 1.f - c;
2280*6467f958SSadaf Ebrahimi             weights[0][0][1] *= c;
2281*6467f958SSadaf Ebrahimi             weights[0][1][1] *= c;
2282*6467f958SSadaf Ebrahimi             weights[1][0][1] *= c;
2283*6467f958SSadaf Ebrahimi             weights[1][1][1] *= c;
2284*6467f958SSadaf Ebrahimi 
2285*6467f958SSadaf Ebrahimi             if (verbose)
2286*6467f958SSadaf Ebrahimi                 log_info("\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, "
2287*6467f958SSadaf Ebrahimi                          "frac( z - 0.5f ) = %f\n",
2288*6467f958SSadaf Ebrahimi                          frac(x - 0.5f), frac(y - 0.5f), frac(z - 0.5f));
2289*6467f958SSadaf Ebrahimi 
2290*6467f958SSadaf Ebrahimi             for (int i = 0; i < 3; i++)
2291*6467f958SSadaf Ebrahimi             {
2292*6467f958SSadaf Ebrahimi                 outData[i] = (float)((upLeftA[i] * weights[0][0][0])
2293*6467f958SSadaf Ebrahimi                                      + (upRightA[i] * weights[1][0][0])
2294*6467f958SSadaf Ebrahimi                                      + (lowLeftA[i] * weights[0][1][0])
2295*6467f958SSadaf Ebrahimi                                      + (lowRightA[i] * weights[1][1][0])
2296*6467f958SSadaf Ebrahimi                                      + (upLeftB[i] * weights[0][0][1])
2297*6467f958SSadaf Ebrahimi                                      + (upRightB[i] * weights[1][0][1])
2298*6467f958SSadaf Ebrahimi                                      + (lowLeftB[i] * weights[0][1][1])
2299*6467f958SSadaf Ebrahimi                                      + (lowRightB[i] * weights[1][1][1]));
2300*6467f958SSadaf Ebrahimi                 // flush subnormal results to zero if necessary
2301*6467f958SSadaf Ebrahimi                 if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
2302*6467f958SSadaf Ebrahimi                     outData[i] = copysignf(0.0f, outData[i]);
2303*6467f958SSadaf Ebrahimi             }
2304*6467f958SSadaf Ebrahimi             outData[3] = (float)((upLeftA[3] * weights[0][0][0])
2305*6467f958SSadaf Ebrahimi                                  + (upRightA[3] * weights[1][0][0])
2306*6467f958SSadaf Ebrahimi                                  + (lowLeftA[3] * weights[0][1][0])
2307*6467f958SSadaf Ebrahimi                                  + (lowRightA[3] * weights[1][1][0])
2308*6467f958SSadaf Ebrahimi                                  + (upLeftB[3] * weights[0][0][1])
2309*6467f958SSadaf Ebrahimi                                  + (upRightB[3] * weights[1][0][1])
2310*6467f958SSadaf Ebrahimi                                  + (lowLeftB[3] * weights[0][1][1])
2311*6467f958SSadaf Ebrahimi                                  + (lowRightB[3] * weights[1][1][1]));
2312*6467f958SSadaf Ebrahimi             // flush subnormal results to zero if necessary
2313*6467f958SSadaf Ebrahimi             if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
2314*6467f958SSadaf Ebrahimi                 outData[3] = copysignf(0.0f, outData[3]);
2315*6467f958SSadaf Ebrahimi         }
2316*6467f958SSadaf Ebrahimi 
2317*6467f958SSadaf Ebrahimi         return returnVal;
2318*6467f958SSadaf Ebrahimi     }
2319*6467f958SSadaf Ebrahimi }
2320*6467f958SSadaf Ebrahimi 
sample_image_pixel_float_offset(void * imageData,image_descriptor * imageInfo,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms)2321*6467f958SSadaf Ebrahimi FloatPixel sample_image_pixel_float_offset(
2322*6467f958SSadaf Ebrahimi     void *imageData, image_descriptor *imageInfo, float x, float y, float z,
2323*6467f958SSadaf Ebrahimi     float xAddressOffset, float yAddressOffset, float zAddressOffset,
2324*6467f958SSadaf Ebrahimi     image_sampler_data *imageSampler, float *outData, int verbose,
2325*6467f958SSadaf Ebrahimi     int *containsDenorms)
2326*6467f958SSadaf Ebrahimi {
2327*6467f958SSadaf Ebrahimi     return sample_image_pixel_float_offset(
2328*6467f958SSadaf Ebrahimi         imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset,
2329*6467f958SSadaf Ebrahimi         zAddressOffset, imageSampler, outData, verbose, containsDenorms, 0);
2330*6467f958SSadaf Ebrahimi }
2331*6467f958SSadaf Ebrahimi 
2332*6467f958SSadaf Ebrahimi 
debug_find_vector_in_image(void * imagePtr,image_descriptor * imageInfo,void * vectorToFind,size_t vectorSize,int * outX,int * outY,int * outZ,size_t lod)2333*6467f958SSadaf Ebrahimi int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
2334*6467f958SSadaf Ebrahimi                                void *vectorToFind, size_t vectorSize, int *outX,
2335*6467f958SSadaf Ebrahimi                                int *outY, int *outZ, size_t lod)
2336*6467f958SSadaf Ebrahimi {
2337*6467f958SSadaf Ebrahimi     int foundCount = 0;
2338*6467f958SSadaf Ebrahimi     char *iPtr = (char *)imagePtr;
2339*6467f958SSadaf Ebrahimi     size_t width;
2340*6467f958SSadaf Ebrahimi     size_t depth;
2341*6467f958SSadaf Ebrahimi     size_t height;
2342*6467f958SSadaf Ebrahimi     size_t row_pitch;
2343*6467f958SSadaf Ebrahimi     size_t slice_pitch;
2344*6467f958SSadaf Ebrahimi 
2345*6467f958SSadaf Ebrahimi     switch (imageInfo->type)
2346*6467f958SSadaf Ebrahimi     {
2347*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D:
2348*6467f958SSadaf Ebrahimi             width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2349*6467f958SSadaf Ebrahimi             height = 1;
2350*6467f958SSadaf Ebrahimi             depth = 1;
2351*6467f958SSadaf Ebrahimi             break;
2352*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
2353*6467f958SSadaf Ebrahimi             width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2354*6467f958SSadaf Ebrahimi             height = 1;
2355*6467f958SSadaf Ebrahimi             depth = imageInfo->arraySize;
2356*6467f958SSadaf Ebrahimi             break;
2357*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE2D:
2358*6467f958SSadaf Ebrahimi             width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2359*6467f958SSadaf Ebrahimi             height =
2360*6467f958SSadaf Ebrahimi                 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2361*6467f958SSadaf Ebrahimi             depth = 1;
2362*6467f958SSadaf Ebrahimi             break;
2363*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
2364*6467f958SSadaf Ebrahimi             width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2365*6467f958SSadaf Ebrahimi             height =
2366*6467f958SSadaf Ebrahimi                 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2367*6467f958SSadaf Ebrahimi             depth = imageInfo->arraySize;
2368*6467f958SSadaf Ebrahimi             break;
2369*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE3D:
2370*6467f958SSadaf Ebrahimi             width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2371*6467f958SSadaf Ebrahimi             height =
2372*6467f958SSadaf Ebrahimi                 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2373*6467f958SSadaf Ebrahimi             depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
2374*6467f958SSadaf Ebrahimi             break;
2375*6467f958SSadaf Ebrahimi     }
2376*6467f958SSadaf Ebrahimi 
2377*6467f958SSadaf Ebrahimi     row_pitch = width * get_pixel_size(imageInfo->format);
2378*6467f958SSadaf Ebrahimi     slice_pitch = row_pitch * height;
2379*6467f958SSadaf Ebrahimi 
2380*6467f958SSadaf Ebrahimi     for (size_t z = 0; z < depth; z++)
2381*6467f958SSadaf Ebrahimi     {
2382*6467f958SSadaf Ebrahimi         for (size_t y = 0; y < height; y++)
2383*6467f958SSadaf Ebrahimi         {
2384*6467f958SSadaf Ebrahimi             for (size_t x = 0; x < width; x++)
2385*6467f958SSadaf Ebrahimi             {
2386*6467f958SSadaf Ebrahimi                 if (memcmp(iPtr, vectorToFind, vectorSize) == 0)
2387*6467f958SSadaf Ebrahimi                 {
2388*6467f958SSadaf Ebrahimi                     if (foundCount == 0)
2389*6467f958SSadaf Ebrahimi                     {
2390*6467f958SSadaf Ebrahimi                         *outX = (int)x;
2391*6467f958SSadaf Ebrahimi                         if (outY != NULL) *outY = (int)y;
2392*6467f958SSadaf Ebrahimi                         if (outZ != NULL) *outZ = (int)z;
2393*6467f958SSadaf Ebrahimi                     }
2394*6467f958SSadaf Ebrahimi                     foundCount++;
2395*6467f958SSadaf Ebrahimi                 }
2396*6467f958SSadaf Ebrahimi                 iPtr += vectorSize;
2397*6467f958SSadaf Ebrahimi             }
2398*6467f958SSadaf Ebrahimi             iPtr += row_pitch - (width * vectorSize);
2399*6467f958SSadaf Ebrahimi         }
2400*6467f958SSadaf Ebrahimi         iPtr += slice_pitch - (height * row_pitch);
2401*6467f958SSadaf Ebrahimi     }
2402*6467f958SSadaf Ebrahimi     return foundCount;
2403*6467f958SSadaf Ebrahimi }
2404*6467f958SSadaf Ebrahimi 
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,unsigned int * valuesToFind,int * outX,int * outY,int * outZ,int lod)2405*6467f958SSadaf Ebrahimi int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2406*6467f958SSadaf Ebrahimi                               unsigned int *valuesToFind, int *outX, int *outY,
2407*6467f958SSadaf Ebrahimi                               int *outZ, int lod)
2408*6467f958SSadaf Ebrahimi {
2409*6467f958SSadaf Ebrahimi     char vectorToFind[4 * 4];
2410*6467f958SSadaf Ebrahimi     size_t vectorSize = get_format_channel_count(imageInfo->format);
2411*6467f958SSadaf Ebrahimi 
2412*6467f958SSadaf Ebrahimi 
2413*6467f958SSadaf Ebrahimi     if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8)
2414*6467f958SSadaf Ebrahimi     {
2415*6467f958SSadaf Ebrahimi         unsigned char *p = (unsigned char *)vectorToFind;
2416*6467f958SSadaf Ebrahimi         for (unsigned int i = 0; i < vectorSize; i++)
2417*6467f958SSadaf Ebrahimi             p[i] = (unsigned char)valuesToFind[i];
2418*6467f958SSadaf Ebrahimi     }
2419*6467f958SSadaf Ebrahimi     else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16)
2420*6467f958SSadaf Ebrahimi     {
2421*6467f958SSadaf Ebrahimi         unsigned short *p = (unsigned short *)vectorToFind;
2422*6467f958SSadaf Ebrahimi         for (unsigned int i = 0; i < vectorSize; i++)
2423*6467f958SSadaf Ebrahimi             p[i] = (unsigned short)valuesToFind[i];
2424*6467f958SSadaf Ebrahimi         vectorSize *= 2;
2425*6467f958SSadaf Ebrahimi     }
2426*6467f958SSadaf Ebrahimi     else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32)
2427*6467f958SSadaf Ebrahimi     {
2428*6467f958SSadaf Ebrahimi         unsigned int *p = (unsigned int *)vectorToFind;
2429*6467f958SSadaf Ebrahimi         for (unsigned int i = 0; i < vectorSize; i++)
2430*6467f958SSadaf Ebrahimi             p[i] = (unsigned int)valuesToFind[i];
2431*6467f958SSadaf Ebrahimi         vectorSize *= 4;
2432*6467f958SSadaf Ebrahimi     }
2433*6467f958SSadaf Ebrahimi     else
2434*6467f958SSadaf Ebrahimi     {
2435*6467f958SSadaf Ebrahimi         log_info("WARNING: Unable to search for debug pixel: invalid image "
2436*6467f958SSadaf Ebrahimi                  "format\n");
2437*6467f958SSadaf Ebrahimi         return false;
2438*6467f958SSadaf Ebrahimi     }
2439*6467f958SSadaf Ebrahimi     return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2440*6467f958SSadaf Ebrahimi                                       vectorSize, outX, outY, outZ, lod);
2441*6467f958SSadaf Ebrahimi }
2442*6467f958SSadaf Ebrahimi 
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,int * valuesToFind,int * outX,int * outY,int * outZ,int lod)2443*6467f958SSadaf Ebrahimi int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2444*6467f958SSadaf Ebrahimi                               int *valuesToFind, int *outX, int *outY,
2445*6467f958SSadaf Ebrahimi                               int *outZ, int lod)
2446*6467f958SSadaf Ebrahimi {
2447*6467f958SSadaf Ebrahimi     char vectorToFind[4 * 4];
2448*6467f958SSadaf Ebrahimi     size_t vectorSize = get_format_channel_count(imageInfo->format);
2449*6467f958SSadaf Ebrahimi 
2450*6467f958SSadaf Ebrahimi     if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT8)
2451*6467f958SSadaf Ebrahimi     {
2452*6467f958SSadaf Ebrahimi         char *p = (char *)vectorToFind;
2453*6467f958SSadaf Ebrahimi         for (unsigned int i = 0; i < vectorSize; i++)
2454*6467f958SSadaf Ebrahimi             p[i] = (char)valuesToFind[i];
2455*6467f958SSadaf Ebrahimi     }
2456*6467f958SSadaf Ebrahimi     else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT16)
2457*6467f958SSadaf Ebrahimi     {
2458*6467f958SSadaf Ebrahimi         short *p = (short *)vectorToFind;
2459*6467f958SSadaf Ebrahimi         for (unsigned int i = 0; i < vectorSize; i++)
2460*6467f958SSadaf Ebrahimi             p[i] = (short)valuesToFind[i];
2461*6467f958SSadaf Ebrahimi         vectorSize *= 2;
2462*6467f958SSadaf Ebrahimi     }
2463*6467f958SSadaf Ebrahimi     else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT32)
2464*6467f958SSadaf Ebrahimi     {
2465*6467f958SSadaf Ebrahimi         int *p = (int *)vectorToFind;
2466*6467f958SSadaf Ebrahimi         for (unsigned int i = 0; i < vectorSize; i++)
2467*6467f958SSadaf Ebrahimi             p[i] = (int)valuesToFind[i];
2468*6467f958SSadaf Ebrahimi         vectorSize *= 4;
2469*6467f958SSadaf Ebrahimi     }
2470*6467f958SSadaf Ebrahimi     else
2471*6467f958SSadaf Ebrahimi     {
2472*6467f958SSadaf Ebrahimi         log_info("WARNING: Unable to search for debug pixel: invalid image "
2473*6467f958SSadaf Ebrahimi                  "format\n");
2474*6467f958SSadaf Ebrahimi         return false;
2475*6467f958SSadaf Ebrahimi     }
2476*6467f958SSadaf Ebrahimi     return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2477*6467f958SSadaf Ebrahimi                                       vectorSize, outX, outY, outZ, lod);
2478*6467f958SSadaf Ebrahimi }
2479*6467f958SSadaf Ebrahimi 
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,float * valuesToFind,int * outX,int * outY,int * outZ,int lod)2480*6467f958SSadaf Ebrahimi int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2481*6467f958SSadaf Ebrahimi                               float *valuesToFind, int *outX, int *outY,
2482*6467f958SSadaf Ebrahimi                               int *outZ, int lod)
2483*6467f958SSadaf Ebrahimi {
2484*6467f958SSadaf Ebrahimi     char vectorToFind[4 * 4];
2485*6467f958SSadaf Ebrahimi     float swizzled[4];
2486*6467f958SSadaf Ebrahimi     memcpy(swizzled, valuesToFind, sizeof(swizzled));
2487*6467f958SSadaf Ebrahimi     size_t vectorSize = get_pixel_size(imageInfo->format);
2488*6467f958SSadaf Ebrahimi     pack_image_pixel(swizzled, imageInfo->format, vectorToFind);
2489*6467f958SSadaf Ebrahimi     return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2490*6467f958SSadaf Ebrahimi                                       vectorSize, outX, outY, outZ, lod);
2491*6467f958SSadaf Ebrahimi }
2492*6467f958SSadaf Ebrahimi 
2493*6467f958SSadaf Ebrahimi template <class T>
swizzle_vector_for_image(T * srcVector,const cl_image_format * imageFormat)2494*6467f958SSadaf Ebrahimi void swizzle_vector_for_image(T *srcVector, const cl_image_format *imageFormat)
2495*6467f958SSadaf Ebrahimi {
2496*6467f958SSadaf Ebrahimi     T temp;
2497*6467f958SSadaf Ebrahimi     switch (imageFormat->image_channel_order)
2498*6467f958SSadaf Ebrahimi     {
2499*6467f958SSadaf Ebrahimi         case CL_A: srcVector[0] = srcVector[3]; break;
2500*6467f958SSadaf Ebrahimi         case CL_R:
2501*6467f958SSadaf Ebrahimi         case CL_Rx:
2502*6467f958SSadaf Ebrahimi         case CL_RG:
2503*6467f958SSadaf Ebrahimi         case CL_RGx:
2504*6467f958SSadaf Ebrahimi         case CL_RGB:
2505*6467f958SSadaf Ebrahimi         case CL_RGBx:
2506*6467f958SSadaf Ebrahimi         case CL_RGBA:
2507*6467f958SSadaf Ebrahimi         case CL_sRGB:
2508*6467f958SSadaf Ebrahimi         case CL_sRGBx:
2509*6467f958SSadaf Ebrahimi         case CL_sRGBA: break;
2510*6467f958SSadaf Ebrahimi         case CL_RA: srcVector[1] = srcVector[3]; break;
2511*6467f958SSadaf Ebrahimi         case CL_ARGB:
2512*6467f958SSadaf Ebrahimi             temp = srcVector[3];
2513*6467f958SSadaf Ebrahimi             srcVector[3] = srcVector[2];
2514*6467f958SSadaf Ebrahimi             srcVector[2] = srcVector[1];
2515*6467f958SSadaf Ebrahimi             srcVector[1] = srcVector[0];
2516*6467f958SSadaf Ebrahimi             srcVector[0] = temp;
2517*6467f958SSadaf Ebrahimi             break;
2518*6467f958SSadaf Ebrahimi         case CL_ABGR:
2519*6467f958SSadaf Ebrahimi             temp = srcVector[3];
2520*6467f958SSadaf Ebrahimi             srcVector[3] = srcVector[0];
2521*6467f958SSadaf Ebrahimi             srcVector[0] = temp;
2522*6467f958SSadaf Ebrahimi             temp = srcVector[2];
2523*6467f958SSadaf Ebrahimi             srcVector[2] = srcVector[1];
2524*6467f958SSadaf Ebrahimi             srcVector[1] = temp;
2525*6467f958SSadaf Ebrahimi             break;
2526*6467f958SSadaf Ebrahimi         case CL_BGRA:
2527*6467f958SSadaf Ebrahimi         case CL_sBGRA:
2528*6467f958SSadaf Ebrahimi             temp = srcVector[0];
2529*6467f958SSadaf Ebrahimi             srcVector[0] = srcVector[2];
2530*6467f958SSadaf Ebrahimi             srcVector[2] = temp;
2531*6467f958SSadaf Ebrahimi             break;
2532*6467f958SSadaf Ebrahimi         case CL_INTENSITY:
2533*6467f958SSadaf Ebrahimi             srcVector[3] = srcVector[0];
2534*6467f958SSadaf Ebrahimi             srcVector[2] = srcVector[0];
2535*6467f958SSadaf Ebrahimi             srcVector[1] = srcVector[0];
2536*6467f958SSadaf Ebrahimi             break;
2537*6467f958SSadaf Ebrahimi         case CL_LUMINANCE:
2538*6467f958SSadaf Ebrahimi             srcVector[2] = srcVector[0];
2539*6467f958SSadaf Ebrahimi             srcVector[1] = srcVector[0];
2540*6467f958SSadaf Ebrahimi             break;
2541*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
2542*6467f958SSadaf Ebrahimi         case CL_1RGB_APPLE:
2543*6467f958SSadaf Ebrahimi             temp = srcVector[3];
2544*6467f958SSadaf Ebrahimi             srcVector[3] = srcVector[2];
2545*6467f958SSadaf Ebrahimi             srcVector[2] = srcVector[1];
2546*6467f958SSadaf Ebrahimi             srcVector[1] = srcVector[0];
2547*6467f958SSadaf Ebrahimi             srcVector[0] = temp;
2548*6467f958SSadaf Ebrahimi             break;
2549*6467f958SSadaf Ebrahimi #endif
2550*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
2551*6467f958SSadaf Ebrahimi         case CL_BGR1_APPLE:
2552*6467f958SSadaf Ebrahimi             temp = srcVector[0];
2553*6467f958SSadaf Ebrahimi             srcVector[0] = srcVector[2];
2554*6467f958SSadaf Ebrahimi             srcVector[2] = temp;
2555*6467f958SSadaf Ebrahimi             break;
2556*6467f958SSadaf Ebrahimi #endif
2557*6467f958SSadaf Ebrahimi     }
2558*6467f958SSadaf Ebrahimi }
2559*6467f958SSadaf Ebrahimi 
2560*6467f958SSadaf Ebrahimi #define SATURATE(v, min, max) (v < min ? min : (v > max ? max : v))
2561*6467f958SSadaf Ebrahimi 
pack_image_pixel(unsigned int * srcVector,const cl_image_format * imageFormat,void * outData)2562*6467f958SSadaf Ebrahimi void pack_image_pixel(unsigned int *srcVector,
2563*6467f958SSadaf Ebrahimi                       const cl_image_format *imageFormat, void *outData)
2564*6467f958SSadaf Ebrahimi {
2565*6467f958SSadaf Ebrahimi     swizzle_vector_for_image<unsigned int>(srcVector, imageFormat);
2566*6467f958SSadaf Ebrahimi     size_t channelCount = get_format_channel_count(imageFormat);
2567*6467f958SSadaf Ebrahimi 
2568*6467f958SSadaf Ebrahimi     switch (imageFormat->image_channel_data_type)
2569*6467f958SSadaf Ebrahimi     {
2570*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: {
2571*6467f958SSadaf Ebrahimi             unsigned char *ptr = (unsigned char *)outData;
2572*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2573*6467f958SSadaf Ebrahimi                 ptr[i] = (unsigned char)SATURATE(srcVector[i], 0, 255);
2574*6467f958SSadaf Ebrahimi             break;
2575*6467f958SSadaf Ebrahimi         }
2576*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16: {
2577*6467f958SSadaf Ebrahimi             unsigned short *ptr = (unsigned short *)outData;
2578*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2579*6467f958SSadaf Ebrahimi                 ptr[i] = (unsigned short)SATURATE(srcVector[i], 0, 65535);
2580*6467f958SSadaf Ebrahimi             break;
2581*6467f958SSadaf Ebrahimi         }
2582*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: {
2583*6467f958SSadaf Ebrahimi             unsigned int *ptr = (unsigned int *)outData;
2584*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2585*6467f958SSadaf Ebrahimi                 ptr[i] = (unsigned int)srcVector[i];
2586*6467f958SSadaf Ebrahimi             break;
2587*6467f958SSadaf Ebrahimi         }
2588*6467f958SSadaf Ebrahimi         default: break;
2589*6467f958SSadaf Ebrahimi     }
2590*6467f958SSadaf Ebrahimi }
2591*6467f958SSadaf Ebrahimi 
pack_image_pixel(int * srcVector,const cl_image_format * imageFormat,void * outData)2592*6467f958SSadaf Ebrahimi void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
2593*6467f958SSadaf Ebrahimi                       void *outData)
2594*6467f958SSadaf Ebrahimi {
2595*6467f958SSadaf Ebrahimi     swizzle_vector_for_image<int>(srcVector, imageFormat);
2596*6467f958SSadaf Ebrahimi     size_t chanelCount = get_format_channel_count(imageFormat);
2597*6467f958SSadaf Ebrahimi 
2598*6467f958SSadaf Ebrahimi     switch (imageFormat->image_channel_data_type)
2599*6467f958SSadaf Ebrahimi     {
2600*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8: {
2601*6467f958SSadaf Ebrahimi             char *ptr = (char *)outData;
2602*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < chanelCount; i++)
2603*6467f958SSadaf Ebrahimi                 ptr[i] = (char)SATURATE(srcVector[i], -128, 127);
2604*6467f958SSadaf Ebrahimi             break;
2605*6467f958SSadaf Ebrahimi         }
2606*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16: {
2607*6467f958SSadaf Ebrahimi             short *ptr = (short *)outData;
2608*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < chanelCount; i++)
2609*6467f958SSadaf Ebrahimi                 ptr[i] = (short)SATURATE(srcVector[i], -32768, 32767);
2610*6467f958SSadaf Ebrahimi             break;
2611*6467f958SSadaf Ebrahimi         }
2612*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32: {
2613*6467f958SSadaf Ebrahimi             int *ptr = (int *)outData;
2614*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < chanelCount; i++)
2615*6467f958SSadaf Ebrahimi                 ptr[i] = (int)srcVector[i];
2616*6467f958SSadaf Ebrahimi             break;
2617*6467f958SSadaf Ebrahimi         }
2618*6467f958SSadaf Ebrahimi         default: break;
2619*6467f958SSadaf Ebrahimi     }
2620*6467f958SSadaf Ebrahimi }
2621*6467f958SSadaf Ebrahimi 
round_to_even(float v)2622*6467f958SSadaf Ebrahimi cl_int round_to_even(float v)
2623*6467f958SSadaf Ebrahimi {
2624*6467f958SSadaf Ebrahimi     // clamp overflow
2625*6467f958SSadaf Ebrahimi     if (v >= -(float)CL_INT_MIN) return CL_INT_MAX;
2626*6467f958SSadaf Ebrahimi     if (v <= (float)CL_INT_MIN) return CL_INT_MIN;
2627*6467f958SSadaf Ebrahimi 
2628*6467f958SSadaf Ebrahimi     // round fractional values to integer value
2629*6467f958SSadaf Ebrahimi     if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
2630*6467f958SSadaf Ebrahimi     {
2631*6467f958SSadaf Ebrahimi         static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23),
2632*6467f958SSadaf Ebrahimi                                         MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
2633*6467f958SSadaf Ebrahimi         float magicVal = magic[v < 0.0f];
2634*6467f958SSadaf Ebrahimi         v += magicVal;
2635*6467f958SSadaf Ebrahimi         v -= magicVal;
2636*6467f958SSadaf Ebrahimi     }
2637*6467f958SSadaf Ebrahimi 
2638*6467f958SSadaf Ebrahimi     return (cl_int)v;
2639*6467f958SSadaf Ebrahimi }
2640*6467f958SSadaf Ebrahimi 
pack_image_pixel(float * srcVector,const cl_image_format * imageFormat,void * outData)2641*6467f958SSadaf Ebrahimi void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
2642*6467f958SSadaf Ebrahimi                       void *outData)
2643*6467f958SSadaf Ebrahimi {
2644*6467f958SSadaf Ebrahimi     swizzle_vector_for_image<float>(srcVector, imageFormat);
2645*6467f958SSadaf Ebrahimi     size_t channelCount = get_format_channel_count(imageFormat);
2646*6467f958SSadaf Ebrahimi     switch (imageFormat->image_channel_data_type)
2647*6467f958SSadaf Ebrahimi     {
2648*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT: {
2649*6467f958SSadaf Ebrahimi             cl_half *ptr = (cl_half *)outData;
2650*6467f958SSadaf Ebrahimi 
2651*6467f958SSadaf Ebrahimi             switch (gFloatToHalfRoundingMode)
2652*6467f958SSadaf Ebrahimi             {
2653*6467f958SSadaf Ebrahimi                 case kRoundToNearestEven:
2654*6467f958SSadaf Ebrahimi                     for (unsigned int i = 0; i < channelCount; i++)
2655*6467f958SSadaf Ebrahimi                         ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTE);
2656*6467f958SSadaf Ebrahimi                     break;
2657*6467f958SSadaf Ebrahimi                 case kRoundTowardZero:
2658*6467f958SSadaf Ebrahimi                     for (unsigned int i = 0; i < channelCount; i++)
2659*6467f958SSadaf Ebrahimi                         ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTZ);
2660*6467f958SSadaf Ebrahimi                     break;
2661*6467f958SSadaf Ebrahimi                 default:
2662*6467f958SSadaf Ebrahimi                     log_error("ERROR: Test internal error -- unhandled or "
2663*6467f958SSadaf Ebrahimi                               "unknown float->half rounding mode.\n");
2664*6467f958SSadaf Ebrahimi                     exit(-1);
2665*6467f958SSadaf Ebrahimi                     break;
2666*6467f958SSadaf Ebrahimi             }
2667*6467f958SSadaf Ebrahimi             break;
2668*6467f958SSadaf Ebrahimi         }
2669*6467f958SSadaf Ebrahimi 
2670*6467f958SSadaf Ebrahimi         case CL_FLOAT: {
2671*6467f958SSadaf Ebrahimi             cl_float *ptr = (cl_float *)outData;
2672*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2673*6467f958SSadaf Ebrahimi                 ptr[i] = srcVector[i];
2674*6467f958SSadaf Ebrahimi             break;
2675*6467f958SSadaf Ebrahimi         }
2676*6467f958SSadaf Ebrahimi 
2677*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8: {
2678*6467f958SSadaf Ebrahimi             cl_char *ptr = (cl_char *)outData;
2679*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2680*6467f958SSadaf Ebrahimi                 ptr[i] =
2681*6467f958SSadaf Ebrahimi                     (cl_char)NORMALIZE_SIGNED(srcVector[i], -127.0f, 127.f);
2682*6467f958SSadaf Ebrahimi             break;
2683*6467f958SSadaf Ebrahimi         }
2684*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16: {
2685*6467f958SSadaf Ebrahimi             cl_short *ptr = (cl_short *)outData;
2686*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2687*6467f958SSadaf Ebrahimi                 ptr[i] =
2688*6467f958SSadaf Ebrahimi                     (short)NORMALIZE_SIGNED(srcVector[i], -32767.f, 32767.f);
2689*6467f958SSadaf Ebrahimi             break;
2690*6467f958SSadaf Ebrahimi         }
2691*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8: {
2692*6467f958SSadaf Ebrahimi             cl_uchar *ptr = (cl_uchar *)outData;
2693*6467f958SSadaf Ebrahimi             if (is_sRGBA_order(imageFormat->image_channel_order))
2694*6467f958SSadaf Ebrahimi             {
2695*6467f958SSadaf Ebrahimi                 ptr[0] = (unsigned char)(sRGBmap(srcVector[0]) + 0.5);
2696*6467f958SSadaf Ebrahimi                 ptr[1] = (unsigned char)(sRGBmap(srcVector[1]) + 0.5);
2697*6467f958SSadaf Ebrahimi                 ptr[2] = (unsigned char)(sRGBmap(srcVector[2]) + 0.5);
2698*6467f958SSadaf Ebrahimi                 if (channelCount == 4)
2699*6467f958SSadaf Ebrahimi                     ptr[3] = (unsigned char)NORMALIZE(srcVector[3], 255.f);
2700*6467f958SSadaf Ebrahimi             }
2701*6467f958SSadaf Ebrahimi             else
2702*6467f958SSadaf Ebrahimi             {
2703*6467f958SSadaf Ebrahimi                 for (unsigned int i = 0; i < channelCount; i++)
2704*6467f958SSadaf Ebrahimi                     ptr[i] = (unsigned char)NORMALIZE(srcVector[i], 255.f);
2705*6467f958SSadaf Ebrahimi             }
2706*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
2707*6467f958SSadaf Ebrahimi             if (imageFormat->image_channel_order == CL_1RGB_APPLE)
2708*6467f958SSadaf Ebrahimi                 ptr[0] = 255.0f;
2709*6467f958SSadaf Ebrahimi #endif
2710*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
2711*6467f958SSadaf Ebrahimi             if (imageFormat->image_channel_order == CL_BGR1_APPLE)
2712*6467f958SSadaf Ebrahimi                 ptr[3] = 255.0f;
2713*6467f958SSadaf Ebrahimi #endif
2714*6467f958SSadaf Ebrahimi             break;
2715*6467f958SSadaf Ebrahimi         }
2716*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16: {
2717*6467f958SSadaf Ebrahimi             cl_ushort *ptr = (cl_ushort *)outData;
2718*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2719*6467f958SSadaf Ebrahimi                 ptr[i] = (unsigned short)NORMALIZE(srcVector[i], 65535.f);
2720*6467f958SSadaf Ebrahimi             break;
2721*6467f958SSadaf Ebrahimi         }
2722*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555: {
2723*6467f958SSadaf Ebrahimi             cl_ushort *ptr = (cl_ushort *)outData;
2724*6467f958SSadaf Ebrahimi             ptr[0] =
2725*6467f958SSadaf Ebrahimi                 (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 10)
2726*6467f958SSadaf Ebrahimi                 | (((unsigned short)NORMALIZE(srcVector[1], 31.f) & 31) << 5)
2727*6467f958SSadaf Ebrahimi                 | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
2728*6467f958SSadaf Ebrahimi             break;
2729*6467f958SSadaf Ebrahimi         }
2730*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565: {
2731*6467f958SSadaf Ebrahimi             cl_ushort *ptr = (cl_ushort *)outData;
2732*6467f958SSadaf Ebrahimi             ptr[0] =
2733*6467f958SSadaf Ebrahimi                 (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 11)
2734*6467f958SSadaf Ebrahimi                 | (((unsigned short)NORMALIZE(srcVector[1], 63.f) & 63) << 5)
2735*6467f958SSadaf Ebrahimi                 | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
2736*6467f958SSadaf Ebrahimi             break;
2737*6467f958SSadaf Ebrahimi         }
2738*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010: {
2739*6467f958SSadaf Ebrahimi             cl_uint *ptr = (cl_uint *)outData;
2740*6467f958SSadaf Ebrahimi             ptr[0] =
2741*6467f958SSadaf Ebrahimi                 (((unsigned int)NORMALIZE(srcVector[0], 1023.f) & 1023) << 20)
2742*6467f958SSadaf Ebrahimi                 | (((unsigned int)NORMALIZE(srcVector[1], 1023.f) & 1023) << 10)
2743*6467f958SSadaf Ebrahimi                 | (((unsigned int)NORMALIZE(srcVector[2], 1023.f) & 1023) << 0);
2744*6467f958SSadaf Ebrahimi             break;
2745*6467f958SSadaf Ebrahimi         }
2746*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8: {
2747*6467f958SSadaf Ebrahimi             cl_char *ptr = (cl_char *)outData;
2748*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2749*6467f958SSadaf Ebrahimi                 ptr[i] =
2750*6467f958SSadaf Ebrahimi                     (cl_char)CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
2751*6467f958SSadaf Ebrahimi             break;
2752*6467f958SSadaf Ebrahimi         }
2753*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16: {
2754*6467f958SSadaf Ebrahimi             cl_short *ptr = (cl_short *)outData;
2755*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2756*6467f958SSadaf Ebrahimi                 ptr[i] =
2757*6467f958SSadaf Ebrahimi                     (short)CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
2758*6467f958SSadaf Ebrahimi             break;
2759*6467f958SSadaf Ebrahimi         }
2760*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32: {
2761*6467f958SSadaf Ebrahimi             cl_int *ptr = (cl_int *)outData;
2762*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2763*6467f958SSadaf Ebrahimi                 ptr[i] = round_to_even(srcVector[i]);
2764*6467f958SSadaf Ebrahimi             break;
2765*6467f958SSadaf Ebrahimi         }
2766*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: {
2767*6467f958SSadaf Ebrahimi             cl_uchar *ptr = (cl_uchar *)outData;
2768*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2769*6467f958SSadaf Ebrahimi                 ptr[i] =
2770*6467f958SSadaf Ebrahimi                     (cl_uchar)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
2771*6467f958SSadaf Ebrahimi             break;
2772*6467f958SSadaf Ebrahimi         }
2773*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16: {
2774*6467f958SSadaf Ebrahimi             cl_ushort *ptr = (cl_ushort *)outData;
2775*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2776*6467f958SSadaf Ebrahimi                 ptr[i] = (cl_ushort)CONVERT_UINT(srcVector[i], 32767.f,
2777*6467f958SSadaf Ebrahimi                                                  CL_USHRT_MAX);
2778*6467f958SSadaf Ebrahimi             break;
2779*6467f958SSadaf Ebrahimi         }
2780*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: {
2781*6467f958SSadaf Ebrahimi             cl_uint *ptr = (cl_uint *)outData;
2782*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2783*6467f958SSadaf Ebrahimi                 ptr[i] = (cl_uint)CONVERT_UINT(
2784*6467f958SSadaf Ebrahimi                     srcVector[i],
2785*6467f958SSadaf Ebrahimi                     MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
2786*6467f958SSadaf Ebrahimi                     CL_UINT_MAX);
2787*6467f958SSadaf Ebrahimi             break;
2788*6467f958SSadaf Ebrahimi         }
2789*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
2790*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE: {
2791*6467f958SSadaf Ebrahimi             cl_ushort *ptr = (cl_ushort *)outData;
2792*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2793*6467f958SSadaf Ebrahimi             {
2794*6467f958SSadaf Ebrahimi                 cl_float f = fmaxf(srcVector[i], -1.0f);
2795*6467f958SSadaf Ebrahimi                 f = fminf(f, 3.0f);
2796*6467f958SSadaf Ebrahimi                 cl_int d = rintf(f * 0x1.0p14f);
2797*6467f958SSadaf Ebrahimi                 d += 16384;
2798*6467f958SSadaf Ebrahimi                 if (d > CL_USHRT_MAX) d = CL_USHRT_MAX;
2799*6467f958SSadaf Ebrahimi                 ptr[i] = d;
2800*6467f958SSadaf Ebrahimi             }
2801*6467f958SSadaf Ebrahimi             break;
2802*6467f958SSadaf Ebrahimi         }
2803*6467f958SSadaf Ebrahimi #endif
2804*6467f958SSadaf Ebrahimi         default:
2805*6467f958SSadaf Ebrahimi             log_error("INTERNAL ERROR: unknown format (%d)\n",
2806*6467f958SSadaf Ebrahimi                       imageFormat->image_channel_data_type);
2807*6467f958SSadaf Ebrahimi             exit(-1);
2808*6467f958SSadaf Ebrahimi             break;
2809*6467f958SSadaf Ebrahimi     }
2810*6467f958SSadaf Ebrahimi }
2811*6467f958SSadaf Ebrahimi 
pack_image_pixel_error(const float * srcVector,const cl_image_format * imageFormat,const void * results,float * errors)2812*6467f958SSadaf Ebrahimi void pack_image_pixel_error(const float *srcVector,
2813*6467f958SSadaf Ebrahimi                             const cl_image_format *imageFormat,
2814*6467f958SSadaf Ebrahimi                             const void *results, float *errors)
2815*6467f958SSadaf Ebrahimi {
2816*6467f958SSadaf Ebrahimi     size_t channelCount = get_format_channel_count(imageFormat);
2817*6467f958SSadaf Ebrahimi     switch (imageFormat->image_channel_data_type)
2818*6467f958SSadaf Ebrahimi     {
2819*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT: {
2820*6467f958SSadaf Ebrahimi             const cl_half *ptr = (const cl_half *)results;
2821*6467f958SSadaf Ebrahimi 
2822*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2823*6467f958SSadaf Ebrahimi                 errors[i] = Ulp_Error_Half(ptr[i], srcVector[i]);
2824*6467f958SSadaf Ebrahimi 
2825*6467f958SSadaf Ebrahimi             break;
2826*6467f958SSadaf Ebrahimi         }
2827*6467f958SSadaf Ebrahimi 
2828*6467f958SSadaf Ebrahimi         case CL_FLOAT: {
2829*6467f958SSadaf Ebrahimi             const cl_ushort *ptr = (const cl_ushort *)results;
2830*6467f958SSadaf Ebrahimi 
2831*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2832*6467f958SSadaf Ebrahimi                 errors[i] = Ulp_Error(ptr[i], srcVector[i]);
2833*6467f958SSadaf Ebrahimi 
2834*6467f958SSadaf Ebrahimi             break;
2835*6467f958SSadaf Ebrahimi         }
2836*6467f958SSadaf Ebrahimi 
2837*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8: {
2838*6467f958SSadaf Ebrahimi             const cl_char *ptr = (const cl_char *)results;
2839*6467f958SSadaf Ebrahimi 
2840*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2841*6467f958SSadaf Ebrahimi                 errors[i] = ptr[i]
2842*6467f958SSadaf Ebrahimi                     - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -127.0f, 127.f);
2843*6467f958SSadaf Ebrahimi 
2844*6467f958SSadaf Ebrahimi             break;
2845*6467f958SSadaf Ebrahimi         }
2846*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16: {
2847*6467f958SSadaf Ebrahimi             const cl_short *ptr = (const cl_short *)results;
2848*6467f958SSadaf Ebrahimi 
2849*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2850*6467f958SSadaf Ebrahimi                 errors[i] = ptr[i]
2851*6467f958SSadaf Ebrahimi                     - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -32767.f,
2852*6467f958SSadaf Ebrahimi                                                  32767.f);
2853*6467f958SSadaf Ebrahimi 
2854*6467f958SSadaf Ebrahimi             break;
2855*6467f958SSadaf Ebrahimi         }
2856*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8: {
2857*6467f958SSadaf Ebrahimi             const cl_uchar *ptr = (const cl_uchar *)results;
2858*6467f958SSadaf Ebrahimi 
2859*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2860*6467f958SSadaf Ebrahimi                 errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 255.f);
2861*6467f958SSadaf Ebrahimi 
2862*6467f958SSadaf Ebrahimi             break;
2863*6467f958SSadaf Ebrahimi         }
2864*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16: {
2865*6467f958SSadaf Ebrahimi             const cl_ushort *ptr = (const cl_ushort *)results;
2866*6467f958SSadaf Ebrahimi 
2867*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2868*6467f958SSadaf Ebrahimi                 errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 65535.f);
2869*6467f958SSadaf Ebrahimi 
2870*6467f958SSadaf Ebrahimi             break;
2871*6467f958SSadaf Ebrahimi         }
2872*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555: {
2873*6467f958SSadaf Ebrahimi             const cl_ushort *ptr = (const cl_ushort *)results;
2874*6467f958SSadaf Ebrahimi 
2875*6467f958SSadaf Ebrahimi             errors[0] =
2876*6467f958SSadaf Ebrahimi                 ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
2877*6467f958SSadaf Ebrahimi             errors[1] =
2878*6467f958SSadaf Ebrahimi                 ((ptr[0] >> 5) & 31) - NORMALIZE_UNROUNDED(srcVector[1], 31.f);
2879*6467f958SSadaf Ebrahimi             errors[2] =
2880*6467f958SSadaf Ebrahimi                 ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
2881*6467f958SSadaf Ebrahimi 
2882*6467f958SSadaf Ebrahimi             break;
2883*6467f958SSadaf Ebrahimi         }
2884*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565: {
2885*6467f958SSadaf Ebrahimi             const cl_ushort *ptr = (const cl_ushort *)results;
2886*6467f958SSadaf Ebrahimi 
2887*6467f958SSadaf Ebrahimi             errors[0] =
2888*6467f958SSadaf Ebrahimi                 ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
2889*6467f958SSadaf Ebrahimi             errors[1] =
2890*6467f958SSadaf Ebrahimi                 ((ptr[0] >> 5) & 63) - NORMALIZE_UNROUNDED(srcVector[1], 63.f);
2891*6467f958SSadaf Ebrahimi             errors[2] =
2892*6467f958SSadaf Ebrahimi                 ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
2893*6467f958SSadaf Ebrahimi 
2894*6467f958SSadaf Ebrahimi             break;
2895*6467f958SSadaf Ebrahimi         }
2896*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010: {
2897*6467f958SSadaf Ebrahimi             const cl_uint *ptr = (const cl_uint *)results;
2898*6467f958SSadaf Ebrahimi 
2899*6467f958SSadaf Ebrahimi             errors[0] = ((ptr[0] >> 20) & 1023)
2900*6467f958SSadaf Ebrahimi                 - NORMALIZE_UNROUNDED(srcVector[0], 1023.f);
2901*6467f958SSadaf Ebrahimi             errors[1] = ((ptr[0] >> 10) & 1023)
2902*6467f958SSadaf Ebrahimi                 - NORMALIZE_UNROUNDED(srcVector[1], 1023.f);
2903*6467f958SSadaf Ebrahimi             errors[2] = ((ptr[0] >> 0) & 1023)
2904*6467f958SSadaf Ebrahimi                 - NORMALIZE_UNROUNDED(srcVector[2], 1023.f);
2905*6467f958SSadaf Ebrahimi 
2906*6467f958SSadaf Ebrahimi             break;
2907*6467f958SSadaf Ebrahimi         }
2908*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8: {
2909*6467f958SSadaf Ebrahimi             const cl_char *ptr = (const cl_char *)results;
2910*6467f958SSadaf Ebrahimi 
2911*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2912*6467f958SSadaf Ebrahimi                 errors[i] =
2913*6467f958SSadaf Ebrahimi                     ptr[i] - CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
2914*6467f958SSadaf Ebrahimi 
2915*6467f958SSadaf Ebrahimi             break;
2916*6467f958SSadaf Ebrahimi         }
2917*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16: {
2918*6467f958SSadaf Ebrahimi             const cl_short *ptr = (const cl_short *)results;
2919*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2920*6467f958SSadaf Ebrahimi                 errors[i] = ptr[i]
2921*6467f958SSadaf Ebrahimi                     - CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
2922*6467f958SSadaf Ebrahimi             break;
2923*6467f958SSadaf Ebrahimi         }
2924*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32: {
2925*6467f958SSadaf Ebrahimi             const cl_int *ptr = (const cl_int *)results;
2926*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2927*6467f958SSadaf Ebrahimi                 errors[i] = (cl_float)((cl_long)ptr[i]
2928*6467f958SSadaf Ebrahimi                                        - (cl_long)round_to_even(srcVector[i]));
2929*6467f958SSadaf Ebrahimi             break;
2930*6467f958SSadaf Ebrahimi         }
2931*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: {
2932*6467f958SSadaf Ebrahimi             const cl_uchar *ptr = (const cl_uchar *)results;
2933*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2934*6467f958SSadaf Ebrahimi                 errors[i] = static_cast<float>(
2935*6467f958SSadaf Ebrahimi                     (cl_int)ptr[i]
2936*6467f958SSadaf Ebrahimi                     - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX));
2937*6467f958SSadaf Ebrahimi             break;
2938*6467f958SSadaf Ebrahimi         }
2939*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16: {
2940*6467f958SSadaf Ebrahimi             const cl_ushort *ptr = (const cl_ushort *)results;
2941*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2942*6467f958SSadaf Ebrahimi                 errors[i] = static_cast<float>(
2943*6467f958SSadaf Ebrahimi                     (cl_int)ptr[i]
2944*6467f958SSadaf Ebrahimi                     - (cl_int)CONVERT_UINT(srcVector[i], 32767.f,
2945*6467f958SSadaf Ebrahimi                                            CL_USHRT_MAX));
2946*6467f958SSadaf Ebrahimi             break;
2947*6467f958SSadaf Ebrahimi         }
2948*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32: {
2949*6467f958SSadaf Ebrahimi             const cl_uint *ptr = (const cl_uint *)results;
2950*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2951*6467f958SSadaf Ebrahimi                 errors[i] = (cl_float)(
2952*6467f958SSadaf Ebrahimi                     (cl_long)ptr[i]
2953*6467f958SSadaf Ebrahimi                     - (cl_long)CONVERT_UINT(
2954*6467f958SSadaf Ebrahimi                         srcVector[i],
2955*6467f958SSadaf Ebrahimi                         MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
2956*6467f958SSadaf Ebrahimi                         CL_UINT_MAX));
2957*6467f958SSadaf Ebrahimi             break;
2958*6467f958SSadaf Ebrahimi         }
2959*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
2960*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE: {
2961*6467f958SSadaf Ebrahimi             const cl_ushort *ptr = (const cl_ushort *)results;
2962*6467f958SSadaf Ebrahimi 
2963*6467f958SSadaf Ebrahimi             for (unsigned int i = 0; i < channelCount; i++)
2964*6467f958SSadaf Ebrahimi                 errors[i] = ptr[i]
2965*6467f958SSadaf Ebrahimi                     - NORMALIZE_SIGNED_UNROUNDED(((int)srcVector[i] - 16384),
2966*6467f958SSadaf Ebrahimi                                                  -16384.f, 49151.f);
2967*6467f958SSadaf Ebrahimi 
2968*6467f958SSadaf Ebrahimi             break;
2969*6467f958SSadaf Ebrahimi         }
2970*6467f958SSadaf Ebrahimi #endif
2971*6467f958SSadaf Ebrahimi         default:
2972*6467f958SSadaf Ebrahimi             log_error("INTERNAL ERROR: unknown format (%d)\n",
2973*6467f958SSadaf Ebrahimi                       imageFormat->image_channel_data_type);
2974*6467f958SSadaf Ebrahimi             exit(-1);
2975*6467f958SSadaf Ebrahimi             break;
2976*6467f958SSadaf Ebrahimi     }
2977*6467f958SSadaf Ebrahimi }
2978*6467f958SSadaf Ebrahimi 
2979*6467f958SSadaf Ebrahimi 
2980*6467f958SSadaf Ebrahimi //
2981*6467f958SSadaf Ebrahimi //  Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
2982*6467f958SSadaf Ebrahimi //  This should be called lazily before attempting to verify image writes,
2983*6467f958SSadaf Ebrahimi //  otherwise an error will occur.
2984*6467f958SSadaf Ebrahimi //
DetectFloatToHalfRoundingMode(cl_command_queue q)2985*6467f958SSadaf Ebrahimi int DetectFloatToHalfRoundingMode(
2986*6467f958SSadaf Ebrahimi     cl_command_queue q) // Returns CL_SUCCESS on success
2987*6467f958SSadaf Ebrahimi {
2988*6467f958SSadaf Ebrahimi     cl_int err = CL_SUCCESS;
2989*6467f958SSadaf Ebrahimi 
2990*6467f958SSadaf Ebrahimi     if (gFloatToHalfRoundingMode == kDefaultRoundingMode)
2991*6467f958SSadaf Ebrahimi     {
2992*6467f958SSadaf Ebrahimi         // Some numbers near 0.5f, that we look at to see how the values are
2993*6467f958SSadaf Ebrahimi         // rounded.
2994*6467f958SSadaf Ebrahimi         static const cl_uint inData[4 * 4] = {
2995*6467f958SSadaf Ebrahimi             0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U,
2996*6467f958SSadaf Ebrahimi             0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
2997*6467f958SSadaf Ebrahimi             0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U,
2998*6467f958SSadaf Ebrahimi             0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U
2999*6467f958SSadaf Ebrahimi         };
3000*6467f958SSadaf Ebrahimi         static const size_t count = sizeof(inData) / (4 * sizeof(inData[0]));
3001*6467f958SSadaf Ebrahimi         const float *inp = (const float *)inData;
3002*6467f958SSadaf Ebrahimi         cl_context context = NULL;
3003*6467f958SSadaf Ebrahimi 
3004*6467f958SSadaf Ebrahimi         // Create an input buffer
3005*6467f958SSadaf Ebrahimi         err = clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(context),
3006*6467f958SSadaf Ebrahimi                                     &context, NULL);
3007*6467f958SSadaf Ebrahimi         if (err)
3008*6467f958SSadaf Ebrahimi         {
3009*6467f958SSadaf Ebrahimi             log_error("Error:  could not get context from command queue in "
3010*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode  (%d)",
3011*6467f958SSadaf Ebrahimi                       err);
3012*6467f958SSadaf Ebrahimi             return err;
3013*6467f958SSadaf Ebrahimi         }
3014*6467f958SSadaf Ebrahimi 
3015*6467f958SSadaf Ebrahimi         cl_mem inBuf = clCreateBuffer(context,
3016*6467f958SSadaf Ebrahimi                                       CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR
3017*6467f958SSadaf Ebrahimi                                           | CL_MEM_ALLOC_HOST_PTR,
3018*6467f958SSadaf Ebrahimi                                       sizeof(inData), (void *)inData, &err);
3019*6467f958SSadaf Ebrahimi         if (NULL == inBuf || err)
3020*6467f958SSadaf Ebrahimi         {
3021*6467f958SSadaf Ebrahimi             log_error("Error:  could not create input buffer in "
3022*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode  (err: %d)",
3023*6467f958SSadaf Ebrahimi                       err);
3024*6467f958SSadaf Ebrahimi             return err;
3025*6467f958SSadaf Ebrahimi         }
3026*6467f958SSadaf Ebrahimi 
3027*6467f958SSadaf Ebrahimi         // Create a small output image
3028*6467f958SSadaf Ebrahimi         cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
3029*6467f958SSadaf Ebrahimi         cl_mem outImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &fmt,
3030*6467f958SSadaf Ebrahimi                                           count, 1, 0, NULL, &err);
3031*6467f958SSadaf Ebrahimi         if (NULL == outImage || err)
3032*6467f958SSadaf Ebrahimi         {
3033*6467f958SSadaf Ebrahimi             log_error("Error:  could not create half float out image in "
3034*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode  (err: %d)",
3035*6467f958SSadaf Ebrahimi                       err);
3036*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3037*6467f958SSadaf Ebrahimi             return err;
3038*6467f958SSadaf Ebrahimi         }
3039*6467f958SSadaf Ebrahimi 
3040*6467f958SSadaf Ebrahimi         // Create our program, and a kernel
3041*6467f958SSadaf Ebrahimi         const char *kernelSource[1] = {
3042*6467f958SSadaf Ebrahimi             "kernel void detect_round( global float4 *in, write_only image2d_t "
3043*6467f958SSadaf Ebrahimi             "out )\n"
3044*6467f958SSadaf Ebrahimi             "{\n"
3045*6467f958SSadaf Ebrahimi             "   write_imagef( out, (int2)(get_global_id(0),0), "
3046*6467f958SSadaf Ebrahimi             "in[get_global_id(0)] );\n"
3047*6467f958SSadaf Ebrahimi             "}\n"
3048*6467f958SSadaf Ebrahimi         };
3049*6467f958SSadaf Ebrahimi 
3050*6467f958SSadaf Ebrahimi         clProgramWrapper program;
3051*6467f958SSadaf Ebrahimi         clKernelWrapper kernel;
3052*6467f958SSadaf Ebrahimi         err = create_single_kernel_helper(context, &program, &kernel, 1,
3053*6467f958SSadaf Ebrahimi                                           kernelSource, "detect_round");
3054*6467f958SSadaf Ebrahimi 
3055*6467f958SSadaf Ebrahimi         if (NULL == program || err)
3056*6467f958SSadaf Ebrahimi         {
3057*6467f958SSadaf Ebrahimi             log_error("Error:  could not create program in "
3058*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode (err: %d)",
3059*6467f958SSadaf Ebrahimi                       err);
3060*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3061*6467f958SSadaf Ebrahimi             clReleaseMemObject(outImage);
3062*6467f958SSadaf Ebrahimi             return err;
3063*6467f958SSadaf Ebrahimi         }
3064*6467f958SSadaf Ebrahimi 
3065*6467f958SSadaf Ebrahimi         cl_device_id device = NULL;
3066*6467f958SSadaf Ebrahimi         err = clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(device), &device,
3067*6467f958SSadaf Ebrahimi                                     NULL);
3068*6467f958SSadaf Ebrahimi         if (err)
3069*6467f958SSadaf Ebrahimi         {
3070*6467f958SSadaf Ebrahimi             log_error("Error:  could not get device from command queue in "
3071*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode  (%d)",
3072*6467f958SSadaf Ebrahimi                       err);
3073*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3074*6467f958SSadaf Ebrahimi             clReleaseMemObject(outImage);
3075*6467f958SSadaf Ebrahimi             return err;
3076*6467f958SSadaf Ebrahimi         }
3077*6467f958SSadaf Ebrahimi 
3078*6467f958SSadaf Ebrahimi         err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf);
3079*6467f958SSadaf Ebrahimi         if (err)
3080*6467f958SSadaf Ebrahimi         {
3081*6467f958SSadaf Ebrahimi             log_error("Error: could not set argument 0 of kernel in "
3082*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode (%d)",
3083*6467f958SSadaf Ebrahimi                       err);
3084*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3085*6467f958SSadaf Ebrahimi             clReleaseMemObject(outImage);
3086*6467f958SSadaf Ebrahimi             return err;
3087*6467f958SSadaf Ebrahimi         }
3088*6467f958SSadaf Ebrahimi 
3089*6467f958SSadaf Ebrahimi         err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outImage);
3090*6467f958SSadaf Ebrahimi         if (err)
3091*6467f958SSadaf Ebrahimi         {
3092*6467f958SSadaf Ebrahimi             log_error("Error: could not set argument 1 of kernel in "
3093*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode (%d)",
3094*6467f958SSadaf Ebrahimi                       err);
3095*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3096*6467f958SSadaf Ebrahimi             clReleaseMemObject(outImage);
3097*6467f958SSadaf Ebrahimi             return err;
3098*6467f958SSadaf Ebrahimi         }
3099*6467f958SSadaf Ebrahimi 
3100*6467f958SSadaf Ebrahimi         // Run the kernel
3101*6467f958SSadaf Ebrahimi         size_t global_work_size = count;
3102*6467f958SSadaf Ebrahimi         err = clEnqueueNDRangeKernel(q, kernel, 1, NULL, &global_work_size,
3103*6467f958SSadaf Ebrahimi                                      NULL, 0, NULL, NULL);
3104*6467f958SSadaf Ebrahimi         if (err)
3105*6467f958SSadaf Ebrahimi         {
3106*6467f958SSadaf Ebrahimi             log_error("Error: could not enqueue kernel in "
3107*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode (%d)",
3108*6467f958SSadaf Ebrahimi                       err);
3109*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3110*6467f958SSadaf Ebrahimi             clReleaseMemObject(outImage);
3111*6467f958SSadaf Ebrahimi             return err;
3112*6467f958SSadaf Ebrahimi         }
3113*6467f958SSadaf Ebrahimi 
3114*6467f958SSadaf Ebrahimi         // read the results
3115*6467f958SSadaf Ebrahimi         cl_half outBuf[count * 4];
3116*6467f958SSadaf Ebrahimi         memset(outBuf, -1, sizeof(outBuf));
3117*6467f958SSadaf Ebrahimi         size_t origin[3] = { 0, 0, 0 };
3118*6467f958SSadaf Ebrahimi         size_t region[3] = { count, 1, 1 };
3119*6467f958SSadaf Ebrahimi         err = clEnqueueReadImage(q, outImage, CL_TRUE, origin, region, 0, 0,
3120*6467f958SSadaf Ebrahimi                                  outBuf, 0, NULL, NULL);
3121*6467f958SSadaf Ebrahimi         if (err)
3122*6467f958SSadaf Ebrahimi         {
3123*6467f958SSadaf Ebrahimi             log_error("Error: could not read output image in "
3124*6467f958SSadaf Ebrahimi                       "DetectFloatToHalfRoundingMode (%d)",
3125*6467f958SSadaf Ebrahimi                       err);
3126*6467f958SSadaf Ebrahimi             clReleaseMemObject(inBuf);
3127*6467f958SSadaf Ebrahimi             clReleaseMemObject(outImage);
3128*6467f958SSadaf Ebrahimi             return err;
3129*6467f958SSadaf Ebrahimi         }
3130*6467f958SSadaf Ebrahimi 
3131*6467f958SSadaf Ebrahimi         // Generate our list of reference results
3132*6467f958SSadaf Ebrahimi         cl_half rte_ref[count * 4];
3133*6467f958SSadaf Ebrahimi         cl_half rtz_ref[count * 4];
3134*6467f958SSadaf Ebrahimi         for (size_t i = 0; i < 4 * count; i++)
3135*6467f958SSadaf Ebrahimi         {
3136*6467f958SSadaf Ebrahimi             rte_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTE);
3137*6467f958SSadaf Ebrahimi             rtz_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTZ);
3138*6467f958SSadaf Ebrahimi         }
3139*6467f958SSadaf Ebrahimi 
3140*6467f958SSadaf Ebrahimi         // Verify that we got something in either rtz or rte mode
3141*6467f958SSadaf Ebrahimi         if (0 == memcmp(rte_ref, outBuf, sizeof(rte_ref)))
3142*6467f958SSadaf Ebrahimi         {
3143*6467f958SSadaf Ebrahimi             log_info("Autodetected float->half rounding mode to be rte\n");
3144*6467f958SSadaf Ebrahimi             gFloatToHalfRoundingMode = kRoundToNearestEven;
3145*6467f958SSadaf Ebrahimi         }
3146*6467f958SSadaf Ebrahimi         else if (0 == memcmp(rtz_ref, outBuf, sizeof(rtz_ref)))
3147*6467f958SSadaf Ebrahimi         {
3148*6467f958SSadaf Ebrahimi             log_info("Autodetected float->half rounding mode to be rtz\n");
3149*6467f958SSadaf Ebrahimi             gFloatToHalfRoundingMode = kRoundTowardZero;
3150*6467f958SSadaf Ebrahimi         }
3151*6467f958SSadaf Ebrahimi         else
3152*6467f958SSadaf Ebrahimi         {
3153*6467f958SSadaf Ebrahimi             log_error("ERROR: float to half conversions proceed with invalid "
3154*6467f958SSadaf Ebrahimi                       "rounding mode!\n");
3155*6467f958SSadaf Ebrahimi             log_info("\nfor:");
3156*6467f958SSadaf Ebrahimi             for (size_t i = 0; i < count; i++)
3157*6467f958SSadaf Ebrahimi                 log_info(" {%a, %a, %a, %a},", inp[4 * i], inp[4 * i + 1],
3158*6467f958SSadaf Ebrahimi                          inp[4 * i + 2], inp[4 * i + 3]);
3159*6467f958SSadaf Ebrahimi             log_info("\ngot:");
3160*6467f958SSadaf Ebrahimi             for (size_t i = 0; i < count; i++)
3161*6467f958SSadaf Ebrahimi                 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3162*6467f958SSadaf Ebrahimi                          outBuf[4 * i], outBuf[4 * i + 1], outBuf[4 * i + 2],
3163*6467f958SSadaf Ebrahimi                          outBuf[4 * i + 3]);
3164*6467f958SSadaf Ebrahimi             log_info("\nrte:");
3165*6467f958SSadaf Ebrahimi             for (size_t i = 0; i < count; i++)
3166*6467f958SSadaf Ebrahimi                 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3167*6467f958SSadaf Ebrahimi                          rte_ref[4 * i], rte_ref[4 * i + 1], rte_ref[4 * i + 2],
3168*6467f958SSadaf Ebrahimi                          rte_ref[4 * i + 3]);
3169*6467f958SSadaf Ebrahimi             log_info("\nrtz:");
3170*6467f958SSadaf Ebrahimi             for (size_t i = 0; i < count; i++)
3171*6467f958SSadaf Ebrahimi                 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3172*6467f958SSadaf Ebrahimi                          rtz_ref[4 * i], rtz_ref[4 * i + 1], rtz_ref[4 * i + 2],
3173*6467f958SSadaf Ebrahimi                          rtz_ref[4 * i + 3]);
3174*6467f958SSadaf Ebrahimi             log_info("\n");
3175*6467f958SSadaf Ebrahimi             err = -1;
3176*6467f958SSadaf Ebrahimi             gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
3177*6467f958SSadaf Ebrahimi         }
3178*6467f958SSadaf Ebrahimi 
3179*6467f958SSadaf Ebrahimi         // clean up
3180*6467f958SSadaf Ebrahimi         clReleaseMemObject(inBuf);
3181*6467f958SSadaf Ebrahimi         clReleaseMemObject(outImage);
3182*6467f958SSadaf Ebrahimi         return err;
3183*6467f958SSadaf Ebrahimi     }
3184*6467f958SSadaf Ebrahimi 
3185*6467f958SSadaf Ebrahimi     // Make sure that the rounding mode was successfully detected, if we checked
3186*6467f958SSadaf Ebrahimi     // earlier
3187*6467f958SSadaf Ebrahimi     if (gFloatToHalfRoundingMode != kRoundToNearestEven
3188*6467f958SSadaf Ebrahimi         && gFloatToHalfRoundingMode != kRoundTowardZero)
3189*6467f958SSadaf Ebrahimi         return -2;
3190*6467f958SSadaf Ebrahimi 
3191*6467f958SSadaf Ebrahimi     return err;
3192*6467f958SSadaf Ebrahimi }
3193*6467f958SSadaf Ebrahimi 
create_random_image_data(ExplicitType dataType,image_descriptor * imageInfo,BufferOwningPtr<char> & P,MTdata d,bool image2DFromBuffer)3194*6467f958SSadaf Ebrahimi char *create_random_image_data(ExplicitType dataType,
3195*6467f958SSadaf Ebrahimi                                image_descriptor *imageInfo,
3196*6467f958SSadaf Ebrahimi                                BufferOwningPtr<char> &P, MTdata d,
3197*6467f958SSadaf Ebrahimi                                bool image2DFromBuffer)
3198*6467f958SSadaf Ebrahimi {
3199*6467f958SSadaf Ebrahimi     size_t allocSize, numPixels;
3200*6467f958SSadaf Ebrahimi     if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
3201*6467f958SSadaf Ebrahimi     {
3202*6467f958SSadaf Ebrahimi         allocSize = (size_t)(compute_mipmapped_image_size(*imageInfo) * 4
3203*6467f958SSadaf Ebrahimi                              * get_explicit_type_size(dataType))
3204*6467f958SSadaf Ebrahimi             / get_pixel_size(imageInfo->format);
3205*6467f958SSadaf Ebrahimi         numPixels = allocSize / (get_explicit_type_size(dataType) * 4);
3206*6467f958SSadaf Ebrahimi     }
3207*6467f958SSadaf Ebrahimi     else
3208*6467f958SSadaf Ebrahimi     {
3209*6467f958SSadaf Ebrahimi         numPixels = (image2DFromBuffer ? imageInfo->rowPitch : imageInfo->width)
3210*6467f958SSadaf Ebrahimi             * imageInfo->height * (imageInfo->depth ? imageInfo->depth : 1)
3211*6467f958SSadaf Ebrahimi             * (imageInfo->arraySize ? imageInfo->arraySize : 1);
3212*6467f958SSadaf Ebrahimi         allocSize = numPixels * 4 * get_explicit_type_size(dataType);
3213*6467f958SSadaf Ebrahimi     }
3214*6467f958SSadaf Ebrahimi 
3215*6467f958SSadaf Ebrahimi #if 0 // DEBUG
3216*6467f958SSadaf Ebrahimi     {
3217*6467f958SSadaf Ebrahimi       fprintf(stderr,"--- create_random_image_data:\n");
3218*6467f958SSadaf Ebrahimi       fprintf(stderr,"allocSize = %zu\n",allocSize);
3219*6467f958SSadaf Ebrahimi       fprintf(stderr,"numPixels = %zu\n",numPixels);
3220*6467f958SSadaf Ebrahimi       fprintf(stderr,"width = %zu\n",imageInfo->width);
3221*6467f958SSadaf Ebrahimi       fprintf(stderr,"height = %zu\n",imageInfo->height);
3222*6467f958SSadaf Ebrahimi       fprintf(stderr,"depth = %zu\n",imageInfo->depth);
3223*6467f958SSadaf Ebrahimi       fprintf(stderr,"rowPitch = %zu\n",imageInfo->rowPitch);
3224*6467f958SSadaf Ebrahimi       fprintf(stderr,"slicePitch = %zu\n",imageInfo->slicePitch);
3225*6467f958SSadaf Ebrahimi       fprintf(stderr,"arraySize = %zu\n",imageInfo->arraySize);
3226*6467f958SSadaf Ebrahimi       fprintf(stderr,"explicit_type_size = %zu\n",get_explicit_type_size(dataType));
3227*6467f958SSadaf Ebrahimi     }
3228*6467f958SSadaf Ebrahimi #endif
3229*6467f958SSadaf Ebrahimi 
3230*6467f958SSadaf Ebrahimi #if defined(__APPLE__)
3231*6467f958SSadaf Ebrahimi     char *data = NULL;
3232*6467f958SSadaf Ebrahimi     if (gDeviceType == CL_DEVICE_TYPE_CPU)
3233*6467f958SSadaf Ebrahimi     {
3234*6467f958SSadaf Ebrahimi         size_t mapSize =
3235*6467f958SSadaf Ebrahimi             ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
3236*6467f958SSadaf Ebrahimi 
3237*6467f958SSadaf Ebrahimi         void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
3238*6467f958SSadaf Ebrahimi                          MAP_ANON | MAP_PRIVATE, 0, 0);
3239*6467f958SSadaf Ebrahimi         if (map == MAP_FAILED)
3240*6467f958SSadaf Ebrahimi         {
3241*6467f958SSadaf Ebrahimi             perror("create_random_image_data: mmap");
3242*6467f958SSadaf Ebrahimi             log_error("%s:%d: mmap failed, mapSize = %zu\n", __FILE__, __LINE__,
3243*6467f958SSadaf Ebrahimi                       mapSize);
3244*6467f958SSadaf Ebrahimi         }
3245*6467f958SSadaf Ebrahimi         intptr_t data_end = (intptr_t)map + mapSize - 4096;
3246*6467f958SSadaf Ebrahimi         data = (char *)(data_end - (intptr_t)allocSize);
3247*6467f958SSadaf Ebrahimi 
3248*6467f958SSadaf Ebrahimi         mprotect(map, 4096, PROT_NONE);
3249*6467f958SSadaf Ebrahimi         mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
3250*6467f958SSadaf Ebrahimi         P.reset(data, map, mapSize);
3251*6467f958SSadaf Ebrahimi     }
3252*6467f958SSadaf Ebrahimi     else
3253*6467f958SSadaf Ebrahimi     {
3254*6467f958SSadaf Ebrahimi         data = (char *)malloc(allocSize);
3255*6467f958SSadaf Ebrahimi         P.reset(data);
3256*6467f958SSadaf Ebrahimi     }
3257*6467f958SSadaf Ebrahimi #else
3258*6467f958SSadaf Ebrahimi     char *data =
3259*6467f958SSadaf Ebrahimi         (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
3260*6467f958SSadaf Ebrahimi     P.reset(data, NULL, 0, allocSize, true);
3261*6467f958SSadaf Ebrahimi #endif
3262*6467f958SSadaf Ebrahimi 
3263*6467f958SSadaf Ebrahimi     if (data == NULL)
3264*6467f958SSadaf Ebrahimi     {
3265*6467f958SSadaf Ebrahimi         log_error(
3266*6467f958SSadaf Ebrahimi             "ERROR: Unable to malloc %zu bytes for create_random_image_data\n",
3267*6467f958SSadaf Ebrahimi             allocSize);
3268*6467f958SSadaf Ebrahimi         return NULL;
3269*6467f958SSadaf Ebrahimi     }
3270*6467f958SSadaf Ebrahimi 
3271*6467f958SSadaf Ebrahimi     switch (dataType)
3272*6467f958SSadaf Ebrahimi     {
3273*6467f958SSadaf Ebrahimi         case kFloat: {
3274*6467f958SSadaf Ebrahimi             float *inputValues = (float *)data;
3275*6467f958SSadaf Ebrahimi             switch (imageInfo->format->image_channel_data_type)
3276*6467f958SSadaf Ebrahimi             {
3277*6467f958SSadaf Ebrahimi                 case CL_HALF_FLOAT: {
3278*6467f958SSadaf Ebrahimi                     // Generate data that is (mostly) inside the range of a half
3279*6467f958SSadaf Ebrahimi                     // float const float HALF_MIN = 5.96046448e-08f;
3280*6467f958SSadaf Ebrahimi                     const float HALF_MAX = 65504.0f;
3281*6467f958SSadaf Ebrahimi 
3282*6467f958SSadaf Ebrahimi                     size_t i = 0;
3283*6467f958SSadaf Ebrahimi                     inputValues[i++] = 0.f;
3284*6467f958SSadaf Ebrahimi                     inputValues[i++] = 1.f;
3285*6467f958SSadaf Ebrahimi                     inputValues[i++] = -1.f;
3286*6467f958SSadaf Ebrahimi                     inputValues[i++] = 2.f;
3287*6467f958SSadaf Ebrahimi                     for (; i < numPixels * 4; i++)
3288*6467f958SSadaf Ebrahimi                         inputValues[i] = get_random_float(-HALF_MAX - 2.f,
3289*6467f958SSadaf Ebrahimi                                                           HALF_MAX + 2.f, d);
3290*6467f958SSadaf Ebrahimi                 }
3291*6467f958SSadaf Ebrahimi                 break;
3292*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
3293*6467f958SSadaf Ebrahimi                 case CL_SFIXED14_APPLE: {
3294*6467f958SSadaf Ebrahimi                     size_t i = 0;
3295*6467f958SSadaf Ebrahimi                     if (numPixels * 4 >= 8)
3296*6467f958SSadaf Ebrahimi                     {
3297*6467f958SSadaf Ebrahimi                         inputValues[i++] = INFINITY;
3298*6467f958SSadaf Ebrahimi                         inputValues[i++] = 0x1.0p14f;
3299*6467f958SSadaf Ebrahimi                         inputValues[i++] = 0x1.0p31f;
3300*6467f958SSadaf Ebrahimi                         inputValues[i++] = 0x1.0p32f;
3301*6467f958SSadaf Ebrahimi                         inputValues[i++] = -INFINITY;
3302*6467f958SSadaf Ebrahimi                         inputValues[i++] = -0x1.0p14f;
3303*6467f958SSadaf Ebrahimi                         inputValues[i++] = -0x1.0p31f;
3304*6467f958SSadaf Ebrahimi                         inputValues[i++] = -0x1.1p31f;
3305*6467f958SSadaf Ebrahimi                     }
3306*6467f958SSadaf Ebrahimi                     for (; i < numPixels * 4; i++)
3307*6467f958SSadaf Ebrahimi                         inputValues[i] = get_random_float(-1.1f, 3.1f, d);
3308*6467f958SSadaf Ebrahimi                 }
3309*6467f958SSadaf Ebrahimi                 break;
3310*6467f958SSadaf Ebrahimi #endif
3311*6467f958SSadaf Ebrahimi                 case CL_FLOAT: {
3312*6467f958SSadaf Ebrahimi                     size_t i = 0;
3313*6467f958SSadaf Ebrahimi                     inputValues[i++] = INFINITY;
3314*6467f958SSadaf Ebrahimi                     inputValues[i++] = -INFINITY;
3315*6467f958SSadaf Ebrahimi                     inputValues[i++] = 0.0f;
3316*6467f958SSadaf Ebrahimi                     inputValues[i++] = 0.0f;
3317*6467f958SSadaf Ebrahimi                     cl_uint *p = (cl_uint *)data;
3318*6467f958SSadaf Ebrahimi                     for (; i < numPixels * 4; i++) p[i] = genrand_int32(d);
3319*6467f958SSadaf Ebrahimi                 }
3320*6467f958SSadaf Ebrahimi                 break;
3321*6467f958SSadaf Ebrahimi 
3322*6467f958SSadaf Ebrahimi                 default:
3323*6467f958SSadaf Ebrahimi                     size_t i = 0;
3324*6467f958SSadaf Ebrahimi                     if (numPixels * 4 >= 36)
3325*6467f958SSadaf Ebrahimi                     {
3326*6467f958SSadaf Ebrahimi                         inputValues[i++] = 0.0f;
3327*6467f958SSadaf Ebrahimi                         inputValues[i++] = 0.5f;
3328*6467f958SSadaf Ebrahimi                         inputValues[i++] = 31.5f;
3329*6467f958SSadaf Ebrahimi                         inputValues[i++] = 32.0f;
3330*6467f958SSadaf Ebrahimi                         inputValues[i++] = 127.5f;
3331*6467f958SSadaf Ebrahimi                         inputValues[i++] = 128.0f;
3332*6467f958SSadaf Ebrahimi                         inputValues[i++] = 255.5f;
3333*6467f958SSadaf Ebrahimi                         inputValues[i++] = 256.0f;
3334*6467f958SSadaf Ebrahimi                         inputValues[i++] = 1023.5f;
3335*6467f958SSadaf Ebrahimi                         inputValues[i++] = 1024.0f;
3336*6467f958SSadaf Ebrahimi                         inputValues[i++] = 32767.5f;
3337*6467f958SSadaf Ebrahimi                         inputValues[i++] = 32768.0f;
3338*6467f958SSadaf Ebrahimi                         inputValues[i++] = 65535.5f;
3339*6467f958SSadaf Ebrahimi                         inputValues[i++] = 65536.0f;
3340*6467f958SSadaf Ebrahimi                         inputValues[i++] = 2147483648.0f;
3341*6467f958SSadaf Ebrahimi                         inputValues[i++] = 4294967296.0f;
3342*6467f958SSadaf Ebrahimi                         inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
3343*6467f958SSadaf Ebrahimi                         inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
3344*6467f958SSadaf Ebrahimi                         inputValues[i++] = -0.0f;
3345*6467f958SSadaf Ebrahimi                         inputValues[i++] = -0.5f;
3346*6467f958SSadaf Ebrahimi                         inputValues[i++] = -31.5f;
3347*6467f958SSadaf Ebrahimi                         inputValues[i++] = -32.0f;
3348*6467f958SSadaf Ebrahimi                         inputValues[i++] = -127.5f;
3349*6467f958SSadaf Ebrahimi                         inputValues[i++] = -128.0f;
3350*6467f958SSadaf Ebrahimi                         inputValues[i++] = -255.5f;
3351*6467f958SSadaf Ebrahimi                         inputValues[i++] = -256.0f;
3352*6467f958SSadaf Ebrahimi                         inputValues[i++] = -1023.5f;
3353*6467f958SSadaf Ebrahimi                         inputValues[i++] = -1024.0f;
3354*6467f958SSadaf Ebrahimi                         inputValues[i++] = -32767.5f;
3355*6467f958SSadaf Ebrahimi                         inputValues[i++] = -32768.0f;
3356*6467f958SSadaf Ebrahimi                         inputValues[i++] = -65535.5f;
3357*6467f958SSadaf Ebrahimi                         inputValues[i++] = -65536.0f;
3358*6467f958SSadaf Ebrahimi                         inputValues[i++] = -2147483648.0f;
3359*6467f958SSadaf Ebrahimi                         inputValues[i++] = -4294967296.0f;
3360*6467f958SSadaf Ebrahimi                         inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
3361*6467f958SSadaf Ebrahimi                         inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
3362*6467f958SSadaf Ebrahimi                     }
3363*6467f958SSadaf Ebrahimi                     if (is_format_signed(imageInfo->format))
3364*6467f958SSadaf Ebrahimi                     {
3365*6467f958SSadaf Ebrahimi                         for (; i < numPixels * 4; i++)
3366*6467f958SSadaf Ebrahimi                             inputValues[i] = get_random_float(-1.1f, 1.1f, d);
3367*6467f958SSadaf Ebrahimi                     }
3368*6467f958SSadaf Ebrahimi                     else
3369*6467f958SSadaf Ebrahimi                     {
3370*6467f958SSadaf Ebrahimi                         for (; i < numPixels * 4; i++)
3371*6467f958SSadaf Ebrahimi                             inputValues[i] = get_random_float(-0.1f, 1.1f, d);
3372*6467f958SSadaf Ebrahimi                     }
3373*6467f958SSadaf Ebrahimi                     break;
3374*6467f958SSadaf Ebrahimi             }
3375*6467f958SSadaf Ebrahimi             break;
3376*6467f958SSadaf Ebrahimi         }
3377*6467f958SSadaf Ebrahimi 
3378*6467f958SSadaf Ebrahimi         case kInt: {
3379*6467f958SSadaf Ebrahimi             int *imageData = (int *)data;
3380*6467f958SSadaf Ebrahimi 
3381*6467f958SSadaf Ebrahimi             // We want to generate ints (mostly) in range of the target format
3382*6467f958SSadaf Ebrahimi             int formatMin = get_format_min_int(imageInfo->format);
3383*6467f958SSadaf Ebrahimi             size_t formatMax = get_format_max_int(imageInfo->format);
3384*6467f958SSadaf Ebrahimi             if (formatMin == 0)
3385*6467f958SSadaf Ebrahimi             {
3386*6467f958SSadaf Ebrahimi                 // Unsigned values, but we are only an int, so cap the actual
3387*6467f958SSadaf Ebrahimi                 // max at the max of signed ints
3388*6467f958SSadaf Ebrahimi                 if (formatMax > 2147483647L) formatMax = 2147483647L;
3389*6467f958SSadaf Ebrahimi             }
3390*6467f958SSadaf Ebrahimi             // If the final format is small enough, give us a bit of room for
3391*6467f958SSadaf Ebrahimi             // out-of-range values to test
3392*6467f958SSadaf Ebrahimi             if (formatMax < 2147483647L) formatMax += 2;
3393*6467f958SSadaf Ebrahimi             if (formatMin > -2147483648LL) formatMin -= 2;
3394*6467f958SSadaf Ebrahimi 
3395*6467f958SSadaf Ebrahimi             // Now gen
3396*6467f958SSadaf Ebrahimi             for (size_t i = 0; i < numPixels * 4; i++)
3397*6467f958SSadaf Ebrahimi             {
3398*6467f958SSadaf Ebrahimi                 imageData[i] = random_in_range(formatMin, (int)formatMax, d);
3399*6467f958SSadaf Ebrahimi             }
3400*6467f958SSadaf Ebrahimi             break;
3401*6467f958SSadaf Ebrahimi         }
3402*6467f958SSadaf Ebrahimi 
3403*6467f958SSadaf Ebrahimi         case kUInt:
3404*6467f958SSadaf Ebrahimi         case kUnsignedInt: {
3405*6467f958SSadaf Ebrahimi             unsigned int *imageData = (unsigned int *)data;
3406*6467f958SSadaf Ebrahimi 
3407*6467f958SSadaf Ebrahimi             // We want to generate ints (mostly) in range of the target format
3408*6467f958SSadaf Ebrahimi             int formatMin = get_format_min_int(imageInfo->format);
3409*6467f958SSadaf Ebrahimi             size_t formatMax = get_format_max_int(imageInfo->format);
3410*6467f958SSadaf Ebrahimi             if (formatMin < 0) formatMin = 0;
3411*6467f958SSadaf Ebrahimi             // If the final format is small enough, give us a bit of room for
3412*6467f958SSadaf Ebrahimi             // out-of-range values to test
3413*6467f958SSadaf Ebrahimi             if (formatMax < 4294967295LL) formatMax += 2;
3414*6467f958SSadaf Ebrahimi 
3415*6467f958SSadaf Ebrahimi             // Now gen
3416*6467f958SSadaf Ebrahimi             for (size_t i = 0; i < numPixels * 4; i++)
3417*6467f958SSadaf Ebrahimi             {
3418*6467f958SSadaf Ebrahimi                 imageData[i] = random_in_range(formatMin, (int)formatMax, d);
3419*6467f958SSadaf Ebrahimi             }
3420*6467f958SSadaf Ebrahimi             break;
3421*6467f958SSadaf Ebrahimi         }
3422*6467f958SSadaf Ebrahimi         default:
3423*6467f958SSadaf Ebrahimi             // Unsupported source format
3424*6467f958SSadaf Ebrahimi             delete[] data;
3425*6467f958SSadaf Ebrahimi             return NULL;
3426*6467f958SSadaf Ebrahimi     }
3427*6467f958SSadaf Ebrahimi 
3428*6467f958SSadaf Ebrahimi     return data;
3429*6467f958SSadaf Ebrahimi }
3430*6467f958SSadaf Ebrahimi 
3431*6467f958SSadaf Ebrahimi /*
3432*6467f958SSadaf Ebrahimi     deprecated
3433*6467f958SSadaf Ebrahimi bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t
3434*6467f958SSadaf Ebrahimi max, int &outValue )
3435*6467f958SSadaf Ebrahimi {
3436*6467f958SSadaf Ebrahimi     int v = (int)value;
3437*6467f958SSadaf Ebrahimi 
3438*6467f958SSadaf Ebrahimi     switch(imageSampler->addressing_mode)
3439*6467f958SSadaf Ebrahimi     {
3440*6467f958SSadaf Ebrahimi         case CL_ADDRESS_REPEAT:
3441*6467f958SSadaf Ebrahimi             outValue = v;
3442*6467f958SSadaf Ebrahimi             while( v < 0 )
3443*6467f958SSadaf Ebrahimi                 v += (int)max;
3444*6467f958SSadaf Ebrahimi             while( v >= (int)max )
3445*6467f958SSadaf Ebrahimi                 v -= (int)max;
3446*6467f958SSadaf Ebrahimi             if( v != outValue )
3447*6467f958SSadaf Ebrahimi             {
3448*6467f958SSadaf Ebrahimi                 outValue = v;
3449*6467f958SSadaf Ebrahimi                 return true;
3450*6467f958SSadaf Ebrahimi             }
3451*6467f958SSadaf Ebrahimi             return false;
3452*6467f958SSadaf Ebrahimi 
3453*6467f958SSadaf Ebrahimi         case CL_ADDRESS_MIRRORED_REPEAT:
3454*6467f958SSadaf Ebrahimi             log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do
3455*6467f958SSadaf Ebrahimi we ever use this? exit(-1);
3456*6467f958SSadaf Ebrahimi 
3457*6467f958SSadaf Ebrahimi         default:
3458*6467f958SSadaf Ebrahimi             if( v < 0 )
3459*6467f958SSadaf Ebrahimi             {
3460*6467f958SSadaf Ebrahimi                 outValue = 0;
3461*6467f958SSadaf Ebrahimi                 return true;
3462*6467f958SSadaf Ebrahimi             }
3463*6467f958SSadaf Ebrahimi             if( v >= (int)max )
3464*6467f958SSadaf Ebrahimi             {
3465*6467f958SSadaf Ebrahimi                 outValue = (int)max - 1;
3466*6467f958SSadaf Ebrahimi                 return true;
3467*6467f958SSadaf Ebrahimi             }
3468*6467f958SSadaf Ebrahimi             outValue = v;
3469*6467f958SSadaf Ebrahimi             return false;
3470*6467f958SSadaf Ebrahimi     }
3471*6467f958SSadaf Ebrahimi 
3472*6467f958SSadaf Ebrahimi }
3473*6467f958SSadaf Ebrahimi */
3474*6467f958SSadaf Ebrahimi 
get_sampler_kernel_code(image_sampler_data * imageSampler,char * outLine)3475*6467f958SSadaf Ebrahimi void get_sampler_kernel_code(image_sampler_data *imageSampler, char *outLine)
3476*6467f958SSadaf Ebrahimi {
3477*6467f958SSadaf Ebrahimi     const char *normalized;
3478*6467f958SSadaf Ebrahimi     const char *addressMode;
3479*6467f958SSadaf Ebrahimi     const char *filterMode;
3480*6467f958SSadaf Ebrahimi 
3481*6467f958SSadaf Ebrahimi     if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP)
3482*6467f958SSadaf Ebrahimi         addressMode = "CLK_ADDRESS_CLAMP";
3483*6467f958SSadaf Ebrahimi     else if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
3484*6467f958SSadaf Ebrahimi         addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
3485*6467f958SSadaf Ebrahimi     else if (imageSampler->addressing_mode == CL_ADDRESS_REPEAT)
3486*6467f958SSadaf Ebrahimi         addressMode = "CLK_ADDRESS_REPEAT";
3487*6467f958SSadaf Ebrahimi     else if (imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
3488*6467f958SSadaf Ebrahimi         addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
3489*6467f958SSadaf Ebrahimi     else if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
3490*6467f958SSadaf Ebrahimi         addressMode = "CLK_ADDRESS_NONE";
3491*6467f958SSadaf Ebrahimi     else
3492*6467f958SSadaf Ebrahimi     {
3493*6467f958SSadaf Ebrahimi         log_error("**Error: Unknown addressing mode! Aborting...\n");
3494*6467f958SSadaf Ebrahimi         abort();
3495*6467f958SSadaf Ebrahimi     }
3496*6467f958SSadaf Ebrahimi 
3497*6467f958SSadaf Ebrahimi     if (imageSampler->normalized_coords)
3498*6467f958SSadaf Ebrahimi         normalized = "CLK_NORMALIZED_COORDS_TRUE";
3499*6467f958SSadaf Ebrahimi     else
3500*6467f958SSadaf Ebrahimi         normalized = "CLK_NORMALIZED_COORDS_FALSE";
3501*6467f958SSadaf Ebrahimi 
3502*6467f958SSadaf Ebrahimi     if (imageSampler->filter_mode == CL_FILTER_LINEAR)
3503*6467f958SSadaf Ebrahimi         filterMode = "CLK_FILTER_LINEAR";
3504*6467f958SSadaf Ebrahimi     else
3505*6467f958SSadaf Ebrahimi         filterMode = "CLK_FILTER_NEAREST";
3506*6467f958SSadaf Ebrahimi 
3507*6467f958SSadaf Ebrahimi     sprintf(outLine, "    const sampler_t imageSampler = %s | %s | %s;\n",
3508*6467f958SSadaf Ebrahimi             addressMode, filterMode, normalized);
3509*6467f958SSadaf Ebrahimi }
3510*6467f958SSadaf Ebrahimi 
copy_image_data(image_descriptor * srcImageInfo,image_descriptor * dstImageInfo,void * imageValues,void * destImageValues,const size_t sourcePos[],const size_t destPos[],const size_t regionSize[])3511*6467f958SSadaf Ebrahimi void copy_image_data(image_descriptor *srcImageInfo,
3512*6467f958SSadaf Ebrahimi                      image_descriptor *dstImageInfo, void *imageValues,
3513*6467f958SSadaf Ebrahimi                      void *destImageValues, const size_t sourcePos[],
3514*6467f958SSadaf Ebrahimi                      const size_t destPos[], const size_t regionSize[])
3515*6467f958SSadaf Ebrahimi {
3516*6467f958SSadaf Ebrahimi     //  assert( srcImageInfo->format == dstImageInfo->format );
3517*6467f958SSadaf Ebrahimi 
3518*6467f958SSadaf Ebrahimi     size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
3519*6467f958SSadaf Ebrahimi     size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
3520*6467f958SSadaf Ebrahimi     size_t src_row_pitch_lod, src_slice_pitch_lod;
3521*6467f958SSadaf Ebrahimi     size_t dst_row_pitch_lod, dst_slice_pitch_lod;
3522*6467f958SSadaf Ebrahimi 
3523*6467f958SSadaf Ebrahimi     size_t pixelSize = get_pixel_size(srcImageInfo->format);
3524*6467f958SSadaf Ebrahimi 
3525*6467f958SSadaf Ebrahimi     sourcePos_lod[0] = sourcePos[0];
3526*6467f958SSadaf Ebrahimi     sourcePos_lod[1] = sourcePos[1];
3527*6467f958SSadaf Ebrahimi     sourcePos_lod[2] = sourcePos[2];
3528*6467f958SSadaf Ebrahimi     destPos_lod[0] = destPos[0];
3529*6467f958SSadaf Ebrahimi     destPos_lod[1] = destPos[1];
3530*6467f958SSadaf Ebrahimi     destPos_lod[2] = destPos[2];
3531*6467f958SSadaf Ebrahimi     src_row_pitch_lod = srcImageInfo->rowPitch;
3532*6467f958SSadaf Ebrahimi     dst_row_pitch_lod = dstImageInfo->rowPitch;
3533*6467f958SSadaf Ebrahimi     src_slice_pitch_lod = srcImageInfo->slicePitch;
3534*6467f958SSadaf Ebrahimi     dst_slice_pitch_lod = dstImageInfo->slicePitch;
3535*6467f958SSadaf Ebrahimi 
3536*6467f958SSadaf Ebrahimi     if (srcImageInfo->num_mip_levels > 1)
3537*6467f958SSadaf Ebrahimi     {
3538*6467f958SSadaf Ebrahimi         size_t src_width_lod = 1 /*srcImageInfo->width*/;
3539*6467f958SSadaf Ebrahimi         size_t src_height_lod = 1 /*srcImageInfo->height*/;
3540*6467f958SSadaf Ebrahimi 
3541*6467f958SSadaf Ebrahimi         switch (srcImageInfo->type)
3542*6467f958SSadaf Ebrahimi         {
3543*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
3544*6467f958SSadaf Ebrahimi                 src_lod = sourcePos[1];
3545*6467f958SSadaf Ebrahimi                 sourcePos_lod[1] = sourcePos_lod[2] = 0;
3546*6467f958SSadaf Ebrahimi                 src_width_lod = (srcImageInfo->width >> src_lod)
3547*6467f958SSadaf Ebrahimi                     ? (srcImageInfo->width >> src_lod)
3548*6467f958SSadaf Ebrahimi                     : 1;
3549*6467f958SSadaf Ebrahimi                 break;
3550*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
3551*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
3552*6467f958SSadaf Ebrahimi                 src_lod = sourcePos[2];
3553*6467f958SSadaf Ebrahimi                 sourcePos_lod[1] = sourcePos[1];
3554*6467f958SSadaf Ebrahimi                 sourcePos_lod[2] = 0;
3555*6467f958SSadaf Ebrahimi                 src_width_lod = (srcImageInfo->width >> src_lod)
3556*6467f958SSadaf Ebrahimi                     ? (srcImageInfo->width >> src_lod)
3557*6467f958SSadaf Ebrahimi                     : 1;
3558*6467f958SSadaf Ebrahimi                 if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
3559*6467f958SSadaf Ebrahimi                     src_height_lod = (srcImageInfo->height >> src_lod)
3560*6467f958SSadaf Ebrahimi                         ? (srcImageInfo->height >> src_lod)
3561*6467f958SSadaf Ebrahimi                         : 1;
3562*6467f958SSadaf Ebrahimi                 break;
3563*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
3564*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
3565*6467f958SSadaf Ebrahimi                 src_lod = sourcePos[3];
3566*6467f958SSadaf Ebrahimi                 sourcePos_lod[1] = sourcePos[1];
3567*6467f958SSadaf Ebrahimi                 sourcePos_lod[2] = sourcePos[2];
3568*6467f958SSadaf Ebrahimi                 src_width_lod = (srcImageInfo->width >> src_lod)
3569*6467f958SSadaf Ebrahimi                     ? (srcImageInfo->width >> src_lod)
3570*6467f958SSadaf Ebrahimi                     : 1;
3571*6467f958SSadaf Ebrahimi                 src_height_lod = (srcImageInfo->height >> src_lod)
3572*6467f958SSadaf Ebrahimi                     ? (srcImageInfo->height >> src_lod)
3573*6467f958SSadaf Ebrahimi                     : 1;
3574*6467f958SSadaf Ebrahimi                 break;
3575*6467f958SSadaf Ebrahimi         }
3576*6467f958SSadaf Ebrahimi         src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
3577*6467f958SSadaf Ebrahimi         src_row_pitch_lod =
3578*6467f958SSadaf Ebrahimi             src_width_lod * get_pixel_size(srcImageInfo->format);
3579*6467f958SSadaf Ebrahimi         src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
3580*6467f958SSadaf Ebrahimi     }
3581*6467f958SSadaf Ebrahimi 
3582*6467f958SSadaf Ebrahimi     if (dstImageInfo->num_mip_levels > 1)
3583*6467f958SSadaf Ebrahimi     {
3584*6467f958SSadaf Ebrahimi         size_t dst_width_lod = 1 /*dstImageInfo->width*/;
3585*6467f958SSadaf Ebrahimi         size_t dst_height_lod = 1 /*dstImageInfo->height*/;
3586*6467f958SSadaf Ebrahimi         switch (dstImageInfo->type)
3587*6467f958SSadaf Ebrahimi         {
3588*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
3589*6467f958SSadaf Ebrahimi                 dst_lod = destPos[1];
3590*6467f958SSadaf Ebrahimi                 destPos_lod[1] = destPos_lod[2] = 0;
3591*6467f958SSadaf Ebrahimi                 dst_width_lod = (dstImageInfo->width >> dst_lod)
3592*6467f958SSadaf Ebrahimi                     ? (dstImageInfo->width >> dst_lod)
3593*6467f958SSadaf Ebrahimi                     : 1;
3594*6467f958SSadaf Ebrahimi                 break;
3595*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
3596*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
3597*6467f958SSadaf Ebrahimi                 dst_lod = destPos[2];
3598*6467f958SSadaf Ebrahimi                 destPos_lod[1] = destPos[1];
3599*6467f958SSadaf Ebrahimi                 destPos_lod[2] = 0;
3600*6467f958SSadaf Ebrahimi                 dst_width_lod = (dstImageInfo->width >> dst_lod)
3601*6467f958SSadaf Ebrahimi                     ? (dstImageInfo->width >> dst_lod)
3602*6467f958SSadaf Ebrahimi                     : 1;
3603*6467f958SSadaf Ebrahimi                 if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
3604*6467f958SSadaf Ebrahimi                     dst_height_lod = (dstImageInfo->height >> dst_lod)
3605*6467f958SSadaf Ebrahimi                         ? (dstImageInfo->height >> dst_lod)
3606*6467f958SSadaf Ebrahimi                         : 1;
3607*6467f958SSadaf Ebrahimi                 break;
3608*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
3609*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
3610*6467f958SSadaf Ebrahimi                 dst_lod = destPos[3];
3611*6467f958SSadaf Ebrahimi                 destPos_lod[1] = destPos[1];
3612*6467f958SSadaf Ebrahimi                 destPos_lod[2] = destPos[2];
3613*6467f958SSadaf Ebrahimi                 dst_width_lod = (dstImageInfo->width >> dst_lod)
3614*6467f958SSadaf Ebrahimi                     ? (dstImageInfo->width >> dst_lod)
3615*6467f958SSadaf Ebrahimi                     : 1;
3616*6467f958SSadaf Ebrahimi                 dst_height_lod = (dstImageInfo->height >> dst_lod)
3617*6467f958SSadaf Ebrahimi                     ? (dstImageInfo->height >> dst_lod)
3618*6467f958SSadaf Ebrahimi                     : 1;
3619*6467f958SSadaf Ebrahimi                 break;
3620*6467f958SSadaf Ebrahimi         }
3621*6467f958SSadaf Ebrahimi         dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
3622*6467f958SSadaf Ebrahimi         dst_row_pitch_lod =
3623*6467f958SSadaf Ebrahimi             dst_width_lod * get_pixel_size(dstImageInfo->format);
3624*6467f958SSadaf Ebrahimi         dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
3625*6467f958SSadaf Ebrahimi     }
3626*6467f958SSadaf Ebrahimi 
3627*6467f958SSadaf Ebrahimi     // Get initial pointers
3628*6467f958SSadaf Ebrahimi     char *sourcePtr = (char *)imageValues
3629*6467f958SSadaf Ebrahimi         + sourcePos_lod[2] * src_slice_pitch_lod
3630*6467f958SSadaf Ebrahimi         + sourcePos_lod[1] * src_row_pitch_lod + pixelSize * sourcePos_lod[0]
3631*6467f958SSadaf Ebrahimi         + src_mip_level_offset;
3632*6467f958SSadaf Ebrahimi     char *destPtr = (char *)destImageValues
3633*6467f958SSadaf Ebrahimi         + destPos_lod[2] * dst_slice_pitch_lod
3634*6467f958SSadaf Ebrahimi         + destPos_lod[1] * dst_row_pitch_lod + pixelSize * destPos_lod[0]
3635*6467f958SSadaf Ebrahimi         + dst_mip_level_offset;
3636*6467f958SSadaf Ebrahimi 
3637*6467f958SSadaf Ebrahimi     for (size_t z = 0; z < (regionSize[2] > 0 ? regionSize[2] : 1); z++)
3638*6467f958SSadaf Ebrahimi     {
3639*6467f958SSadaf Ebrahimi         char *rowSourcePtr = sourcePtr;
3640*6467f958SSadaf Ebrahimi         char *rowDestPtr = destPtr;
3641*6467f958SSadaf Ebrahimi         for (size_t y = 0; y < regionSize[1]; y++)
3642*6467f958SSadaf Ebrahimi         {
3643*6467f958SSadaf Ebrahimi             memcpy(rowDestPtr, rowSourcePtr, pixelSize * regionSize[0]);
3644*6467f958SSadaf Ebrahimi             rowSourcePtr += src_row_pitch_lod;
3645*6467f958SSadaf Ebrahimi             rowDestPtr += dst_row_pitch_lod;
3646*6467f958SSadaf Ebrahimi         }
3647*6467f958SSadaf Ebrahimi 
3648*6467f958SSadaf Ebrahimi         sourcePtr += src_slice_pitch_lod;
3649*6467f958SSadaf Ebrahimi         destPtr += dst_slice_pitch_lod;
3650*6467f958SSadaf Ebrahimi     }
3651*6467f958SSadaf Ebrahimi }
3652*6467f958SSadaf Ebrahimi 
random_float(float low,float high,MTdata d)3653*6467f958SSadaf Ebrahimi float random_float(float low, float high, MTdata d)
3654*6467f958SSadaf Ebrahimi {
3655*6467f958SSadaf Ebrahimi     float t = (float)genrand_real1(d);
3656*6467f958SSadaf Ebrahimi     return (1.0f - t) * low + t * high;
3657*6467f958SSadaf Ebrahimi }
3658*6467f958SSadaf Ebrahimi 
CoordWalker(void * coords,bool useFloats,size_t vecSize)3659*6467f958SSadaf Ebrahimi CoordWalker::CoordWalker(void *coords, bool useFloats, size_t vecSize)
3660*6467f958SSadaf Ebrahimi {
3661*6467f958SSadaf Ebrahimi     if (useFloats)
3662*6467f958SSadaf Ebrahimi     {
3663*6467f958SSadaf Ebrahimi         mFloatCoords = (cl_float *)coords;
3664*6467f958SSadaf Ebrahimi         mIntCoords = NULL;
3665*6467f958SSadaf Ebrahimi     }
3666*6467f958SSadaf Ebrahimi     else
3667*6467f958SSadaf Ebrahimi     {
3668*6467f958SSadaf Ebrahimi         mFloatCoords = NULL;
3669*6467f958SSadaf Ebrahimi         mIntCoords = (cl_int *)coords;
3670*6467f958SSadaf Ebrahimi     }
3671*6467f958SSadaf Ebrahimi     mVecSize = vecSize;
3672*6467f958SSadaf Ebrahimi }
3673*6467f958SSadaf Ebrahimi 
~CoordWalker()3674*6467f958SSadaf Ebrahimi CoordWalker::~CoordWalker() {}
3675*6467f958SSadaf Ebrahimi 
Get(size_t idx,size_t el)3676*6467f958SSadaf Ebrahimi cl_float CoordWalker::Get(size_t idx, size_t el)
3677*6467f958SSadaf Ebrahimi {
3678*6467f958SSadaf Ebrahimi     if (mIntCoords != NULL)
3679*6467f958SSadaf Ebrahimi         return (cl_float)mIntCoords[idx * mVecSize + el];
3680*6467f958SSadaf Ebrahimi     else
3681*6467f958SSadaf Ebrahimi         return mFloatCoords[idx * mVecSize + el];
3682*6467f958SSadaf Ebrahimi }
3683*6467f958SSadaf Ebrahimi 
3684*6467f958SSadaf Ebrahimi 
print_read_header(const cl_image_format * format,image_sampler_data * sampler,bool err,int t)3685*6467f958SSadaf Ebrahimi void print_read_header(const cl_image_format *format,
3686*6467f958SSadaf Ebrahimi                        image_sampler_data *sampler, bool err, int t)
3687*6467f958SSadaf Ebrahimi {
3688*6467f958SSadaf Ebrahimi     const char *addressMode = NULL;
3689*6467f958SSadaf Ebrahimi     const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
3690*6467f958SSadaf Ebrahimi 
3691*6467f958SSadaf Ebrahimi     if (sampler->addressing_mode == CL_ADDRESS_CLAMP)
3692*6467f958SSadaf Ebrahimi         addressMode = "CL_ADDRESS_CLAMP";
3693*6467f958SSadaf Ebrahimi     else if (sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
3694*6467f958SSadaf Ebrahimi         addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
3695*6467f958SSadaf Ebrahimi     else if (sampler->addressing_mode == CL_ADDRESS_REPEAT)
3696*6467f958SSadaf Ebrahimi         addressMode = "CL_ADDRESS_REPEAT";
3697*6467f958SSadaf Ebrahimi     else if (sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
3698*6467f958SSadaf Ebrahimi         addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
3699*6467f958SSadaf Ebrahimi     else
3700*6467f958SSadaf Ebrahimi         addressMode = "CL_ADDRESS_NONE";
3701*6467f958SSadaf Ebrahimi 
3702*6467f958SSadaf Ebrahimi     if (t)
3703*6467f958SSadaf Ebrahimi     {
3704*6467f958SSadaf Ebrahimi         if (err)
3705*6467f958SSadaf Ebrahimi             log_error("[%-7s %-24s %d] - %s - %s - %s - %s\n",
3706*6467f958SSadaf Ebrahimi                       GetChannelOrderName(format->image_channel_order),
3707*6467f958SSadaf Ebrahimi                       GetChannelTypeName(format->image_channel_data_type),
3708*6467f958SSadaf Ebrahimi                       (int)get_format_channel_count(format),
3709*6467f958SSadaf Ebrahimi                       sampler->filter_mode == CL_FILTER_NEAREST
3710*6467f958SSadaf Ebrahimi                           ? "CL_FILTER_NEAREST"
3711*6467f958SSadaf Ebrahimi                           : "CL_FILTER_LINEAR",
3712*6467f958SSadaf Ebrahimi                       addressMode,
3713*6467f958SSadaf Ebrahimi                       normalizedNames[sampler->normalized_coords ? 1 : 0],
3714*6467f958SSadaf Ebrahimi                       t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
3715*6467f958SSadaf Ebrahimi         else
3716*6467f958SSadaf Ebrahimi             log_info("[%-7s %-24s %d] - %s - %s - %s - %s\n",
3717*6467f958SSadaf Ebrahimi                      GetChannelOrderName(format->image_channel_order),
3718*6467f958SSadaf Ebrahimi                      GetChannelTypeName(format->image_channel_data_type),
3719*6467f958SSadaf Ebrahimi                      (int)get_format_channel_count(format),
3720*6467f958SSadaf Ebrahimi                      sampler->filter_mode == CL_FILTER_NEAREST
3721*6467f958SSadaf Ebrahimi                          ? "CL_FILTER_NEAREST"
3722*6467f958SSadaf Ebrahimi                          : "CL_FILTER_LINEAR",
3723*6467f958SSadaf Ebrahimi                      addressMode,
3724*6467f958SSadaf Ebrahimi                      normalizedNames[sampler->normalized_coords ? 1 : 0],
3725*6467f958SSadaf Ebrahimi                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
3726*6467f958SSadaf Ebrahimi     }
3727*6467f958SSadaf Ebrahimi     else
3728*6467f958SSadaf Ebrahimi     {
3729*6467f958SSadaf Ebrahimi         if (err)
3730*6467f958SSadaf Ebrahimi             log_error("[%-7s %-24s %d] - %s - %s - %s\n",
3731*6467f958SSadaf Ebrahimi                       GetChannelOrderName(format->image_channel_order),
3732*6467f958SSadaf Ebrahimi                       GetChannelTypeName(format->image_channel_data_type),
3733*6467f958SSadaf Ebrahimi                       (int)get_format_channel_count(format),
3734*6467f958SSadaf Ebrahimi                       sampler->filter_mode == CL_FILTER_NEAREST
3735*6467f958SSadaf Ebrahimi                           ? "CL_FILTER_NEAREST"
3736*6467f958SSadaf Ebrahimi                           : "CL_FILTER_LINEAR",
3737*6467f958SSadaf Ebrahimi                       addressMode,
3738*6467f958SSadaf Ebrahimi                       normalizedNames[sampler->normalized_coords ? 1 : 0]);
3739*6467f958SSadaf Ebrahimi         else
3740*6467f958SSadaf Ebrahimi             log_info("[%-7s %-24s %d] - %s - %s - %s\n",
3741*6467f958SSadaf Ebrahimi                      GetChannelOrderName(format->image_channel_order),
3742*6467f958SSadaf Ebrahimi                      GetChannelTypeName(format->image_channel_data_type),
3743*6467f958SSadaf Ebrahimi                      (int)get_format_channel_count(format),
3744*6467f958SSadaf Ebrahimi                      sampler->filter_mode == CL_FILTER_NEAREST
3745*6467f958SSadaf Ebrahimi                          ? "CL_FILTER_NEAREST"
3746*6467f958SSadaf Ebrahimi                          : "CL_FILTER_LINEAR",
3747*6467f958SSadaf Ebrahimi                      addressMode,
3748*6467f958SSadaf Ebrahimi                      normalizedNames[sampler->normalized_coords ? 1 : 0]);
3749*6467f958SSadaf Ebrahimi     }
3750*6467f958SSadaf Ebrahimi }
3751*6467f958SSadaf Ebrahimi 
print_write_header(const cl_image_format * format,bool err=false)3752*6467f958SSadaf Ebrahimi void print_write_header(const cl_image_format *format, bool err = false)
3753*6467f958SSadaf Ebrahimi {
3754*6467f958SSadaf Ebrahimi     if (err)
3755*6467f958SSadaf Ebrahimi         log_error("[%-7s %-24s %d]\n",
3756*6467f958SSadaf Ebrahimi                   GetChannelOrderName(format->image_channel_order),
3757*6467f958SSadaf Ebrahimi                   GetChannelTypeName(format->image_channel_data_type),
3758*6467f958SSadaf Ebrahimi                   (int)get_format_channel_count(format));
3759*6467f958SSadaf Ebrahimi     else
3760*6467f958SSadaf Ebrahimi         log_info("[%-7s %-24s %d]\n",
3761*6467f958SSadaf Ebrahimi                  GetChannelOrderName(format->image_channel_order),
3762*6467f958SSadaf Ebrahimi                  GetChannelTypeName(format->image_channel_data_type),
3763*6467f958SSadaf Ebrahimi                  (int)get_format_channel_count(format));
3764*6467f958SSadaf Ebrahimi }
3765*6467f958SSadaf Ebrahimi 
3766*6467f958SSadaf Ebrahimi 
print_header(const cl_image_format * format,bool err=false)3767*6467f958SSadaf Ebrahimi void print_header(const cl_image_format *format, bool err = false)
3768*6467f958SSadaf Ebrahimi {
3769*6467f958SSadaf Ebrahimi     if (err)
3770*6467f958SSadaf Ebrahimi     {
3771*6467f958SSadaf Ebrahimi         log_error("[%-7s %-24s %d]\n",
3772*6467f958SSadaf Ebrahimi                   GetChannelOrderName(format->image_channel_order),
3773*6467f958SSadaf Ebrahimi                   GetChannelTypeName(format->image_channel_data_type),
3774*6467f958SSadaf Ebrahimi                   (int)get_format_channel_count(format));
3775*6467f958SSadaf Ebrahimi     }
3776*6467f958SSadaf Ebrahimi     else
3777*6467f958SSadaf Ebrahimi     {
3778*6467f958SSadaf Ebrahimi         log_info("[%-7s %-24s %d]\n",
3779*6467f958SSadaf Ebrahimi                  GetChannelOrderName(format->image_channel_order),
3780*6467f958SSadaf Ebrahimi                  GetChannelTypeName(format->image_channel_data_type),
3781*6467f958SSadaf Ebrahimi                  (int)get_format_channel_count(format));
3782*6467f958SSadaf Ebrahimi     }
3783*6467f958SSadaf Ebrahimi }
3784*6467f958SSadaf Ebrahimi 
find_format(cl_image_format * formatList,unsigned int numFormats,cl_image_format * formatToFind)3785*6467f958SSadaf Ebrahimi bool find_format(cl_image_format *formatList, unsigned int numFormats,
3786*6467f958SSadaf Ebrahimi                  cl_image_format *formatToFind)
3787*6467f958SSadaf Ebrahimi {
3788*6467f958SSadaf Ebrahimi     for (unsigned int i = 0; i < numFormats; i++)
3789*6467f958SSadaf Ebrahimi     {
3790*6467f958SSadaf Ebrahimi         if (formatList[i].image_channel_order
3791*6467f958SSadaf Ebrahimi                 == formatToFind->image_channel_order
3792*6467f958SSadaf Ebrahimi             && formatList[i].image_channel_data_type
3793*6467f958SSadaf Ebrahimi                 == formatToFind->image_channel_data_type)
3794*6467f958SSadaf Ebrahimi             return true;
3795*6467f958SSadaf Ebrahimi     }
3796*6467f958SSadaf Ebrahimi     return false;
3797*6467f958SSadaf Ebrahimi }
3798*6467f958SSadaf Ebrahimi 
build_required_image_formats(cl_mem_flags flags,cl_mem_object_type image_type,cl_device_id device,std::vector<cl_image_format> & formatsToSupport)3799*6467f958SSadaf Ebrahimi void build_required_image_formats(
3800*6467f958SSadaf Ebrahimi     cl_mem_flags flags, cl_mem_object_type image_type, cl_device_id device,
3801*6467f958SSadaf Ebrahimi     std::vector<cl_image_format> &formatsToSupport)
3802*6467f958SSadaf Ebrahimi {
3803*6467f958SSadaf Ebrahimi     formatsToSupport.clear();
3804*6467f958SSadaf Ebrahimi 
3805*6467f958SSadaf Ebrahimi     // Minimum list of supported image formats for reading or writing (embedded
3806*6467f958SSadaf Ebrahimi     // profile)
3807*6467f958SSadaf Ebrahimi     static std::vector<cl_image_format> embeddedProfile_readOrWrite{
3808*6467f958SSadaf Ebrahimi         // clang-format off
3809*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT8 },
3810*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT16 },
3811*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT8 },
3812*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT16 },
3813*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT32 },
3814*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT8 },
3815*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT16 },
3816*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT32 },
3817*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_HALF_FLOAT },
3818*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_FLOAT },
3819*6467f958SSadaf Ebrahimi         // clang-format on
3820*6467f958SSadaf Ebrahimi     };
3821*6467f958SSadaf Ebrahimi 
3822*6467f958SSadaf Ebrahimi     // Minimum list of required image formats for reading or writing
3823*6467f958SSadaf Ebrahimi     // num_channels, for all image types.
3824*6467f958SSadaf Ebrahimi     static std::vector<cl_image_format> fullProfile_readOrWrite{
3825*6467f958SSadaf Ebrahimi         // clang-format off
3826*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT8 },
3827*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT16 },
3828*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT8 },
3829*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT16 },
3830*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT32 },
3831*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT8 },
3832*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT16 },
3833*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT32 },
3834*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_HALF_FLOAT },
3835*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_FLOAT },
3836*6467f958SSadaf Ebrahimi         { CL_BGRA, CL_UNORM_INT8 },
3837*6467f958SSadaf Ebrahimi         // clang-format on
3838*6467f958SSadaf Ebrahimi     };
3839*6467f958SSadaf Ebrahimi 
3840*6467f958SSadaf Ebrahimi     // Minimum list of supported image formats for reading or writing
3841*6467f958SSadaf Ebrahimi     // (OpenCL 2.0, 2.1, or 2.2), for all image types.
3842*6467f958SSadaf Ebrahimi     static std::vector<cl_image_format> fullProfile_2x_readOrWrite{
3843*6467f958SSadaf Ebrahimi         // clang-format off
3844*6467f958SSadaf Ebrahimi         { CL_R, CL_UNORM_INT8 },
3845*6467f958SSadaf Ebrahimi         { CL_R, CL_UNORM_INT16 },
3846*6467f958SSadaf Ebrahimi         { CL_R, CL_SNORM_INT8 },
3847*6467f958SSadaf Ebrahimi         { CL_R, CL_SNORM_INT16 },
3848*6467f958SSadaf Ebrahimi         { CL_R, CL_SIGNED_INT8 },
3849*6467f958SSadaf Ebrahimi         { CL_R, CL_SIGNED_INT16 },
3850*6467f958SSadaf Ebrahimi         { CL_R, CL_SIGNED_INT32 },
3851*6467f958SSadaf Ebrahimi         { CL_R, CL_UNSIGNED_INT8 },
3852*6467f958SSadaf Ebrahimi         { CL_R, CL_UNSIGNED_INT16 },
3853*6467f958SSadaf Ebrahimi         { CL_R, CL_UNSIGNED_INT32 },
3854*6467f958SSadaf Ebrahimi         { CL_R, CL_HALF_FLOAT },
3855*6467f958SSadaf Ebrahimi         { CL_R, CL_FLOAT },
3856*6467f958SSadaf Ebrahimi         { CL_RG, CL_UNORM_INT8 },
3857*6467f958SSadaf Ebrahimi         { CL_RG, CL_UNORM_INT16 },
3858*6467f958SSadaf Ebrahimi         { CL_RG, CL_SNORM_INT8 },
3859*6467f958SSadaf Ebrahimi         { CL_RG, CL_SNORM_INT16 },
3860*6467f958SSadaf Ebrahimi         { CL_RG, CL_SIGNED_INT8 },
3861*6467f958SSadaf Ebrahimi         { CL_RG, CL_SIGNED_INT16 },
3862*6467f958SSadaf Ebrahimi         { CL_RG, CL_SIGNED_INT32 },
3863*6467f958SSadaf Ebrahimi         { CL_RG, CL_UNSIGNED_INT8 },
3864*6467f958SSadaf Ebrahimi         { CL_RG, CL_UNSIGNED_INT16 },
3865*6467f958SSadaf Ebrahimi         { CL_RG, CL_UNSIGNED_INT32 },
3866*6467f958SSadaf Ebrahimi         { CL_RG, CL_HALF_FLOAT },
3867*6467f958SSadaf Ebrahimi         { CL_RG, CL_FLOAT },
3868*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT8 },
3869*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT16 },
3870*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SNORM_INT8 },
3871*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SNORM_INT16 },
3872*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT8 },
3873*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT16 },
3874*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT32 },
3875*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT8 },
3876*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT16 },
3877*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT32 },
3878*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_HALF_FLOAT },
3879*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_FLOAT },
3880*6467f958SSadaf Ebrahimi         { CL_BGRA, CL_UNORM_INT8 },
3881*6467f958SSadaf Ebrahimi         // clang-format on
3882*6467f958SSadaf Ebrahimi     };
3883*6467f958SSadaf Ebrahimi 
3884*6467f958SSadaf Ebrahimi     // Conditional addition to the 2x readOrWrite table:
3885*6467f958SSadaf Ebrahimi     // Support for the CL_DEPTH image channel order is required only for 2D
3886*6467f958SSadaf Ebrahimi     // images and 2D image arrays.
3887*6467f958SSadaf Ebrahimi     static std::vector<cl_image_format> fullProfile_2x_readOrWrite_Depth{
3888*6467f958SSadaf Ebrahimi         // clang-format off
3889*6467f958SSadaf Ebrahimi         { CL_DEPTH, CL_UNORM_INT16 },
3890*6467f958SSadaf Ebrahimi         { CL_DEPTH, CL_FLOAT },
3891*6467f958SSadaf Ebrahimi         // clang-format on
3892*6467f958SSadaf Ebrahimi     };
3893*6467f958SSadaf Ebrahimi 
3894*6467f958SSadaf Ebrahimi     // Conditional addition to the 2x readOrWrite table:
3895*6467f958SSadaf Ebrahimi     // Support for reading from the CL_sRGBA image channel order is optional for
3896*6467f958SSadaf Ebrahimi     // 1D image buffers. Support for writing to the CL_sRGBA image channel order
3897*6467f958SSadaf Ebrahimi     // is optional for all image types.
3898*6467f958SSadaf Ebrahimi     static std::vector<cl_image_format> fullProfile_2x_readOrWrite_srgb{
3899*6467f958SSadaf Ebrahimi         { CL_sRGBA, CL_UNORM_INT8 },
3900*6467f958SSadaf Ebrahimi     };
3901*6467f958SSadaf Ebrahimi 
3902*6467f958SSadaf Ebrahimi     // Minimum list of required image formats for reading and writing.
3903*6467f958SSadaf Ebrahimi     static std::vector<cl_image_format> fullProfile_readAndWrite{
3904*6467f958SSadaf Ebrahimi         // clang-format off
3905*6467f958SSadaf Ebrahimi         { CL_R, CL_UNORM_INT8 },
3906*6467f958SSadaf Ebrahimi         { CL_R, CL_SIGNED_INT8 },
3907*6467f958SSadaf Ebrahimi         { CL_R, CL_SIGNED_INT16 },
3908*6467f958SSadaf Ebrahimi         { CL_R, CL_SIGNED_INT32 },
3909*6467f958SSadaf Ebrahimi         { CL_R, CL_UNSIGNED_INT8 },
3910*6467f958SSadaf Ebrahimi         { CL_R, CL_UNSIGNED_INT16 },
3911*6467f958SSadaf Ebrahimi         { CL_R, CL_UNSIGNED_INT32 },
3912*6467f958SSadaf Ebrahimi         { CL_R, CL_HALF_FLOAT },
3913*6467f958SSadaf Ebrahimi         { CL_R, CL_FLOAT },
3914*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNORM_INT8 },
3915*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT8 },
3916*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT16 },
3917*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_SIGNED_INT32 },
3918*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT8 },
3919*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT16 },
3920*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_UNSIGNED_INT32 },
3921*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_HALF_FLOAT },
3922*6467f958SSadaf Ebrahimi         { CL_RGBA, CL_FLOAT },
3923*6467f958SSadaf Ebrahimi         // clang-format on
3924*6467f958SSadaf Ebrahimi     };
3925*6467f958SSadaf Ebrahimi 
3926*6467f958SSadaf Ebrahimi     // Embedded profile
3927*6467f958SSadaf Ebrahimi     if (gIsEmbedded)
3928*6467f958SSadaf Ebrahimi     {
3929*6467f958SSadaf Ebrahimi         copy(embeddedProfile_readOrWrite.begin(),
3930*6467f958SSadaf Ebrahimi              embeddedProfile_readOrWrite.end(),
3931*6467f958SSadaf Ebrahimi              back_inserter(formatsToSupport));
3932*6467f958SSadaf Ebrahimi     }
3933*6467f958SSadaf Ebrahimi     // Full profile
3934*6467f958SSadaf Ebrahimi     else
3935*6467f958SSadaf Ebrahimi     {
3936*6467f958SSadaf Ebrahimi         Version version = get_device_cl_version(device);
3937*6467f958SSadaf Ebrahimi         if (version < Version(2, 0) || version >= Version(3, 0))
3938*6467f958SSadaf Ebrahimi         {
3939*6467f958SSadaf Ebrahimi             // Full profile, OpenCL 1.2 or 3.0.
3940*6467f958SSadaf Ebrahimi             if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
3941*6467f958SSadaf Ebrahimi             {
3942*6467f958SSadaf Ebrahimi                 // Note: assumes that read-write images are supported!
3943*6467f958SSadaf Ebrahimi                 copy(fullProfile_readAndWrite.begin(),
3944*6467f958SSadaf Ebrahimi                      fullProfile_readAndWrite.end(),
3945*6467f958SSadaf Ebrahimi                      back_inserter(formatsToSupport));
3946*6467f958SSadaf Ebrahimi             }
3947*6467f958SSadaf Ebrahimi             else
3948*6467f958SSadaf Ebrahimi             {
3949*6467f958SSadaf Ebrahimi                 copy(fullProfile_readOrWrite.begin(),
3950*6467f958SSadaf Ebrahimi                      fullProfile_readOrWrite.end(),
3951*6467f958SSadaf Ebrahimi                      back_inserter(formatsToSupport));
3952*6467f958SSadaf Ebrahimi             }
3953*6467f958SSadaf Ebrahimi         }
3954*6467f958SSadaf Ebrahimi         else
3955*6467f958SSadaf Ebrahimi         {
3956*6467f958SSadaf Ebrahimi             // Full profile, OpenCL 2.0, 2.1, 2.2.
3957*6467f958SSadaf Ebrahimi             if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
3958*6467f958SSadaf Ebrahimi             {
3959*6467f958SSadaf Ebrahimi                 copy(fullProfile_readAndWrite.begin(),
3960*6467f958SSadaf Ebrahimi                      fullProfile_readAndWrite.end(),
3961*6467f958SSadaf Ebrahimi                      back_inserter(formatsToSupport));
3962*6467f958SSadaf Ebrahimi             }
3963*6467f958SSadaf Ebrahimi             else
3964*6467f958SSadaf Ebrahimi             {
3965*6467f958SSadaf Ebrahimi                 copy(fullProfile_2x_readOrWrite.begin(),
3966*6467f958SSadaf Ebrahimi                      fullProfile_2x_readOrWrite.end(),
3967*6467f958SSadaf Ebrahimi                      back_inserter(formatsToSupport));
3968*6467f958SSadaf Ebrahimi 
3969*6467f958SSadaf Ebrahimi                 // Support for the CL_DEPTH image channel order is required only
3970*6467f958SSadaf Ebrahimi                 // for 2D images and 2D image arrays.
3971*6467f958SSadaf Ebrahimi                 if (image_type == CL_MEM_OBJECT_IMAGE2D
3972*6467f958SSadaf Ebrahimi                     || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
3973*6467f958SSadaf Ebrahimi                 {
3974*6467f958SSadaf Ebrahimi                     copy(fullProfile_2x_readOrWrite_Depth.begin(),
3975*6467f958SSadaf Ebrahimi                          fullProfile_2x_readOrWrite_Depth.end(),
3976*6467f958SSadaf Ebrahimi                          back_inserter(formatsToSupport));
3977*6467f958SSadaf Ebrahimi                 }
3978*6467f958SSadaf Ebrahimi 
3979*6467f958SSadaf Ebrahimi                 // Support for reading from the CL_sRGBA image channel order is
3980*6467f958SSadaf Ebrahimi                 // optional for 1D image buffers. Support for writing to the
3981*6467f958SSadaf Ebrahimi                 // CL_sRGBA image channel order is optional for all image types.
3982*6467f958SSadaf Ebrahimi                 if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER
3983*6467f958SSadaf Ebrahimi                     && flags == CL_MEM_READ_ONLY)
3984*6467f958SSadaf Ebrahimi                 {
3985*6467f958SSadaf Ebrahimi                     copy(fullProfile_2x_readOrWrite_srgb.begin(),
3986*6467f958SSadaf Ebrahimi                          fullProfile_2x_readOrWrite_srgb.end(),
3987*6467f958SSadaf Ebrahimi                          back_inserter(formatsToSupport));
3988*6467f958SSadaf Ebrahimi                 }
3989*6467f958SSadaf Ebrahimi             }
3990*6467f958SSadaf Ebrahimi         }
3991*6467f958SSadaf Ebrahimi     }
3992*6467f958SSadaf Ebrahimi }
3993*6467f958SSadaf Ebrahimi 
is_image_format_required(cl_image_format format,cl_mem_flags flags,cl_mem_object_type image_type,cl_device_id device)3994*6467f958SSadaf Ebrahimi bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
3995*6467f958SSadaf Ebrahimi                               cl_mem_object_type image_type,
3996*6467f958SSadaf Ebrahimi                               cl_device_id device)
3997*6467f958SSadaf Ebrahimi {
3998*6467f958SSadaf Ebrahimi     std::vector<cl_image_format> formatsToSupport;
3999*6467f958SSadaf Ebrahimi     build_required_image_formats(flags, image_type, device, formatsToSupport);
4000*6467f958SSadaf Ebrahimi 
4001*6467f958SSadaf Ebrahimi     for (auto &formatItr : formatsToSupport)
4002*6467f958SSadaf Ebrahimi     {
4003*6467f958SSadaf Ebrahimi         if (formatItr.image_channel_order == format.image_channel_order
4004*6467f958SSadaf Ebrahimi             && formatItr.image_channel_data_type
4005*6467f958SSadaf Ebrahimi                 == format.image_channel_data_type)
4006*6467f958SSadaf Ebrahimi         {
4007*6467f958SSadaf Ebrahimi             return true;
4008*6467f958SSadaf Ebrahimi         }
4009*6467f958SSadaf Ebrahimi     }
4010*6467f958SSadaf Ebrahimi 
4011*6467f958SSadaf Ebrahimi     return false;
4012*6467f958SSadaf Ebrahimi }
4013*6467f958SSadaf Ebrahimi 
compute_max_mip_levels(size_t width,size_t height,size_t depth)4014*6467f958SSadaf Ebrahimi cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
4015*6467f958SSadaf Ebrahimi {
4016*6467f958SSadaf Ebrahimi     cl_uint retMaxMipLevels = 0;
4017*6467f958SSadaf Ebrahimi     size_t max_dim = 0;
4018*6467f958SSadaf Ebrahimi 
4019*6467f958SSadaf Ebrahimi     max_dim = width;
4020*6467f958SSadaf Ebrahimi     max_dim = height > max_dim ? height : max_dim;
4021*6467f958SSadaf Ebrahimi     max_dim = depth > max_dim ? depth : max_dim;
4022*6467f958SSadaf Ebrahimi 
4023*6467f958SSadaf Ebrahimi     while (max_dim)
4024*6467f958SSadaf Ebrahimi     {
4025*6467f958SSadaf Ebrahimi         retMaxMipLevels++;
4026*6467f958SSadaf Ebrahimi         max_dim >>= 1;
4027*6467f958SSadaf Ebrahimi     }
4028*6467f958SSadaf Ebrahimi     return retMaxMipLevels;
4029*6467f958SSadaf Ebrahimi }
4030*6467f958SSadaf Ebrahimi 
compute_mipmapped_image_size(image_descriptor imageInfo)4031*6467f958SSadaf Ebrahimi cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo)
4032*6467f958SSadaf Ebrahimi {
4033*6467f958SSadaf Ebrahimi     cl_ulong retSize = 0;
4034*6467f958SSadaf Ebrahimi     size_t curr_width, curr_height, curr_depth, curr_array_size;
4035*6467f958SSadaf Ebrahimi     curr_width = imageInfo.width;
4036*6467f958SSadaf Ebrahimi     curr_height = imageInfo.height;
4037*6467f958SSadaf Ebrahimi     curr_depth = imageInfo.depth;
4038*6467f958SSadaf Ebrahimi     curr_array_size = imageInfo.arraySize;
4039*6467f958SSadaf Ebrahimi 
4040*6467f958SSadaf Ebrahimi     for (int i = 0; i < (int)imageInfo.num_mip_levels; i++)
4041*6467f958SSadaf Ebrahimi     {
4042*6467f958SSadaf Ebrahimi         switch (imageInfo.type)
4043*6467f958SSadaf Ebrahimi         {
4044*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
4045*6467f958SSadaf Ebrahimi                 retSize += (cl_ulong)curr_width * curr_height * curr_depth
4046*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo.format);
4047*6467f958SSadaf Ebrahimi                 break;
4048*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
4049*6467f958SSadaf Ebrahimi                 retSize += (cl_ulong)curr_width * curr_height
4050*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo.format);
4051*6467f958SSadaf Ebrahimi                 break;
4052*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
4053*6467f958SSadaf Ebrahimi                 retSize +=
4054*6467f958SSadaf Ebrahimi                     (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
4055*6467f958SSadaf Ebrahimi                 break;
4056*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4057*6467f958SSadaf Ebrahimi                 retSize += (cl_ulong)curr_width * curr_array_size
4058*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo.format);
4059*6467f958SSadaf Ebrahimi                 break;
4060*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4061*6467f958SSadaf Ebrahimi                 retSize += (cl_ulong)curr_width * curr_height * curr_array_size
4062*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo.format);
4063*6467f958SSadaf Ebrahimi                 break;
4064*6467f958SSadaf Ebrahimi         }
4065*6467f958SSadaf Ebrahimi 
4066*6467f958SSadaf Ebrahimi         switch (imageInfo.type)
4067*6467f958SSadaf Ebrahimi         {
4068*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
4069*6467f958SSadaf Ebrahimi                 curr_depth = curr_depth >> 1 ? curr_depth >> 1 : 1;
4070*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
4071*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4072*6467f958SSadaf Ebrahimi                 curr_height = curr_height >> 1 ? curr_height >> 1 : 1;
4073*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
4074*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4075*6467f958SSadaf Ebrahimi                 curr_width = curr_width >> 1 ? curr_width >> 1 : 1;
4076*6467f958SSadaf Ebrahimi         }
4077*6467f958SSadaf Ebrahimi     }
4078*6467f958SSadaf Ebrahimi 
4079*6467f958SSadaf Ebrahimi     return retSize;
4080*6467f958SSadaf Ebrahimi }
4081*6467f958SSadaf Ebrahimi 
compute_mip_level_offset(image_descriptor * imageInfo,size_t lod)4082*6467f958SSadaf Ebrahimi size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod)
4083*6467f958SSadaf Ebrahimi {
4084*6467f958SSadaf Ebrahimi     size_t retOffset = 0;
4085*6467f958SSadaf Ebrahimi     size_t width, height, depth;
4086*6467f958SSadaf Ebrahimi     width = imageInfo->width;
4087*6467f958SSadaf Ebrahimi     height = imageInfo->height;
4088*6467f958SSadaf Ebrahimi     depth = imageInfo->depth;
4089*6467f958SSadaf Ebrahimi 
4090*6467f958SSadaf Ebrahimi     for (size_t i = 0; i < lod; i++)
4091*6467f958SSadaf Ebrahimi     {
4092*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
4093*6467f958SSadaf Ebrahimi         {
4094*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4095*6467f958SSadaf Ebrahimi                 retOffset += (size_t)width * height * imageInfo->arraySize
4096*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo->format);
4097*6467f958SSadaf Ebrahimi                 break;
4098*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D:
4099*6467f958SSadaf Ebrahimi                 retOffset += (size_t)width * height * depth
4100*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo->format);
4101*6467f958SSadaf Ebrahimi                 break;
4102*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4103*6467f958SSadaf Ebrahimi                 retOffset += (size_t)width * imageInfo->arraySize
4104*6467f958SSadaf Ebrahimi                     * get_pixel_size(imageInfo->format);
4105*6467f958SSadaf Ebrahimi                 break;
4106*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
4107*6467f958SSadaf Ebrahimi                 retOffset +=
4108*6467f958SSadaf Ebrahimi                     (size_t)width * height * get_pixel_size(imageInfo->format);
4109*6467f958SSadaf Ebrahimi                 break;
4110*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D:
4111*6467f958SSadaf Ebrahimi                 retOffset += (size_t)width * get_pixel_size(imageInfo->format);
4112*6467f958SSadaf Ebrahimi                 break;
4113*6467f958SSadaf Ebrahimi         }
4114*6467f958SSadaf Ebrahimi 
4115*6467f958SSadaf Ebrahimi         // Compute next lod dimensions
4116*6467f958SSadaf Ebrahimi         switch (imageInfo->type)
4117*6467f958SSadaf Ebrahimi         {
4118*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE3D: depth = (depth >> 1) ? (depth >> 1) : 1;
4119*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D:
4120*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4121*6467f958SSadaf Ebrahimi                 height = (height >> 1) ? (height >> 1) : 1;
4122*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4123*6467f958SSadaf Ebrahimi             case CL_MEM_OBJECT_IMAGE1D: width = (width >> 1) ? (width >> 1) : 1;
4124*6467f958SSadaf Ebrahimi         }
4125*6467f958SSadaf Ebrahimi     }
4126*6467f958SSadaf Ebrahimi     return retOffset;
4127*6467f958SSadaf Ebrahimi }
4128*6467f958SSadaf Ebrahimi 
convert_image_type_to_string(cl_mem_object_type image_type)4129*6467f958SSadaf Ebrahimi const char *convert_image_type_to_string(cl_mem_object_type image_type)
4130*6467f958SSadaf Ebrahimi {
4131*6467f958SSadaf Ebrahimi     switch (image_type)
4132*6467f958SSadaf Ebrahimi     {
4133*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D: return "1D";
4134*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE2D: return "2D";
4135*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE3D: return "3D";
4136*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
4137*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
4138*6467f958SSadaf Ebrahimi         case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
4139*6467f958SSadaf Ebrahimi         default: return "unrecognized object type";
4140*6467f958SSadaf Ebrahimi     }
4141*6467f958SSadaf Ebrahimi }
4142