1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17 #include "test_common.h"
18 
19 #if !defined(_WIN32)
20 #include <sys/mman.h>
21 #endif
22 
23 extern cl_mem_flags gMemFlagsToUse;
24 extern int gtestTypesToRun;
25 
26 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
27 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
28 
29 const char *readwrite1DArrayKernelSourcePattern =
30     "%s\n"
31     "__kernel void sample_kernel( __global %s4 *input, read_write "
32     "image1d_array_t output %s)\n"
33     "{\n"
34     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
35     "%s"
36     "   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
37     "}";
38 
39 const char *write1DArrayKernelSourcePattern =
40     "%s\n"
41     "__kernel void sample_kernel( __global %s4 *input, write_only "
42     "image1d_array_t output %s)\n"
43     "{\n"
44     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
45     "%s"
46     "   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
47     "}";
48 
49 const char *offset1DArraySource =
50 "   int offset = tidY*get_image_width(output) + tidX;\n";
51 
52 const char *offset1DArrayLodSource =
53 "   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
54 "   int offset = tidY*width_lod + tidX;\n";
55 
test_write_image_1D_array(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)56 int test_write_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
57                      image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
58 {
59     int                 totalErrors = 0;
60     size_t              num_flags   = 0;
61     const cl_mem_flags  *mem_flag_types = NULL;
62     const char *        *mem_flag_names = NULL;
63     const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
64     const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
65     const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
66     const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
67 
68     if(gtestTypesToRun & kWriteTests)
69     {
70         mem_flag_types = write_only_mem_flag_types;
71         mem_flag_names = write_only_mem_flag_names;
72         num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
73     }
74     else
75     {
76         mem_flag_types = read_write_mem_flag_types;
77         mem_flag_names = read_write_mem_flag_names;
78         num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
79     }
80 
81     size_t pixelSize = get_pixel_size( imageInfo->format );
82 
83     for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
84     {
85         int error;
86         size_t threads[2];
87         bool verifyRounding = false;
88         int forceCorrectlyRoundedWrites = 0;
89 
90 #if defined( __APPLE__ )
91         // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
92         if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
93             forceCorrectlyRoundedWrites = 1;
94 #endif
95 
96         if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
97             if( DetectFloatToHalfRoundingMode(queue) )
98                 return 1;
99 
100         BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
101 
102         create_random_image_data( inputType, imageInfo, imageValues, d );
103 
104         if(!gTestMipmaps)
105         {
106             if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
107             {
108                 /* Pilot data for sRGB images */
109                 if(is_sRGBA_order(imageInfo->format->image_channel_order))
110                 {
111                     // We want to generate ints (mostly) in range of the target format which should be [0,255]
112                     // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
113                     // it can test some out-of-range data points
114                     const unsigned int test_range_ext = 16;
115                     int formatMin = 0 - test_range_ext;
116                     int formatMax = 255 + test_range_ext;
117                     int pixel_value = 0;
118 
119                     // First, fill with arbitrary floats
120                     for( size_t y = 0; y < imageInfo->arraySize; y++ )
121                     {
122                         float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
123                         for( size_t i = 0; i < imageInfo->width * 4; i++ )
124                         {
125                             pixel_value = random_in_range( formatMin, (int)formatMax, d );
126                             inputValues[ i ] = (float)(pixel_value/255.0f);
127                         }
128                     }
129 
130                     // Throw a few extra test values in there
131                     float *inputValues = (float *)(char*)imageValues;
132                     size_t i = 0;
133 
134                     // Piloting some debug inputs.
135                     inputValues[ i++ ] = -0.5f;
136                     inputValues[ i++ ] = 0.5f;
137                     inputValues[ i++ ] = 2.f;
138                     inputValues[ i++ ] = 0.5f;
139 
140                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
141                     // is correct
142                     if( imageInfo->width > 12 )
143                     {
144                         float formatMax = (float)get_format_max_int( imageInfo->format );
145                         inputValues[ i++ ] = 4.0f / formatMax;
146                         inputValues[ i++ ] = 4.3f / formatMax;
147                         inputValues[ i++ ] = 4.5f / formatMax;
148                         inputValues[ i++ ] = 4.7f / formatMax;
149                         inputValues[ i++ ] = 5.0f / formatMax;
150                         inputValues[ i++ ] = 5.3f / formatMax;
151                         inputValues[ i++ ] = 5.5f / formatMax;
152                         inputValues[ i++ ] = 5.7f / formatMax;
153                     }
154                 }
155                 else
156                 {
157                     // First, fill with arbitrary floats
158                     for( size_t y = 0; y < imageInfo->arraySize; y++ )
159                     {
160                         float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
161                         for( size_t i = 0; i < imageInfo->width * 4; i++ )
162                             inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
163                     }
164 
165                     // Throw a few extra test values in there
166                     float *inputValues = (float *)(char*)imageValues;
167                     size_t i = 0;
168                     inputValues[ i++ ] = -0.0000000000009f;
169                     inputValues[ i++ ] = 1.f;
170                     inputValues[ i++ ] = -1.f;
171                     inputValues[ i++ ] = 2.f;
172 
173                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
174                     // is correct
175                     if( imageInfo->width > 12 )
176                     {
177                         float formatMax = (float)get_format_max_int( imageInfo->format );
178                         inputValues[ i++ ] = 4.0f / formatMax;
179                         inputValues[ i++ ] = 4.3f / formatMax;
180                         inputValues[ i++ ] = 4.5f / formatMax;
181                         inputValues[ i++ ] = 4.7f / formatMax;
182                         inputValues[ i++ ] = 5.0f / formatMax;
183                         inputValues[ i++ ] = 5.3f / formatMax;
184                         inputValues[ i++ ] = 5.5f / formatMax;
185                         inputValues[ i++ ] = 5.7f / formatMax;
186                         verifyRounding = true;
187                     }
188                 }
189             }
190             else if( inputType == kUInt )
191             {
192                 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
193                 size_t i = 0;
194                 inputValues[ i++ ] = 0;
195                 inputValues[ i++ ] = 65535;
196                 inputValues[ i++ ] = 7271820;
197                 inputValues[ i++ ] = 0;
198             }
199         }
200 
201         // Construct testing sources
202         clProtectedImage protImage;
203         clMemWrapper unprotImage;
204         cl_mem image;
205 
206         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
207         {
208             // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
209             // Do not use protected images for max image size test since it rounds the row size to a page size
210             if (gTestMaxImages) {
211                 create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
212 
213                 unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
214                                               imageInfo->width, imageInfo->arraySize, 0, 0,
215                                               maxImageUseHostPtrBackingStore, &error );
216             } else {
217                 error = protImage.Create( context, (cl_mem_object_type)CL_MEM_OBJECT_IMAGE1D_ARRAY, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, 1, 1, imageInfo->arraySize );
218             }
219             if( error != CL_SUCCESS )
220             {
221                 log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
222                           imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
223                 return error;
224             }
225 
226             if (gTestMaxImages)
227                 image = (cl_mem)unprotImage;
228             else
229                 image = (cl_mem)protImage;
230         }
231         else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
232         {
233             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
234             // it works just as if no flag is specified, so we just do the same thing either way
235             // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
236             if( gTestMipmaps )
237             {
238                 cl_image_desc image_desc = {0};
239                 image_desc.image_type = imageInfo->type;
240                 image_desc.num_mip_levels = imageInfo->num_mip_levels;
241                 image_desc.image_width = imageInfo->width;
242                 image_desc.image_array_size = imageInfo->arraySize;
243 
244                 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
245                                              imageInfo->format, &image_desc, NULL, &error);
246                 if( error != CL_SUCCESS )
247                 {
248                     log_error( "ERROR: Unable to create %d level 1D image array of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->arraySize,
249                                IGetErrorString( error ), mem_flag_names[mem_flag_index] );
250                     return error;
251                 }
252             }
253             else
254             {
255                 unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
256                                               imageInfo->width, imageInfo->arraySize, 0, 0,
257                                               imageValues, &error );
258                 if( error != CL_SUCCESS )
259                 {
260                     log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
261                               imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
262                     return error;
263                 }
264             }
265             image = unprotImage;
266         }
267 
268         error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
269         test_error( error, "Unable to set kernel arguments" );
270 
271         size_t width_lod = imageInfo->width, nextLevelOffset = 0;
272         size_t origin[ 3 ] = { 0, 0, 0 };
273         size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
274         size_t resultSize;
275 
276         for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
277         {
278             if(gTestMipmaps)
279             {
280                 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
281 
282             }
283             // Run the kernel
284             threads[0] = (size_t)width_lod;
285             threads[1] = (size_t)imageInfo->arraySize;
286 
287             clMemWrapper inputStream;
288 
289             char *imagePtrOffset = imageValues + nextLevelOffset;
290             inputStream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
291                                          get_explicit_type_size(inputType) * 4
292                                              * width_lod * imageInfo->arraySize,
293                                          imagePtrOffset, &error);
294             test_error( error, "Unable to create input buffer" );
295 
296             // Set arguments
297             error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
298             test_error( error, "Unable to set kernel arguments" );
299 
300             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
301             test_error( error, "Unable to run kernel" );
302 
303             // Get results
304             if( gTestMipmaps )
305                 resultSize = width_lod * get_pixel_size(imageInfo->format) * imageInfo->arraySize;
306             else
307                 resultSize = imageInfo->rowPitch * imageInfo->arraySize;
308 
309             clProtectedArray PA(resultSize);
310             char *resultValues = (char *)((void *)PA);
311 
312             if( gDebugTrace )
313                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
314 
315 
316             origin[2] = lod;
317             region[0] = width_lod;
318             error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region,
319                                         gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
320             test_error( error, "Unable to read results from kernel" );
321             if( gDebugTrace )
322                 log_info( "    results read\n" );
323 
324             // Validate results element by element
325             char *imagePtr = imageValues + nextLevelOffset;
326             int numTries = 5;
327             for( size_t y = 0, i = 0; y < imageInfo->arraySize; y++ )
328             {
329                 char *resultPtr;
330                 if( gTestMipmaps )
331                     resultPtr = (char *)resultValues + y * width_lod * pixelSize;
332                 else
333                     resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
334                 for( size_t x = 0; x < width_lod; x++, i++ )
335                 {
336                     char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
337 
338                     // Convert this pixel
339                     if( inputType == kFloat )
340                         pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
341                     else if( inputType == kInt )
342                         pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
343                     else // if( inputType == kUInt )
344                         pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
345 
346                     // Compare against the results
347                     if(is_sRGBA_order(imageInfo->format->image_channel_order))
348                     {
349                         // Compare sRGB-mapped values
350                         cl_float expected[4]    = {0};
351                         cl_float* input_values  = (float*)imagePtr;
352                         cl_uchar *actual        = (cl_uchar*)resultPtr;
353                         float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
354                         float err[4]            = {0.0f};
355 
356                         for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
357                         {
358                             if(j < 3)
359                             {
360                                 expected[j] = sRGBmap(input_values[j]);
361                             }
362                             else // there is no sRGB conversion for alpha component if it exists
363                             {
364                                 expected[j] = NORMALIZE(input_values[j], 255.0f);
365                             }
366 
367                             err[j] = fabsf( expected[ j ] - actual[ j ] );
368                         }
369 
370                         if ((err[0] > max_err) ||
371                             (err[1] > max_err) ||
372                             (err[2] > max_err) ||
373                             (err[3] > 0)) // there is no conversion for alpha so the error should be zero
374                         {
375                             log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
376                             log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
377                             log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
378                             log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
379                             return 1;
380                         }
381                     }
382                     else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
383                     {
384                         float *expected = (float *)resultBuffer;
385                         float *actual = (float *)resultPtr;
386 
387                         if( !validate_float_write_results( expected, actual, imageInfo ) )
388                         {
389                             unsigned int *e = (unsigned int *)resultBuffer;
390                             unsigned int *a = (unsigned int *)resultPtr;
391                             log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[ mem_flag_index ] );
392                             log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
393                             log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
394                             log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
395                             log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
396                             totalErrors++;
397                             if( ( --numTries ) == 0 )
398                                 return 1;
399                         }
400                     }
401                     else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
402                     {
403                         cl_half *e = (cl_half *)resultBuffer;
404                         cl_half *a = (cl_half *)resultPtr;
405                         if( !validate_half_write_results( e, a, imageInfo ) )
406                         {
407                             totalErrors++;
408                             log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[ mem_flag_index ] );
409                             log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
410                             log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
411                             if( inputType == kFloat )
412                             {
413                                 float *p = (float *)imagePtr;
414                                 log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
415                                 log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
416                             }
417                             if( ( --numTries ) == 0 )
418                                 return 1;
419                         }
420                     }
421                     else
422                     {
423 
424                         filter_undefined_bits(imageInfo, resultPtr);
425 
426                         // Exact result passes every time
427                         if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
428                         {
429                             // result is inexact.  Calculate error
430                             int failure = 1;
431                             float errors[4] = {NAN, NAN, NAN, NAN};
432                             pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
433 
434                             failure = filter_rounding_errors(
435                                 forceCorrectlyRoundedWrites, imageInfo, errors);
436 
437                             if( failure )
438                             {
439                                 totalErrors++;
440                                 // Is it our special rounding test?
441                                 if( verifyRounding && i >= 1 && i <= 2 )
442                                 {
443                                     // Try to guess what the rounding mode of the device really is based on what it returned
444                                     const char *deviceRounding = "unknown";
445                                     unsigned int deviceResults[8];
446                                     read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
447                                     read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
448 
449                                     if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
450                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
451                                         deviceRounding = "truncate";
452                                     else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
453                                             deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
454                                         deviceRounding = "round to nearest";
455                                     else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
456                                             deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
457                                         deviceRounding = "round to even";
458 
459                                     log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
460                                     log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
461                                               deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
462                                     log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
463                                     return 1;
464                                 }
465                                 log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
466                                 switch(imageInfo->format->image_channel_data_type)
467                                 {
468                                     case CL_UNORM_INT8:
469                                     case CL_SNORM_INT8:
470                                     case CL_UNSIGNED_INT8:
471                                     case CL_SIGNED_INT8:
472                                         log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
473                                         log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
474                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
475                                         break;
476                                     case CL_UNORM_SHORT_565: {
477                                         cl_uint *ref_value =
478                                             (cl_uint *)resultBuffer;
479                                         cl_uint *test_value =
480                                             (cl_uint *)resultPtr;
481 
482                                         log_error(" Expected: 0x%2.2x Actual: "
483                                                   "0x%2.2x \n",
484                                                   ref_value[0], test_value[0]);
485 
486                                         log_error("    Expected: 0x%2.2x "
487                                                   "0x%2.2x 0x%2.2x \n",
488                                                   ref_value[0] & 0x1F,
489                                                   (ref_value[0] >> 5) & 0x3F,
490                                                   (ref_value[0] >> 11) & 0x1F);
491                                         log_error("    Actual:   0x%2.2x "
492                                                   "0x%2.2x 0x%2.2x \n",
493                                                   test_value[0] & 0x1F,
494                                                   (test_value[0] >> 5) & 0x3F,
495                                                   (test_value[0] >> 11) & 0x1F);
496                                         log_error("    Error:    %f %f %f %f\n",
497                                                   errors[0], errors[1],
498                                                   errors[2]);
499                                         break;
500                                     }
501                                     case CL_UNORM_SHORT_555: {
502                                         cl_uint *ref_value =
503                                             (cl_uint *)resultBuffer;
504                                         cl_uint *test_value =
505                                             (cl_uint *)resultPtr;
506 
507                                         log_error(" Expected: 0x%2.2x Actual: "
508                                                   "0x%2.2x \n",
509                                                   ref_value[0], test_value[0]);
510 
511                                         log_error("    Expected: 0x%2.2x "
512                                                   "0x%2.2x 0x%2.2x \n",
513                                                   ref_value[0] & 0x1F,
514                                                   (ref_value[0] >> 5) & 0x1F,
515                                                   (ref_value[0] >> 10) & 0x1F);
516                                         log_error("    Actual:   0x%2.2x "
517                                                   "0x%2.2x 0x%2.2x \n",
518                                                   test_value[0] & 0x1F,
519                                                   (test_value[0] >> 5) & 0x1F,
520                                                   (test_value[0] >> 10) & 0x1F);
521                                         log_error("    Error:    %f %f %f %f\n",
522                                                   errors[0], errors[1],
523                                                   errors[2]);
524                                         break;
525                                     }
526                                     case CL_UNORM_INT16:
527                                     case CL_SNORM_INT16:
528                                     case CL_UNSIGNED_INT16:
529                                     case CL_SIGNED_INT16:
530 #ifdef CL_SFIXED14_APPLE
531                                     case CL_SFIXED14_APPLE:
532 #endif
533                                         log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
534                                         log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
535                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
536                                         break;
537                                     case CL_HALF_FLOAT:
538                                         log_error("    Expected: 0x%4.4x "
539                                                   "0x%4.4x 0x%4.4x 0x%4.4x\n",
540                                                   ((cl_half *)resultBuffer)[0],
541                                                   ((cl_half *)resultBuffer)[1],
542                                                   ((cl_half *)resultBuffer)[2],
543                                                   ((cl_half *)resultBuffer)[3]);
544                                         log_error("    Actual:   0x%4.4x "
545                                                   "0x%4.4x 0x%4.4x 0x%4.4x\n",
546                                                   ((cl_half *)resultPtr)[0],
547                                                   ((cl_half *)resultPtr)[1],
548                                                   ((cl_half *)resultPtr)[2],
549                                                   ((cl_half *)resultPtr)[3]);
550                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
551                                         break;
552                                     case CL_UNSIGNED_INT32:
553                                     case CL_SIGNED_INT32:
554                                         log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
555                                         log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
556                                         break;
557                                     case CL_FLOAT:
558                                         log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
559                                         log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
560                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
561                                         break;
562                                 }
563 
564                                 float *v = (float *)imagePtr;
565                                 log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
566                                 log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
567                                 log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
568 
569                                 if( ( --numTries ) == 0 )
570                                     return 1;
571                             }
572                         }
573                     }
574                     imagePtr += get_explicit_type_size( inputType ) * 4;
575                     resultPtr += pixelSize;
576                 }
577             }
578             {
579                 nextLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
580                 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
581             }
582         }
583     }
584 
585     // All done!
586     return totalErrors;
587 }
588 
589 
test_write_image_1D_array_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)590 int test_write_image_1D_array_set(cl_device_id device, cl_context context,
591                                   cl_command_queue queue,
592                                   const cl_image_format *format,
593                                   ExplicitType inputType, MTdata d)
594 {
595     char programSrc[10240];
596     const char *ptr;
597     const char *readFormat;
598     clProgramWrapper program;
599     clKernelWrapper kernel;
600     const char *KernelSourcePattern = NULL;
601     int error;
602 
603     // Get our operating parameters
604     size_t maxWidth, maxArraySize;
605     cl_ulong maxAllocSize, memSize;
606     size_t pixelSize;
607 
608     image_descriptor imageInfo = { 0x0 };
609 
610     imageInfo.format = format;
611     imageInfo.slicePitch = 0;
612     imageInfo.height = imageInfo.depth = 1;
613     imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
614     pixelSize = get_pixel_size( imageInfo.format );
615 
616     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
617     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
618     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
619     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
620     test_error( error, "Unable to get max image 2D size from device" );
621 
622     if (memSize > (cl_ulong)SIZE_MAX) {
623       memSize = (cl_ulong)SIZE_MAX;
624     }
625 
626     // Determine types
627     if( inputType == kInt )
628         readFormat = "i";
629     else if( inputType == kUInt )
630         readFormat = "ui";
631     else // kFloat
632         readFormat = "f";
633 
634     if(gtestTypesToRun & kWriteTests)
635     {
636         KernelSourcePattern = write1DArrayKernelSourcePattern;
637     }
638     else
639     {
640         KernelSourcePattern = readwrite1DArrayKernelSourcePattern;
641     }
642     // Construct the source
643     // Construct the source
644     sprintf(
645         programSrc, KernelSourcePattern,
646         gTestMipmaps
647             ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
648               "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
649             : "",
650         get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
651         gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource, readFormat,
652         gTestMipmaps ? ", lod" : "");
653 
654     ptr = programSrc;
655     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
656                                         "sample_kernel");
657     test_error( error, "Unable to create testing kernel" );
658 
659     // Run tests
660     if( gTestSmallImages )
661     {
662         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
663         {
664             imageInfo.rowPitch = imageInfo.width * pixelSize;
665             imageInfo.slicePitch = imageInfo.rowPitch;
666             for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
667             {
668                 if(gTestMipmaps)
669                     imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
670 
671                 if( gDebugTrace )
672                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
673                 int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
674                 if( retCode )
675                     return retCode;
676             }
677         }
678     }
679     else if( gTestMaxImages )
680     {
681         // Try a specific set of maximum sizes
682         size_t numbeOfSizes;
683         size_t sizes[100][3];
684 
685         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format, CL_TRUE);
686 
687         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
688         {
689             imageInfo.width = sizes[ idx ][ 0 ];
690             imageInfo.arraySize = sizes[ idx ][ 2 ];
691             imageInfo.rowPitch = imageInfo.width * pixelSize;
692             imageInfo.slicePitch = imageInfo.rowPitch;
693             if(gTestMipmaps)
694                 imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
695             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
696             int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
697             if( retCode )
698                 return retCode;
699         }
700     }
701     else if( gTestRounding )
702     {
703         size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
704         imageInfo.arraySize = typeRange / 256;
705         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
706 
707         imageInfo.rowPitch = imageInfo.width * pixelSize;
708         imageInfo.slicePitch = imageInfo.rowPitch;
709         int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
710         if( retCode )
711             return retCode;
712     }
713     else
714     {
715         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
716         {
717             cl_ulong size;
718             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
719             // image, the result array, plus offset arrays, will fit in the global ram space
720             do
721             {
722                 imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
723                 imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
724 
725                 if( gTestMipmaps)
726                 {
727                     imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
728                     size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
729                 }
730                 else
731                 {
732                     imageInfo.rowPitch = imageInfo.width * pixelSize;
733                     if( gEnablePitch )
734                     {
735                         size_t extraWidth = (int)random_log_in_range( 0, 64, d );
736                         imageInfo.rowPitch += extraWidth * pixelSize;
737                     }
738                     imageInfo.slicePitch = imageInfo.rowPitch;
739 
740                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
741                 }
742             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
743 
744             if( gDebugTrace )
745                 log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
746 
747             int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
748             if( retCode )
749                 return retCode;
750         }
751     }
752 
753     return 0;
754 }
755