1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include "harness/compat.h"
17*6467f958SSadaf Ebrahimi
18*6467f958SSadaf Ebrahimi #include <stdio.h>
19*6467f958SSadaf Ebrahimi #include <string.h>
20*6467f958SSadaf Ebrahimi #include <time.h>
21*6467f958SSadaf Ebrahimi #include <sys/types.h>
22*6467f958SSadaf Ebrahimi #include <sys/stat.h>
23*6467f958SSadaf Ebrahimi
24*6467f958SSadaf Ebrahimi #include <algorithm>
25*6467f958SSadaf Ebrahimi
26*6467f958SSadaf Ebrahimi #include "procs.h"
27*6467f958SSadaf Ebrahimi #include "harness/testHarness.h"
28*6467f958SSadaf Ebrahimi #include "harness/errorHelpers.h"
29*6467f958SSadaf Ebrahimi
30*6467f958SSadaf Ebrahimi #ifndef uchar
31*6467f958SSadaf Ebrahimi typedef unsigned char uchar;
32*6467f958SSadaf Ebrahimi #endif
33*6467f958SSadaf Ebrahimi
34*6467f958SSadaf Ebrahimi //#define CREATE_OUTPUT 1
35*6467f958SSadaf Ebrahimi
36*6467f958SSadaf Ebrahimi extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
37*6467f958SSadaf Ebrahimi
38*6467f958SSadaf Ebrahimi
39*6467f958SSadaf Ebrahimi
40*6467f958SSadaf Ebrahimi //--- the code for kernel executables
41*6467f958SSadaf Ebrahimi static const char *image_filter_src =
42*6467f958SSadaf Ebrahimi "constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
43*6467f958SSadaf Ebrahimi "\n"
44*6467f958SSadaf Ebrahimi "__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
45*6467f958SSadaf Ebrahimi " read_only image2d_t src_image, write_only image2d_t dst_image )\n"
46*6467f958SSadaf Ebrahimi "{\n"
47*6467f958SSadaf Ebrahimi " int i, j;\n"
48*6467f958SSadaf Ebrahimi " int indx = 0;\n"
49*6467f958SSadaf Ebrahimi " int tid_x = get_global_id(0);\n"
50*6467f958SSadaf Ebrahimi " int tid_y = get_global_id(1);\n"
51*6467f958SSadaf Ebrahimi " float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
52*6467f958SSadaf Ebrahimi "\n"
53*6467f958SSadaf Ebrahimi " for (i=-m/2; i<(m+1)/2; i++){\n"
54*6467f958SSadaf Ebrahimi " for (j=-n/2; j<(n+1)/2; j++){\n"
55*6467f958SSadaf Ebrahimi " float w = filter_weights[indx++];\n"
56*6467f958SSadaf Ebrahimi "\n"
57*6467f958SSadaf Ebrahimi " if (w != 0.0f){\n"
58*6467f958SSadaf Ebrahimi " filter_result += w * read_imagef(src_image, sampler,\n"
59*6467f958SSadaf Ebrahimi " (int2)(tid_x + j, tid_y + i));\n"
60*6467f958SSadaf Ebrahimi " }\n"
61*6467f958SSadaf Ebrahimi " }\n"
62*6467f958SSadaf Ebrahimi " }\n"
63*6467f958SSadaf Ebrahimi "\n"
64*6467f958SSadaf Ebrahimi " write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
65*6467f958SSadaf Ebrahimi "}\n";
66*6467f958SSadaf Ebrahimi
67*6467f958SSadaf Ebrahimi
68*6467f958SSadaf Ebrahimi //--- equivalent non-kernel code
read_imagef(int x,int y,int w,int h,int nChannels,uchar * src,float * srcRgb)69*6467f958SSadaf Ebrahimi static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
70*6467f958SSadaf Ebrahimi {
71*6467f958SSadaf Ebrahimi // clamp the coords
72*6467f958SSadaf Ebrahimi int x0 = std::min(std::max(x, 0), w - 1);
73*6467f958SSadaf Ebrahimi int y0 = std::min(std::max(y, 0), h - 1);
74*6467f958SSadaf Ebrahimi
75*6467f958SSadaf Ebrahimi // get tine index
76*6467f958SSadaf Ebrahimi int indx = ( y0 * w + x0 ) * nChannels;
77*6467f958SSadaf Ebrahimi
78*6467f958SSadaf Ebrahimi // seed the return array
79*6467f958SSadaf Ebrahimi int i;
80*6467f958SSadaf Ebrahimi for( i = 0; i < nChannels; i++ ){
81*6467f958SSadaf Ebrahimi srcRgb[i] = (float)src[indx+i];
82*6467f958SSadaf Ebrahimi }
83*6467f958SSadaf Ebrahimi } // end read_imagef()
84*6467f958SSadaf Ebrahimi
85*6467f958SSadaf Ebrahimi
write_imagef(uchar * dst,int x,int y,int w,int h,int nChannels,float * dstRgb)86*6467f958SSadaf Ebrahimi static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
87*6467f958SSadaf Ebrahimi {
88*6467f958SSadaf Ebrahimi // get tine index
89*6467f958SSadaf Ebrahimi int indx = ( y * w + x ) * nChannels;
90*6467f958SSadaf Ebrahimi
91*6467f958SSadaf Ebrahimi // seed the return array
92*6467f958SSadaf Ebrahimi int i;
93*6467f958SSadaf Ebrahimi for( i = 0; i < nChannels; i++ ){
94*6467f958SSadaf Ebrahimi dst[indx+i] = (uchar)dstRgb[i];
95*6467f958SSadaf Ebrahimi }
96*6467f958SSadaf Ebrahimi } // end write_imagef()
97*6467f958SSadaf Ebrahimi
98*6467f958SSadaf Ebrahimi
basicFilterPixel(int x,int y,int n,int m,int xsize,int ysize,int nChannels,const float * filter_weights,uchar * src,uchar * dst)99*6467f958SSadaf Ebrahimi static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
100*6467f958SSadaf Ebrahimi {
101*6467f958SSadaf Ebrahimi int i, j, k;
102*6467f958SSadaf Ebrahimi int indx = 0;
103*6467f958SSadaf Ebrahimi float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
104*6467f958SSadaf Ebrahimi float srcRgb[4];
105*6467f958SSadaf Ebrahimi
106*6467f958SSadaf Ebrahimi for( i = -m/2; i < (m+1)/2; i++ ){
107*6467f958SSadaf Ebrahimi for( j = -n/2; j < (n+1)/2; j++ ){
108*6467f958SSadaf Ebrahimi float w = filter_weights[indx++];
109*6467f958SSadaf Ebrahimi
110*6467f958SSadaf Ebrahimi if( w != 0 ){
111*6467f958SSadaf Ebrahimi read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
112*6467f958SSadaf Ebrahimi for( k = 0; k < nChannels; k++ ){
113*6467f958SSadaf Ebrahimi filter_result[k] += w * srcRgb[k];
114*6467f958SSadaf Ebrahimi }
115*6467f958SSadaf Ebrahimi }
116*6467f958SSadaf Ebrahimi }
117*6467f958SSadaf Ebrahimi }
118*6467f958SSadaf Ebrahimi
119*6467f958SSadaf Ebrahimi write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
120*6467f958SSadaf Ebrahimi
121*6467f958SSadaf Ebrahimi } // end basicFilterPixel()
122*6467f958SSadaf Ebrahimi
123*6467f958SSadaf Ebrahimi
124*6467f958SSadaf Ebrahimi //--- helper functions
createImage(int elements,MTdata d)125*6467f958SSadaf Ebrahimi static uchar *createImage( int elements, MTdata d)
126*6467f958SSadaf Ebrahimi {
127*6467f958SSadaf Ebrahimi int i;
128*6467f958SSadaf Ebrahimi uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
129*6467f958SSadaf Ebrahimi if( ! ptr )
130*6467f958SSadaf Ebrahimi return NULL;
131*6467f958SSadaf Ebrahimi
132*6467f958SSadaf Ebrahimi for( i = 0; i < elements; i++ ){
133*6467f958SSadaf Ebrahimi ptr[i] = (uchar)genrand_int32(d);
134*6467f958SSadaf Ebrahimi }
135*6467f958SSadaf Ebrahimi
136*6467f958SSadaf Ebrahimi return ptr;
137*6467f958SSadaf Ebrahimi
138*6467f958SSadaf Ebrahimi } // end createImage()
139*6467f958SSadaf Ebrahimi
140*6467f958SSadaf Ebrahimi
verifyImages(uchar * ptr0,uchar * ptr1,uchar tolerance,int xsize,int ysize,int nChannels)141*6467f958SSadaf Ebrahimi static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
142*6467f958SSadaf Ebrahimi {
143*6467f958SSadaf Ebrahimi int x, y, z;
144*6467f958SSadaf Ebrahimi uchar *p0 = ptr0;
145*6467f958SSadaf Ebrahimi uchar *p1 = ptr1;
146*6467f958SSadaf Ebrahimi
147*6467f958SSadaf Ebrahimi for( y = 0; y < ysize; y++ ){
148*6467f958SSadaf Ebrahimi for( x = 0; x < xsize; x++ ){
149*6467f958SSadaf Ebrahimi for( z = 0; z < nChannels; z++ ){
150*6467f958SSadaf Ebrahimi if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
151*6467f958SSadaf Ebrahimi log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
152*6467f958SSadaf Ebrahimi (int)p0[-1], (int)p1[-1] );
153*6467f958SSadaf Ebrahimi return -1;
154*6467f958SSadaf Ebrahimi }
155*6467f958SSadaf Ebrahimi }
156*6467f958SSadaf Ebrahimi }
157*6467f958SSadaf Ebrahimi }
158*6467f958SSadaf Ebrahimi
159*6467f958SSadaf Ebrahimi return 0;
160*6467f958SSadaf Ebrahimi
161*6467f958SSadaf Ebrahimi } // end verifyImages()
162*6467f958SSadaf Ebrahimi
163*6467f958SSadaf Ebrahimi
kernelFilter(cl_device_id device,cl_context context,cl_command_queue queue,int w,int h,int nChannels,uchar * inptr,uchar * outptr)164*6467f958SSadaf Ebrahimi static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
165*6467f958SSadaf Ebrahimi uchar *inptr, uchar *outptr )
166*6467f958SSadaf Ebrahimi {
167*6467f958SSadaf Ebrahimi cl_program program[1];
168*6467f958SSadaf Ebrahimi cl_kernel kernel[1];
169*6467f958SSadaf Ebrahimi cl_mem memobjs[3];
170*6467f958SSadaf Ebrahimi cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
171*6467f958SSadaf Ebrahimi cl_event executeEvent;
172*6467f958SSadaf Ebrahimi cl_ulong queueStart, submitStart, writeStart, writeEnd;
173*6467f958SSadaf Ebrahimi size_t threads[2];
174*6467f958SSadaf Ebrahimi float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
175*6467f958SSadaf Ebrahimi int filter_w = 3, filter_h = 3;
176*6467f958SSadaf Ebrahimi int err = 0;
177*6467f958SSadaf Ebrahimi
178*6467f958SSadaf Ebrahimi // set thread dimensions
179*6467f958SSadaf Ebrahimi threads[0] = w;
180*6467f958SSadaf Ebrahimi threads[1] = h;
181*6467f958SSadaf Ebrahimi
182*6467f958SSadaf Ebrahimi // allocate the input and output image memory objects
183*6467f958SSadaf Ebrahimi memobjs[0] =
184*6467f958SSadaf Ebrahimi create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
185*6467f958SSadaf Ebrahimi &image_format_desc, w, h, 0, inptr, &err);
186*6467f958SSadaf Ebrahimi if( memobjs[0] == (cl_mem)0 ){
187*6467f958SSadaf Ebrahimi log_error( " unable to create 2D image using create_image_2d\n" );
188*6467f958SSadaf Ebrahimi return -1;
189*6467f958SSadaf Ebrahimi }
190*6467f958SSadaf Ebrahimi
191*6467f958SSadaf Ebrahimi memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
192*6467f958SSadaf Ebrahimi if( memobjs[1] == (cl_mem)0 ){
193*6467f958SSadaf Ebrahimi log_error( " unable to create 2D image using create_image_2d\n" );
194*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
195*6467f958SSadaf Ebrahimi return -1;
196*6467f958SSadaf Ebrahimi }
197*6467f958SSadaf Ebrahimi
198*6467f958SSadaf Ebrahimi // allocate an array memory object to load the filter weights
199*6467f958SSadaf Ebrahimi memobjs[2] = clCreateBuffer(
200*6467f958SSadaf Ebrahimi context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
201*6467f958SSadaf Ebrahimi sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
202*6467f958SSadaf Ebrahimi if( memobjs[2] == (cl_mem)0 ){
203*6467f958SSadaf Ebrahimi log_error( " unable to create array using clCreateBuffer\n" );
204*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
205*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
206*6467f958SSadaf Ebrahimi return -1;
207*6467f958SSadaf Ebrahimi }
208*6467f958SSadaf Ebrahimi
209*6467f958SSadaf Ebrahimi // create the compute program
210*6467f958SSadaf Ebrahimi err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
211*6467f958SSadaf Ebrahimi if( err ){
212*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
213*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
214*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
215*6467f958SSadaf Ebrahimi return -1;
216*6467f958SSadaf Ebrahimi }
217*6467f958SSadaf Ebrahimi
218*6467f958SSadaf Ebrahimi
219*6467f958SSadaf Ebrahimi // create kernel args object and set arg values.
220*6467f958SSadaf Ebrahimi // set the args values
221*6467f958SSadaf Ebrahimi err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
222*6467f958SSadaf Ebrahimi err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
223*6467f958SSadaf Ebrahimi err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
224*6467f958SSadaf Ebrahimi err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
225*6467f958SSadaf Ebrahimi err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
226*6467f958SSadaf Ebrahimi
227*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
228*6467f958SSadaf Ebrahimi print_error( err, "clSetKernelArg failed\n" );
229*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
230*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
231*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
232*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
233*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
234*6467f958SSadaf Ebrahimi return -1;
235*6467f958SSadaf Ebrahimi }
236*6467f958SSadaf Ebrahimi
237*6467f958SSadaf Ebrahimi err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
238*6467f958SSadaf Ebrahimi
239*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
240*6467f958SSadaf Ebrahimi print_error( err, "clEnqueueNDRangeKernel failed\n" );
241*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
242*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
243*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
244*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
245*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
246*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
247*6467f958SSadaf Ebrahimi return -1;
248*6467f958SSadaf Ebrahimi }
249*6467f958SSadaf Ebrahimi
250*6467f958SSadaf Ebrahimi // This synchronization point is needed in order to assume the data is valid.
251*6467f958SSadaf Ebrahimi // Getting profiling information is not a synchronization point.
252*6467f958SSadaf Ebrahimi err = clWaitForEvents( 1, &executeEvent );
253*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS )
254*6467f958SSadaf Ebrahimi {
255*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
256*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
257*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
258*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
259*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
260*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
261*6467f958SSadaf Ebrahimi return -1;
262*6467f958SSadaf Ebrahimi }
263*6467f958SSadaf Ebrahimi
264*6467f958SSadaf Ebrahimi // test profiling
265*6467f958SSadaf Ebrahimi while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
266*6467f958SSadaf Ebrahimi CL_PROFILING_INFO_NOT_AVAILABLE );
267*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
268*6467f958SSadaf Ebrahimi print_error( err, "clGetEventProfilingInfo failed" );
269*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
270*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
271*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
272*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
273*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
274*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
275*6467f958SSadaf Ebrahimi return -1;
276*6467f958SSadaf Ebrahimi }
277*6467f958SSadaf Ebrahimi
278*6467f958SSadaf Ebrahimi while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
279*6467f958SSadaf Ebrahimi CL_PROFILING_INFO_NOT_AVAILABLE );
280*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
281*6467f958SSadaf Ebrahimi print_error( err, "clGetEventProfilingInfo failed" );
282*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
283*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
284*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
285*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
286*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
287*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
288*6467f958SSadaf Ebrahimi return -1;
289*6467f958SSadaf Ebrahimi }
290*6467f958SSadaf Ebrahimi
291*6467f958SSadaf Ebrahimi err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
292*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
293*6467f958SSadaf Ebrahimi print_error( err, "clGetEventProfilingInfo failed" );
294*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
295*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
296*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
297*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
298*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
299*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
300*6467f958SSadaf Ebrahimi return -1;
301*6467f958SSadaf Ebrahimi }
302*6467f958SSadaf Ebrahimi
303*6467f958SSadaf Ebrahimi err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
304*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
305*6467f958SSadaf Ebrahimi print_error( err, "clGetEventProfilingInfo failed" );
306*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
307*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
308*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
309*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
310*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
311*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
312*6467f958SSadaf Ebrahimi return -1;
313*6467f958SSadaf Ebrahimi }
314*6467f958SSadaf Ebrahimi
315*6467f958SSadaf Ebrahimi // read output image
316*6467f958SSadaf Ebrahimi size_t origin[3] = { 0, 0, 0 };
317*6467f958SSadaf Ebrahimi size_t region[3] = { w, h, 1 };
318*6467f958SSadaf Ebrahimi err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
319*6467f958SSadaf Ebrahimi if( err != CL_SUCCESS ){
320*6467f958SSadaf Ebrahimi print_error( err, "clReadImage failed\n" );
321*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
322*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
323*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
324*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
325*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
326*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
327*6467f958SSadaf Ebrahimi return -1;
328*6467f958SSadaf Ebrahimi }
329*6467f958SSadaf Ebrahimi
330*6467f958SSadaf Ebrahimi // release event, kernel, program, and memory objects
331*6467f958SSadaf Ebrahimi clReleaseEvent( executeEvent );
332*6467f958SSadaf Ebrahimi clReleaseKernel( kernel[0] );
333*6467f958SSadaf Ebrahimi clReleaseProgram( program[0] );
334*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[2] );
335*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[1] );
336*6467f958SSadaf Ebrahimi clReleaseMemObject( memobjs[0] );
337*6467f958SSadaf Ebrahimi
338*6467f958SSadaf Ebrahimi if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
339*6467f958SSadaf Ebrahimi err = -1;
340*6467f958SSadaf Ebrahimi
341*6467f958SSadaf Ebrahimi return err;
342*6467f958SSadaf Ebrahimi
343*6467f958SSadaf Ebrahimi } // end kernelFilter()
344*6467f958SSadaf Ebrahimi
345*6467f958SSadaf Ebrahimi
basicFilter(int w,int h,int nChannels,uchar * inptr,uchar * outptr)346*6467f958SSadaf Ebrahimi static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
347*6467f958SSadaf Ebrahimi {
348*6467f958SSadaf Ebrahimi const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
349*6467f958SSadaf Ebrahimi int filter_w = 3, filter_h = 3;
350*6467f958SSadaf Ebrahimi int x, y;
351*6467f958SSadaf Ebrahimi
352*6467f958SSadaf Ebrahimi for( y = 0; y < h; y++ ){
353*6467f958SSadaf Ebrahimi for( x = 0; x < w; x++ ){
354*6467f958SSadaf Ebrahimi basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
355*6467f958SSadaf Ebrahimi }
356*6467f958SSadaf Ebrahimi }
357*6467f958SSadaf Ebrahimi
358*6467f958SSadaf Ebrahimi return 0;
359*6467f958SSadaf Ebrahimi
360*6467f958SSadaf Ebrahimi } // end of basicFilter()
361*6467f958SSadaf Ebrahimi
362*6467f958SSadaf Ebrahimi
test_execute(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)363*6467f958SSadaf Ebrahimi int test_execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
364*6467f958SSadaf Ebrahimi {
365*6467f958SSadaf Ebrahimi uchar *inptr;
366*6467f958SSadaf Ebrahimi uchar *outptr[2];
367*6467f958SSadaf Ebrahimi int w = 256, h = 256;
368*6467f958SSadaf Ebrahimi int nChannels = 4;
369*6467f958SSadaf Ebrahimi int nElements = w * h * nChannels;
370*6467f958SSadaf Ebrahimi int err = 0;
371*6467f958SSadaf Ebrahimi MTdata d;
372*6467f958SSadaf Ebrahimi
373*6467f958SSadaf Ebrahimi
374*6467f958SSadaf Ebrahimi PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
375*6467f958SSadaf Ebrahimi
376*6467f958SSadaf Ebrahimi d = init_genrand( gRandomSeed );
377*6467f958SSadaf Ebrahimi inptr = createImage( nElements, d );
378*6467f958SSadaf Ebrahimi free_mtdata( d); d = NULL;
379*6467f958SSadaf Ebrahimi
380*6467f958SSadaf Ebrahimi if( ! inptr ){
381*6467f958SSadaf Ebrahimi log_error( " unable to allocate %d bytes of memory for image\n", nElements );
382*6467f958SSadaf Ebrahimi return -1;
383*6467f958SSadaf Ebrahimi }
384*6467f958SSadaf Ebrahimi
385*6467f958SSadaf Ebrahimi outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
386*6467f958SSadaf Ebrahimi if( ! outptr[0] ){
387*6467f958SSadaf Ebrahimi log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
388*6467f958SSadaf Ebrahimi free( (void *)inptr );
389*6467f958SSadaf Ebrahimi return -1;
390*6467f958SSadaf Ebrahimi }
391*6467f958SSadaf Ebrahimi
392*6467f958SSadaf Ebrahimi outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
393*6467f958SSadaf Ebrahimi if( ! outptr[1] ){
394*6467f958SSadaf Ebrahimi log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
395*6467f958SSadaf Ebrahimi free( (void *)outptr[0] );
396*6467f958SSadaf Ebrahimi free( (void *)inptr );
397*6467f958SSadaf Ebrahimi return -1;
398*6467f958SSadaf Ebrahimi }
399*6467f958SSadaf Ebrahimi
400*6467f958SSadaf Ebrahimi err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
401*6467f958SSadaf Ebrahimi
402*6467f958SSadaf Ebrahimi if( ! err ){
403*6467f958SSadaf Ebrahimi basicFilter( w, h, nChannels, inptr, outptr[1] );
404*6467f958SSadaf Ebrahimi
405*6467f958SSadaf Ebrahimi // verify that the images are the same
406*6467f958SSadaf Ebrahimi err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
407*6467f958SSadaf Ebrahimi if( err )
408*6467f958SSadaf Ebrahimi log_error( " images do not match\n" );
409*6467f958SSadaf Ebrahimi }
410*6467f958SSadaf Ebrahimi
411*6467f958SSadaf Ebrahimi // clean up
412*6467f958SSadaf Ebrahimi free( (void *)outptr[1] );
413*6467f958SSadaf Ebrahimi free( (void *)outptr[0] );
414*6467f958SSadaf Ebrahimi free( (void *)inptr );
415*6467f958SSadaf Ebrahimi
416*6467f958SSadaf Ebrahimi return err;
417*6467f958SSadaf Ebrahimi
418*6467f958SSadaf Ebrahimi } // end execute()
419*6467f958SSadaf Ebrahimi
420*6467f958SSadaf Ebrahimi
421*6467f958SSadaf Ebrahimi
422