xref: /aosp_15_r20/external/clpeak/src/compute_integer_fast.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1*1cd03ba3SJeremy Kemp #include <clpeak.h>
2*1cd03ba3SJeremy Kemp 
runComputeIntFast(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)3*1cd03ba3SJeremy Kemp int clPeak::runComputeIntFast(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
4*1cd03ba3SJeremy Kemp {
5*1cd03ba3SJeremy Kemp   float timed, gflops;
6*1cd03ba3SJeremy Kemp   cl_uint workPerWI;
7*1cd03ba3SJeremy Kemp   cl::NDRange globalSize, localSize;
8*1cd03ba3SJeremy Kemp   cl_int A = 4;
9*1cd03ba3SJeremy Kemp   uint iters = devInfo.computeIters;
10*1cd03ba3SJeremy Kemp 
11*1cd03ba3SJeremy Kemp   if (!isComputeIntFast)
12*1cd03ba3SJeremy Kemp     return 0;
13*1cd03ba3SJeremy Kemp 
14*1cd03ba3SJeremy Kemp   try
15*1cd03ba3SJeremy Kemp   {
16*1cd03ba3SJeremy Kemp     log->print(NEWLINE TAB TAB "Integer compute Fast 24bit (GIOPS)" NEWLINE);
17*1cd03ba3SJeremy Kemp     log->xmlOpenTag("integer_compute_fast");
18*1cd03ba3SJeremy Kemp     log->xmlAppendAttribs("unit", "giops");
19*1cd03ba3SJeremy Kemp 
20*1cd03ba3SJeremy Kemp     cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
21*1cd03ba3SJeremy Kemp 
22*1cd03ba3SJeremy Kemp     uint64_t globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize);
23*1cd03ba3SJeremy Kemp     uint64_t t = std::min((globalWIs * sizeof(cl_int)), devInfo.maxAllocSize) / sizeof(cl_int);
24*1cd03ba3SJeremy Kemp     globalWIs = roundToMultipleOf(t, devInfo.maxWGSize);
25*1cd03ba3SJeremy Kemp 
26*1cd03ba3SJeremy Kemp     cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_int)));
27*1cd03ba3SJeremy Kemp 
28*1cd03ba3SJeremy Kemp     globalSize = globalWIs;
29*1cd03ba3SJeremy Kemp     localSize = devInfo.maxWGSize;
30*1cd03ba3SJeremy Kemp 
31*1cd03ba3SJeremy Kemp     cl::Kernel kernel_v1(prog, "compute_intfast_v1");
32*1cd03ba3SJeremy Kemp     kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A);
33*1cd03ba3SJeremy Kemp 
34*1cd03ba3SJeremy Kemp     cl::Kernel kernel_v2(prog, "compute_intfast_v2");
35*1cd03ba3SJeremy Kemp     kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A);
36*1cd03ba3SJeremy Kemp 
37*1cd03ba3SJeremy Kemp     cl::Kernel kernel_v4(prog, "compute_intfast_v4");
38*1cd03ba3SJeremy Kemp     kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A);
39*1cd03ba3SJeremy Kemp 
40*1cd03ba3SJeremy Kemp     cl::Kernel kernel_v8(prog, "compute_intfast_v8");
41*1cd03ba3SJeremy Kemp     kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A);
42*1cd03ba3SJeremy Kemp 
43*1cd03ba3SJeremy Kemp     cl::Kernel kernel_v16(prog, "compute_intfast_v16");
44*1cd03ba3SJeremy Kemp     kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A);
45*1cd03ba3SJeremy Kemp 
46*1cd03ba3SJeremy Kemp     ///////////////////////////////////////////////////////////////////////////
47*1cd03ba3SJeremy Kemp     // Vector width 1
48*1cd03ba3SJeremy Kemp     if (!forceTest || strcmp(specifiedTestName, "int") == 0)
49*1cd03ba3SJeremy Kemp     {
50*1cd03ba3SJeremy Kemp       log->print(TAB TAB TAB "int   : ");
51*1cd03ba3SJeremy Kemp 
52*1cd03ba3SJeremy Kemp       workPerWI = 2048; // Indicates integer operations executed per work-item
53*1cd03ba3SJeremy Kemp 
54*1cd03ba3SJeremy Kemp       timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);
55*1cd03ba3SJeremy Kemp 
56*1cd03ba3SJeremy Kemp       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
57*1cd03ba3SJeremy Kemp 
58*1cd03ba3SJeremy Kemp       log->print(gflops);
59*1cd03ba3SJeremy Kemp       log->print(NEWLINE);
60*1cd03ba3SJeremy Kemp       log->xmlRecord("int", gflops);
61*1cd03ba3SJeremy Kemp     }
62*1cd03ba3SJeremy Kemp     ///////////////////////////////////////////////////////////////////////////
63*1cd03ba3SJeremy Kemp 
64*1cd03ba3SJeremy Kemp     // Vector width 2
65*1cd03ba3SJeremy Kemp     if (!forceTest || strcmp(specifiedTestName, "int2") == 0)
66*1cd03ba3SJeremy Kemp     {
67*1cd03ba3SJeremy Kemp       log->print(TAB TAB TAB "int2  : ");
68*1cd03ba3SJeremy Kemp 
69*1cd03ba3SJeremy Kemp       workPerWI = 2048;
70*1cd03ba3SJeremy Kemp 
71*1cd03ba3SJeremy Kemp       timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters);
72*1cd03ba3SJeremy Kemp 
73*1cd03ba3SJeremy Kemp       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
74*1cd03ba3SJeremy Kemp 
75*1cd03ba3SJeremy Kemp       log->print(gflops);
76*1cd03ba3SJeremy Kemp       log->print(NEWLINE);
77*1cd03ba3SJeremy Kemp       log->xmlRecord("int2", gflops);
78*1cd03ba3SJeremy Kemp     }
79*1cd03ba3SJeremy Kemp     ///////////////////////////////////////////////////////////////////////////
80*1cd03ba3SJeremy Kemp 
81*1cd03ba3SJeremy Kemp     // Vector width 4
82*1cd03ba3SJeremy Kemp     if (!forceTest || strcmp(specifiedTestName, "int4") == 0)
83*1cd03ba3SJeremy Kemp     {
84*1cd03ba3SJeremy Kemp       log->print(TAB TAB TAB "int4  : ");
85*1cd03ba3SJeremy Kemp 
86*1cd03ba3SJeremy Kemp       workPerWI = 2048;
87*1cd03ba3SJeremy Kemp 
88*1cd03ba3SJeremy Kemp       timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters);
89*1cd03ba3SJeremy Kemp 
90*1cd03ba3SJeremy Kemp       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
91*1cd03ba3SJeremy Kemp 
92*1cd03ba3SJeremy Kemp       log->print(gflops);
93*1cd03ba3SJeremy Kemp       log->print(NEWLINE);
94*1cd03ba3SJeremy Kemp       log->xmlRecord("int4", gflops);
95*1cd03ba3SJeremy Kemp     }
96*1cd03ba3SJeremy Kemp     ///////////////////////////////////////////////////////////////////////////
97*1cd03ba3SJeremy Kemp 
98*1cd03ba3SJeremy Kemp     // Vector width 8
99*1cd03ba3SJeremy Kemp     if (!forceTest || strcmp(specifiedTestName, "int8") == 0)
100*1cd03ba3SJeremy Kemp     {
101*1cd03ba3SJeremy Kemp       log->print(TAB TAB TAB "int8  : ");
102*1cd03ba3SJeremy Kemp 
103*1cd03ba3SJeremy Kemp       workPerWI = 2048;
104*1cd03ba3SJeremy Kemp 
105*1cd03ba3SJeremy Kemp       timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters);
106*1cd03ba3SJeremy Kemp 
107*1cd03ba3SJeremy Kemp       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
108*1cd03ba3SJeremy Kemp 
109*1cd03ba3SJeremy Kemp       log->print(gflops);
110*1cd03ba3SJeremy Kemp       log->print(NEWLINE);
111*1cd03ba3SJeremy Kemp       log->xmlRecord("int8", gflops);
112*1cd03ba3SJeremy Kemp     }
113*1cd03ba3SJeremy Kemp     ///////////////////////////////////////////////////////////////////////////
114*1cd03ba3SJeremy Kemp 
115*1cd03ba3SJeremy Kemp     // Vector width 16
116*1cd03ba3SJeremy Kemp     if (!forceTest || strcmp(specifiedTestName, "int16") == 0)
117*1cd03ba3SJeremy Kemp     {
118*1cd03ba3SJeremy Kemp       log->print(TAB TAB TAB "int16 : ");
119*1cd03ba3SJeremy Kemp 
120*1cd03ba3SJeremy Kemp       workPerWI = 2048;
121*1cd03ba3SJeremy Kemp 
122*1cd03ba3SJeremy Kemp       timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters);
123*1cd03ba3SJeremy Kemp 
124*1cd03ba3SJeremy Kemp       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
125*1cd03ba3SJeremy Kemp 
126*1cd03ba3SJeremy Kemp       log->print(gflops);
127*1cd03ba3SJeremy Kemp       log->print(NEWLINE);
128*1cd03ba3SJeremy Kemp       log->xmlRecord("int16", gflops);
129*1cd03ba3SJeremy Kemp     }
130*1cd03ba3SJeremy Kemp     ///////////////////////////////////////////////////////////////////////////
131*1cd03ba3SJeremy Kemp     log->xmlCloseTag(); // integer_compute
132*1cd03ba3SJeremy Kemp   }
133*1cd03ba3SJeremy Kemp   catch (cl::Error &error)
134*1cd03ba3SJeremy Kemp   {
135*1cd03ba3SJeremy Kemp     stringstream ss;
136*1cd03ba3SJeremy Kemp     ss << error.what() << " (" << error.err() << ")" NEWLINE
137*1cd03ba3SJeremy Kemp        << TAB TAB TAB "Tests skipped" NEWLINE;
138*1cd03ba3SJeremy Kemp     log->print(ss.str());
139*1cd03ba3SJeremy Kemp     return -1;
140*1cd03ba3SJeremy Kemp   }
141*1cd03ba3SJeremy Kemp 
142*1cd03ba3SJeremy Kemp   return 0;
143*1cd03ba3SJeremy Kemp }
144