xref: /aosp_15_r20/external/clpeak/src/compute_char.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1 #include <clpeak.h>
2 
runComputeChar(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)3 int clPeak::runComputeChar(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
4 {
5   float timed, gflops;
6   cl_uint workPerWI;
7   cl::NDRange globalSize, localSize;
8   cl_char A = 4;
9   uint iters = devInfo.computeIters;
10 
11   if (!isComputeChar)
12     return 0;
13 
14   try
15   {
16     log->print(NEWLINE TAB TAB "Integer char (8bit) compute (GIOPS)" NEWLINE);
17     log->xmlOpenTag("integer_compute_char");
18     log->xmlAppendAttribs("unit", "giops");
19 
20     cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
21 
22     uint64_t globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize);
23     uint64_t t = std::min((globalWIs * sizeof(cl_char)), devInfo.maxAllocSize) / sizeof(cl_char);
24     globalWIs = roundToMultipleOf(t, devInfo.maxWGSize);
25 
26     cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_char)));
27 
28     globalSize = globalWIs;
29     localSize = devInfo.maxWGSize;
30 
31     cl::Kernel kernel_v1(prog, "compute_char_v1");
32     kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A);
33 
34     cl::Kernel kernel_v2(prog, "compute_char_v2");
35     kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A);
36 
37     cl::Kernel kernel_v4(prog, "compute_char_v4");
38     kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A);
39 
40     cl::Kernel kernel_v8(prog, "compute_char_v8");
41     kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A);
42 
43     cl::Kernel kernel_v16(prog, "compute_char_v16");
44     kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A);
45 
46     ///////////////////////////////////////////////////////////////////////////
47     // Vector width 1
48     if (!forceTest || strcmp(specifiedTestName, "char") == 0)
49     {
50       log->print(TAB TAB TAB "char   : ");
51 
52       workPerWI = 2048; // Indicates integer operations executed per work-item
53 
54       timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);
55 
56       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
57 
58       log->print(gflops);
59       log->print(NEWLINE);
60       log->xmlRecord("char", gflops);
61     }
62     ///////////////////////////////////////////////////////////////////////////
63 
64     // Vector width 2
65     if (!forceTest || strcmp(specifiedTestName, "char2") == 0)
66     {
67       log->print(TAB TAB TAB "char2  : ");
68 
69       workPerWI = 2048;
70 
71       timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters);
72 
73       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
74 
75       log->print(gflops);
76       log->print(NEWLINE);
77       log->xmlRecord("char2", gflops);
78     }
79     ///////////////////////////////////////////////////////////////////////////
80 
81     // Vector width 4
82     if (!forceTest || strcmp(specifiedTestName, "char4") == 0)
83     {
84       log->print(TAB TAB TAB "char4  : ");
85 
86       workPerWI = 2048;
87 
88       timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters);
89 
90       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
91 
92       log->print(gflops);
93       log->print(NEWLINE);
94       log->xmlRecord("char4", gflops);
95     }
96     ///////////////////////////////////////////////////////////////////////////
97 
98     // Vector width 8
99     if (!forceTest || strcmp(specifiedTestName, "char8") == 0)
100     {
101       log->print(TAB TAB TAB "char8  : ");
102 
103       workPerWI = 2048;
104 
105       timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters);
106 
107       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
108 
109       log->print(gflops);
110       log->print(NEWLINE);
111       log->xmlRecord("char8", gflops);
112     }
113     ///////////////////////////////////////////////////////////////////////////
114 
115     // Vector width 16
116     if (!forceTest || strcmp(specifiedTestName, "char16") == 0)
117     {
118       log->print(TAB TAB TAB "char16 : ");
119 
120       workPerWI = 2048;
121 
122       timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters);
123 
124       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
125 
126       log->print(gflops);
127       log->print(NEWLINE);
128       log->xmlRecord("char16", gflops);
129     }
130     ///////////////////////////////////////////////////////////////////////////
131     log->xmlCloseTag(); // integer_compute
132   }
133   catch (cl::Error &error)
134   {
135     stringstream ss;
136     ss << error.what() << " (" << error.err() << ")" NEWLINE
137        << TAB TAB TAB "Tests skipped" NEWLINE;
138     log->print(ss.str());
139     return -1;
140   }
141 
142   return 0;
143 }
144