1 #include <clpeak.h>
2
runComputeInteger(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)3 int clPeak::runComputeInteger(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
4 {
5 float timed, gflops;
6 cl_uint workPerWI;
7 cl::NDRange globalSize, localSize;
8 cl_int A = 4;
9 uint iters = devInfo.computeIters;
10
11 if (!isComputeInt)
12 return 0;
13
14 try
15 {
16 log->print(NEWLINE TAB TAB "Integer compute (GIOPS)" NEWLINE);
17 log->xmlOpenTag("integer_compute");
18 log->xmlAppendAttribs("unit", "giops");
19
20 cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
21
22 uint64_t globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize);
23 uint64_t t = std::min((globalWIs * sizeof(cl_int)), devInfo.maxAllocSize) / sizeof(cl_int);
24 globalWIs = roundToMultipleOf(t, devInfo.maxWGSize);
25
26 cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_int)));
27
28 globalSize = globalWIs;
29 localSize = devInfo.maxWGSize;
30
31 cl::Kernel kernel_v1(prog, "compute_integer_v1");
32 kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A);
33
34 cl::Kernel kernel_v2(prog, "compute_integer_v2");
35 kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A);
36
37 cl::Kernel kernel_v4(prog, "compute_integer_v4");
38 kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A);
39
40 cl::Kernel kernel_v8(prog, "compute_integer_v8");
41 kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A);
42
43 cl::Kernel kernel_v16(prog, "compute_integer_v16");
44 kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A);
45
46 ///////////////////////////////////////////////////////////////////////////
47 // Vector width 1
48 if (!forceTest || strcmp(specifiedTestName, "int") == 0)
49 {
50 log->print(TAB TAB TAB "int : ");
51
52 workPerWI = 2048; // Indicates integer operations executed per work-item
53
54 timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);
55
56 gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
57
58 log->print(gflops);
59 log->print(NEWLINE);
60 log->xmlRecord("int", gflops);
61 }
62 ///////////////////////////////////////////////////////////////////////////
63
64 // Vector width 2
65 if (!forceTest || strcmp(specifiedTestName, "int2") == 0)
66 {
67 log->print(TAB TAB TAB "int2 : ");
68
69 workPerWI = 2048;
70
71 timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters);
72
73 gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
74
75 log->print(gflops);
76 log->print(NEWLINE);
77 log->xmlRecord("int2", gflops);
78 }
79 ///////////////////////////////////////////////////////////////////////////
80
81 // Vector width 4
82 if (!forceTest || strcmp(specifiedTestName, "int4") == 0)
83 {
84 log->print(TAB TAB TAB "int4 : ");
85
86 workPerWI = 2048;
87
88 timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters);
89
90 gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
91
92 log->print(gflops);
93 log->print(NEWLINE);
94 log->xmlRecord("int4", gflops);
95 }
96 ///////////////////////////////////////////////////////////////////////////
97
98 // Vector width 8
99 if (!forceTest || strcmp(specifiedTestName, "int8") == 0)
100 {
101 log->print(TAB TAB TAB "int8 : ");
102
103 workPerWI = 2048;
104
105 timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters);
106
107 gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
108
109 log->print(gflops);
110 log->print(NEWLINE);
111 log->xmlRecord("int8", gflops);
112 }
113 ///////////////////////////////////////////////////////////////////////////
114
115 // Vector width 16
116 if (!forceTest || strcmp(specifiedTestName, "int16") == 0)
117 {
118 log->print(TAB TAB TAB "int16 : ");
119
120 workPerWI = 2048;
121
122 timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters);
123
124 gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
125
126 log->print(gflops);
127 log->print(NEWLINE);
128 log->xmlRecord("int16", gflops);
129 }
130 ///////////////////////////////////////////////////////////////////////////
131 log->xmlCloseTag(); // integer_compute
132 }
133 catch (cl::Error &error)
134 {
135 stringstream ss;
136 ss << error.what() << " (" << error.err() << ")" NEWLINE
137 << TAB TAB TAB "Tests skipped" NEWLINE;
138 log->print(ss.str());
139 return -1;
140 }
141
142 return 0;
143 }
144