1 #include <clpeak.h>
2
runTransferBandwidthTest(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)3 int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
4 {
5 if (!isTransferBW)
6 return 0;
7
8 float timed, gbps;
9 cl::NDRange globalSize, localSize;
10 cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
11 uint iters = devInfo.transferBWIters;
12 Timer timer;
13 float *arr = NULL;
14
15 uint64_t maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
16 uint64_t numItems = roundToMultipleOf(maxItems, devInfo.maxWGSize, devInfo.transferBWMaxSize);
17
18 try
19 {
20 arr = new float[numItems];
21 memset(arr, 0, numItems * sizeof(float));
22 cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));
23
24 log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);
25 log->xmlOpenTag("transfer_bandwidth");
26 log->xmlAppendAttribs("unit", "gbps");
27
28 ///////////////////////////////////////////////////////////////////////////
29 // enqueueWriteBuffer
30 log->print(TAB TAB TAB "enqueueWriteBuffer : ");
31
32 // Dummy warm-up
33 queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
34 queue.finish();
35
36 timed = 0;
37
38 if (useEventTimer)
39 {
40 for (uint i = 0; i < iters; i++)
41 {
42 cl::Event timeEvent;
43 queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
44 queue.finish();
45 timed += timeInUS(timeEvent);
46 }
47 }
48 else
49 {
50 Timer timer;
51
52 timer.start();
53 for (uint i = 0; i < iters; i++)
54 {
55 queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
56 }
57 queue.finish();
58 timed = timer.stopAndTime();
59 }
60 timed /= static_cast<float>(iters);
61
62 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
63 log->print(gbps);
64 log->print(NEWLINE);
65 log->xmlRecord("enqueuewritebuffer", gbps);
66 ///////////////////////////////////////////////////////////////////////////
67 // enqueueReadBuffer
68 log->print(TAB TAB TAB "enqueueReadBuffer : ");
69
70 // Dummy warm-up
71 queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
72 queue.finish();
73
74 timed = 0;
75 if (useEventTimer)
76 {
77 for (uint i = 0; i < iters; i++)
78 {
79 cl::Event timeEvent;
80 queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
81 queue.finish();
82 timed += timeInUS(timeEvent);
83 }
84 }
85 else
86 {
87 Timer timer;
88
89 timer.start();
90 for (uint i = 0; i < iters; i++)
91 {
92 queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
93 }
94 queue.finish();
95 timed = timer.stopAndTime();
96 }
97 timed /= static_cast<float>(iters);
98
99 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
100 log->print(gbps);
101 log->print(NEWLINE);
102 log->xmlRecord("enqueuereadbuffer", gbps);
103 ///////////////////////////////////////////////////////////////////////////
104 // enqueueWriteBuffer non-blocking
105 log->print(TAB TAB TAB "enqueueWriteBuffer non-blocking : ");
106
107 // Dummy warm-up
108 queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
109 queue.finish();
110
111 timed = 0;
112
113 if (useEventTimer)
114 {
115 for (uint i = 0; i < iters; i++)
116 {
117 cl::Event timeEvent;
118 queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
119 queue.finish();
120 timed += timeInUS(timeEvent);
121 }
122 }
123 else
124 {
125 Timer timer;
126
127 timer.start();
128 for (uint i = 0; i < iters; i++)
129 {
130 queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
131 }
132 queue.finish();
133 timed = timer.stopAndTime();
134 }
135 timed /= static_cast<float>(iters);
136
137 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
138 log->print(gbps);
139 log->print(NEWLINE);
140 log->xmlRecord("enqueuewritebuffer_nonblocking", gbps);
141 ///////////////////////////////////////////////////////////////////////////
142 // enqueueReadBuffer non-blocking
143 log->print(TAB TAB TAB "enqueueReadBuffer non-blocking : ");
144
145 // Dummy warm-up
146 queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
147 queue.finish();
148
149 timed = 0;
150 if (useEventTimer)
151 {
152 for (uint i = 0; i < iters; i++)
153 {
154 cl::Event timeEvent;
155 queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
156 queue.finish();
157 timed += timeInUS(timeEvent);
158 }
159 }
160 else
161 {
162 Timer timer;
163
164 timer.start();
165 for (uint i = 0; i < iters; i++)
166 {
167 queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
168 }
169 queue.finish();
170 timed = timer.stopAndTime();
171 }
172 timed /= static_cast<float>(iters);
173
174 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
175 log->print(gbps);
176 log->print(NEWLINE);
177 log->xmlRecord("enqueuereadbuffer_nonblocking", gbps);
178 ///////////////////////////////////////////////////////////////////////////
179 // enqueueMapBuffer
180 log->print(TAB TAB TAB "enqueueMapBuffer(for read) : ");
181
182 queue.finish();
183
184 timed = 0;
185 if (useEventTimer)
186 {
187 for (uint i = 0; i < iters; i++)
188 {
189 cl::Event timeEvent;
190 void *mapPtr;
191
192 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)), NULL, &timeEvent);
193 queue.finish();
194 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
195 queue.finish();
196 timed += timeInUS(timeEvent);
197 }
198 }
199 else
200 {
201 for (uint i = 0; i < iters; i++)
202 {
203 Timer timer;
204 void *mapPtr;
205
206 timer.start();
207 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
208 queue.finish();
209 timed += timer.stopAndTime();
210
211 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
212 queue.finish();
213 }
214 }
215 timed /= static_cast<float>(iters);
216
217 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
218 log->print(gbps);
219 log->print(NEWLINE);
220 log->xmlRecord("enqueuemapbuffer", gbps);
221 ///////////////////////////////////////////////////////////////////////////
222
223 // memcpy from mapped ptr
224 log->print(TAB TAB TAB TAB "memcpy from mapped ptr : ");
225 queue.finish();
226
227 timed = 0;
228 for (uint i = 0; i < iters; i++)
229 {
230 cl::Event timeEvent;
231 void *mapPtr;
232
233 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
234 queue.finish();
235
236 timer.start();
237 memcpy(arr, mapPtr, (numItems * sizeof(float)));
238 timed += timer.stopAndTime();
239
240 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
241 queue.finish();
242 }
243 timed /= static_cast<float>(iters);
244
245 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
246 log->print(gbps);
247 log->print(NEWLINE);
248 log->xmlRecord("memcpy_from_mapped_ptr", gbps);
249
250 ///////////////////////////////////////////////////////////////////////////
251
252 // enqueueUnmap
253 log->print(TAB TAB TAB "enqueueUnmap(after write) : ");
254
255 queue.finish();
256
257 timed = 0;
258 if (useEventTimer)
259 {
260 for (uint i = 0; i < iters; i++)
261 {
262 cl::Event timeEvent;
263 void *mapPtr;
264
265 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
266 queue.finish();
267 queue.enqueueUnmapMemObject(clBuffer, mapPtr, NULL, &timeEvent);
268 queue.finish();
269 timed += timeInUS(timeEvent);
270 }
271 }
272 else
273 {
274 for (uint i = 0; i < iters; i++)
275 {
276 Timer timer;
277 void *mapPtr;
278
279 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
280 queue.finish();
281
282 timer.start();
283 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
284 queue.finish();
285 timed += timer.stopAndTime();
286 }
287 }
288 timed /= static_cast<float>(iters);
289 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
290
291 log->print(gbps);
292 log->print(NEWLINE);
293 log->xmlRecord("enqueueunmap", gbps);
294 ///////////////////////////////////////////////////////////////////////////
295
296 // memcpy to mapped ptr
297 log->print(TAB TAB TAB TAB "memcpy to mapped ptr : ");
298 queue.finish();
299
300 timed = 0;
301 for (uint i = 0; i < iters; i++)
302 {
303 cl::Event timeEvent;
304 void *mapPtr;
305
306 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
307 queue.finish();
308
309 timer.start();
310 memcpy(mapPtr, arr, (numItems * sizeof(float)));
311 timed += timer.stopAndTime();
312
313 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
314 queue.finish();
315 }
316 timed /= static_cast<float>(iters);
317
318 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
319 log->print(gbps);
320 log->print(NEWLINE);
321 log->xmlRecord("memcpy_to_mapped_ptr", gbps);
322
323 ///////////////////////////////////////////////////////////////////////////
324 log->xmlCloseTag(); // transfer_bandwidth
325
326 if (arr)
327 delete[] arr;
328 }
329 catch (cl::Error &error)
330 {
331 stringstream ss;
332 ss << error.what() << " (" << error.err() << ")" NEWLINE
333 << TAB TAB TAB "Tests skipped" NEWLINE;
334 log->print(ss.str());
335
336 if (arr)
337 {
338 delete[] arr;
339 }
340 return -1;
341 }
342
343 return 0;
344 }
345