xref: /aosp_15_r20/external/clpeak/src/transfer_bandwidth.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1 #include <clpeak.h>
2 
runTransferBandwidthTest(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)3 int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
4 {
5   if (!isTransferBW)
6     return 0;
7 
8   float timed, gbps;
9   cl::NDRange globalSize, localSize;
10   cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
11   uint iters = devInfo.transferBWIters;
12   Timer timer;
13   float *arr = NULL;
14 
15   uint64_t maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
16   uint64_t numItems = roundToMultipleOf(maxItems, devInfo.maxWGSize, devInfo.transferBWMaxSize);
17 
18   try
19   {
20     arr = new float[numItems];
21     memset(arr, 0, numItems * sizeof(float));
22     cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));
23 
24     log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);
25     log->xmlOpenTag("transfer_bandwidth");
26     log->xmlAppendAttribs("unit", "gbps");
27 
28     ///////////////////////////////////////////////////////////////////////////
29     // enqueueWriteBuffer
30     log->print(TAB TAB TAB "enqueueWriteBuffer              : ");
31 
32     // Dummy warm-up
33     queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
34     queue.finish();
35 
36     timed = 0;
37 
38     if (useEventTimer)
39     {
40       for (uint i = 0; i < iters; i++)
41       {
42         cl::Event timeEvent;
43         queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
44         queue.finish();
45         timed += timeInUS(timeEvent);
46       }
47     }
48     else
49     {
50       Timer timer;
51 
52       timer.start();
53       for (uint i = 0; i < iters; i++)
54       {
55         queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
56       }
57       queue.finish();
58       timed = timer.stopAndTime();
59     }
60     timed /= static_cast<float>(iters);
61 
62     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
63     log->print(gbps);
64     log->print(NEWLINE);
65     log->xmlRecord("enqueuewritebuffer", gbps);
66     ///////////////////////////////////////////////////////////////////////////
67     // enqueueReadBuffer
68     log->print(TAB TAB TAB "enqueueReadBuffer               : ");
69 
70     // Dummy warm-up
71     queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
72     queue.finish();
73 
74     timed = 0;
75     if (useEventTimer)
76     {
77       for (uint i = 0; i < iters; i++)
78       {
79         cl::Event timeEvent;
80         queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
81         queue.finish();
82         timed += timeInUS(timeEvent);
83       }
84     }
85     else
86     {
87       Timer timer;
88 
89       timer.start();
90       for (uint i = 0; i < iters; i++)
91       {
92         queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
93       }
94       queue.finish();
95       timed = timer.stopAndTime();
96     }
97     timed /= static_cast<float>(iters);
98 
99     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
100     log->print(gbps);
101     log->print(NEWLINE);
102     log->xmlRecord("enqueuereadbuffer", gbps);
103     ///////////////////////////////////////////////////////////////////////////
104     // enqueueWriteBuffer non-blocking
105     log->print(TAB TAB TAB "enqueueWriteBuffer non-blocking : ");
106 
107     // Dummy warm-up
108     queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
109     queue.finish();
110 
111     timed = 0;
112 
113     if (useEventTimer)
114     {
115       for (uint i = 0; i < iters; i++)
116       {
117         cl::Event timeEvent;
118         queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
119         queue.finish();
120         timed += timeInUS(timeEvent);
121       }
122     }
123     else
124     {
125       Timer timer;
126 
127       timer.start();
128       for (uint i = 0; i < iters; i++)
129       {
130         queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
131       }
132       queue.finish();
133       timed = timer.stopAndTime();
134     }
135     timed /= static_cast<float>(iters);
136 
137     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
138     log->print(gbps);
139     log->print(NEWLINE);
140     log->xmlRecord("enqueuewritebuffer_nonblocking", gbps);
141     ///////////////////////////////////////////////////////////////////////////
142     // enqueueReadBuffer non-blocking
143     log->print(TAB TAB TAB "enqueueReadBuffer non-blocking  : ");
144 
145     // Dummy warm-up
146     queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
147     queue.finish();
148 
149     timed = 0;
150     if (useEventTimer)
151     {
152       for (uint i = 0; i < iters; i++)
153       {
154         cl::Event timeEvent;
155         queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
156         queue.finish();
157         timed += timeInUS(timeEvent);
158       }
159     }
160     else
161     {
162       Timer timer;
163 
164       timer.start();
165       for (uint i = 0; i < iters; i++)
166       {
167         queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
168       }
169       queue.finish();
170       timed = timer.stopAndTime();
171     }
172     timed /= static_cast<float>(iters);
173 
174     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
175     log->print(gbps);
176     log->print(NEWLINE);
177     log->xmlRecord("enqueuereadbuffer_nonblocking", gbps);
178     ///////////////////////////////////////////////////////////////////////////
179     // enqueueMapBuffer
180     log->print(TAB TAB TAB "enqueueMapBuffer(for read)      : ");
181 
182     queue.finish();
183 
184     timed = 0;
185     if (useEventTimer)
186     {
187       for (uint i = 0; i < iters; i++)
188       {
189         cl::Event timeEvent;
190         void *mapPtr;
191 
192         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)), NULL, &timeEvent);
193         queue.finish();
194         queue.enqueueUnmapMemObject(clBuffer, mapPtr);
195         queue.finish();
196         timed += timeInUS(timeEvent);
197       }
198     }
199     else
200     {
201       for (uint i = 0; i < iters; i++)
202       {
203         Timer timer;
204         void *mapPtr;
205 
206         timer.start();
207         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
208         queue.finish();
209         timed += timer.stopAndTime();
210 
211         queue.enqueueUnmapMemObject(clBuffer, mapPtr);
212         queue.finish();
213       }
214     }
215     timed /= static_cast<float>(iters);
216 
217     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
218     log->print(gbps);
219     log->print(NEWLINE);
220     log->xmlRecord("enqueuemapbuffer", gbps);
221     ///////////////////////////////////////////////////////////////////////////
222 
223     // memcpy from mapped ptr
224     log->print(TAB TAB TAB TAB "memcpy from mapped ptr        : ");
225     queue.finish();
226 
227     timed = 0;
228     for (uint i = 0; i < iters; i++)
229     {
230       cl::Event timeEvent;
231       void *mapPtr;
232 
233       mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
234       queue.finish();
235 
236       timer.start();
237       memcpy(arr, mapPtr, (numItems * sizeof(float)));
238       timed += timer.stopAndTime();
239 
240       queue.enqueueUnmapMemObject(clBuffer, mapPtr);
241       queue.finish();
242     }
243     timed /= static_cast<float>(iters);
244 
245     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
246     log->print(gbps);
247     log->print(NEWLINE);
248     log->xmlRecord("memcpy_from_mapped_ptr", gbps);
249 
250     ///////////////////////////////////////////////////////////////////////////
251 
252     // enqueueUnmap
253     log->print(TAB TAB TAB "enqueueUnmap(after write)       : ");
254 
255     queue.finish();
256 
257     timed = 0;
258     if (useEventTimer)
259     {
260       for (uint i = 0; i < iters; i++)
261       {
262         cl::Event timeEvent;
263         void *mapPtr;
264 
265         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
266         queue.finish();
267         queue.enqueueUnmapMemObject(clBuffer, mapPtr, NULL, &timeEvent);
268         queue.finish();
269         timed += timeInUS(timeEvent);
270       }
271     }
272     else
273     {
274       for (uint i = 0; i < iters; i++)
275       {
276         Timer timer;
277         void *mapPtr;
278 
279         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
280         queue.finish();
281 
282         timer.start();
283         queue.enqueueUnmapMemObject(clBuffer, mapPtr);
284         queue.finish();
285         timed += timer.stopAndTime();
286       }
287     }
288     timed /= static_cast<float>(iters);
289     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
290 
291     log->print(gbps);
292     log->print(NEWLINE);
293     log->xmlRecord("enqueueunmap", gbps);
294     ///////////////////////////////////////////////////////////////////////////
295 
296     // memcpy to mapped ptr
297     log->print(TAB TAB TAB TAB "memcpy to mapped ptr          : ");
298     queue.finish();
299 
300     timed = 0;
301     for (uint i = 0; i < iters; i++)
302     {
303       cl::Event timeEvent;
304       void *mapPtr;
305 
306       mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
307       queue.finish();
308 
309       timer.start();
310       memcpy(mapPtr, arr, (numItems * sizeof(float)));
311       timed += timer.stopAndTime();
312 
313       queue.enqueueUnmapMemObject(clBuffer, mapPtr);
314       queue.finish();
315     }
316     timed /= static_cast<float>(iters);
317 
318     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
319     log->print(gbps);
320     log->print(NEWLINE);
321     log->xmlRecord("memcpy_to_mapped_ptr", gbps);
322 
323     ///////////////////////////////////////////////////////////////////////////
324     log->xmlCloseTag(); // transfer_bandwidth
325 
326     if (arr)
327       delete[] arr;
328   }
329   catch (cl::Error &error)
330   {
331     stringstream ss;
332     ss << error.what() << " (" << error.err() << ")" NEWLINE
333        << TAB TAB TAB "Tests skipped" NEWLINE;
334     log->print(ss.str());
335 
336     if (arr)
337     {
338       delete[] arr;
339     }
340     return -1;
341   }
342 
343   return 0;
344 }
345