1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
17
18 #include <algorithm>
19 #include <string>
20 #include <utility>
21 #include <vector>
22
23 #include "absl/strings/ascii.h"
24 #include "absl/strings/numbers.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/strings/str_format.h"
27 #include "absl/strings/str_split.h"
28 #include "tensorflow/lite/delegates/gpu/cl/util.h"
29 #include "tensorflow/lite/delegates/gpu/common/status.h"
30 #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h"
31
32 namespace tflite {
33 namespace gpu {
34 namespace cl {
35
ParseQualcommOpenClCompilerVersion(const std::string & cl_driver_version,AdrenoInfo::OpenClCompilerVersion * result)36 void ParseQualcommOpenClCompilerVersion(
37 const std::string& cl_driver_version,
38 AdrenoInfo::OpenClCompilerVersion* result) {
39 // Searching this part: "Compiler E031.**.**.**" where * is digit
40 const std::string start = "Compiler E031.";
41 size_t position = cl_driver_version.find(start);
42 if (position == std::string::npos) {
43 return;
44 }
45 const size_t main_part_length = 8; // main part is **.**.**
46 if (position + start.length() + main_part_length >
47 cl_driver_version.length()) {
48 return;
49 }
50
51 const std::string main_part =
52 cl_driver_version.substr(position + start.length(), main_part_length);
53 if (!absl::ascii_isdigit(main_part[0]) ||
54 !absl::ascii_isdigit(main_part[1]) || main_part[2] != '.' ||
55 !absl::ascii_isdigit(main_part[3]) ||
56 !absl::ascii_isdigit(main_part[4]) || main_part[5] != '.' ||
57 !absl::ascii_isdigit(main_part[6]) ||
58 !absl::ascii_isdigit(main_part[7])) {
59 return;
60 }
61 result->major = (main_part[0] - '0') * 10 + (main_part[1] - '0');
62 result->minor = (main_part[3] - '0') * 10 + (main_part[4] - '0');
63 result->patch = (main_part[6] - '0') * 10 + (main_part[7] - '0');
64 }
65
66 template <>
GetDeviceInfo(cl_device_id id,cl_device_info info)67 std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info) {
68 size_t size;
69 cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
70 if (error != CL_SUCCESS) {
71 return "";
72 }
73
74 std::string result(size - 1, 0);
75 error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
76 if (error != CL_SUCCESS) {
77 return "";
78 }
79 return result;
80 }
81
82 namespace {
83 template <typename T>
GetPlatformInfo(cl_platform_id id,cl_platform_info info)84 T GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
85 T result;
86 cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
87 if (error != CL_SUCCESS) {
88 return -1;
89 }
90 return result;
91 }
92
GetPlatformInfo(cl_platform_id id,cl_platform_info info)93 std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
94 size_t size;
95 cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
96 if (error != CL_SUCCESS) {
97 return "";
98 }
99
100 std::string result(size - 1, 0);
101 error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
102 if (error != CL_SUCCESS) {
103 return "";
104 }
105 return result;
106 }
107
GetDeviceWorkDimsSizes(cl_device_id id,int3 * result)108 void GetDeviceWorkDimsSizes(cl_device_id id, int3* result) {
109 int dims_count =
110 GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
111 if (dims_count < 3) {
112 return;
113 }
114 std::vector<size_t> limits(dims_count);
115 cl_int error =
116 clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES,
117 sizeof(size_t) * dims_count, limits.data(), nullptr);
118 if (error != CL_SUCCESS) {
119 return;
120 }
121 // dims_count must be at least 3 according to spec
122 result->x = limits[0];
123 result->y = limits[1];
124 result->z = limits[2];
125 }
126
ParseCLVersion(const std::string & version)127 OpenClVersion ParseCLVersion(const std::string& version) {
128 const auto first_dot_pos = version.find_first_of('.');
129 if (first_dot_pos == std::string::npos) {
130 return OpenClVersion::kCl1_0;
131 }
132 const int major = version[first_dot_pos - 1] - '0';
133 const int minor = version[first_dot_pos + 1] - '0';
134
135 if (major == 1) {
136 if (minor == 2) {
137 return OpenClVersion::kCl1_2;
138 } else if (minor == 1) {
139 return OpenClVersion::kCl1_1;
140 } else {
141 return OpenClVersion::kCl1_0;
142 }
143 } else if (major == 2) {
144 if (minor == 2) {
145 return OpenClVersion::kCl2_2;
146 } else if (minor == 1) {
147 return OpenClVersion::kCl2_1;
148 } else {
149 return OpenClVersion::kCl2_0;
150 }
151 } else if (major == 3) {
152 return OpenClVersion::kCl3_0;
153 } else {
154 return OpenClVersion::kCl1_0;
155 }
156 }
157
158 // check that gpu_version belong to range min_version-max_version
159 // min_version is included and max_version is excluded.
IsGPUVersionInRange(int gpu_version,int min_version,int max_version)160 bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
161 return gpu_version >= min_version && gpu_version < max_version;
162 }
163
GpuInfoFromDeviceID(cl_device_id id,cl_platform_id platform_id)164 GpuInfo GpuInfoFromDeviceID(cl_device_id id, cl_platform_id platform_id) {
165 GpuInfo info;
166 info.opencl_info.platform_version =
167 GetPlatformInfo(platform_id, CL_PLATFORM_VERSION);
168 info.opencl_info.device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
169 info.opencl_info.vendor_name =
170 GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
171 info.opencl_info.opencl_c_version =
172 GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
173 info.opencl_info.driver_version =
174 GetDeviceInfo<std::string>(id, CL_DRIVER_VERSION);
175 const std::string gpu_description = absl::StrCat(
176 info.opencl_info.device_name, " ", info.opencl_info.vendor_name, " ",
177 info.opencl_info.opencl_c_version);
178 GetGpuInfoFromDeviceDescription(gpu_description, GpuApi::kOpenCl, &info);
179 info.opencl_info.cl_version =
180 ParseCLVersion(info.opencl_info.opencl_c_version);
181 info.opencl_info.extensions =
182 absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
183 info.opencl_info.supports_fp16 = false;
184 info.opencl_info.supports_image3d_writes = false;
185 for (const auto& ext : info.opencl_info.extensions) {
186 if (ext == "cl_khr_fp16") {
187 info.opencl_info.supports_fp16 = true;
188 }
189 if (ext == "cl_khr_3d_image_writes") {
190 info.opencl_info.supports_image3d_writes = true;
191 }
192 }
193
194 info.opencl_info.supports_images =
195 GetDeviceInfo<cl_bool>(id, CL_DEVICE_IMAGE_SUPPORT);
196
197 cl_device_fp_config f32_config =
198 GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
199 info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
200
201 if (info.opencl_info.supports_fp16) {
202 cl_device_fp_config f16_config;
203 auto status = GetDeviceInfo<cl_device_fp_config>(
204 id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
205 // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
206 if (status.ok() && !info.IsAMD()) {
207 info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
208 } else { // happens on PowerVR
209 f16_config = f32_config;
210 info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
211 }
212 } else {
213 info.opencl_info.supports_fp16_rtn = false;
214 }
215
216 if (info.IsPowerVR() && !info.opencl_info.supports_fp16) {
217 // PowerVR doesn't have full support of fp16 and so doesn't list this
218 // extension. But it can support fp16 in MADs and as buffers/textures types,
219 // so we will use it.
220 info.opencl_info.supports_fp16 = true;
221 info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
222 }
223
224 if (!info.opencl_info.supports_image3d_writes &&
225 ((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) ||
226 info.IsNvidia())) {
227 // in local tests Adreno 430 can write in image 3d, at least on small sizes,
228 // but it doesn't have cl_khr_3d_image_writes in list of available
229 // extensions
230 // The same for NVidia
231 info.opencl_info.supports_image3d_writes = true;
232 }
233 info.opencl_info.compute_units_count =
234 GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
235 info.opencl_info.image2d_max_width =
236 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
237 info.opencl_info.image2d_max_height =
238 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
239 info.opencl_info.buffer_max_size =
240 GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
241 info.opencl_info.max_allocation_size =
242 GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
243 if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) {
244 info.opencl_info.image_buffer_max_size =
245 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
246 info.opencl_info.image_array_max_layers =
247 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
248 }
249 info.opencl_info.image3d_max_width =
250 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
251 info.opencl_info.image3d_max_height =
252 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
253 info.opencl_info.image3d_max_depth =
254 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
255 int3 max_work_group_sizes;
256 GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
257 info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
258 info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
259 info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
260 info.opencl_info.max_work_group_total_size =
261 GetDeviceInfo<size_t>(id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
262
263 info.opencl_info.base_addr_align_in_bits =
264 GetDeviceInfo<cl_uint>(id, CL_DEVICE_MEM_BASE_ADDR_ALIGN);
265 info.opencl_info.image_pitch_alignment = 0;
266 if (info.opencl_info.cl_version == OpenClVersion::kCl2_0 ||
267 info.opencl_info.cl_version == OpenClVersion::kCl2_1 ||
268 info.opencl_info.cl_version == OpenClVersion::kCl2_2) {
269 info.opencl_info.image_pitch_alignment =
270 GetDeviceInfo<cl_uint>(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT);
271 info.opencl_info.image_base_address_alignment =
272 GetDeviceInfo<cl_uint>(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT);
273 } else if (info.SupportsExtension("cl_khr_image2d_from_buffer")) {
274 cl_uint result = 0;
275 auto status =
276 GetDeviceInfo(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR, &result);
277 if (status.ok()) {
278 info.opencl_info.image_pitch_alignment = result;
279 }
280 result = 0;
281 status =
282 GetDeviceInfo(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR, &result);
283 if (status.ok()) {
284 info.opencl_info.image_base_address_alignment = result;
285 }
286 }
287
288 if (info.IsIntel()) {
289 if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
290 size_t sub_groups_count;
291 cl_int status =
292 clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0,
293 nullptr, &sub_groups_count);
294 if (status == CL_SUCCESS) {
295 std::vector<size_t> sub_group_sizes(sub_groups_count);
296 status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
297 sizeof(size_t) * sub_groups_count,
298 sub_group_sizes.data(), nullptr);
299 if (status == CL_SUCCESS) {
300 for (int i = 0; i < sub_groups_count; ++i) {
301 info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
302 }
303 }
304 }
305 }
306 }
307 if (info.IsAdreno()) {
308 ParseQualcommOpenClCompilerVersion(info.opencl_info.driver_version,
309 &info.adreno_info.cl_compiler_version);
310 }
311 return info;
312 }
313
314 } // namespace
315
CLDevice(cl_device_id id,cl_platform_id platform_id)316 CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
317 : info_(GpuInfoFromDeviceID(id, platform_id)),
318 id_(id),
319 platform_id_(platform_id) {
320 if (info_.IsAdreno() &&
321 info_.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
322 acceleration::AndroidInfo android_info;
323 if (acceleration::RequestAndroidInfo(&android_info).ok()) {
324 info_.adreno_info.compiler_bugs_in_a6xx =
325 android_info.android_sdk_version == "26";
326 }
327 }
328 }
329
CLDevice(const CLDevice & device)330 CLDevice::CLDevice(const CLDevice& device)
331 : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}
332
operator =(const CLDevice & device)333 CLDevice& CLDevice::operator=(const CLDevice& device) {
334 if (this != &device) {
335 info_ = device.info_;
336 id_ = device.id_;
337 platform_id_ = device.platform_id_;
338 }
339 return *this;
340 }
341
CLDevice(CLDevice && device)342 CLDevice::CLDevice(CLDevice&& device)
343 : info_(std::move(device.info_)),
344 id_(device.id_),
345 platform_id_(device.platform_id_) {
346 device.id_ = nullptr;
347 device.platform_id_ = nullptr;
348 }
349
operator =(CLDevice && device)350 CLDevice& CLDevice::operator=(CLDevice&& device) {
351 if (this != &device) {
352 id_ = nullptr;
353 platform_id_ = nullptr;
354 info_ = std::move(device.info_);
355 std::swap(id_, device.id_);
356 std::swap(platform_id_, device.platform_id_);
357 }
358 return *this;
359 }
360
GetPlatformVersion() const361 std::string CLDevice::GetPlatformVersion() const {
362 return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
363 }
364
DisableOneLayerTextureArray()365 void CLDevice::DisableOneLayerTextureArray() {
366 info_.adreno_info.support_one_layer_texture_array = false;
367 }
368
CreateDefaultGPUDevice(CLDevice * result)369 absl::Status CreateDefaultGPUDevice(CLDevice* result) {
370 cl_uint num_platforms;
371 cl_int status = clGetPlatformIDs(0, nullptr, &num_platforms);
372 if (status != CL_SUCCESS) {
373 return absl::UnknownError(
374 absl::StrFormat("clGetPlatformIDs returned %d", status));
375 }
376 if (num_platforms == 0) {
377 return absl::UnknownError("No supported OpenCL platform.");
378 }
379 std::vector<cl_platform_id> platforms(num_platforms);
380 status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
381 if (status != CL_SUCCESS) {
382 return absl::UnknownError(
383 absl::StrFormat("clGetPlatformIDs returned %d", status));
384 }
385
386 cl_platform_id platform_id = platforms[0];
387 cl_uint num_devices;
388 status =
389 clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
390 if (status != CL_SUCCESS) {
391 return absl::UnknownError(
392 absl::StrFormat("clGetDeviceIDs returned %d", status));
393 }
394 if (num_devices == 0) {
395 return absl::UnknownError("No GPU on current platform.");
396 }
397
398 std::vector<cl_device_id> devices(num_devices);
399 status = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices,
400 devices.data(), nullptr);
401 if (status != CL_SUCCESS) {
402 return absl::UnknownError(
403 absl::StrFormat("clGetDeviceIDs returned %d", status));
404 }
405
406 *result = CLDevice(devices[0], platform_id);
407 return absl::OkStatus();
408 }
409
410 } // namespace cl
411 } // namespace gpu
412 } // namespace tflite
413