xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/cl_device.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
17 
18 #include <algorithm>
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/strings/ascii.h"
24 #include "absl/strings/numbers.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/strings/str_format.h"
27 #include "absl/strings/str_split.h"
28 #include "tensorflow/lite/delegates/gpu/cl/util.h"
29 #include "tensorflow/lite/delegates/gpu/common/status.h"
30 #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h"
31 
32 namespace tflite {
33 namespace gpu {
34 namespace cl {
35 
ParseQualcommOpenClCompilerVersion(const std::string & cl_driver_version,AdrenoInfo::OpenClCompilerVersion * result)36 void ParseQualcommOpenClCompilerVersion(
37     const std::string& cl_driver_version,
38     AdrenoInfo::OpenClCompilerVersion* result) {
39   // Searching this part: "Compiler E031.**.**.**" where * is digit
40   const std::string start = "Compiler E031.";
41   size_t position = cl_driver_version.find(start);
42   if (position == std::string::npos) {
43     return;
44   }
45   const size_t main_part_length = 8;  // main part is **.**.**
46   if (position + start.length() + main_part_length >
47       cl_driver_version.length()) {
48     return;
49   }
50 
51   const std::string main_part =
52       cl_driver_version.substr(position + start.length(), main_part_length);
53   if (!absl::ascii_isdigit(main_part[0]) ||
54       !absl::ascii_isdigit(main_part[1]) || main_part[2] != '.' ||
55       !absl::ascii_isdigit(main_part[3]) ||
56       !absl::ascii_isdigit(main_part[4]) || main_part[5] != '.' ||
57       !absl::ascii_isdigit(main_part[6]) ||
58       !absl::ascii_isdigit(main_part[7])) {
59     return;
60   }
61   result->major = (main_part[0] - '0') * 10 + (main_part[1] - '0');
62   result->minor = (main_part[3] - '0') * 10 + (main_part[4] - '0');
63   result->patch = (main_part[6] - '0') * 10 + (main_part[7] - '0');
64 }
65 
66 template <>
GetDeviceInfo(cl_device_id id,cl_device_info info)67 std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info) {
68   size_t size;
69   cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
70   if (error != CL_SUCCESS) {
71     return "";
72   }
73 
74   std::string result(size - 1, 0);
75   error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
76   if (error != CL_SUCCESS) {
77     return "";
78   }
79   return result;
80 }
81 
82 namespace {
83 template <typename T>
GetPlatformInfo(cl_platform_id id,cl_platform_info info)84 T GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
85   T result;
86   cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
87   if (error != CL_SUCCESS) {
88     return -1;
89   }
90   return result;
91 }
92 
GetPlatformInfo(cl_platform_id id,cl_platform_info info)93 std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
94   size_t size;
95   cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
96   if (error != CL_SUCCESS) {
97     return "";
98   }
99 
100   std::string result(size - 1, 0);
101   error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
102   if (error != CL_SUCCESS) {
103     return "";
104   }
105   return result;
106 }
107 
GetDeviceWorkDimsSizes(cl_device_id id,int3 * result)108 void GetDeviceWorkDimsSizes(cl_device_id id, int3* result) {
109   int dims_count =
110       GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
111   if (dims_count < 3) {
112     return;
113   }
114   std::vector<size_t> limits(dims_count);
115   cl_int error =
116       clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES,
117                       sizeof(size_t) * dims_count, limits.data(), nullptr);
118   if (error != CL_SUCCESS) {
119     return;
120   }
121   // dims_count must be at least 3 according to spec
122   result->x = limits[0];
123   result->y = limits[1];
124   result->z = limits[2];
125 }
126 
ParseCLVersion(const std::string & version)127 OpenClVersion ParseCLVersion(const std::string& version) {
128   const auto first_dot_pos = version.find_first_of('.');
129   if (first_dot_pos == std::string::npos) {
130     return OpenClVersion::kCl1_0;
131   }
132   const int major = version[first_dot_pos - 1] - '0';
133   const int minor = version[first_dot_pos + 1] - '0';
134 
135   if (major == 1) {
136     if (minor == 2) {
137       return OpenClVersion::kCl1_2;
138     } else if (minor == 1) {
139       return OpenClVersion::kCl1_1;
140     } else {
141       return OpenClVersion::kCl1_0;
142     }
143   } else if (major == 2) {
144     if (minor == 2) {
145       return OpenClVersion::kCl2_2;
146     } else if (minor == 1) {
147       return OpenClVersion::kCl2_1;
148     } else {
149       return OpenClVersion::kCl2_0;
150     }
151   } else if (major == 3) {
152     return OpenClVersion::kCl3_0;
153   } else {
154     return OpenClVersion::kCl1_0;
155   }
156 }
157 
158 // check that gpu_version belong to range min_version-max_version
159 // min_version is included and max_version is excluded.
IsGPUVersionInRange(int gpu_version,int min_version,int max_version)160 bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
161   return gpu_version >= min_version && gpu_version < max_version;
162 }
163 
GpuInfoFromDeviceID(cl_device_id id,cl_platform_id platform_id)164 GpuInfo GpuInfoFromDeviceID(cl_device_id id, cl_platform_id platform_id) {
165   GpuInfo info;
166   info.opencl_info.platform_version =
167       GetPlatformInfo(platform_id, CL_PLATFORM_VERSION);
168   info.opencl_info.device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
169   info.opencl_info.vendor_name =
170       GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
171   info.opencl_info.opencl_c_version =
172       GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
173   info.opencl_info.driver_version =
174       GetDeviceInfo<std::string>(id, CL_DRIVER_VERSION);
175   const std::string gpu_description = absl::StrCat(
176       info.opencl_info.device_name, " ", info.opencl_info.vendor_name, " ",
177       info.opencl_info.opencl_c_version);
178   GetGpuInfoFromDeviceDescription(gpu_description, GpuApi::kOpenCl, &info);
179   info.opencl_info.cl_version =
180       ParseCLVersion(info.opencl_info.opencl_c_version);
181   info.opencl_info.extensions =
182       absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
183   info.opencl_info.supports_fp16 = false;
184   info.opencl_info.supports_image3d_writes = false;
185   for (const auto& ext : info.opencl_info.extensions) {
186     if (ext == "cl_khr_fp16") {
187       info.opencl_info.supports_fp16 = true;
188     }
189     if (ext == "cl_khr_3d_image_writes") {
190       info.opencl_info.supports_image3d_writes = true;
191     }
192   }
193 
194   info.opencl_info.supports_images =
195       GetDeviceInfo<cl_bool>(id, CL_DEVICE_IMAGE_SUPPORT);
196 
197   cl_device_fp_config f32_config =
198       GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
199   info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
200 
201   if (info.opencl_info.supports_fp16) {
202     cl_device_fp_config f16_config;
203     auto status = GetDeviceInfo<cl_device_fp_config>(
204         id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
205     // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
206     if (status.ok() && !info.IsAMD()) {
207       info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
208     } else {  // happens on PowerVR
209       f16_config = f32_config;
210       info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
211     }
212   } else {
213     info.opencl_info.supports_fp16_rtn = false;
214   }
215 
216   if (info.IsPowerVR() && !info.opencl_info.supports_fp16) {
217     // PowerVR doesn't have full support of fp16 and so doesn't list this
218     // extension. But it can support fp16 in MADs and as buffers/textures types,
219     // so we will use it.
220     info.opencl_info.supports_fp16 = true;
221     info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
222   }
223 
224   if (!info.opencl_info.supports_image3d_writes &&
225       ((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) ||
226        info.IsNvidia())) {
227     // in local tests Adreno 430 can write in image 3d, at least on small sizes,
228     // but it doesn't have cl_khr_3d_image_writes in list of available
229     // extensions
230     // The same for NVidia
231     info.opencl_info.supports_image3d_writes = true;
232   }
233   info.opencl_info.compute_units_count =
234       GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
235   info.opencl_info.image2d_max_width =
236       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
237   info.opencl_info.image2d_max_height =
238       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
239   info.opencl_info.buffer_max_size =
240       GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
241   info.opencl_info.max_allocation_size =
242       GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
243   if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) {
244     info.opencl_info.image_buffer_max_size =
245         GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
246     info.opencl_info.image_array_max_layers =
247         GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
248   }
249   info.opencl_info.image3d_max_width =
250       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
251   info.opencl_info.image3d_max_height =
252       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
253   info.opencl_info.image3d_max_depth =
254       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
255   int3 max_work_group_sizes;
256   GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
257   info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
258   info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
259   info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
260   info.opencl_info.max_work_group_total_size =
261       GetDeviceInfo<size_t>(id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
262 
263   info.opencl_info.base_addr_align_in_bits =
264       GetDeviceInfo<cl_uint>(id, CL_DEVICE_MEM_BASE_ADDR_ALIGN);
265   info.opencl_info.image_pitch_alignment = 0;
266   if (info.opencl_info.cl_version == OpenClVersion::kCl2_0 ||
267       info.opencl_info.cl_version == OpenClVersion::kCl2_1 ||
268       info.opencl_info.cl_version == OpenClVersion::kCl2_2) {
269     info.opencl_info.image_pitch_alignment =
270         GetDeviceInfo<cl_uint>(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT);
271     info.opencl_info.image_base_address_alignment =
272         GetDeviceInfo<cl_uint>(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT);
273   } else if (info.SupportsExtension("cl_khr_image2d_from_buffer")) {
274     cl_uint result = 0;
275     auto status =
276         GetDeviceInfo(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR, &result);
277     if (status.ok()) {
278       info.opencl_info.image_pitch_alignment = result;
279     }
280     result = 0;
281     status =
282         GetDeviceInfo(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR, &result);
283     if (status.ok()) {
284       info.opencl_info.image_base_address_alignment = result;
285     }
286   }
287 
288   if (info.IsIntel()) {
289     if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
290       size_t sub_groups_count;
291       cl_int status =
292           clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0,
293                           nullptr, &sub_groups_count);
294       if (status == CL_SUCCESS) {
295         std::vector<size_t> sub_group_sizes(sub_groups_count);
296         status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
297                                  sizeof(size_t) * sub_groups_count,
298                                  sub_group_sizes.data(), nullptr);
299         if (status == CL_SUCCESS) {
300           for (int i = 0; i < sub_groups_count; ++i) {
301             info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
302           }
303         }
304       }
305     }
306   }
307   if (info.IsAdreno()) {
308     ParseQualcommOpenClCompilerVersion(info.opencl_info.driver_version,
309                                        &info.adreno_info.cl_compiler_version);
310   }
311   return info;
312 }
313 
314 }  // namespace
315 
CLDevice(cl_device_id id,cl_platform_id platform_id)316 CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
317     : info_(GpuInfoFromDeviceID(id, platform_id)),
318       id_(id),
319       platform_id_(platform_id) {
320   if (info_.IsAdreno() &&
321       info_.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
322     acceleration::AndroidInfo android_info;
323     if (acceleration::RequestAndroidInfo(&android_info).ok()) {
324       info_.adreno_info.compiler_bugs_in_a6xx =
325           android_info.android_sdk_version == "26";
326     }
327   }
328 }
329 
CLDevice(const CLDevice & device)330 CLDevice::CLDevice(const CLDevice& device)
331     : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}
332 
operator =(const CLDevice & device)333 CLDevice& CLDevice::operator=(const CLDevice& device) {
334   if (this != &device) {
335     info_ = device.info_;
336     id_ = device.id_;
337     platform_id_ = device.platform_id_;
338   }
339   return *this;
340 }
341 
CLDevice(CLDevice && device)342 CLDevice::CLDevice(CLDevice&& device)
343     : info_(std::move(device.info_)),
344       id_(device.id_),
345       platform_id_(device.platform_id_) {
346   device.id_ = nullptr;
347   device.platform_id_ = nullptr;
348 }
349 
operator =(CLDevice && device)350 CLDevice& CLDevice::operator=(CLDevice&& device) {
351   if (this != &device) {
352     id_ = nullptr;
353     platform_id_ = nullptr;
354     info_ = std::move(device.info_);
355     std::swap(id_, device.id_);
356     std::swap(platform_id_, device.platform_id_);
357   }
358   return *this;
359 }
360 
GetPlatformVersion() const361 std::string CLDevice::GetPlatformVersion() const {
362   return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
363 }
364 
DisableOneLayerTextureArray()365 void CLDevice::DisableOneLayerTextureArray() {
366   info_.adreno_info.support_one_layer_texture_array = false;
367 }
368 
CreateDefaultGPUDevice(CLDevice * result)369 absl::Status CreateDefaultGPUDevice(CLDevice* result) {
370   cl_uint num_platforms;
371   cl_int status = clGetPlatformIDs(0, nullptr, &num_platforms);
372   if (status != CL_SUCCESS) {
373     return absl::UnknownError(
374         absl::StrFormat("clGetPlatformIDs returned %d", status));
375   }
376   if (num_platforms == 0) {
377     return absl::UnknownError("No supported OpenCL platform.");
378   }
379   std::vector<cl_platform_id> platforms(num_platforms);
380   status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
381   if (status != CL_SUCCESS) {
382     return absl::UnknownError(
383         absl::StrFormat("clGetPlatformIDs returned %d", status));
384   }
385 
386   cl_platform_id platform_id = platforms[0];
387   cl_uint num_devices;
388   status =
389       clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
390   if (status != CL_SUCCESS) {
391     return absl::UnknownError(
392         absl::StrFormat("clGetDeviceIDs returned %d", status));
393   }
394   if (num_devices == 0) {
395     return absl::UnknownError("No GPU on current platform.");
396   }
397 
398   std::vector<cl_device_id> devices(num_devices);
399   status = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices,
400                           devices.data(), nullptr);
401   if (status != CL_SUCCESS) {
402     return absl::UnknownError(
403         absl::StrFormat("clGetDeviceIDs returned %d", status));
404   }
405 
406   *result = CLDevice(devices[0], platform_id);
407   return absl::OkStatus();
408 }
409 
410 }  // namespace cl
411 }  // namespace gpu
412 }  // namespace tflite
413