xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/environment.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
17 
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 #include "tensorflow/lite/delegates/gpu/cl/util.h"
23 #include "tensorflow/lite/delegates/gpu/common/shape.h"
24 
25 namespace tflite {
26 namespace gpu {
27 namespace cl {
28 namespace {
CreateEnvironment(Environment * result,bool shared,cl_context_properties egl_context,cl_context_properties egl_display)29 absl::Status CreateEnvironment(Environment* result, bool shared,
30                                cl_context_properties egl_context,
31                                cl_context_properties egl_display) {
32   CLDevice gpu;
33   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
34 
35   CLContext context;
36   if (shared) {
37     RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
38   } else {
39     RETURN_IF_ERROR(CreateCLContext(gpu, &context));
40   }
41   CLCommandQueue queue;
42   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
43   ProfilingCommandQueue profiling_queue;
44   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
45 
46   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
47                         std::move(profiling_queue));
48 
49   return result->Init();
50 }
51 
IsGpuSupportsStorageType(const GpuInfo & gpu_info,TensorStorageType storage_type)52 bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
53                               TensorStorageType storage_type) {
54   switch (storage_type) {
55     case TensorStorageType::TEXTURE_2D:
56       return !gpu_info.IsAMD();
57     case TensorStorageType::BUFFER:
58       return true;
59     case TensorStorageType::TEXTURE_ARRAY:
60       return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
61     case TensorStorageType::IMAGE_BUFFER:
62       return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
63              gpu_info.SupportsImageBuffer();
64     case TensorStorageType::TEXTURE_3D:
65       return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
66     case TensorStorageType::SINGLE_TEXTURE_2D:
67       return false;
68     case TensorStorageType::UNKNOWN:
69       return false;
70   }
71   return false;
72 }
73 
IsGpuSupportsPrecision(const GpuInfo & gpu_info,CalculationsPrecision precision)74 bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
75                             CalculationsPrecision precision) {
76   switch (precision) {
77     case CalculationsPrecision::F32_F16:
78     case CalculationsPrecision::F16:
79       return gpu_info.SupportsFP16();
80     case CalculationsPrecision::F32:
81       return true;
82   }
83 }
84 
85 }  // namespace
86 
Environment(CLDevice && device,CLContext && context,CLCommandQueue && queue,ProfilingCommandQueue && profiling_queue)87 Environment::Environment(CLDevice&& device, CLContext&& context,
88                          CLCommandQueue&& queue,
89                          ProfilingCommandQueue&& profiling_queue)
90     : device_(std::move(device)),
91       context_(std::move(context)),
92       queue_(std::move(queue)),
93       profiling_queue_(std::move(profiling_queue)) {}
94 
Environment(Environment && environment)95 Environment::Environment(Environment&& environment)
96     : device_(std::move(environment.device_)),
97       context_(std::move(environment.context_)),
98       queue_(std::move(environment.queue_)),
99       profiling_queue_(std::move(environment.profiling_queue_)),
100       program_cache_(std::move(environment.program_cache_)) {}
101 
operator =(Environment && environment)102 Environment& Environment::operator=(Environment&& environment) {
103   if (this != &environment) {
104     device_ = std::move(environment.device_);
105     context_ = std::move(environment.context_);
106     queue_ = std::move(environment.queue_);
107     profiling_queue_ = std::move(environment.profiling_queue_);
108     program_cache_ = std::move(environment.program_cache_);
109   }
110   return *this;
111 }
112 
Init()113 absl::Status Environment::Init() {
114   if (device().GetInfo().IsAdreno() &&
115       device().GetInfo().SupportsTextureArray()) {
116     const auto& adreno_info = device().info_.adreno_info;
117     // Some Adreno < 600 have bug with one layer texture array. b/131099086
118     // If we have one layer texture array and will write smt from kernel to this
119     // texture, we will get zeroes instead of actual values.
120     // The same kernel will work, if we use texture array with more than one
121     // layer.
122     if (adreno_info.IsAdreno3xx() || adreno_info.IsAdreno4xx() ||
123         adreno_info.IsAdreno5xx()) {
124       GetDevicePtr()->DisableOneLayerTextureArray();
125     }
126   }
127   return absl::OkStatus();
128 }
129 
SetHighPerformance() const130 void Environment::SetHighPerformance() const {
131   // TODO(sorokin) use cl_perf_hint if available
132 }
133 
SetDefaultPerformance() const134 void Environment::SetDefaultPerformance() const {
135   // TODO(sorokin) use cl_perf_hint if available
136 }
137 
SetLowPerformance() const138 void Environment::SetLowPerformance() const {
139   // TODO(sorokin) use cl_perf_hint if available
140 }
141 
GetSupportedPrecisions() const142 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
143   std::vector<CalculationsPrecision> precisions;
144   for (CalculationsPrecision precision :
145        {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
146         CalculationsPrecision::F16}) {
147     if (IsSupported(precision)) {
148       precisions.push_back(precision);
149     }
150   }
151   return precisions;
152 }
153 
IsSupported(CalculationsPrecision precision) const154 bool Environment::IsSupported(CalculationsPrecision precision) const {
155   return IsGpuSupportsPrecision(device_.GetInfo(), precision);
156 }
157 
GetSupportedStorages() const158 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
159   std::vector<TensorStorageType> storage_types;
160   for (auto storage_type :
161        {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
162         TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
163         TensorStorageType::TEXTURE_3D}) {
164     if (IsSupported(storage_type)) {
165       storage_types.push_back(storage_type);
166     }
167   }
168   return storage_types;
169 }
170 
171 std::vector<TensorStorageType>
GetSupportedStoragesWithHWZeroClampSupport() const172 Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
173   std::vector<TensorStorageType> storage_types;
174   for (auto storage_type :
175        {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
176         TensorStorageType::TEXTURE_3D}) {
177     if (IsSupported(storage_type)) {
178       storage_types.push_back(storage_type);
179     }
180   }
181   return storage_types;
182 }
183 
IsSupported(TensorStorageType storage_type) const184 bool Environment::IsSupported(TensorStorageType storage_type) const {
185   return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
186 }
187 
GetFastestStorageType(const GpuInfo & gpu_info)188 TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
189   if (gpu_info.IsAdreno()) {
190     if (gpu_info.adreno_info.IsAdreno6xxOrHigher() &&
191         !gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
192       return TensorStorageType::TEXTURE_ARRAY;
193     } else {
194       return TensorStorageType::TEXTURE_2D;
195     }
196   } else if (gpu_info.IsPowerVR()) {
197     return TensorStorageType::TEXTURE_2D;
198   } else if (gpu_info.IsMali()) {
199     return TensorStorageType::TEXTURE_2D;
200   } else if (gpu_info.IsNvidia()) {
201     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
202                                           : TensorStorageType::BUFFER;
203   } else if (gpu_info.IsAMD()) {
204     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
205                                           : TensorStorageType::BUFFER;
206   } else if (gpu_info.IsIntel()) {
207     return TensorStorageType::BUFFER;
208   }
209   return TensorStorageType::BUFFER;
210 }
211 
GetStorageTypeWithMinimalMemoryConsumption(const GpuInfo & gpu_info)212 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
213     const GpuInfo& gpu_info) {
214   if (gpu_info.IsAdreno()) {
215     if (gpu_info.adreno_info.IsAdreno3xx() ||
216         gpu_info.adreno_info.IsAdreno4xx()) {
217       return TensorStorageType::BUFFER;
218     } else {
219       if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
220         return TensorStorageType::TEXTURE_2D;
221       } else {
222         return TensorStorageType::IMAGE_BUFFER;
223       }
224     }
225   } else if (gpu_info.IsPowerVR()) {
226     if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
227         CanUseSubBufferForImage2d(gpu_info)) {
228       return TensorStorageType::TEXTURE_2D;
229     } else {
230       return TensorStorageType::BUFFER;
231     }
232   } else if (gpu_info.IsMali()) {
233     if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
234         CanUseSubBufferForImage2d(gpu_info)) {
235       return TensorStorageType::TEXTURE_2D;
236     } else {
237       return TensorStorageType::BUFFER;
238     }
239   } else if (gpu_info.IsNvidia()) {
240     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
241                                           : TensorStorageType::BUFFER;
242   } else if (gpu_info.IsAMD()) {
243     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
244                                           : TensorStorageType::BUFFER;
245   } else if (gpu_info.IsIntel()) {
246     return TensorStorageType::BUFFER;
247   }
248   return TensorStorageType::BUFFER;
249 }
250 
CanUseSubBufferForImage2d(const GpuInfo & gpu_info)251 bool CanUseSubBufferForImage2d(const GpuInfo& gpu_info) {
252   if (!gpu_info.IsCL11OrHigher()) {
253     return false;
254   }
255   if (gpu_info.IsPowerVR()) {
256     // driver issue
257     return false;
258   }
259   if (gpu_info.IsNvidia()) {
260     return false;
261   }
262   if (gpu_info.IsMali() &&
263       (gpu_info.mali_info.IsBifrost() || gpu_info.mali_info.IsMidgard())) {
264     // Known driver issue on some G72 (Bifrost), G76 (Bifrost), T830 (Midgard),
265     // and T880 (Midgard) devices.
266     return false;
267   }
268   return true;
269 }
270 
CreateEnvironment(Environment * result)271 absl::Status CreateEnvironment(Environment* result) {
272   CLDevice gpu;
273   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
274 
275   CLContext context;
276   RETURN_IF_ERROR(CreateCLContext(gpu, &context));
277   CLCommandQueue queue;
278   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
279   ProfilingCommandQueue profiling_queue;
280   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
281 
282   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
283                         std::move(profiling_queue));
284   return result->Init();
285 }
286 
287 }  // namespace cl
288 }  // namespace gpu
289 }  // namespace tflite
290