1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
17
18 #include <string>
19 #include <utility>
20 #include <vector>
21
22 #include "tensorflow/lite/delegates/gpu/cl/util.h"
23 #include "tensorflow/lite/delegates/gpu/common/shape.h"
24
25 namespace tflite {
26 namespace gpu {
27 namespace cl {
28 namespace {
CreateEnvironment(Environment * result,bool shared,cl_context_properties egl_context,cl_context_properties egl_display)29 absl::Status CreateEnvironment(Environment* result, bool shared,
30 cl_context_properties egl_context,
31 cl_context_properties egl_display) {
32 CLDevice gpu;
33 RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
34
35 CLContext context;
36 if (shared) {
37 RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
38 } else {
39 RETURN_IF_ERROR(CreateCLContext(gpu, &context));
40 }
41 CLCommandQueue queue;
42 RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
43 ProfilingCommandQueue profiling_queue;
44 RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
45
46 *result = Environment(std::move(gpu), std::move(context), std::move(queue),
47 std::move(profiling_queue));
48
49 return result->Init();
50 }
51
IsGpuSupportsStorageType(const GpuInfo & gpu_info,TensorStorageType storage_type)52 bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
53 TensorStorageType storage_type) {
54 switch (storage_type) {
55 case TensorStorageType::TEXTURE_2D:
56 return !gpu_info.IsAMD();
57 case TensorStorageType::BUFFER:
58 return true;
59 case TensorStorageType::TEXTURE_ARRAY:
60 return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
61 case TensorStorageType::IMAGE_BUFFER:
62 return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
63 gpu_info.SupportsImageBuffer();
64 case TensorStorageType::TEXTURE_3D:
65 return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
66 case TensorStorageType::SINGLE_TEXTURE_2D:
67 return false;
68 case TensorStorageType::UNKNOWN:
69 return false;
70 }
71 return false;
72 }
73
IsGpuSupportsPrecision(const GpuInfo & gpu_info,CalculationsPrecision precision)74 bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
75 CalculationsPrecision precision) {
76 switch (precision) {
77 case CalculationsPrecision::F32_F16:
78 case CalculationsPrecision::F16:
79 return gpu_info.SupportsFP16();
80 case CalculationsPrecision::F32:
81 return true;
82 }
83 }
84
85 } // namespace
86
Environment(CLDevice && device,CLContext && context,CLCommandQueue && queue,ProfilingCommandQueue && profiling_queue)87 Environment::Environment(CLDevice&& device, CLContext&& context,
88 CLCommandQueue&& queue,
89 ProfilingCommandQueue&& profiling_queue)
90 : device_(std::move(device)),
91 context_(std::move(context)),
92 queue_(std::move(queue)),
93 profiling_queue_(std::move(profiling_queue)) {}
94
Environment(Environment && environment)95 Environment::Environment(Environment&& environment)
96 : device_(std::move(environment.device_)),
97 context_(std::move(environment.context_)),
98 queue_(std::move(environment.queue_)),
99 profiling_queue_(std::move(environment.profiling_queue_)),
100 program_cache_(std::move(environment.program_cache_)) {}
101
operator =(Environment && environment)102 Environment& Environment::operator=(Environment&& environment) {
103 if (this != &environment) {
104 device_ = std::move(environment.device_);
105 context_ = std::move(environment.context_);
106 queue_ = std::move(environment.queue_);
107 profiling_queue_ = std::move(environment.profiling_queue_);
108 program_cache_ = std::move(environment.program_cache_);
109 }
110 return *this;
111 }
112
Init()113 absl::Status Environment::Init() {
114 if (device().GetInfo().IsAdreno() &&
115 device().GetInfo().SupportsTextureArray()) {
116 const auto& adreno_info = device().info_.adreno_info;
117 // Some Adreno < 600 have bug with one layer texture array. b/131099086
118 // If we have one layer texture array and will write smt from kernel to this
119 // texture, we will get zeroes instead of actual values.
120 // The same kernel will work, if we use texture array with more than one
121 // layer.
122 if (adreno_info.IsAdreno3xx() || adreno_info.IsAdreno4xx() ||
123 adreno_info.IsAdreno5xx()) {
124 GetDevicePtr()->DisableOneLayerTextureArray();
125 }
126 }
127 return absl::OkStatus();
128 }
129
SetHighPerformance() const130 void Environment::SetHighPerformance() const {
131 // TODO(sorokin) use cl_perf_hint if available
132 }
133
SetDefaultPerformance() const134 void Environment::SetDefaultPerformance() const {
135 // TODO(sorokin) use cl_perf_hint if available
136 }
137
SetLowPerformance() const138 void Environment::SetLowPerformance() const {
139 // TODO(sorokin) use cl_perf_hint if available
140 }
141
GetSupportedPrecisions() const142 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
143 std::vector<CalculationsPrecision> precisions;
144 for (CalculationsPrecision precision :
145 {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
146 CalculationsPrecision::F16}) {
147 if (IsSupported(precision)) {
148 precisions.push_back(precision);
149 }
150 }
151 return precisions;
152 }
153
IsSupported(CalculationsPrecision precision) const154 bool Environment::IsSupported(CalculationsPrecision precision) const {
155 return IsGpuSupportsPrecision(device_.GetInfo(), precision);
156 }
157
GetSupportedStorages() const158 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
159 std::vector<TensorStorageType> storage_types;
160 for (auto storage_type :
161 {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
162 TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
163 TensorStorageType::TEXTURE_3D}) {
164 if (IsSupported(storage_type)) {
165 storage_types.push_back(storage_type);
166 }
167 }
168 return storage_types;
169 }
170
171 std::vector<TensorStorageType>
GetSupportedStoragesWithHWZeroClampSupport() const172 Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
173 std::vector<TensorStorageType> storage_types;
174 for (auto storage_type :
175 {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
176 TensorStorageType::TEXTURE_3D}) {
177 if (IsSupported(storage_type)) {
178 storage_types.push_back(storage_type);
179 }
180 }
181 return storage_types;
182 }
183
IsSupported(TensorStorageType storage_type) const184 bool Environment::IsSupported(TensorStorageType storage_type) const {
185 return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
186 }
187
GetFastestStorageType(const GpuInfo & gpu_info)188 TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
189 if (gpu_info.IsAdreno()) {
190 if (gpu_info.adreno_info.IsAdreno6xxOrHigher() &&
191 !gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
192 return TensorStorageType::TEXTURE_ARRAY;
193 } else {
194 return TensorStorageType::TEXTURE_2D;
195 }
196 } else if (gpu_info.IsPowerVR()) {
197 return TensorStorageType::TEXTURE_2D;
198 } else if (gpu_info.IsMali()) {
199 return TensorStorageType::TEXTURE_2D;
200 } else if (gpu_info.IsNvidia()) {
201 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
202 : TensorStorageType::BUFFER;
203 } else if (gpu_info.IsAMD()) {
204 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
205 : TensorStorageType::BUFFER;
206 } else if (gpu_info.IsIntel()) {
207 return TensorStorageType::BUFFER;
208 }
209 return TensorStorageType::BUFFER;
210 }
211
GetStorageTypeWithMinimalMemoryConsumption(const GpuInfo & gpu_info)212 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
213 const GpuInfo& gpu_info) {
214 if (gpu_info.IsAdreno()) {
215 if (gpu_info.adreno_info.IsAdreno3xx() ||
216 gpu_info.adreno_info.IsAdreno4xx()) {
217 return TensorStorageType::BUFFER;
218 } else {
219 if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
220 return TensorStorageType::TEXTURE_2D;
221 } else {
222 return TensorStorageType::IMAGE_BUFFER;
223 }
224 }
225 } else if (gpu_info.IsPowerVR()) {
226 if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
227 CanUseSubBufferForImage2d(gpu_info)) {
228 return TensorStorageType::TEXTURE_2D;
229 } else {
230 return TensorStorageType::BUFFER;
231 }
232 } else if (gpu_info.IsMali()) {
233 if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
234 CanUseSubBufferForImage2d(gpu_info)) {
235 return TensorStorageType::TEXTURE_2D;
236 } else {
237 return TensorStorageType::BUFFER;
238 }
239 } else if (gpu_info.IsNvidia()) {
240 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
241 : TensorStorageType::BUFFER;
242 } else if (gpu_info.IsAMD()) {
243 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
244 : TensorStorageType::BUFFER;
245 } else if (gpu_info.IsIntel()) {
246 return TensorStorageType::BUFFER;
247 }
248 return TensorStorageType::BUFFER;
249 }
250
CanUseSubBufferForImage2d(const GpuInfo & gpu_info)251 bool CanUseSubBufferForImage2d(const GpuInfo& gpu_info) {
252 if (!gpu_info.IsCL11OrHigher()) {
253 return false;
254 }
255 if (gpu_info.IsPowerVR()) {
256 // driver issue
257 return false;
258 }
259 if (gpu_info.IsNvidia()) {
260 return false;
261 }
262 if (gpu_info.IsMali() &&
263 (gpu_info.mali_info.IsBifrost() || gpu_info.mali_info.IsMidgard())) {
264 // Known driver issue on some G72 (Bifrost), G76 (Bifrost), T830 (Midgard),
265 // and T880 (Midgard) devices.
266 return false;
267 }
268 return true;
269 }
270
CreateEnvironment(Environment * result)271 absl::Status CreateEnvironment(Environment* result) {
272 CLDevice gpu;
273 RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
274
275 CLContext context;
276 RETURN_IF_ERROR(CreateCLContext(gpu, &context));
277 CLCommandQueue queue;
278 RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
279 ProfilingCommandQueue profiling_queue;
280 RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
281
282 *result = Environment(std::move(gpu), std::move(context), std::move(queue),
283 std::move(profiling_queue));
284 return result->Init();
285 }
286
287 } // namespace cl
288 } // namespace gpu
289 } // namespace tflite
290