1 use crate::api::icd::*;
2 use crate::api::util::*;
3 use crate::core::format::*;
4 use crate::core::platform::*;
5 use crate::core::util::*;
6 use crate::core::version::*;
7 use crate::impl_cl_type_trait_base;
8
9 use mesa_rust::compiler::clc::*;
10 use mesa_rust::compiler::nir::*;
11 use mesa_rust::pipe::context::*;
12 use mesa_rust::pipe::device::load_screens;
13 use mesa_rust::pipe::fence::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::*;
16 use mesa_rust_gen::*;
17 use mesa_rust_util::math::SetBitIndices;
18 use mesa_rust_util::static_assert;
19 use rusticl_opencl_gen::*;
20
21 use std::cmp::max;
22 use std::cmp::min;
23 use std::collections::HashMap;
24 use std::convert::TryInto;
25 use std::env;
26 use std::ffi::CString;
27 use std::mem::transmute;
28 use std::os::raw::*;
29 use std::sync::Arc;
30 use std::sync::Mutex;
31 use std::sync::MutexGuard;
32
33 pub struct Device {
34 pub base: CLObjectBase<CL_INVALID_DEVICE>,
35 pub screen: Arc<PipeScreen>,
36 pub cl_version: CLVersion,
37 pub clc_version: CLVersion,
38 pub clc_versions: Vec<cl_name_version>,
39 pub custom: bool,
40 pub embedded: bool,
41 pub extension_string: String,
42 pub extensions: Vec<cl_name_version>,
43 pub spirv_extensions: Vec<CString>,
44 pub clc_features: Vec<cl_name_version>,
45 pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
46 pub lib_clc: NirShader,
47 pub caps: DeviceCaps,
48 helper_ctx: Mutex<PipeContext>,
49 reusable_ctx: Mutex<Vec<PipeContext>>,
50 }
51
52 #[derive(Default)]
53 pub struct DeviceCaps {
54 pub has_3d_image_writes: bool,
55 pub has_images: bool,
56 pub has_rw_images: bool,
57 pub has_timestamp: bool,
58 pub image_2d_size: u32,
59 pub max_read_images: u32,
60 pub max_write_images: u32,
61 pub timer_resolution: u32,
62 }
63
64 impl DeviceCaps {
new(screen: &PipeScreen) -> Self65 fn new(screen: &PipeScreen) -> Self {
66 let cap_timestamp = screen.param(pipe_cap::PIPE_CAP_QUERY_TIMESTAMP) != 0;
67 let timer_resolution = screen.param(pipe_cap::PIPE_CAP_TIMER_RESOLUTION) as u32;
68
69 let max_write_images =
70 Self::shader_param(screen, pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) as u32;
71 let max_read_images =
72 Self::shader_param(screen, pipe_shader_cap::PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS) as u32;
73 let image_2d_size = screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_2D_SIZE) as u32;
74
75 let has_images = screen.param(pipe_cap::PIPE_CAP_TEXTURE_SAMPLER_INDEPENDENT) != 0 &&
76 screen.param(pipe_cap::PIPE_CAP_IMAGE_STORE_FORMATTED) != 0 &&
77 // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
78 max_read_images >= 8 &&
79 // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
80 max_write_images >= 8 &&
81 // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
82 image_2d_size >= 2048;
83
84 Self {
85 has_images: has_images,
86 has_timestamp: cap_timestamp && timer_resolution > 0,
87 image_2d_size: has_images.then_some(image_2d_size).unwrap_or_default(),
88 max_read_images: has_images.then_some(max_read_images).unwrap_or_default(),
89 max_write_images: has_images.then_some(max_write_images).unwrap_or_default(),
90 timer_resolution: timer_resolution,
91 ..Default::default()
92 }
93 }
94
shader_param(screen: &PipeScreen, cap: pipe_shader_cap) -> i3295 fn shader_param(screen: &PipeScreen, cap: pipe_shader_cap) -> i32 {
96 screen.shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
97 }
98 }
99
100 pub trait HelperContextWrapper {
101 #[must_use]
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext)102 fn exec<F>(&self, func: F) -> PipeFence
103 where
104 F: Fn(&HelperContext);
105
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void106 fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
delete_compute_state(&self, cso: *mut c_void)107 fn delete_compute_state(&self, cso: *mut c_void);
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info108 fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32109 fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32;
110
is_create_fence_fd_supported(&self) -> bool111 fn is_create_fence_fd_supported(&self) -> bool;
import_fence(&self, fence_fd: &FenceFd) -> PipeFence112 fn import_fence(&self, fence_fd: &FenceFd) -> PipeFence;
113 }
114
115 pub struct HelperContext<'a> {
116 lock: MutexGuard<'a, PipeContext>,
117 }
118
119 impl<'a> HelperContext<'a> {
resource_copy_region( &self, src: &PipeResource, dst: &PipeResource, dst_offset: &[u32; 3], bx: &pipe_box, )120 pub fn resource_copy_region(
121 &self,
122 src: &PipeResource,
123 dst: &PipeResource,
124 dst_offset: &[u32; 3],
125 bx: &pipe_box,
126 ) {
127 self.lock.resource_copy_region(src, dst, dst_offset, bx);
128 }
129
buffer_subdata( &self, res: &PipeResource, offset: c_uint, data: *const c_void, size: c_uint, )130 pub fn buffer_subdata(
131 &self,
132 res: &PipeResource,
133 offset: c_uint,
134 data: *const c_void,
135 size: c_uint,
136 ) {
137 self.lock.buffer_subdata(res, offset, data, size)
138 }
139
texture_subdata( &self, res: &PipeResource, bx: &pipe_box, data: *const c_void, stride: u32, layer_stride: usize, )140 pub fn texture_subdata(
141 &self,
142 res: &PipeResource,
143 bx: &pipe_box,
144 data: *const c_void,
145 stride: u32,
146 layer_stride: usize,
147 ) {
148 self.lock
149 .texture_subdata(res, bx, data, stride, layer_stride)
150 }
151 }
152
153 impl<'a> HelperContextWrapper for HelperContext<'a> {
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext),154 fn exec<F>(&self, func: F) -> PipeFence
155 where
156 F: Fn(&HelperContext),
157 {
158 func(self);
159 self.lock.flush()
160 }
161
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void162 fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
163 self.lock.create_compute_state(nir, static_local_mem)
164 }
165
delete_compute_state(&self, cso: *mut c_void)166 fn delete_compute_state(&self, cso: *mut c_void) {
167 self.lock.delete_compute_state(cso)
168 }
169
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info170 fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info {
171 self.lock.compute_state_info(state)
172 }
173
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32174 fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32 {
175 self.lock.compute_state_subgroup_size(state, block)
176 }
177
is_create_fence_fd_supported(&self) -> bool178 fn is_create_fence_fd_supported(&self) -> bool {
179 self.lock.is_create_fence_fd_supported()
180 }
181
import_fence(&self, fd: &FenceFd) -> PipeFence182 fn import_fence(&self, fd: &FenceFd) -> PipeFence {
183 self.lock.import_fence(fd)
184 }
185 }
186
187 impl_cl_type_trait_base!(cl_device_id, Device, [Device], CL_INVALID_DEVICE);
188
189 impl Device {
new(screen: PipeScreen) -> Option<Device>190 fn new(screen: PipeScreen) -> Option<Device> {
191 if !Self::check_valid(&screen) {
192 return None;
193 }
194
195 let screen = Arc::new(screen);
196 // Create before loading libclc as llvmpipe only creates the shader cache with the first
197 // context being created.
198 let helper_ctx = screen.create_context()?;
199 let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
200 if lib_clc.is_none() {
201 eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv");
202 }
203
204 let mut d = Self {
205 caps: DeviceCaps::new(&screen),
206 base: CLObjectBase::new(RusticlTypes::Device),
207 helper_ctx: Mutex::new(helper_ctx),
208 screen: screen,
209 cl_version: CLVersion::Cl3_0,
210 clc_version: CLVersion::Cl3_0,
211 clc_versions: Vec::new(),
212 custom: false,
213 embedded: false,
214 extension_string: String::from(""),
215 extensions: Vec::new(),
216 spirv_extensions: Vec::new(),
217 clc_features: Vec::new(),
218 formats: HashMap::new(),
219 lib_clc: lib_clc?,
220 reusable_ctx: Mutex::new(Vec::new()),
221 };
222
223 // check if we are embedded or full profile first
224 d.embedded = d.check_embedded_profile();
225
226 // check if we have to report it as a custom device
227 d.custom = d.check_custom();
228
229 d.fill_format_tables();
230
231 // query supported extensions
232 d.fill_extensions();
233
234 // now figure out what version we are
235 d.check_version();
236
237 Some(d)
238 }
239
240 /// Converts a temporary reference to a static if and only if this device lives inside static
241 /// memory.
to_static(&self) -> Option<&'static Self>242 pub fn to_static(&self) -> Option<&'static Self> {
243 devs().iter().find(|&dev| self == dev)
244 }
245
fill_format_tables(&mut self)246 fn fill_format_tables(&mut self) {
247 // no need to do this if we don't support images
248 if !self.caps.has_images {
249 return;
250 }
251
252 for f in FORMATS {
253 let mut fs = HashMap::new();
254 for t in CL_IMAGE_TYPES {
255 // the CTS doesn't test them, so let's not advertize them by accident if they are
256 // broken
257 if t == CL_MEM_OBJECT_IMAGE1D_BUFFER
258 && [CL_RGB, CL_RGBx].contains(&f.cl_image_format.image_channel_order)
259 && ![CL_UNORM_SHORT_565, CL_UNORM_SHORT_555]
260 .contains(&f.cl_image_format.image_channel_data_type)
261 {
262 continue;
263 }
264
265 let mut flags: cl_uint = 0;
266 if self.screen.is_format_supported(
267 f.pipe,
268 cl_mem_type_to_texture_target(t),
269 PIPE_BIND_SAMPLER_VIEW,
270 ) {
271 flags |= CL_MEM_READ_ONLY;
272 }
273
274 // TODO: cl_khr_srgb_image_writes
275 if !f.is_srgb
276 && self.screen.is_format_supported(
277 f.pipe,
278 cl_mem_type_to_texture_target(t),
279 PIPE_BIND_SHADER_IMAGE,
280 )
281 {
282 flags |= CL_MEM_WRITE_ONLY | CL_MEM_KERNEL_READ_AND_WRITE;
283 }
284
285 // TODO: cl_khr_srgb_image_writes
286 if !f.is_srgb
287 && self.screen.is_format_supported(
288 f.pipe,
289 cl_mem_type_to_texture_target(t),
290 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE,
291 )
292 {
293 flags |= CL_MEM_READ_WRITE;
294 }
295
296 fs.insert(t, flags as cl_mem_flags);
297 }
298
299 // Restrict supported formats with 1DBuffer images. This is an OpenCL CTS workaround.
300 // See https://github.com/KhronosGroup/OpenCL-CTS/issues/1889
301 let image1d_mask = fs[&CL_MEM_OBJECT_IMAGE1D];
302 if let Some(entry) = fs.get_mut(&CL_MEM_OBJECT_IMAGE1D_BUFFER) {
303 *entry &= image1d_mask;
304 }
305
306 self.formats.insert(f.cl_image_format, fs);
307 }
308
309 // now enable some caps based on advertized formats
310 self.caps.has_3d_image_writes = !FORMATS
311 .iter()
312 .filter(|f| {
313 if self.embedded {
314 f.req_for_embeded_read_or_write
315 } else {
316 f.req_for_full_read_or_write
317 }
318 })
319 .map(|f| self.formats[&f.cl_image_format][&CL_MEM_OBJECT_IMAGE3D])
320 .any(|f| f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
321
322 // if we can't advertize 3d image write ext, we have to disable them all
323 if !self.caps.has_3d_image_writes {
324 for f in &mut self.formats.values_mut() {
325 *f.get_mut(&CL_MEM_OBJECT_IMAGE3D).unwrap() &= !cl_mem_flags::from(
326 CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE,
327 );
328 }
329 }
330
331 // we require formatted loads
332 if self.screen.param(pipe_cap::PIPE_CAP_IMAGE_LOAD_FORMATTED) != 0 {
333 // "For embedded profiles devices that support reading from and writing to the same
334 // image object from the same kernel instance (see CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS)
335 // there is no required minimum list of supported image formats."
336 self.caps.has_rw_images = if self.embedded {
337 FORMATS
338 .iter()
339 .flat_map(|f| self.formats[&f.cl_image_format].values())
340 .any(|f| f & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) != 0)
341 } else {
342 !FORMATS
343 .iter()
344 .filter(|f| f.req_for_full_read_and_write)
345 .flat_map(|f| &self.formats[&f.cl_image_format])
346 // maybe? things being all optional is kinda a mess
347 .filter(|(target, _)| **target != CL_MEM_OBJECT_IMAGE3D)
348 .any(|(_, mask)| mask & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) == 0)
349 }
350 }
351
352 // if we can't advertize read_write images, disable them all
353 if !self.caps.has_rw_images {
354 self.formats
355 .values_mut()
356 .flat_map(|f| f.values_mut())
357 .for_each(|f| *f &= !cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE));
358 }
359 }
360
check_valid(screen: &PipeScreen) -> bool361 fn check_valid(screen: &PipeScreen) -> bool {
362 if screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0
363 || screen.shader_param(
364 pipe_shader_type::PIPE_SHADER_COMPUTE,
365 pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS,
366 ) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR as i32))
367 == 0
368 {
369 return false;
370 }
371
372 // CL_DEVICE_MAX_PARAMETER_SIZE
373 // For this minimum value, only a maximum of 128 arguments can be passed to a kernel
374 if (screen.shader_param(
375 pipe_shader_type::PIPE_SHADER_COMPUTE,
376 pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE,
377 ) as u32)
378 < 128
379 {
380 return false;
381 }
382 true
383 }
384
check_custom(&self) -> bool385 fn check_custom(&self) -> bool {
386 // Max size of memory object allocation in bytes. The minimum value is
387 // max(min(1024 × 1024 × 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE), 32 × 1024 × 1024)
388 // for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
389 let mut limit = min(1024 * 1024 * 1024, self.global_mem_size() / 4);
390 limit = max(limit, 32 * 1024 * 1024);
391 if self.max_mem_alloc() < limit {
392 return true;
393 }
394
395 // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
396 // The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
397 if self.max_grid_dimensions() < 3 {
398 return true;
399 }
400
401 if self.embedded {
402 // CL_DEVICE_MAX_PARAMETER_SIZE
403 // The minimum value is 256 bytes for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
404 if self.param_max_size() < 256 {
405 return true;
406 }
407
408 // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
409 // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
410 if self.const_max_size() < 1024 {
411 return true;
412 }
413
414 // TODO
415 // CL_DEVICE_MAX_CONSTANT_ARGS
416 // The minimum value is 4 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
417
418 // CL_DEVICE_LOCAL_MEM_SIZE
419 // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
420 if self.local_mem_size() < 1024 {
421 return true;
422 }
423 } else {
424 // CL 1.0 spec:
425 // CL_DEVICE_MAX_PARAMETER_SIZE
426 // The minimum value is 256 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
427 if self.param_max_size() < 256 {
428 return true;
429 }
430
431 // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
432 // The minimum value is 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
433 if self.const_max_size() < 64 * 1024 {
434 return true;
435 }
436
437 // TODO
438 // CL_DEVICE_MAX_CONSTANT_ARGS
439 // The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
440
441 // CL 1.0 spec:
442 // CL_DEVICE_LOCAL_MEM_SIZE
443 // The minimum value is 16 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
444 if self.local_mem_size() < 16 * 1024 {
445 return true;
446 }
447 }
448
449 false
450 }
451
check_embedded_profile(&self) -> bool452 fn check_embedded_profile(&self) -> bool {
453 if self.caps.has_images {
454 // The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
455 if self.max_samplers() < 16 ||
456 // The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
457 self.caps.max_read_images < 128 ||
458 // The minimum value is 64 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
459 self.caps.max_write_images < 64 ||
460 // The minimum value is 16384 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
461 self.caps.image_2d_size < 16384 ||
462 // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
463 self.image_array_size() < 2048 ||
464 // The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
465 self.image_buffer_max_size_pixels() < 65536
466 {
467 return true;
468 }
469
470 // TODO check req formats
471 }
472 !self.int64_supported()
473 }
474
parse_env_device_type() -> Option<cl_device_type>475 fn parse_env_device_type() -> Option<cl_device_type> {
476 let mut val = env::var("RUSTICL_DEVICE_TYPE").ok()?;
477 val.make_ascii_lowercase();
478 Some(
479 match &*val {
480 "accelerator" => CL_DEVICE_TYPE_ACCELERATOR,
481 "cpu" => CL_DEVICE_TYPE_CPU,
482 "custom" => CL_DEVICE_TYPE_CUSTOM,
483 "gpu" => CL_DEVICE_TYPE_GPU,
484 _ => return None,
485 }
486 .into(),
487 )
488 }
489
parse_env_version() -> Option<CLVersion>490 fn parse_env_version() -> Option<CLVersion> {
491 let val = env::var("RUSTICL_CL_VERSION").ok()?;
492 let (major, minor) = val.split_once('.')?;
493 let major = major.parse().ok()?;
494 let minor = minor.parse().ok()?;
495 mk_cl_version(major, minor, 0).try_into().ok()
496 }
497
498 // TODO add CLC checks
check_version(&mut self)499 fn check_version(&mut self) {
500 let exts: Vec<&str> = self.extension_string.split(' ').collect();
501 let mut res = CLVersion::Cl3_0;
502
503 if self.embedded {
504 if self.caps.has_images {
505 let supports_array_writes = !FORMATS
506 .iter()
507 .filter(|f| f.req_for_embeded_read_or_write)
508 .map(|f| self.formats.get(&f.cl_image_format).unwrap())
509 .map(|f| f.get(&CL_MEM_OBJECT_IMAGE2D_ARRAY).unwrap())
510 .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
511 if self.image_3d_size() < 2048 || !supports_array_writes {
512 res = CLVersion::Cl1_2;
513 }
514 }
515 }
516
517 // TODO: check image 1D, 1Dbuffer, 1Darray and 2Darray support explicitly
518 if self.caps.has_images {
519 // The minimum value is 256 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
520 if self.image_array_size() < 256 ||
521 // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
522 self.image_buffer_max_size_pixels() < 2048
523 {
524 res = CLVersion::Cl1_1;
525 }
526 }
527
528 if self.embedded {
529 // The minimum value for the EMBEDDED profile is 1 KB.
530 if self.printf_buffer_size() < 1024 {
531 res = CLVersion::Cl1_1;
532 }
533 } else {
534 // The minimum value for the FULL profile is 1 MB.
535 if self.printf_buffer_size() < 1024 * 1024 {
536 res = CLVersion::Cl1_1;
537 }
538 }
539
540 if !exts.contains(&"cl_khr_byte_addressable_store")
541 || !exts.contains(&"cl_khr_global_int32_base_atomics")
542 || !exts.contains(&"cl_khr_global_int32_extended_atomics")
543 || !exts.contains(&"cl_khr_local_int32_base_atomics")
544 || !exts.contains(&"cl_khr_local_int32_extended_atomics")
545 // The following modifications are made to the OpenCL 1.1 platform layer and runtime (sections 4 and 5):
546 // The minimum FULL_PROFILE value for CL_DEVICE_MAX_PARAMETER_SIZE increased from 256 to 1024 bytes
547 || self.param_max_size() < 1024
548 // The minimum FULL_PROFILE value for CL_DEVICE_LOCAL_MEM_SIZE increased from 16 KB to 32 KB.
549 || self.local_mem_size() < 32 * 1024
550 {
551 res = CLVersion::Cl1_0;
552 }
553
554 if let Some(val) = Self::parse_env_version() {
555 res = val;
556 }
557
558 if res >= CLVersion::Cl3_0 {
559 self.clc_versions
560 .push(mk_cl_version_ext(3, 0, 0, "OpenCL C"));
561 }
562
563 if res >= CLVersion::Cl1_2 {
564 self.clc_versions
565 .push(mk_cl_version_ext(1, 2, 0, "OpenCL C"));
566 }
567
568 if res >= CLVersion::Cl1_1 {
569 self.clc_versions
570 .push(mk_cl_version_ext(1, 1, 0, "OpenCL C"));
571 }
572
573 if res >= CLVersion::Cl1_0 {
574 self.clc_versions
575 .push(mk_cl_version_ext(1, 0, 0, "OpenCL C"));
576 }
577
578 self.cl_version = res;
579 self.clc_version = min(CLVersion::Cl1_2, res);
580 }
581
fill_extensions(&mut self)582 fn fill_extensions(&mut self) {
583 let mut exts_str: Vec<String> = Vec::new();
584 let mut exts = PLATFORM_EXTENSIONS.to_vec();
585 let mut feats = Vec::new();
586 let mut spirv_exts = Vec::new();
587 let mut add_ext = |major, minor, patch, ext: &str| {
588 exts.push(mk_cl_version_ext(major, minor, patch, ext));
589 exts_str.push(ext.to_owned());
590 };
591 let mut add_feat = |major, minor, patch, feat: &str| {
592 feats.push(mk_cl_version_ext(major, minor, patch, feat));
593 };
594 let mut add_spirv = |ext: &str| {
595 spirv_exts.push(CString::new(ext).unwrap());
596 };
597
598 // add extensions all drivers support for now
599 add_ext(1, 0, 0, "cl_khr_global_int32_base_atomics");
600 add_ext(1, 0, 0, "cl_khr_global_int32_extended_atomics");
601 add_ext(2, 0, 0, "cl_khr_integer_dot_product");
602 add_feat(
603 2,
604 0,
605 0,
606 "__opencl_c_integer_dot_product_input_4x8bit_packed",
607 );
608 add_feat(2, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit");
609 add_ext(1, 0, 0, "cl_khr_local_int32_base_atomics");
610 add_ext(1, 0, 0, "cl_khr_local_int32_extended_atomics");
611
612 add_spirv("SPV_KHR_expect_assume");
613 add_spirv("SPV_KHR_float_controls");
614 add_spirv("SPV_KHR_integer_dot_product");
615 add_spirv("SPV_KHR_no_integer_wrap_decoration");
616
617 if self.fp16_supported() {
618 add_ext(1, 0, 0, "cl_khr_fp16");
619 }
620
621 if self.fp64_supported() {
622 add_ext(1, 0, 0, "cl_khr_fp64");
623 add_feat(1, 0, 0, "__opencl_c_fp64");
624 }
625
626 if self.is_gl_sharing_supported() {
627 add_ext(1, 0, 0, "cl_khr_gl_sharing");
628 }
629
630 if self.int64_supported() {
631 if self.embedded {
632 add_ext(1, 0, 0, "cles_khr_int64");
633 };
634
635 add_feat(1, 0, 0, "__opencl_c_int64");
636 }
637
638 if self.caps.has_images {
639 add_feat(1, 0, 0, "__opencl_c_images");
640
641 if self.image2d_from_buffer_supported() {
642 add_ext(1, 0, 0, "cl_khr_image2d_from_buffer");
643 }
644
645 if self.caps.has_rw_images {
646 add_feat(1, 0, 0, "__opencl_c_read_write_images");
647 }
648
649 if self.caps.has_3d_image_writes {
650 add_ext(1, 0, 0, "cl_khr_3d_image_writes");
651 add_feat(1, 0, 0, "__opencl_c_3d_image_writes");
652 }
653 }
654
655 if self.pci_info().is_some() {
656 add_ext(1, 0, 0, "cl_khr_pci_bus_info");
657 }
658
659 if self.screen().device_uuid().is_some() && self.screen().driver_uuid().is_some() {
660 static_assert!(PIPE_UUID_SIZE == CL_UUID_SIZE_KHR);
661 static_assert!(PIPE_LUID_SIZE == CL_LUID_SIZE_KHR);
662
663 add_ext(1, 0, 0, "cl_khr_device_uuid");
664 }
665
666 if self.subgroups_supported() {
667 // requires CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
668 //add_ext(1, 0, 0, "cl_khr_subgroups");
669 add_feat(1, 0, 0, "__opencl_c_subgroups");
670
671 // we have lowering in `nir_lower_subgroups`, drivers can just use that
672 add_ext(1, 0, 0, "cl_khr_subgroup_shuffle");
673 add_ext(1, 0, 0, "cl_khr_subgroup_shuffle_relative");
674 }
675
676 if self.svm_supported() {
677 add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
678 }
679
680 self.extensions = exts;
681 self.clc_features = feats;
682 self.extension_string = format!("{} {}", PLATFORM_EXTENSION_STR, exts_str.join(" "));
683 self.spirv_extensions = spirv_exts;
684 }
685
shader_param(&self, cap: pipe_shader_cap) -> i32686 fn shader_param(&self, cap: pipe_shader_cap) -> i32 {
687 self.screen
688 .shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
689 }
690
all() -> impl Iterator<Item = Device>691 pub fn all() -> impl Iterator<Item = Device> {
692 load_screens().filter_map(Device::new)
693 }
694
address_bits(&self) -> cl_uint695 pub fn address_bits(&self) -> cl_uint {
696 self.screen
697 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_ADDRESS_BITS)
698 }
699
const_max_size(&self) -> cl_ulong700 pub fn const_max_size(&self) -> cl_ulong {
701 min(
702 // Needed to fix the `api min_max_constant_buffer_size` CL CTS test as it can't really
703 // handle arbitrary values here. We might want to reconsider later and figure out how to
704 // advertize higher values without tripping of the test.
705 // should be at least 1 << 16 (native UBO size on NVidia)
706 // advertising more just in case it benefits other hardware
707 1 << 26,
708 min(
709 self.max_mem_alloc(),
710 self.screen
711 .param(pipe_cap::PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT) as u64,
712 ),
713 )
714 }
715
const_max_count(&self) -> cl_uint716 pub fn const_max_count(&self) -> cl_uint {
717 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFERS) as cl_uint
718 }
719
device_type(&self, internal: bool) -> cl_device_type720 pub fn device_type(&self, internal: bool) -> cl_device_type {
721 if let Some(env) = Self::parse_env_device_type() {
722 return env;
723 }
724
725 if self.custom {
726 return CL_DEVICE_TYPE_CUSTOM as cl_device_type;
727 }
728 let mut res = match self.screen.device_type() {
729 pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE => CL_DEVICE_TYPE_CPU,
730 pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI => CL_DEVICE_TYPE_GPU,
731 pipe_loader_device_type::PIPE_LOADER_DEVICE_PLATFORM => CL_DEVICE_TYPE_GPU,
732 pipe_loader_device_type::NUM_PIPE_LOADER_DEVICE_TYPES => CL_DEVICE_TYPE_CUSTOM,
733 };
734
735 if internal && res == CL_DEVICE_TYPE_GPU && self.screen.driver_name() != "zink" {
736 res |= CL_DEVICE_TYPE_DEFAULT;
737 }
738
739 res as cl_device_type
740 }
741
fp16_supported(&self) -> bool742 pub fn fp16_supported(&self) -> bool {
743 if !Platform::features().fp16 {
744 return false;
745 }
746
747 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_FP16) != 0
748 }
749
fp64_supported(&self) -> bool750 pub fn fp64_supported(&self) -> bool {
751 if !Platform::features().fp64 {
752 return false;
753 }
754
755 self.screen.param(pipe_cap::PIPE_CAP_DOUBLES) == 1
756 }
757
is_gl_sharing_supported(&self) -> bool758 pub fn is_gl_sharing_supported(&self) -> bool {
759 self.screen.param(pipe_cap::PIPE_CAP_CL_GL_SHARING) != 0
760 && self.screen.param(pipe_cap::PIPE_CAP_DMABUF) != 0
761 && !self.is_device_software()
762 && self.screen.is_res_handle_supported()
763 && self.screen.device_uuid().is_some()
764 && self.helper_ctx().is_create_fence_fd_supported()
765 }
766
is_device_software(&self) -> bool767 pub fn is_device_software(&self) -> bool {
768 self.screen.device_type() == pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE
769 }
770
get_nir_options(&self) -> nir_shader_compiler_options771 pub fn get_nir_options(&self) -> nir_shader_compiler_options {
772 unsafe {
773 *self
774 .screen
775 .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
776 }
777 }
778
sdot_4x8_supported(&self) -> bool779 pub fn sdot_4x8_supported(&self) -> bool {
780 self.get_nir_options().has_sdot_4x8
781 }
782
udot_4x8_supported(&self) -> bool783 pub fn udot_4x8_supported(&self) -> bool {
784 self.get_nir_options().has_udot_4x8
785 }
786
sudot_4x8_supported(&self) -> bool787 pub fn sudot_4x8_supported(&self) -> bool {
788 self.get_nir_options().has_sudot_4x8
789 }
790
pack_32_4x8_supported(&self) -> bool791 pub fn pack_32_4x8_supported(&self) -> bool {
792 self.get_nir_options().has_pack_32_4x8
793 }
794
sdot_4x8_sat_supported(&self) -> bool795 pub fn sdot_4x8_sat_supported(&self) -> bool {
796 self.get_nir_options().has_sdot_4x8_sat
797 }
798
udot_4x8_sat_supported(&self) -> bool799 pub fn udot_4x8_sat_supported(&self) -> bool {
800 self.get_nir_options().has_udot_4x8_sat
801 }
802
sudot_4x8_sat_supported(&self) -> bool803 pub fn sudot_4x8_sat_supported(&self) -> bool {
804 self.get_nir_options().has_sudot_4x8_sat
805 }
806
fp64_is_softfp(&self) -> bool807 pub fn fp64_is_softfp(&self) -> bool {
808 bit_check(
809 self.get_nir_options().lower_doubles_options as u32,
810 nir_lower_doubles_options::nir_lower_fp64_full_software as u32,
811 )
812 }
813
int64_supported(&self) -> bool814 pub fn int64_supported(&self) -> bool {
815 self.screen.param(pipe_cap::PIPE_CAP_INT64) == 1
816 }
817
global_mem_size(&self) -> cl_ulong818 pub fn global_mem_size(&self) -> cl_ulong {
819 if let Some(memory_info) = self.screen().query_memory_info() {
820 let memory: cl_ulong = if memory_info.total_device_memory != 0 {
821 memory_info.total_device_memory.into()
822 } else {
823 memory_info.total_staging_memory.into()
824 };
825 memory * 1024
826 } else {
827 self.screen
828 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)
829 }
830 }
831
image_3d_size(&self) -> usize832 pub fn image_3d_size(&self) -> usize {
833 if self.caps.has_images {
834 1 << (self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_3D_LEVELS) - 1)
835 } else {
836 0
837 }
838 }
839
image_3d_supported(&self) -> bool840 pub fn image_3d_supported(&self) -> bool {
841 self.caps.has_images && self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 0
842 }
843
image_array_size(&self) -> usize844 pub fn image_array_size(&self) -> usize {
845 if self.caps.has_images {
846 self.screen
847 .param(pipe_cap::PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS) as usize
848 } else {
849 0
850 }
851 }
852
image_pitch_alignment(&self) -> cl_uint853 pub fn image_pitch_alignment(&self) -> cl_uint {
854 if self.caps.has_images {
855 self.screen
856 .param(pipe_cap::PIPE_CAP_LINEAR_IMAGE_PITCH_ALIGNMENT) as u32
857 } else {
858 0
859 }
860 }
861
image_base_address_alignment(&self) -> cl_uint862 pub fn image_base_address_alignment(&self) -> cl_uint {
863 if self.caps.has_images {
864 self.screen
865 .param(pipe_cap::PIPE_CAP_LINEAR_IMAGE_BASE_ADDRESS_ALIGNMENT) as u32
866 } else {
867 0
868 }
869 }
870
image_buffer_max_size_pixels(&self) -> usize871 pub fn image_buffer_max_size_pixels(&self) -> usize {
872 if self.caps.has_images {
873 min(
874 // The CTS requires it to not exceed `CL_MAX_MEM_ALLOC_SIZE`, also we need to divide
875 // by the max pixel size, because this cap is in pixels, not bytes.
876 //
877 // The CTS also casts this to int in a couple of places,
878 // see: https://github.com/KhronosGroup/OpenCL-CTS/issues/2056
879 min(
880 self.max_mem_alloc() / MAX_PIXEL_SIZE_BYTES,
881 c_int::MAX as cl_ulong,
882 ),
883 self.screen
884 .param(pipe_cap::PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT)
885 as cl_ulong,
886 ) as usize
887 } else {
888 0
889 }
890 }
891
image2d_from_buffer_supported(&self) -> bool892 pub fn image2d_from_buffer_supported(&self) -> bool {
893 self.image_pitch_alignment() != 0 && self.image_base_address_alignment() != 0
894 }
895
little_endian(&self) -> bool896 pub fn little_endian(&self) -> bool {
897 let endianness = self.screen.param(pipe_cap::PIPE_CAP_ENDIANNESS);
898 endianness == (pipe_endian::PIPE_ENDIAN_LITTLE as i32)
899 }
900
local_mem_size(&self) -> cl_ulong901 pub fn local_mem_size(&self) -> cl_ulong {
902 self.screen
903 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)
904 }
905
max_block_sizes(&self) -> Vec<usize>906 pub fn max_block_sizes(&self) -> Vec<usize> {
907 let v: Vec<u64> = self
908 .screen
909 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
910 v.into_iter().map(|v| v as usize).collect()
911 }
912
max_grid_size(&self) -> Vec<u64>913 pub fn max_grid_size(&self) -> Vec<u64> {
914 let v: Vec<u64> = self
915 .screen
916 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
917
918 v.into_iter()
919 .map(|a| min(a, Platform::dbg().max_grid_size))
920 .collect()
921 }
922
max_clock_freq(&self) -> cl_uint923 pub fn max_clock_freq(&self) -> cl_uint {
924 self.screen
925 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)
926 }
927
max_compute_units(&self) -> cl_uint928 pub fn max_compute_units(&self) -> cl_uint {
929 self.screen
930 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)
931 }
932
max_grid_dimensions(&self) -> cl_uint933 pub fn max_grid_dimensions(&self) -> cl_uint {
934 ComputeParam::<u64>::compute_param(
935 self.screen.as_ref(),
936 pipe_compute_cap::PIPE_COMPUTE_CAP_GRID_DIMENSION,
937 ) as cl_uint
938 }
939
max_mem_alloc(&self) -> cl_ulong940 pub fn max_mem_alloc(&self) -> cl_ulong {
941 // TODO: at the moment gallium doesn't support bigger buffers
942 min(
943 self.screen
944 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE),
945 0x80000000,
946 )
947 }
948
max_samplers(&self) -> cl_uint949 pub fn max_samplers(&self) -> cl_uint {
950 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS) as cl_uint
951 }
952
max_threads_per_block(&self) -> usize953 pub fn max_threads_per_block(&self) -> usize {
954 ComputeParam::<u64>::compute_param(
955 self.screen.as_ref(),
956 pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
957 ) as usize
958 }
959
param_max_size(&self) -> usize960 pub fn param_max_size(&self) -> usize {
961 min(
962 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE) as u32,
963 4 * 1024,
964 ) as usize
965 }
966
printf_buffer_size(&self) -> usize967 pub fn printf_buffer_size(&self) -> usize {
968 1024 * 1024
969 }
970
pci_info(&self) -> Option<cl_device_pci_bus_info_khr>971 pub fn pci_info(&self) -> Option<cl_device_pci_bus_info_khr> {
972 if self.screen.device_type() != pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI {
973 return None;
974 }
975
976 let pci_domain = self.screen.param(pipe_cap::PIPE_CAP_PCI_GROUP) as cl_uint;
977 let pci_bus = self.screen.param(pipe_cap::PIPE_CAP_PCI_BUS) as cl_uint;
978 let pci_device = self.screen.param(pipe_cap::PIPE_CAP_PCI_DEVICE) as cl_uint;
979 let pci_function = self.screen.param(pipe_cap::PIPE_CAP_PCI_FUNCTION) as cl_uint;
980
981 Some(cl_device_pci_bus_info_khr {
982 pci_domain,
983 pci_bus,
984 pci_device,
985 pci_function,
986 })
987 }
988
reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>>989 fn reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>> {
990 self.reusable_ctx.lock().unwrap()
991 }
992
screen(&self) -> &Arc<PipeScreen>993 pub fn screen(&self) -> &Arc<PipeScreen> {
994 &self.screen
995 }
996
create_context(&self) -> Option<PipeContext>997 pub fn create_context(&self) -> Option<PipeContext> {
998 self.reusable_ctx()
999 .pop()
1000 .or_else(|| self.screen.create_context())
1001 }
1002
recycle_context(&self, ctx: PipeContext)1003 pub fn recycle_context(&self, ctx: PipeContext) {
1004 if Platform::dbg().reuse_context {
1005 self.reusable_ctx().push(ctx);
1006 }
1007 }
1008
subgroup_sizes(&self) -> Vec<usize>1009 pub fn subgroup_sizes(&self) -> Vec<usize> {
1010 let subgroup_size = ComputeParam::<u32>::compute_param(
1011 self.screen.as_ref(),
1012 pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
1013 );
1014
1015 SetBitIndices::from_msb(subgroup_size)
1016 .map(|bit| 1 << bit)
1017 .collect()
1018 }
1019
max_subgroups(&self) -> u321020 pub fn max_subgroups(&self) -> u32 {
1021 ComputeParam::<u32>::compute_param(
1022 self.screen.as_ref(),
1023 pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
1024 )
1025 }
1026
subgroups_supported(&self) -> bool1027 pub fn subgroups_supported(&self) -> bool {
1028 let subgroup_sizes = self.subgroup_sizes().len();
1029
1030 // we need to be able to query a CSO for subgroup sizes if multiple sub group sizes are
1031 // supported, doing it without shareable shaders isn't practical
1032 self.max_subgroups() > 0
1033 && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
1034 }
1035
svm_supported(&self) -> bool1036 pub fn svm_supported(&self) -> bool {
1037 self.screen.param(pipe_cap::PIPE_CAP_SYSTEM_SVM) == 1
1038 }
1039
unified_memory(&self) -> bool1040 pub fn unified_memory(&self) -> bool {
1041 self.screen.param(pipe_cap::PIPE_CAP_UMA) == 1
1042 }
1043
vendor_id(&self) -> cl_uint1044 pub fn vendor_id(&self) -> cl_uint {
1045 let id = self.screen.param(pipe_cap::PIPE_CAP_VENDOR_ID);
1046 if id == -1 {
1047 return 0;
1048 }
1049 id as u32
1050 }
1051
prefers_real_buffer_in_cb0(&self) -> bool1052 pub fn prefers_real_buffer_in_cb0(&self) -> bool {
1053 self.screen
1054 .param(pipe_cap::PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0)
1055 == 1
1056 }
1057
shareable_shaders(&self) -> bool1058 pub fn shareable_shaders(&self) -> bool {
1059 self.screen.param(pipe_cap::PIPE_CAP_SHAREABLE_SHADERS) == 1
1060 }
1061
images_as_deref(&self) -> bool1062 pub fn images_as_deref(&self) -> bool {
1063 self.screen.param(pipe_cap::PIPE_CAP_NIR_IMAGES_AS_DEREF) == 1
1064 }
1065
samplers_as_deref(&self) -> bool1066 pub fn samplers_as_deref(&self) -> bool {
1067 self.screen.param(pipe_cap::PIPE_CAP_NIR_SAMPLERS_AS_DEREF) == 1
1068 }
1069
helper_ctx(&self) -> impl HelperContextWrapper + '_1070 pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
1071 HelperContext {
1072 lock: self.helper_ctx.lock().unwrap(),
1073 }
1074 }
1075
cl_features(&self) -> clc_optional_features1076 pub fn cl_features(&self) -> clc_optional_features {
1077 let subgroups_supported = self.subgroups_supported();
1078 clc_optional_features {
1079 fp16: self.fp16_supported(),
1080 fp64: self.fp64_supported(),
1081 int64: self.int64_supported(),
1082 images: self.caps.has_images,
1083 images_read_write: self.caps.has_rw_images,
1084 images_write_3d: self.caps.has_3d_image_writes,
1085 integer_dot_product: true,
1086 subgroups: subgroups_supported,
1087 subgroups_shuffle: subgroups_supported,
1088 subgroups_shuffle_relative: subgroups_supported,
1089 ..Default::default()
1090 }
1091 }
1092 }
1093
devs() -> &'static Vec<Device>1094 pub fn devs() -> &'static Vec<Device> {
1095 &Platform::get().devs
1096 }
1097
get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device>1098 pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
1099 devs()
1100 .iter()
1101 .filter(|d| device_type & d.device_type(true) != 0)
1102 .collect()
1103 }
1104
get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device>1105 pub fn get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device> {
1106 devs().iter().find(|d| {
1107 let uuid: [c_uchar; UUID_SIZE] = unsafe { transmute(uuid) };
1108 uuid == d.screen().device_uuid().unwrap()
1109 })
1110 }
1111