1 use crate::api::icd::*;
2 use crate::api::types::*;
3 use crate::api::util::*;
4 use crate::core::context::*;
5 use crate::core::device::*;
6 use crate::core::format::*;
7 use crate::core::gl::*;
8 use crate::core::queue::*;
9 use crate::core::util::*;
10 use crate::impl_cl_type_trait;
11 use crate::impl_cl_type_trait_base;
12 use crate::perf_warning;
13
14 use mesa_rust::pipe::context::*;
15 use mesa_rust::pipe::resource::*;
16 use mesa_rust::pipe::screen::ResourceType;
17 use mesa_rust::pipe::transfer::*;
18 use mesa_rust_gen::*;
19 use mesa_rust_util::properties::Properties;
20 use mesa_rust_util::ptr::AllocSize;
21 use mesa_rust_util::ptr::TrackedPointers;
22 use rusticl_opencl_gen::*;
23
24 use std::alloc;
25 use std::alloc::Layout;
26 use std::cmp;
27 use std::collections::btree_map::Entry;
28 use std::collections::HashMap;
29 use std::convert::TryInto;
30 use std::mem;
31 use std::mem::size_of;
32 use std::ops::Deref;
33 use std::os::raw::c_void;
34 use std::ptr;
35 use std::sync::Arc;
36 use std::sync::Mutex;
37
38 struct Mapping<T> {
39 layout: Layout,
40 writes: bool,
41 ptr: Option<MutMemoryPtr>,
42 /// reference count from the API perspective. Once it reaches 0, we need to write back the
43 /// mappings content to the GPU resource.
44 count: u32,
45 inner: T,
46 }
47
48 impl<T> Drop for Mapping<T> {
drop(&mut self)49 fn drop(&mut self) {
50 if let Some(ptr) = &self.ptr {
51 unsafe {
52 alloc::dealloc(ptr.as_ptr().cast(), self.layout);
53 }
54 }
55 }
56 }
57
58 impl<T> AllocSize<usize> for Mapping<T> {
size(&self) -> usize59 fn size(&self) -> usize {
60 self.layout.size()
61 }
62 }
63
64 impl<T> Deref for Mapping<T> {
65 type Target = T;
66
deref(&self) -> &Self::Target67 fn deref(&self) -> &Self::Target {
68 &self.inner
69 }
70 }
71
72 struct BufferMapping {
73 offset: usize,
74 }
75
76 struct ImageMapping {
77 origin: CLVec<usize>,
78 region: CLVec<usize>,
79 }
80
81 #[repr(transparent)]
82 #[derive(Clone, Copy)]
83 pub struct ConstMemoryPtr {
84 ptr: *const c_void,
85 }
86 unsafe impl Send for ConstMemoryPtr {}
87 unsafe impl Sync for ConstMemoryPtr {}
88
89 impl ConstMemoryPtr {
as_ptr(&self) -> *const c_void90 pub fn as_ptr(&self) -> *const c_void {
91 self.ptr
92 }
93
94 /// # Safety
95 ///
96 /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
97 /// [Send] and [Sync]
from_ptr(ptr: *const c_void) -> Self98 pub unsafe fn from_ptr(ptr: *const c_void) -> Self {
99 Self { ptr: ptr }
100 }
101 }
102
103 impl From<MutMemoryPtr> for ConstMemoryPtr {
from(value: MutMemoryPtr) -> Self104 fn from(value: MutMemoryPtr) -> Self {
105 Self {
106 ptr: value.ptr.cast(),
107 }
108 }
109 }
110
111 #[repr(transparent)]
112 #[derive(Clone, Copy)]
113 pub struct MutMemoryPtr {
114 ptr: *mut c_void,
115 }
116 unsafe impl Send for MutMemoryPtr {}
117 unsafe impl Sync for MutMemoryPtr {}
118
119 impl MutMemoryPtr {
as_ptr(&self) -> *mut c_void120 pub fn as_ptr(&self) -> *mut c_void {
121 self.ptr
122 }
123
124 /// # Safety
125 ///
126 /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
127 /// [Send] and [Sync]
from_ptr(ptr: *mut c_void) -> Self128 pub unsafe fn from_ptr(ptr: *mut c_void) -> Self {
129 Self { ptr: ptr }
130 }
131 }
132
133 pub enum Mem {
134 Buffer(Arc<Buffer>),
135 Image(Arc<Image>),
136 }
137
138 impl Deref for Mem {
139 type Target = MemBase;
140
deref(&self) -> &Self::Target141 fn deref(&self) -> &Self::Target {
142 match self {
143 Self::Buffer(b) => &b.base,
144 Self::Image(i) => &i.base,
145 }
146 }
147 }
148
149 impl Mem {
is_mapped_ptr(&self, ptr: *mut c_void) -> bool150 pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
151 match self {
152 Self::Buffer(b) => b.is_mapped_ptr(ptr),
153 Self::Image(i) => i.is_mapped_ptr(ptr),
154 }
155 }
156
sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>157 pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
158 match self {
159 Self::Buffer(b) => b.sync_unmap(q, ctx, ptr),
160 Self::Image(i) => i.sync_unmap(q, ctx, ptr),
161 }
162 }
163
unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool>164 pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
165 match self {
166 Self::Buffer(b) => b.unmap(ptr),
167 Self::Image(i) => i.unmap(ptr),
168 }
169 }
170 }
171
172 /// # Mapping memory
173 ///
174 /// Maps the queue associated device's resource.
175 ///
176 /// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so called
177 /// non blocking maps. Non blocking maps shall return a valid pointer to the mapped region
178 /// immediately, but should not synchronize data (in case of shadow buffers) until after the map
179 /// event is reached in the queue. This makes it not possible to simply use pipe_transfers as those
180 /// can't be explicitly synced by the frontend.
181 ///
182 /// In order to have a compliant implementation of the mapping API we have to consider the following
183 /// cases:
184 /// 1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr.
185 /// Synchronization of shadowed host ptrs are done in `sync_shadow` on demand.
186 /// 2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with
187 /// `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer.
188 /// 3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we
189 /// - create a shadow pipe_resource with `PIPE_USAGE_STAGING`,
190 /// `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT`
191 /// - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and
192 /// `PIPE_MAP_UNSYNCHRONIZED`
193 /// - sync the shadow buffer like a host_ptr shadow buffer in 1.
194 ///
195 /// Taking this approach we guarentee that we only copy when actually needed while making sure the
196 /// content behind the returned pointer is valid until unmapped.
197 pub struct MemBase {
198 pub base: CLObjectBase<CL_INVALID_MEM_OBJECT>,
199 pub context: Arc<Context>,
200 pub parent: Option<Mem>,
201 pub mem_type: cl_mem_object_type,
202 pub flags: cl_mem_flags,
203 pub size: usize,
204 // it's a bit hacky, but storing the pointer as `usize` gives us `Send` and `Sync`. The
205 // application is required to ensure no data races exist on the memory anyway.
206 pub host_ptr: usize,
207 pub props: Vec<cl_mem_properties>,
208 pub cbs: Mutex<Vec<MemCB>>,
209 pub gl_obj: Option<GLObject>,
210 res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
211 }
212
213 pub struct Buffer {
214 base: MemBase,
215 pub offset: usize,
216 maps: Mutex<TrackedPointers<usize, Mapping<BufferMapping>>>,
217 }
218
219 pub struct Image {
220 base: MemBase,
221 pub image_format: cl_image_format,
222 pub pipe_format: pipe_format,
223 pub image_desc: cl_image_desc,
224 pub image_elem_size: u8,
225 maps: Mutex<TrackedPointers<usize, Mapping<ImageMapping>>>,
226 }
227
228 impl Deref for Buffer {
229 type Target = MemBase;
230
deref(&self) -> &Self::Target231 fn deref(&self) -> &Self::Target {
232 &self.base
233 }
234 }
235
236 impl Deref for Image {
237 type Target = MemBase;
238
deref(&self) -> &Self::Target239 fn deref(&self) -> &Self::Target {
240 &self.base
241 }
242 }
243
244 impl_cl_type_trait_base!(cl_mem, MemBase, [Buffer, Image], CL_INVALID_MEM_OBJECT);
245 impl_cl_type_trait!(cl_mem, Buffer, CL_INVALID_MEM_OBJECT, base.base);
246 impl_cl_type_trait!(cl_mem, Image, CL_INVALID_MEM_OBJECT, base.base);
247
248 pub trait CLImageDescInfo {
type_info(&self) -> (u8, bool)249 fn type_info(&self) -> (u8, bool);
pixels(&self) -> usize250 fn pixels(&self) -> usize;
bx(&self) -> CLResult<pipe_box>251 fn bx(&self) -> CLResult<pipe_box>;
row_pitch(&self) -> CLResult<u32>252 fn row_pitch(&self) -> CLResult<u32>;
slice_pitch(&self) -> usize253 fn slice_pitch(&self) -> usize;
width(&self) -> CLResult<u32>254 fn width(&self) -> CLResult<u32>;
height(&self) -> CLResult<u32>255 fn height(&self) -> CLResult<u32>;
size(&self) -> CLVec<usize>256 fn size(&self) -> CLVec<usize>;
257
dims(&self) -> u8258 fn dims(&self) -> u8 {
259 self.type_info().0
260 }
261
dims_with_array(&self) -> u8262 fn dims_with_array(&self) -> u8 {
263 let array: u8 = self.is_array().into();
264 self.dims() + array
265 }
266
has_slice(&self) -> bool267 fn has_slice(&self) -> bool {
268 self.dims() == 3 || self.is_array()
269 }
270
is_array(&self) -> bool271 fn is_array(&self) -> bool {
272 self.type_info().1
273 }
274 }
275
276 impl CLImageDescInfo for cl_image_desc {
type_info(&self) -> (u8, bool)277 fn type_info(&self) -> (u8, bool) {
278 match self.image_type {
279 CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
280 CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
281 CL_MEM_OBJECT_IMAGE2D => (2, false),
282 CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
283 CL_MEM_OBJECT_IMAGE3D => (3, false),
284 _ => panic!("unknown image_type {:x}", self.image_type),
285 }
286 }
287
pixels(&self) -> usize288 fn pixels(&self) -> usize {
289 let mut res = self.image_width;
290 let dims = self.dims();
291
292 if dims > 1 {
293 res *= self.image_height;
294 }
295
296 if dims > 2 {
297 res *= self.image_depth;
298 }
299
300 if self.is_array() {
301 res *= self.image_array_size;
302 }
303
304 res
305 }
306
size(&self) -> CLVec<usize>307 fn size(&self) -> CLVec<usize> {
308 let mut height = cmp::max(self.image_height, 1);
309 let mut depth = cmp::max(self.image_depth, 1);
310
311 match self.image_type {
312 CL_MEM_OBJECT_IMAGE1D_ARRAY => height = self.image_array_size,
313 CL_MEM_OBJECT_IMAGE2D_ARRAY => depth = self.image_array_size,
314 _ => {}
315 }
316
317 CLVec::new([self.image_width, height, depth])
318 }
319
bx(&self) -> CLResult<pipe_box>320 fn bx(&self) -> CLResult<pipe_box> {
321 create_pipe_box(CLVec::default(), self.size(), self.image_type)
322 }
323
row_pitch(&self) -> CLResult<u32>324 fn row_pitch(&self) -> CLResult<u32> {
325 self.image_row_pitch
326 .try_into()
327 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
328 }
329
slice_pitch(&self) -> usize330 fn slice_pitch(&self) -> usize {
331 self.image_slice_pitch
332 }
333
width(&self) -> CLResult<u32>334 fn width(&self) -> CLResult<u32> {
335 self.image_width
336 .try_into()
337 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
338 }
339
height(&self) -> CLResult<u32>340 fn height(&self) -> CLResult<u32> {
341 self.image_height
342 .try_into()
343 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
344 }
345 }
346
sw_copy( src: *const c_void, dst: *mut c_void, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, pixel_size: u8, )347 fn sw_copy(
348 src: *const c_void,
349 dst: *mut c_void,
350 region: &CLVec<usize>,
351 src_origin: &CLVec<usize>,
352 src_row_pitch: usize,
353 src_slice_pitch: usize,
354 dst_origin: &CLVec<usize>,
355 dst_row_pitch: usize,
356 dst_slice_pitch: usize,
357 pixel_size: u8,
358 ) {
359 let pixel_size = pixel_size as usize;
360 for z in 0..region[2] {
361 if src_row_pitch == dst_row_pitch && region[1] * pixel_size == src_row_pitch {
362 unsafe {
363 ptr::copy(
364 src.byte_add(
365 (*src_origin + [0, 0, z]) * [pixel_size, src_row_pitch, src_slice_pitch],
366 ),
367 dst.byte_add(
368 (*dst_origin + [0, 0, z]) * [pixel_size, dst_row_pitch, dst_slice_pitch],
369 ),
370 region[0] * region[1] * pixel_size,
371 )
372 }
373 } else {
374 for y in 0..region[1] {
375 unsafe {
376 ptr::copy(
377 src.byte_add(
378 (*src_origin + [0, y, z])
379 * [pixel_size, src_row_pitch, src_slice_pitch],
380 ),
381 dst.byte_add(
382 (*dst_origin + [0, y, z])
383 * [pixel_size, dst_row_pitch, dst_slice_pitch],
384 ),
385 region[0] * pixel_size,
386 )
387 };
388 }
389 }
390 }
391 }
392
393 impl MemBase {
new_buffer( context: Arc<Context>, flags: cl_mem_flags, size: usize, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Buffer>>394 pub fn new_buffer(
395 context: Arc<Context>,
396 flags: cl_mem_flags,
397 size: usize,
398 host_ptr: *mut c_void,
399 props: Vec<cl_mem_properties>,
400 ) -> CLResult<Arc<Buffer>> {
401 let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
402 ResourceType::Staging
403 } else {
404 ResourceType::Normal
405 };
406
407 let buffer = context.create_buffer(
408 size,
409 host_ptr,
410 bit_check(flags, CL_MEM_COPY_HOST_PTR),
411 res_type,
412 )?;
413
414 let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
415 host_ptr as usize
416 } else {
417 0
418 };
419
420 Ok(Arc::new(Buffer {
421 base: Self {
422 base: CLObjectBase::new(RusticlTypes::Buffer),
423 context: context,
424 parent: None,
425 mem_type: CL_MEM_OBJECT_BUFFER,
426 flags: flags,
427 size: size,
428 host_ptr: host_ptr,
429 props: props,
430 gl_obj: None,
431 cbs: Mutex::new(Vec::new()),
432 res: Some(buffer),
433 },
434 offset: 0,
435 maps: Mutex::new(TrackedPointers::new()),
436 }))
437 }
438
new_sub_buffer( parent: Arc<Buffer>, flags: cl_mem_flags, offset: usize, size: usize, ) -> Arc<Buffer>439 pub fn new_sub_buffer(
440 parent: Arc<Buffer>,
441 flags: cl_mem_flags,
442 offset: usize,
443 size: usize,
444 ) -> Arc<Buffer> {
445 let host_ptr = if parent.host_ptr().is_null() {
446 0
447 } else {
448 unsafe { parent.host_ptr().byte_add(offset) as usize }
449 };
450
451 Arc::new(Buffer {
452 base: Self {
453 base: CLObjectBase::new(RusticlTypes::Buffer),
454 context: parent.context.clone(),
455 parent: Some(Mem::Buffer(parent)),
456 mem_type: CL_MEM_OBJECT_BUFFER,
457 flags: flags,
458 size: size,
459 host_ptr: host_ptr,
460 props: Vec::new(),
461 gl_obj: None,
462 cbs: Mutex::new(Vec::new()),
463 res: None,
464 },
465 offset: offset,
466 maps: Mutex::new(TrackedPointers::new()),
467 })
468 }
469
new_image( context: Arc<Context>, parent: Option<Mem>, mem_type: cl_mem_object_type, flags: cl_mem_flags, image_format: &cl_image_format, mut image_desc: cl_image_desc, image_elem_size: u8, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Image>>470 pub fn new_image(
471 context: Arc<Context>,
472 parent: Option<Mem>,
473 mem_type: cl_mem_object_type,
474 flags: cl_mem_flags,
475 image_format: &cl_image_format,
476 mut image_desc: cl_image_desc,
477 image_elem_size: u8,
478 host_ptr: *mut c_void,
479 props: Vec<cl_mem_properties>,
480 ) -> CLResult<Arc<Image>> {
481 // we have to sanitize the image_desc a little for internal use
482 let api_image_desc = image_desc;
483 let dims = image_desc.dims();
484 let is_array = image_desc.is_array();
485 if dims < 3 {
486 image_desc.image_depth = 1;
487 }
488 if dims < 2 {
489 image_desc.image_height = 1;
490 }
491 if !is_array {
492 image_desc.image_array_size = 1;
493 }
494
495 let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
496 ResourceType::Staging
497 } else {
498 ResourceType::Normal
499 };
500
501 let texture = if parent.is_none() {
502 let mut texture = context.create_texture(
503 &image_desc,
504 image_format,
505 host_ptr,
506 bit_check(flags, CL_MEM_COPY_HOST_PTR),
507 res_type,
508 );
509
510 // if we error allocating a Staging resource, just try with normal as
511 // `CL_MEM_ALLOC_HOST_PTR` is just a performance hint.
512 if res_type == ResourceType::Staging && texture.is_err() {
513 texture = context.create_texture(
514 &image_desc,
515 image_format,
516 host_ptr,
517 bit_check(flags, CL_MEM_COPY_HOST_PTR),
518 ResourceType::Normal,
519 )
520 }
521
522 Some(texture?)
523 } else {
524 None
525 };
526
527 let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
528 host_ptr as usize
529 } else {
530 0
531 };
532
533 let pipe_format = image_format.to_pipe_format().unwrap();
534 Ok(Arc::new(Image {
535 base: Self {
536 base: CLObjectBase::new(RusticlTypes::Image),
537 context: context,
538 parent: parent,
539 mem_type: mem_type,
540 flags: flags,
541 size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize,
542 host_ptr: host_ptr,
543 props: props,
544 gl_obj: None,
545 cbs: Mutex::new(Vec::new()),
546 res: texture,
547 },
548 image_format: *image_format,
549 pipe_format: pipe_format,
550 image_desc: api_image_desc,
551 image_elem_size: image_elem_size,
552 maps: Mutex::new(TrackedPointers::new()),
553 }))
554 }
555
arc_from_raw(ptr: cl_mem) -> CLResult<Mem>556 pub fn arc_from_raw(ptr: cl_mem) -> CLResult<Mem> {
557 let mem = Self::ref_from_raw(ptr)?;
558 match mem.base.get_type()? {
559 RusticlTypes::Buffer => Ok(Mem::Buffer(Buffer::arc_from_raw(ptr)?)),
560 RusticlTypes::Image => Ok(Mem::Image(Image::arc_from_raw(ptr)?)),
561 _ => Err(CL_INVALID_MEM_OBJECT),
562 }
563 }
564
arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>>565 pub fn arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>> {
566 let count = count as usize;
567 let mut res = Vec::with_capacity(count);
568 for i in 0..count {
569 res.push(Self::arc_from_raw(unsafe { *objs.add(i) })?);
570 }
571 Ok(res)
572 }
573
from_gl( context: Arc<Context>, flags: cl_mem_flags, gl_export_manager: &GLExportManager, ) -> CLResult<cl_mem>574 pub fn from_gl(
575 context: Arc<Context>,
576 flags: cl_mem_flags,
577 gl_export_manager: &GLExportManager,
578 ) -> CLResult<cl_mem> {
579 let export_in = &gl_export_manager.export_in;
580 let export_out = &gl_export_manager.export_out;
581
582 let (mem_type, gl_object_type) = target_from_gl(export_in.target)?;
583 let gl_mem_props = gl_export_manager.get_gl_mem_props()?;
584
585 // Handle Buffers
586 let (image_format, pipe_format, rusticl_type) = if gl_export_manager.is_gl_buffer() {
587 (
588 cl_image_format::default(),
589 pipe_format::PIPE_FORMAT_NONE,
590 RusticlTypes::Buffer,
591 )
592 } else {
593 let image_format =
594 format_from_gl(export_out.internal_format).ok_or(CL_OUT_OF_HOST_MEMORY)?;
595 (
596 image_format,
597 image_format.to_pipe_format().unwrap(),
598 RusticlTypes::Image,
599 )
600 };
601
602 let imported_gl_tex = context.import_gl_buffer(
603 export_out.dmabuf_fd as u32,
604 export_out.modifier,
605 mem_type,
606 export_in.target,
607 pipe_format,
608 gl_mem_props.clone(),
609 )?;
610
611 // Cube maps faces are not linear in memory, so copy all contents
612 // of desired face into a 2D image and copy it back after gl release.
613 let (shadow_map, texture) = if is_cube_map_face(export_in.target) {
614 let shadow = create_shadow_slice(&imported_gl_tex, image_format)?;
615
616 let mut res_map = HashMap::new();
617 shadow
618 .iter()
619 .map(|(k, v)| {
620 let gl_res = imported_gl_tex.get(k).unwrap().clone();
621 res_map.insert(v.clone(), gl_res);
622 })
623 .for_each(drop);
624
625 (Some(res_map), shadow)
626 } else {
627 (None, imported_gl_tex)
628 };
629
630 // it's kinda not supported, but we want to know if anything actually hits this as it's
631 // certainly not tested by the CL CTS.
632 if mem_type != CL_MEM_OBJECT_BUFFER {
633 assert_eq!(gl_mem_props.offset, 0);
634 }
635
636 let base = Self {
637 base: CLObjectBase::new(rusticl_type),
638 context: context,
639 parent: None,
640 mem_type: mem_type,
641 flags: flags,
642 size: gl_mem_props.size(),
643 host_ptr: 0,
644 props: Vec::new(),
645 gl_obj: Some(GLObject {
646 gl_object_target: gl_export_manager.export_in.target,
647 gl_object_type: gl_object_type,
648 gl_object_name: export_in.obj,
649 shadow_map: shadow_map,
650 }),
651 cbs: Mutex::new(Vec::new()),
652 res: Some(texture),
653 };
654
655 Ok(if rusticl_type == RusticlTypes::Buffer {
656 Arc::new(Buffer {
657 base: base,
658 offset: gl_mem_props.offset as usize,
659 maps: Mutex::new(TrackedPointers::new()),
660 })
661 .into_cl()
662 } else {
663 Arc::new(Image {
664 base: base,
665 image_format: image_format,
666 pipe_format: pipe_format,
667 image_desc: cl_image_desc {
668 image_type: mem_type,
669 image_width: gl_mem_props.width as usize,
670 image_height: gl_mem_props.height as usize,
671 image_depth: gl_mem_props.depth as usize,
672 image_array_size: gl_mem_props.array_size as usize,
673 image_row_pitch: 0,
674 image_slice_pitch: 0,
675 num_mip_levels: 1,
676 num_samples: 1,
677 ..Default::default()
678 },
679 image_elem_size: gl_mem_props.pixel_size,
680 maps: Mutex::new(TrackedPointers::new()),
681 })
682 .into_cl()
683 })
684 }
685
is_buffer(&self) -> bool686 pub fn is_buffer(&self) -> bool {
687 self.mem_type == CL_MEM_OBJECT_BUFFER
688 }
689
has_same_parent(&self, other: &Self) -> bool690 pub fn has_same_parent(&self, other: &Self) -> bool {
691 ptr::eq(self.get_parent(), other.get_parent())
692 }
693
694 // this is kinda bogus, because that won't work with system SVM, but the spec wants us to
695 // implement this.
is_svm(&self) -> bool696 pub fn is_svm(&self) -> bool {
697 let mem = self.get_parent();
698 self.context.find_svm_alloc(mem.host_ptr).is_some()
699 && bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
700 }
701
get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>>702 pub fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
703 self.get_parent()
704 .res
705 .as_ref()
706 .and_then(|resources| resources.get(dev))
707 .ok_or(CL_OUT_OF_HOST_MEMORY)
708 }
709
get_parent(&self) -> &Self710 fn get_parent(&self) -> &Self {
711 if let Some(parent) = &self.parent {
712 parent
713 } else {
714 self
715 }
716 }
717
host_ptr(&self) -> *mut c_void718 pub fn host_ptr(&self) -> *mut c_void {
719 self.host_ptr as *mut c_void
720 }
721
is_pure_user_memory(&self, d: &Device) -> CLResult<bool>722 fn is_pure_user_memory(&self, d: &Device) -> CLResult<bool> {
723 let r = self.get_res_of_dev(d)?;
724 // 1Dbuffer objects are weird. The parent memory object can be a host_ptr thing, but we are
725 // not allowed to actually return a pointer based on the host_ptr when mapping.
726 Ok(r.is_user() && !self.host_ptr().is_null())
727 }
728
map<T>( &self, offset: usize, layout: Layout, writes: bool, maps: &Mutex<TrackedPointers<usize, Mapping<T>>>, inner: T, ) -> CLResult<MutMemoryPtr>729 fn map<T>(
730 &self,
731 offset: usize,
732 layout: Layout,
733 writes: bool,
734 maps: &Mutex<TrackedPointers<usize, Mapping<T>>>,
735 inner: T,
736 ) -> CLResult<MutMemoryPtr> {
737 let host_ptr = self.host_ptr();
738 let ptr = unsafe {
739 let ptr = if !host_ptr.is_null() {
740 host_ptr.byte_add(offset)
741 } else {
742 alloc::alloc(layout).cast()
743 };
744
745 MutMemoryPtr::from_ptr(ptr)
746 };
747
748 match maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
749 Entry::Occupied(mut e) => {
750 debug_assert!(!host_ptr.is_null());
751 e.get_mut().count += 1;
752 }
753 Entry::Vacant(e) => {
754 e.insert(Mapping {
755 layout: layout,
756 writes: writes,
757 ptr: host_ptr.is_null().then_some(ptr),
758 count: 1,
759 inner: inner,
760 });
761 }
762 }
763
764 Ok(ptr)
765 }
766 }
767
768 impl Drop for MemBase {
drop(&mut self)769 fn drop(&mut self) {
770 let cbs = mem::take(self.cbs.get_mut().unwrap());
771 for cb in cbs.into_iter().rev() {
772 cb.call(self);
773 }
774 }
775 }
776
777 impl Buffer {
apply_offset(&self, offset: usize) -> CLResult<usize>778 fn apply_offset(&self, offset: usize) -> CLResult<usize> {
779 self.offset.checked_add(offset).ok_or(CL_OUT_OF_HOST_MEMORY)
780 }
781
copy_rect( &self, dst: &Self, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>782 pub fn copy_rect(
783 &self,
784 dst: &Self,
785 q: &Queue,
786 ctx: &PipeContext,
787 region: &CLVec<usize>,
788 src_origin: &CLVec<usize>,
789 src_row_pitch: usize,
790 src_slice_pitch: usize,
791 dst_origin: &CLVec<usize>,
792 dst_row_pitch: usize,
793 dst_slice_pitch: usize,
794 ) -> CLResult<()> {
795 let (offset, size) =
796 CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
797 let tx_src = self.tx(q, ctx, offset, size, RWFlags::RD)?;
798
799 let (offset, size) =
800 CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
801 let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
802
803 perf_warning!("clEnqueueCopyBufferRect stalls the GPU");
804
805 // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
806 sw_copy(
807 tx_src.ptr(),
808 tx_dst.ptr(),
809 region,
810 &CLVec::default(),
811 src_row_pitch,
812 src_slice_pitch,
813 &CLVec::default(),
814 dst_row_pitch,
815 dst_slice_pitch,
816 1,
817 );
818
819 Ok(())
820 }
821
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_offset: usize, dst_offset: usize, size: usize, ) -> CLResult<()>822 pub fn copy_to_buffer(
823 &self,
824 q: &Queue,
825 ctx: &PipeContext,
826 dst: &Buffer,
827 src_offset: usize,
828 dst_offset: usize,
829 size: usize,
830 ) -> CLResult<()> {
831 let src_offset = self.apply_offset(src_offset)?;
832 let dst_offset = dst.apply_offset(dst_offset)?;
833 let src_res = self.get_res_of_dev(q.device)?;
834 let dst_res = dst.get_res_of_dev(q.device)?;
835
836 let bx = create_pipe_box(
837 [src_offset, 0, 0].into(),
838 [size, 1, 1].into(),
839 CL_MEM_OBJECT_BUFFER,
840 )?;
841 let dst_origin: [u32; 3] = [
842 dst_offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
843 0,
844 0,
845 ];
846
847 ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
848 Ok(())
849 }
850
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_offset: usize, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>851 pub fn copy_to_image(
852 &self,
853 q: &Queue,
854 ctx: &PipeContext,
855 dst: &Image,
856 src_offset: usize,
857 dst_origin: CLVec<usize>,
858 region: &CLVec<usize>,
859 ) -> CLResult<()> {
860 let src_offset = self.apply_offset(src_offset)?;
861 let bpp = dst.image_format.pixel_size().unwrap().into();
862 let src_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
863 let size = CLVec::calc_size(region, src_pitch);
864 let tx_src = self.tx(q, ctx, src_offset, size, RWFlags::RD)?;
865
866 // If image is created from a buffer, use image's slice and row pitch instead
867 let tx_dst;
868 let dst_pitch;
869 if let Some(Mem::Buffer(buffer)) = &dst.parent {
870 dst_pitch = [
871 bpp,
872 dst.image_desc.row_pitch()? as usize,
873 dst.image_desc.slice_pitch(),
874 ];
875
876 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
877 tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
878 } else {
879 tx_dst = dst.tx_image(
880 q,
881 ctx,
882 &create_pipe_box(dst_origin, *region, dst.mem_type)?,
883 RWFlags::WR,
884 )?;
885
886 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
887 }
888
889 // Those pitch values cannot have 0 value in its coordinates
890 debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
891 debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
892
893 perf_warning!("clEnqueueCopyBufferToImage stalls the GPU");
894
895 sw_copy(
896 tx_src.ptr(),
897 tx_dst.ptr(),
898 region,
899 &CLVec::default(),
900 src_pitch[1],
901 src_pitch[2],
902 &CLVec::default(),
903 dst_pitch[1],
904 dst_pitch[2],
905 bpp as u8,
906 );
907 Ok(())
908 }
909
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u8], offset: usize, size: usize, ) -> CLResult<()>910 pub fn fill(
911 &self,
912 q: &Queue,
913 ctx: &PipeContext,
914 pattern: &[u8],
915 offset: usize,
916 size: usize,
917 ) -> CLResult<()> {
918 let offset = self.apply_offset(offset)?;
919 let res = self.get_res_of_dev(q.device)?;
920 ctx.clear_buffer(
921 res,
922 pattern,
923 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
924 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
925 );
926 Ok(())
927 }
928
is_mapped_ptr(&self, ptr: *mut c_void) -> bool929 fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
930 let mut maps = self.maps.lock().unwrap();
931 let entry = maps.entry(ptr as usize);
932 matches!(entry, Entry::Occupied(entry) if entry.get().count > 0)
933 }
934
map(&self, size: usize, offset: usize, writes: bool) -> CLResult<MutMemoryPtr>935 pub fn map(&self, size: usize, offset: usize, writes: bool) -> CLResult<MutMemoryPtr> {
936 let layout =
937 unsafe { Layout::from_size_align_unchecked(size, size_of::<[cl_ulong; 16]>()) };
938 self.base.map(
939 offset,
940 layout,
941 writes,
942 &self.maps,
943 BufferMapping { offset: offset },
944 )
945 }
946
read( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: MutMemoryPtr, size: usize, ) -> CLResult<()>947 pub fn read(
948 &self,
949 q: &Queue,
950 ctx: &PipeContext,
951 offset: usize,
952 ptr: MutMemoryPtr,
953 size: usize,
954 ) -> CLResult<()> {
955 let ptr = ptr.as_ptr();
956 let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
957
958 perf_warning!("clEnqueueReadBuffer and clEnqueueMapBuffer stall the GPU");
959
960 unsafe {
961 ptr::copy(tx.ptr(), ptr, size);
962 }
963
964 Ok(())
965 }
966
read_rect( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>967 pub fn read_rect(
968 &self,
969 dst: MutMemoryPtr,
970 q: &Queue,
971 ctx: &PipeContext,
972 region: &CLVec<usize>,
973 src_origin: &CLVec<usize>,
974 src_row_pitch: usize,
975 src_slice_pitch: usize,
976 dst_origin: &CLVec<usize>,
977 dst_row_pitch: usize,
978 dst_slice_pitch: usize,
979 ) -> CLResult<()> {
980 let dst = dst.as_ptr();
981 let (offset, size) =
982 CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
983 let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
984
985 perf_warning!("clEnqueueReadBufferRect stalls the GPU");
986
987 sw_copy(
988 tx.ptr(),
989 dst,
990 region,
991 &CLVec::default(),
992 src_row_pitch,
993 src_slice_pitch,
994 dst_origin,
995 dst_row_pitch,
996 dst_slice_pitch,
997 1,
998 );
999
1000 Ok(())
1001 }
1002
sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1003 pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1004 // no need to update
1005 if self.is_pure_user_memory(q.device)? {
1006 return Ok(());
1007 }
1008
1009 let maps = self.maps.lock().unwrap();
1010 let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else {
1011 return Err(CL_INVALID_VALUE);
1012 };
1013
1014 self.read(q, ctx, mapping.offset, ptr, mapping.size())
1015 }
1016
sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1017 pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1018 // no need to update
1019 if self.is_pure_user_memory(q.device)? {
1020 return Ok(());
1021 }
1022
1023 match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1024 Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1025 Entry::Occupied(entry) => {
1026 let mapping = entry.get();
1027
1028 if mapping.writes {
1029 self.write(q, ctx, mapping.offset, ptr.into(), mapping.size())?;
1030 }
1031
1032 // only remove if the mapping wasn't reused in the meantime
1033 if mapping.count == 0 {
1034 entry.remove();
1035 }
1036
1037 Ok(())
1038 }
1039 }
1040 }
1041
tx<'a>( &self, q: &Queue, ctx: &'a PipeContext, offset: usize, size: usize, rw: RWFlags, ) -> CLResult<PipeTransfer<'a>>1042 fn tx<'a>(
1043 &self,
1044 q: &Queue,
1045 ctx: &'a PipeContext,
1046 offset: usize,
1047 size: usize,
1048 rw: RWFlags,
1049 ) -> CLResult<PipeTransfer<'a>> {
1050 let offset = self.apply_offset(offset)?;
1051 let r = self.get_res_of_dev(q.device)?;
1052
1053 ctx.buffer_map(
1054 r,
1055 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1056 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1057 rw,
1058 )
1059 .ok_or(CL_OUT_OF_RESOURCES)
1060 }
1061
unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool>1062 pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
1063 match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1064 Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1065 Entry::Occupied(mut entry) => {
1066 let entry = entry.get_mut();
1067 debug_assert!(entry.count > 0);
1068 entry.count -= 1;
1069 Ok(entry.count == 0)
1070 }
1071 }
1072 }
1073
write( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: ConstMemoryPtr, size: usize, ) -> CLResult<()>1074 pub fn write(
1075 &self,
1076 q: &Queue,
1077 ctx: &PipeContext,
1078 offset: usize,
1079 ptr: ConstMemoryPtr,
1080 size: usize,
1081 ) -> CLResult<()> {
1082 let ptr = ptr.as_ptr();
1083 let offset = self.apply_offset(offset)?;
1084 let r = self.get_res_of_dev(q.device)?;
1085
1086 perf_warning!("clEnqueueWriteBuffer and clEnqueueUnmapMemObject might stall the GPU");
1087
1088 ctx.buffer_subdata(
1089 r,
1090 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1091 ptr,
1092 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1093 );
1094 Ok(())
1095 }
1096
write_rect( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1097 pub fn write_rect(
1098 &self,
1099 src: ConstMemoryPtr,
1100 q: &Queue,
1101 ctx: &PipeContext,
1102 region: &CLVec<usize>,
1103 src_origin: &CLVec<usize>,
1104 src_row_pitch: usize,
1105 src_slice_pitch: usize,
1106 dst_origin: &CLVec<usize>,
1107 dst_row_pitch: usize,
1108 dst_slice_pitch: usize,
1109 ) -> CLResult<()> {
1110 let src = src.as_ptr();
1111 let (offset, size) =
1112 CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
1113 let tx = self.tx(q, ctx, offset, size, RWFlags::WR)?;
1114
1115 perf_warning!("clEnqueueWriteBufferRect stalls the GPU");
1116
1117 sw_copy(
1118 src,
1119 tx.ptr(),
1120 region,
1121 src_origin,
1122 src_row_pitch,
1123 src_slice_pitch,
1124 &CLVec::default(),
1125 dst_row_pitch,
1126 dst_slice_pitch,
1127 1,
1128 );
1129
1130 Ok(())
1131 }
1132 }
1133
1134 impl Image {
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_origin: CLVec<usize>, dst_offset: usize, region: &CLVec<usize>, ) -> CLResult<()>1135 pub fn copy_to_buffer(
1136 &self,
1137 q: &Queue,
1138 ctx: &PipeContext,
1139 dst: &Buffer,
1140 src_origin: CLVec<usize>,
1141 dst_offset: usize,
1142 region: &CLVec<usize>,
1143 ) -> CLResult<()> {
1144 let dst_offset = dst.apply_offset(dst_offset)?;
1145 let bpp = self.image_format.pixel_size().unwrap().into();
1146
1147 let src_pitch;
1148 let tx_src;
1149 if let Some(Mem::Buffer(buffer)) = &self.parent {
1150 src_pitch = [
1151 bpp,
1152 self.image_desc.row_pitch()? as usize,
1153 self.image_desc.slice_pitch(),
1154 ];
1155 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1156 tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1157 } else {
1158 tx_src = self.tx_image(
1159 q,
1160 ctx,
1161 &create_pipe_box(src_origin, *region, self.mem_type)?,
1162 RWFlags::RD,
1163 )?;
1164 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1165 }
1166
1167 // If image is created from a buffer, use image's slice and row pitch instead
1168 let dst_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
1169
1170 let dst_origin: CLVec<usize> = [dst_offset, 0, 0].into();
1171 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1172 let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
1173
1174 // Those pitch values cannot have 0 value in its coordinates
1175 debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1176 debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1177
1178 perf_warning!("clEnqueueCopyImageToBuffer stalls the GPU");
1179
1180 sw_copy(
1181 tx_src.ptr(),
1182 tx_dst.ptr(),
1183 region,
1184 &CLVec::default(),
1185 src_pitch[1],
1186 src_pitch[2],
1187 &CLVec::default(),
1188 dst_pitch[1],
1189 dst_pitch[2],
1190 bpp as u8,
1191 );
1192 Ok(())
1193 }
1194
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_origin: CLVec<usize>, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1195 pub fn copy_to_image(
1196 &self,
1197 q: &Queue,
1198 ctx: &PipeContext,
1199 dst: &Image,
1200 src_origin: CLVec<usize>,
1201 dst_origin: CLVec<usize>,
1202 region: &CLVec<usize>,
1203 ) -> CLResult<()> {
1204 let src_parent = self.get_parent();
1205 let dst_parent = dst.get_parent();
1206 let src_res = src_parent.get_res_of_dev(q.device)?;
1207 let dst_res = dst_parent.get_res_of_dev(q.device)?;
1208
1209 // We just want to use sw_copy if mem objects have different types or if copy can have
1210 // custom strides (image2d from buff/images)
1211 if src_parent.is_buffer() || dst_parent.is_buffer() {
1212 let bpp = self.image_format.pixel_size().unwrap().into();
1213
1214 let tx_src;
1215 let tx_dst;
1216 let dst_pitch;
1217 let src_pitch;
1218 if let Some(Mem::Buffer(buffer)) = &self.parent {
1219 src_pitch = [
1220 bpp,
1221 self.image_desc.row_pitch()? as usize,
1222 self.image_desc.slice_pitch(),
1223 ];
1224
1225 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1226 tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1227 } else {
1228 tx_src = self.tx_image(
1229 q,
1230 ctx,
1231 &create_pipe_box(src_origin, *region, src_parent.mem_type)?,
1232 RWFlags::RD,
1233 )?;
1234
1235 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1236 }
1237
1238 if let Some(Mem::Buffer(buffer)) = &dst.parent {
1239 // If image is created from a buffer, use image's slice and row pitch instead
1240 dst_pitch = [
1241 bpp,
1242 dst.image_desc.row_pitch()? as usize,
1243 dst.image_desc.slice_pitch(),
1244 ];
1245
1246 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1247 tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1248 } else {
1249 tx_dst = dst.tx_image(
1250 q,
1251 ctx,
1252 &create_pipe_box(dst_origin, *region, dst_parent.mem_type)?,
1253 RWFlags::WR,
1254 )?;
1255
1256 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
1257 }
1258
1259 // Those pitch values cannot have 0 value in its coordinates
1260 debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1261 debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1262
1263 perf_warning!(
1264 "clEnqueueCopyImage stalls the GPU when src or dst are created from a buffer"
1265 );
1266
1267 sw_copy(
1268 tx_src.ptr(),
1269 tx_dst.ptr(),
1270 region,
1271 &CLVec::default(),
1272 src_pitch[1],
1273 src_pitch[2],
1274 &CLVec::default(),
1275 dst_pitch[1],
1276 dst_pitch[2],
1277 bpp as u8,
1278 )
1279 } else {
1280 let bx = create_pipe_box(src_origin, *region, src_parent.mem_type)?;
1281 let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
1282
1283 if src_parent.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1284 (dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
1285 }
1286
1287 ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
1288 }
1289 Ok(())
1290 }
1291
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u32], origin: &CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1292 pub fn fill(
1293 &self,
1294 q: &Queue,
1295 ctx: &PipeContext,
1296 pattern: &[u32],
1297 origin: &CLVec<usize>,
1298 region: &CLVec<usize>,
1299 ) -> CLResult<()> {
1300 let res = self.get_res_of_dev(q.device)?;
1301
1302 // make sure we allocate multiples of 4 bytes so drivers don't read out of bounds or
1303 // unaligned.
1304 // TODO: use div_ceil once it's available
1305 let pixel_size: usize = self.image_format.pixel_size().unwrap().into();
1306 let mut new_pattern: Vec<u32> = vec![0; pixel_size.div_ceil(size_of::<u32>())];
1307
1308 // we don't support CL_DEPTH for now
1309 assert!(pattern.len() == 4);
1310
1311 // SAFETY: pointers have to be valid for read/writes of exactly one pixel of their
1312 // respective format.
1313 // `new_pattern` has the correct size due to the `size` above.
1314 // `pattern` is validated through the CL API and allows undefined behavior if not followed
1315 // by CL API rules. It's expected to be a 4 component array of 32 bit values, except for
1316 // CL_DEPTH where it's just one value.
1317 unsafe {
1318 util_format_pack_rgba(
1319 self.pipe_format,
1320 new_pattern.as_mut_ptr().cast(),
1321 pattern.as_ptr().cast(),
1322 1,
1323 );
1324 }
1325
1326 // If image is created from a buffer, use clear_image_buffer instead
1327 if self.is_parent_buffer() {
1328 let strides = (
1329 self.image_desc.row_pitch()? as usize,
1330 self.image_desc.slice_pitch(),
1331 );
1332 ctx.clear_image_buffer(res, &new_pattern, origin, region, strides, pixel_size);
1333 } else {
1334 let bx = create_pipe_box(*origin, *region, self.mem_type)?;
1335 ctx.clear_texture(res, &new_pattern, &bx);
1336 }
1337
1338 Ok(())
1339 }
1340
is_mapped_ptr(&self, ptr: *mut c_void) -> bool1341 fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
1342 let mut maps = self.maps.lock().unwrap();
1343 let entry = maps.entry(ptr as usize);
1344 matches!(entry, Entry::Occupied(entry) if entry.get().count > 0)
1345 }
1346
is_parent_buffer(&self) -> bool1347 pub fn is_parent_buffer(&self) -> bool {
1348 matches!(self.parent, Some(Mem::Buffer(_)))
1349 }
1350
map( &self, origin: CLVec<usize>, region: CLVec<usize>, row_pitch: &mut usize, slice_pitch: &mut usize, writes: bool, ) -> CLResult<MutMemoryPtr>1351 pub fn map(
1352 &self,
1353 origin: CLVec<usize>,
1354 region: CLVec<usize>,
1355 row_pitch: &mut usize,
1356 slice_pitch: &mut usize,
1357 writes: bool,
1358 ) -> CLResult<MutMemoryPtr> {
1359 let pixel_size = self.image_format.pixel_size().unwrap() as usize;
1360
1361 *row_pitch = self.image_desc.row_pitch()? as usize;
1362 *slice_pitch = self.image_desc.slice_pitch();
1363
1364 let offset = CLVec::calc_offset(origin, [pixel_size, *row_pitch, *slice_pitch]);
1365
1366 // From the CL Spec:
1367 //
1368 // The pointer returned maps a 1D, 2D or 3D region starting at origin and is at least
1369 // region[0] pixels in size for a 1D image, 1D image buffer or 1D image array,
1370 // (image_row_pitch × region[1]) pixels in size for a 2D image or 2D image array, and
1371 // (image_slice_pitch × region[2]) pixels in size for a 3D image. The result of a memory
1372 // access outside this region is undefined.
1373 //
1374 // It's not guaranteed that the row_pitch is taken into account for 1D images, but the CL
1375 // CTS relies on this behavior.
1376 //
1377 // Also note, that the spec wording is wrong in regards to arrays, which need to take the
1378 // image_slice_pitch into account.
1379 let size = if self.image_desc.is_array() || self.image_desc.dims() == 3 {
1380 debug_assert_ne!(*slice_pitch, 0);
1381 // the slice count is in region[1] for 1D array images
1382 if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1383 region[1] * *slice_pitch
1384 } else {
1385 region[2] * *slice_pitch
1386 }
1387 } else {
1388 debug_assert_ne!(*row_pitch, 0);
1389 region[1] * *row_pitch
1390 };
1391
1392 let layout;
1393 unsafe {
1394 layout = Layout::from_size_align_unchecked(size, size_of::<[u32; 4]>());
1395 }
1396
1397 self.base.map(
1398 offset,
1399 layout,
1400 writes,
1401 &self.maps,
1402 ImageMapping {
1403 origin: origin,
1404 region: region,
1405 },
1406 )
1407 }
1408
pipe_image_host_access(&self) -> u161409 pub fn pipe_image_host_access(&self) -> u16 {
1410 // those flags are all mutually exclusive
1411 (if bit_check(self.flags, CL_MEM_HOST_READ_ONLY) {
1412 PIPE_IMAGE_ACCESS_READ
1413 } else if bit_check(self.flags, CL_MEM_HOST_WRITE_ONLY) {
1414 PIPE_IMAGE_ACCESS_WRITE
1415 } else if bit_check(self.flags, CL_MEM_HOST_NO_ACCESS) {
1416 0
1417 } else {
1418 PIPE_IMAGE_ACCESS_READ_WRITE
1419 }) as u16
1420 }
1421
read( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1422 pub fn read(
1423 &self,
1424 dst: MutMemoryPtr,
1425 q: &Queue,
1426 ctx: &PipeContext,
1427 region: &CLVec<usize>,
1428 src_origin: &CLVec<usize>,
1429 dst_row_pitch: usize,
1430 dst_slice_pitch: usize,
1431 ) -> CLResult<()> {
1432 let dst = dst.as_ptr();
1433 let pixel_size = self.image_format.pixel_size().unwrap();
1434
1435 let tx;
1436 let src_row_pitch;
1437 let src_slice_pitch;
1438 if let Some(Mem::Buffer(buffer)) = &self.parent {
1439 src_row_pitch = self.image_desc.image_row_pitch;
1440 src_slice_pitch = self.image_desc.image_slice_pitch;
1441
1442 let (offset, size) = CLVec::calc_offset_size(
1443 src_origin,
1444 region,
1445 [pixel_size.into(), src_row_pitch, src_slice_pitch],
1446 );
1447
1448 tx = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1449 } else {
1450 let bx = create_pipe_box(*src_origin, *region, self.mem_type)?;
1451 tx = self.tx_image(q, ctx, &bx, RWFlags::RD)?;
1452 src_row_pitch = tx.row_pitch() as usize;
1453 src_slice_pitch = tx.slice_pitch();
1454 };
1455
1456 perf_warning!("clEnqueueReadImage and clEnqueueMapImage stall the GPU");
1457
1458 sw_copy(
1459 tx.ptr(),
1460 dst,
1461 region,
1462 &CLVec::default(),
1463 src_row_pitch,
1464 src_slice_pitch,
1465 &CLVec::default(),
1466 dst_row_pitch,
1467 dst_slice_pitch,
1468 pixel_size,
1469 );
1470
1471 Ok(())
1472 }
1473
sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1474 pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1475 // no need to update
1476 if self.is_pure_user_memory(q.device)? {
1477 return Ok(());
1478 }
1479
1480 let maps = self.maps.lock().unwrap();
1481 let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else {
1482 return Err(CL_INVALID_VALUE);
1483 };
1484
1485 let row_pitch = self.image_desc.row_pitch()? as usize;
1486 let slice_pitch = self.image_desc.slice_pitch();
1487
1488 self.read(
1489 ptr,
1490 q,
1491 ctx,
1492 &mapping.region,
1493 &mapping.origin,
1494 row_pitch,
1495 slice_pitch,
1496 )
1497 }
1498
sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1499 pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1500 // no need to update
1501 if self.is_pure_user_memory(q.device)? {
1502 return Ok(());
1503 }
1504
1505 match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1506 Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1507 Entry::Occupied(entry) => {
1508 let mapping = entry.get();
1509 let row_pitch = self.image_desc.row_pitch()? as usize;
1510 let slice_pitch = self.image_desc.slice_pitch();
1511
1512 if mapping.writes {
1513 self.write(
1514 ptr.into(),
1515 q,
1516 ctx,
1517 &mapping.region,
1518 row_pitch,
1519 slice_pitch,
1520 &mapping.origin,
1521 )?;
1522 }
1523
1524 // only remove if the mapping wasn't reused in the meantime
1525 if mapping.count == 0 {
1526 entry.remove();
1527 }
1528
1529 Ok(())
1530 }
1531 }
1532 }
1533
tx_image<'a>( &self, q: &Queue, ctx: &'a PipeContext, bx: &pipe_box, rw: RWFlags, ) -> CLResult<PipeTransfer<'a>>1534 fn tx_image<'a>(
1535 &self,
1536 q: &Queue,
1537 ctx: &'a PipeContext,
1538 bx: &pipe_box,
1539 rw: RWFlags,
1540 ) -> CLResult<PipeTransfer<'a>> {
1541 let r = self.get_res_of_dev(q.device)?;
1542 ctx.texture_map(r, bx, rw).ok_or(CL_OUT_OF_RESOURCES)
1543 }
1544
unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool>1545 pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
1546 match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1547 Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1548 Entry::Occupied(mut entry) => {
1549 let entry = entry.get_mut();
1550 debug_assert!(entry.count > 0);
1551 entry.count -= 1;
1552 Ok(entry.count == 0)
1553 }
1554 }
1555 }
1556
write( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_row_pitch: usize, mut src_slice_pitch: usize, dst_origin: &CLVec<usize>, ) -> CLResult<()>1557 pub fn write(
1558 &self,
1559 src: ConstMemoryPtr,
1560 q: &Queue,
1561 ctx: &PipeContext,
1562 region: &CLVec<usize>,
1563 src_row_pitch: usize,
1564 mut src_slice_pitch: usize,
1565 dst_origin: &CLVec<usize>,
1566 ) -> CLResult<()> {
1567 let src = src.as_ptr();
1568 let dst_row_pitch = self.image_desc.image_row_pitch;
1569 let dst_slice_pitch = self.image_desc.image_slice_pitch;
1570
1571 // texture_subdata most likely maps the resource anyway
1572 perf_warning!("clEnqueueWriteImage and clEnqueueUnmapMemObject stall the GPU");
1573
1574 if let Some(Mem::Buffer(buffer)) = &self.parent {
1575 let pixel_size = self.image_format.pixel_size().unwrap();
1576 let (offset, size) = CLVec::calc_offset_size(
1577 dst_origin,
1578 region,
1579 [pixel_size.into(), dst_row_pitch, dst_slice_pitch],
1580 );
1581 let tx = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1582
1583 sw_copy(
1584 src,
1585 tx.ptr(),
1586 region,
1587 &CLVec::default(),
1588 src_row_pitch,
1589 src_slice_pitch,
1590 &CLVec::default(),
1591 dst_row_pitch,
1592 dst_slice_pitch,
1593 pixel_size,
1594 );
1595 } else {
1596 let res = self.get_res_of_dev(q.device)?;
1597 let bx = create_pipe_box(*dst_origin, *region, self.mem_type)?;
1598
1599 if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1600 src_slice_pitch = src_row_pitch;
1601 }
1602
1603 ctx.texture_subdata(
1604 res,
1605 &bx,
1606 src,
1607 src_row_pitch
1608 .try_into()
1609 .map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1610 src_slice_pitch,
1611 );
1612 }
1613 Ok(())
1614 }
1615 }
1616
1617 pub struct Sampler {
1618 pub base: CLObjectBase<CL_INVALID_SAMPLER>,
1619 pub context: Arc<Context>,
1620 pub normalized_coords: bool,
1621 pub addressing_mode: cl_addressing_mode,
1622 pub filter_mode: cl_filter_mode,
1623 pub props: Option<Properties<cl_sampler_properties>>,
1624 }
1625
1626 impl_cl_type_trait!(cl_sampler, Sampler, CL_INVALID_SAMPLER);
1627
1628 impl Sampler {
new( context: Arc<Context>, normalized_coords: bool, addressing_mode: cl_addressing_mode, filter_mode: cl_filter_mode, props: Option<Properties<cl_sampler_properties>>, ) -> Arc<Sampler>1629 pub fn new(
1630 context: Arc<Context>,
1631 normalized_coords: bool,
1632 addressing_mode: cl_addressing_mode,
1633 filter_mode: cl_filter_mode,
1634 props: Option<Properties<cl_sampler_properties>>,
1635 ) -> Arc<Sampler> {
1636 Arc::new(Self {
1637 base: CLObjectBase::new(RusticlTypes::Sampler),
1638 context: context,
1639 normalized_coords: normalized_coords,
1640 addressing_mode: addressing_mode,
1641 filter_mode: filter_mode,
1642 props: props,
1643 })
1644 }
1645
nir_to_cl( addressing_mode: u32, filter_mode: u32, normalized_coords: u32, ) -> (cl_addressing_mode, cl_filter_mode, bool)1646 pub fn nir_to_cl(
1647 addressing_mode: u32,
1648 filter_mode: u32,
1649 normalized_coords: u32,
1650 ) -> (cl_addressing_mode, cl_filter_mode, bool) {
1651 let addr_mode = match addressing_mode {
1652 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_NONE => CL_ADDRESS_NONE,
1653 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE => {
1654 CL_ADDRESS_CLAMP_TO_EDGE
1655 }
1656 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP => CL_ADDRESS_CLAMP,
1657 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT => CL_ADDRESS_REPEAT,
1658 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED => {
1659 CL_ADDRESS_MIRRORED_REPEAT
1660 }
1661 _ => panic!("unknown addressing_mode"),
1662 };
1663
1664 let filter = match filter_mode {
1665 cl_sampler_filter_mode::SAMPLER_FILTER_MODE_NEAREST => CL_FILTER_NEAREST,
1666 cl_sampler_filter_mode::SAMPLER_FILTER_MODE_LINEAR => CL_FILTER_LINEAR,
1667 _ => panic!("unknown filter_mode"),
1668 };
1669
1670 (addr_mode, filter, normalized_coords != 0)
1671 }
1672
cl_to_pipe( (addressing_mode, filter_mode, normalized_coords): ( cl_addressing_mode, cl_filter_mode, bool, ), ) -> pipe_sampler_state1673 pub fn cl_to_pipe(
1674 (addressing_mode, filter_mode, normalized_coords): (
1675 cl_addressing_mode,
1676 cl_filter_mode,
1677 bool,
1678 ),
1679 ) -> pipe_sampler_state {
1680 let mut res = pipe_sampler_state::default();
1681
1682 let wrap = match addressing_mode {
1683 CL_ADDRESS_CLAMP_TO_EDGE => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1684 CL_ADDRESS_CLAMP => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_BORDER,
1685 CL_ADDRESS_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_REPEAT,
1686 CL_ADDRESS_MIRRORED_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_MIRROR_REPEAT,
1687 // TODO: what's a reasonable default?
1688 _ => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1689 };
1690
1691 let img_filter = match filter_mode {
1692 CL_FILTER_NEAREST => pipe_tex_filter::PIPE_TEX_FILTER_NEAREST,
1693 CL_FILTER_LINEAR => pipe_tex_filter::PIPE_TEX_FILTER_LINEAR,
1694 _ => panic!("unknown filter_mode"),
1695 };
1696
1697 res.set_min_img_filter(img_filter);
1698 res.set_mag_img_filter(img_filter);
1699 res.set_unnormalized_coords((!normalized_coords).into());
1700 res.set_wrap_r(wrap);
1701 res.set_wrap_s(wrap);
1702 res.set_wrap_t(wrap);
1703
1704 res
1705 }
1706
pipe(&self) -> pipe_sampler_state1707 pub fn pipe(&self) -> pipe_sampler_state {
1708 Self::cl_to_pipe((
1709 self.addressing_mode,
1710 self.filter_mode,
1711 self.normalized_coords,
1712 ))
1713 }
1714 }
1715