xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/rusticl/core/memory.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 use crate::api::icd::*;
2 use crate::api::types::*;
3 use crate::api::util::*;
4 use crate::core::context::*;
5 use crate::core::device::*;
6 use crate::core::format::*;
7 use crate::core::gl::*;
8 use crate::core::queue::*;
9 use crate::core::util::*;
10 use crate::impl_cl_type_trait;
11 use crate::impl_cl_type_trait_base;
12 use crate::perf_warning;
13 
14 use mesa_rust::pipe::context::*;
15 use mesa_rust::pipe::resource::*;
16 use mesa_rust::pipe::screen::ResourceType;
17 use mesa_rust::pipe::transfer::*;
18 use mesa_rust_gen::*;
19 use mesa_rust_util::properties::Properties;
20 use mesa_rust_util::ptr::AllocSize;
21 use mesa_rust_util::ptr::TrackedPointers;
22 use rusticl_opencl_gen::*;
23 
24 use std::alloc;
25 use std::alloc::Layout;
26 use std::cmp;
27 use std::collections::btree_map::Entry;
28 use std::collections::HashMap;
29 use std::convert::TryInto;
30 use std::mem;
31 use std::mem::size_of;
32 use std::ops::Deref;
33 use std::os::raw::c_void;
34 use std::ptr;
35 use std::sync::Arc;
36 use std::sync::Mutex;
37 
38 struct Mapping<T> {
39     layout: Layout,
40     writes: bool,
41     ptr: Option<MutMemoryPtr>,
42     /// reference count from the API perspective. Once it reaches 0, we need to write back the
43     /// mappings content to the GPU resource.
44     count: u32,
45     inner: T,
46 }
47 
48 impl<T> Drop for Mapping<T> {
drop(&mut self)49     fn drop(&mut self) {
50         if let Some(ptr) = &self.ptr {
51             unsafe {
52                 alloc::dealloc(ptr.as_ptr().cast(), self.layout);
53             }
54         }
55     }
56 }
57 
58 impl<T> AllocSize<usize> for Mapping<T> {
size(&self) -> usize59     fn size(&self) -> usize {
60         self.layout.size()
61     }
62 }
63 
64 impl<T> Deref for Mapping<T> {
65     type Target = T;
66 
deref(&self) -> &Self::Target67     fn deref(&self) -> &Self::Target {
68         &self.inner
69     }
70 }
71 
72 struct BufferMapping {
73     offset: usize,
74 }
75 
76 struct ImageMapping {
77     origin: CLVec<usize>,
78     region: CLVec<usize>,
79 }
80 
81 #[repr(transparent)]
82 #[derive(Clone, Copy)]
83 pub struct ConstMemoryPtr {
84     ptr: *const c_void,
85 }
86 unsafe impl Send for ConstMemoryPtr {}
87 unsafe impl Sync for ConstMemoryPtr {}
88 
89 impl ConstMemoryPtr {
as_ptr(&self) -> *const c_void90     pub fn as_ptr(&self) -> *const c_void {
91         self.ptr
92     }
93 
94     /// # Safety
95     ///
96     /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
97     /// [Send] and [Sync]
from_ptr(ptr: *const c_void) -> Self98     pub unsafe fn from_ptr(ptr: *const c_void) -> Self {
99         Self { ptr: ptr }
100     }
101 }
102 
103 impl From<MutMemoryPtr> for ConstMemoryPtr {
from(value: MutMemoryPtr) -> Self104     fn from(value: MutMemoryPtr) -> Self {
105         Self {
106             ptr: value.ptr.cast(),
107         }
108     }
109 }
110 
111 #[repr(transparent)]
112 #[derive(Clone, Copy)]
113 pub struct MutMemoryPtr {
114     ptr: *mut c_void,
115 }
116 unsafe impl Send for MutMemoryPtr {}
117 unsafe impl Sync for MutMemoryPtr {}
118 
119 impl MutMemoryPtr {
as_ptr(&self) -> *mut c_void120     pub fn as_ptr(&self) -> *mut c_void {
121         self.ptr
122     }
123 
124     /// # Safety
125     ///
126     /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
127     /// [Send] and [Sync]
from_ptr(ptr: *mut c_void) -> Self128     pub unsafe fn from_ptr(ptr: *mut c_void) -> Self {
129         Self { ptr: ptr }
130     }
131 }
132 
133 pub enum Mem {
134     Buffer(Arc<Buffer>),
135     Image(Arc<Image>),
136 }
137 
138 impl Deref for Mem {
139     type Target = MemBase;
140 
deref(&self) -> &Self::Target141     fn deref(&self) -> &Self::Target {
142         match self {
143             Self::Buffer(b) => &b.base,
144             Self::Image(i) => &i.base,
145         }
146     }
147 }
148 
149 impl Mem {
is_mapped_ptr(&self, ptr: *mut c_void) -> bool150     pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
151         match self {
152             Self::Buffer(b) => b.is_mapped_ptr(ptr),
153             Self::Image(i) => i.is_mapped_ptr(ptr),
154         }
155     }
156 
sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>157     pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
158         match self {
159             Self::Buffer(b) => b.sync_unmap(q, ctx, ptr),
160             Self::Image(i) => i.sync_unmap(q, ctx, ptr),
161         }
162     }
163 
unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool>164     pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
165         match self {
166             Self::Buffer(b) => b.unmap(ptr),
167             Self::Image(i) => i.unmap(ptr),
168         }
169     }
170 }
171 
172 /// # Mapping memory
173 ///
174 /// Maps the queue associated device's resource.
175 ///
176 /// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so called
177 /// non blocking maps. Non blocking maps shall return a valid pointer to the mapped region
178 /// immediately, but should not synchronize data (in case of shadow buffers) until after the map
179 /// event is reached in the queue. This makes it not possible to simply use pipe_transfers as those
180 /// can't be explicitly synced by the frontend.
181 ///
182 /// In order to have a compliant implementation of the mapping API we have to consider the following
183 /// cases:
184 ///   1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr.
185 ///      Synchronization of shadowed host ptrs are done in `sync_shadow` on demand.
186 ///   2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with
187 ///      `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer.
188 ///   3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we
189 ///      - create a shadow pipe_resource with `PIPE_USAGE_STAGING`,
190 ///        `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT`
191 ///      - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and
192 ///        `PIPE_MAP_UNSYNCHRONIZED`
193 ///      - sync the shadow buffer like a host_ptr shadow buffer in 1.
194 ///
195 /// Taking this approach we guarentee that we only copy when actually needed while making sure the
196 /// content behind the returned pointer is valid until unmapped.
197 pub struct MemBase {
198     pub base: CLObjectBase<CL_INVALID_MEM_OBJECT>,
199     pub context: Arc<Context>,
200     pub parent: Option<Mem>,
201     pub mem_type: cl_mem_object_type,
202     pub flags: cl_mem_flags,
203     pub size: usize,
204     // it's a bit hacky, but storing the pointer as `usize` gives us `Send` and `Sync`. The
205     // application is required to ensure no data races exist on the memory anyway.
206     pub host_ptr: usize,
207     pub props: Vec<cl_mem_properties>,
208     pub cbs: Mutex<Vec<MemCB>>,
209     pub gl_obj: Option<GLObject>,
210     res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
211 }
212 
213 pub struct Buffer {
214     base: MemBase,
215     pub offset: usize,
216     maps: Mutex<TrackedPointers<usize, Mapping<BufferMapping>>>,
217 }
218 
219 pub struct Image {
220     base: MemBase,
221     pub image_format: cl_image_format,
222     pub pipe_format: pipe_format,
223     pub image_desc: cl_image_desc,
224     pub image_elem_size: u8,
225     maps: Mutex<TrackedPointers<usize, Mapping<ImageMapping>>>,
226 }
227 
228 impl Deref for Buffer {
229     type Target = MemBase;
230 
deref(&self) -> &Self::Target231     fn deref(&self) -> &Self::Target {
232         &self.base
233     }
234 }
235 
236 impl Deref for Image {
237     type Target = MemBase;
238 
deref(&self) -> &Self::Target239     fn deref(&self) -> &Self::Target {
240         &self.base
241     }
242 }
243 
244 impl_cl_type_trait_base!(cl_mem, MemBase, [Buffer, Image], CL_INVALID_MEM_OBJECT);
245 impl_cl_type_trait!(cl_mem, Buffer, CL_INVALID_MEM_OBJECT, base.base);
246 impl_cl_type_trait!(cl_mem, Image, CL_INVALID_MEM_OBJECT, base.base);
247 
248 pub trait CLImageDescInfo {
type_info(&self) -> (u8, bool)249     fn type_info(&self) -> (u8, bool);
pixels(&self) -> usize250     fn pixels(&self) -> usize;
bx(&self) -> CLResult<pipe_box>251     fn bx(&self) -> CLResult<pipe_box>;
row_pitch(&self) -> CLResult<u32>252     fn row_pitch(&self) -> CLResult<u32>;
slice_pitch(&self) -> usize253     fn slice_pitch(&self) -> usize;
width(&self) -> CLResult<u32>254     fn width(&self) -> CLResult<u32>;
height(&self) -> CLResult<u32>255     fn height(&self) -> CLResult<u32>;
size(&self) -> CLVec<usize>256     fn size(&self) -> CLVec<usize>;
257 
dims(&self) -> u8258     fn dims(&self) -> u8 {
259         self.type_info().0
260     }
261 
dims_with_array(&self) -> u8262     fn dims_with_array(&self) -> u8 {
263         let array: u8 = self.is_array().into();
264         self.dims() + array
265     }
266 
has_slice(&self) -> bool267     fn has_slice(&self) -> bool {
268         self.dims() == 3 || self.is_array()
269     }
270 
is_array(&self) -> bool271     fn is_array(&self) -> bool {
272         self.type_info().1
273     }
274 }
275 
276 impl CLImageDescInfo for cl_image_desc {
type_info(&self) -> (u8, bool)277     fn type_info(&self) -> (u8, bool) {
278         match self.image_type {
279             CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
280             CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
281             CL_MEM_OBJECT_IMAGE2D => (2, false),
282             CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
283             CL_MEM_OBJECT_IMAGE3D => (3, false),
284             _ => panic!("unknown image_type {:x}", self.image_type),
285         }
286     }
287 
pixels(&self) -> usize288     fn pixels(&self) -> usize {
289         let mut res = self.image_width;
290         let dims = self.dims();
291 
292         if dims > 1 {
293             res *= self.image_height;
294         }
295 
296         if dims > 2 {
297             res *= self.image_depth;
298         }
299 
300         if self.is_array() {
301             res *= self.image_array_size;
302         }
303 
304         res
305     }
306 
size(&self) -> CLVec<usize>307     fn size(&self) -> CLVec<usize> {
308         let mut height = cmp::max(self.image_height, 1);
309         let mut depth = cmp::max(self.image_depth, 1);
310 
311         match self.image_type {
312             CL_MEM_OBJECT_IMAGE1D_ARRAY => height = self.image_array_size,
313             CL_MEM_OBJECT_IMAGE2D_ARRAY => depth = self.image_array_size,
314             _ => {}
315         }
316 
317         CLVec::new([self.image_width, height, depth])
318     }
319 
bx(&self) -> CLResult<pipe_box>320     fn bx(&self) -> CLResult<pipe_box> {
321         create_pipe_box(CLVec::default(), self.size(), self.image_type)
322     }
323 
row_pitch(&self) -> CLResult<u32>324     fn row_pitch(&self) -> CLResult<u32> {
325         self.image_row_pitch
326             .try_into()
327             .map_err(|_| CL_OUT_OF_HOST_MEMORY)
328     }
329 
slice_pitch(&self) -> usize330     fn slice_pitch(&self) -> usize {
331         self.image_slice_pitch
332     }
333 
width(&self) -> CLResult<u32>334     fn width(&self) -> CLResult<u32> {
335         self.image_width
336             .try_into()
337             .map_err(|_| CL_OUT_OF_HOST_MEMORY)
338     }
339 
height(&self) -> CLResult<u32>340     fn height(&self) -> CLResult<u32> {
341         self.image_height
342             .try_into()
343             .map_err(|_| CL_OUT_OF_HOST_MEMORY)
344     }
345 }
346 
sw_copy( src: *const c_void, dst: *mut c_void, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, pixel_size: u8, )347 fn sw_copy(
348     src: *const c_void,
349     dst: *mut c_void,
350     region: &CLVec<usize>,
351     src_origin: &CLVec<usize>,
352     src_row_pitch: usize,
353     src_slice_pitch: usize,
354     dst_origin: &CLVec<usize>,
355     dst_row_pitch: usize,
356     dst_slice_pitch: usize,
357     pixel_size: u8,
358 ) {
359     let pixel_size = pixel_size as usize;
360     for z in 0..region[2] {
361         if src_row_pitch == dst_row_pitch && region[1] * pixel_size == src_row_pitch {
362             unsafe {
363                 ptr::copy(
364                     src.byte_add(
365                         (*src_origin + [0, 0, z]) * [pixel_size, src_row_pitch, src_slice_pitch],
366                     ),
367                     dst.byte_add(
368                         (*dst_origin + [0, 0, z]) * [pixel_size, dst_row_pitch, dst_slice_pitch],
369                     ),
370                     region[0] * region[1] * pixel_size,
371                 )
372             }
373         } else {
374             for y in 0..region[1] {
375                 unsafe {
376                     ptr::copy(
377                         src.byte_add(
378                             (*src_origin + [0, y, z])
379                                 * [pixel_size, src_row_pitch, src_slice_pitch],
380                         ),
381                         dst.byte_add(
382                             (*dst_origin + [0, y, z])
383                                 * [pixel_size, dst_row_pitch, dst_slice_pitch],
384                         ),
385                         region[0] * pixel_size,
386                     )
387                 };
388             }
389         }
390     }
391 }
392 
393 impl MemBase {
new_buffer( context: Arc<Context>, flags: cl_mem_flags, size: usize, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Buffer>>394     pub fn new_buffer(
395         context: Arc<Context>,
396         flags: cl_mem_flags,
397         size: usize,
398         host_ptr: *mut c_void,
399         props: Vec<cl_mem_properties>,
400     ) -> CLResult<Arc<Buffer>> {
401         let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
402             ResourceType::Staging
403         } else {
404             ResourceType::Normal
405         };
406 
407         let buffer = context.create_buffer(
408             size,
409             host_ptr,
410             bit_check(flags, CL_MEM_COPY_HOST_PTR),
411             res_type,
412         )?;
413 
414         let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
415             host_ptr as usize
416         } else {
417             0
418         };
419 
420         Ok(Arc::new(Buffer {
421             base: Self {
422                 base: CLObjectBase::new(RusticlTypes::Buffer),
423                 context: context,
424                 parent: None,
425                 mem_type: CL_MEM_OBJECT_BUFFER,
426                 flags: flags,
427                 size: size,
428                 host_ptr: host_ptr,
429                 props: props,
430                 gl_obj: None,
431                 cbs: Mutex::new(Vec::new()),
432                 res: Some(buffer),
433             },
434             offset: 0,
435             maps: Mutex::new(TrackedPointers::new()),
436         }))
437     }
438 
new_sub_buffer( parent: Arc<Buffer>, flags: cl_mem_flags, offset: usize, size: usize, ) -> Arc<Buffer>439     pub fn new_sub_buffer(
440         parent: Arc<Buffer>,
441         flags: cl_mem_flags,
442         offset: usize,
443         size: usize,
444     ) -> Arc<Buffer> {
445         let host_ptr = if parent.host_ptr().is_null() {
446             0
447         } else {
448             unsafe { parent.host_ptr().byte_add(offset) as usize }
449         };
450 
451         Arc::new(Buffer {
452             base: Self {
453                 base: CLObjectBase::new(RusticlTypes::Buffer),
454                 context: parent.context.clone(),
455                 parent: Some(Mem::Buffer(parent)),
456                 mem_type: CL_MEM_OBJECT_BUFFER,
457                 flags: flags,
458                 size: size,
459                 host_ptr: host_ptr,
460                 props: Vec::new(),
461                 gl_obj: None,
462                 cbs: Mutex::new(Vec::new()),
463                 res: None,
464             },
465             offset: offset,
466             maps: Mutex::new(TrackedPointers::new()),
467         })
468     }
469 
new_image( context: Arc<Context>, parent: Option<Mem>, mem_type: cl_mem_object_type, flags: cl_mem_flags, image_format: &cl_image_format, mut image_desc: cl_image_desc, image_elem_size: u8, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Image>>470     pub fn new_image(
471         context: Arc<Context>,
472         parent: Option<Mem>,
473         mem_type: cl_mem_object_type,
474         flags: cl_mem_flags,
475         image_format: &cl_image_format,
476         mut image_desc: cl_image_desc,
477         image_elem_size: u8,
478         host_ptr: *mut c_void,
479         props: Vec<cl_mem_properties>,
480     ) -> CLResult<Arc<Image>> {
481         // we have to sanitize the image_desc a little for internal use
482         let api_image_desc = image_desc;
483         let dims = image_desc.dims();
484         let is_array = image_desc.is_array();
485         if dims < 3 {
486             image_desc.image_depth = 1;
487         }
488         if dims < 2 {
489             image_desc.image_height = 1;
490         }
491         if !is_array {
492             image_desc.image_array_size = 1;
493         }
494 
495         let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
496             ResourceType::Staging
497         } else {
498             ResourceType::Normal
499         };
500 
501         let texture = if parent.is_none() {
502             let mut texture = context.create_texture(
503                 &image_desc,
504                 image_format,
505                 host_ptr,
506                 bit_check(flags, CL_MEM_COPY_HOST_PTR),
507                 res_type,
508             );
509 
510             // if we error allocating a Staging resource, just try with normal as
511             // `CL_MEM_ALLOC_HOST_PTR` is just a performance hint.
512             if res_type == ResourceType::Staging && texture.is_err() {
513                 texture = context.create_texture(
514                     &image_desc,
515                     image_format,
516                     host_ptr,
517                     bit_check(flags, CL_MEM_COPY_HOST_PTR),
518                     ResourceType::Normal,
519                 )
520             }
521 
522             Some(texture?)
523         } else {
524             None
525         };
526 
527         let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
528             host_ptr as usize
529         } else {
530             0
531         };
532 
533         let pipe_format = image_format.to_pipe_format().unwrap();
534         Ok(Arc::new(Image {
535             base: Self {
536                 base: CLObjectBase::new(RusticlTypes::Image),
537                 context: context,
538                 parent: parent,
539                 mem_type: mem_type,
540                 flags: flags,
541                 size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize,
542                 host_ptr: host_ptr,
543                 props: props,
544                 gl_obj: None,
545                 cbs: Mutex::new(Vec::new()),
546                 res: texture,
547             },
548             image_format: *image_format,
549             pipe_format: pipe_format,
550             image_desc: api_image_desc,
551             image_elem_size: image_elem_size,
552             maps: Mutex::new(TrackedPointers::new()),
553         }))
554     }
555 
arc_from_raw(ptr: cl_mem) -> CLResult<Mem>556     pub fn arc_from_raw(ptr: cl_mem) -> CLResult<Mem> {
557         let mem = Self::ref_from_raw(ptr)?;
558         match mem.base.get_type()? {
559             RusticlTypes::Buffer => Ok(Mem::Buffer(Buffer::arc_from_raw(ptr)?)),
560             RusticlTypes::Image => Ok(Mem::Image(Image::arc_from_raw(ptr)?)),
561             _ => Err(CL_INVALID_MEM_OBJECT),
562         }
563     }
564 
arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>>565     pub fn arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>> {
566         let count = count as usize;
567         let mut res = Vec::with_capacity(count);
568         for i in 0..count {
569             res.push(Self::arc_from_raw(unsafe { *objs.add(i) })?);
570         }
571         Ok(res)
572     }
573 
from_gl( context: Arc<Context>, flags: cl_mem_flags, gl_export_manager: &GLExportManager, ) -> CLResult<cl_mem>574     pub fn from_gl(
575         context: Arc<Context>,
576         flags: cl_mem_flags,
577         gl_export_manager: &GLExportManager,
578     ) -> CLResult<cl_mem> {
579         let export_in = &gl_export_manager.export_in;
580         let export_out = &gl_export_manager.export_out;
581 
582         let (mem_type, gl_object_type) = target_from_gl(export_in.target)?;
583         let gl_mem_props = gl_export_manager.get_gl_mem_props()?;
584 
585         // Handle Buffers
586         let (image_format, pipe_format, rusticl_type) = if gl_export_manager.is_gl_buffer() {
587             (
588                 cl_image_format::default(),
589                 pipe_format::PIPE_FORMAT_NONE,
590                 RusticlTypes::Buffer,
591             )
592         } else {
593             let image_format =
594                 format_from_gl(export_out.internal_format).ok_or(CL_OUT_OF_HOST_MEMORY)?;
595             (
596                 image_format,
597                 image_format.to_pipe_format().unwrap(),
598                 RusticlTypes::Image,
599             )
600         };
601 
602         let imported_gl_tex = context.import_gl_buffer(
603             export_out.dmabuf_fd as u32,
604             export_out.modifier,
605             mem_type,
606             export_in.target,
607             pipe_format,
608             gl_mem_props.clone(),
609         )?;
610 
611         // Cube maps faces are not linear in memory, so copy all contents
612         // of desired face into a 2D image and copy it back after gl release.
613         let (shadow_map, texture) = if is_cube_map_face(export_in.target) {
614             let shadow = create_shadow_slice(&imported_gl_tex, image_format)?;
615 
616             let mut res_map = HashMap::new();
617             shadow
618                 .iter()
619                 .map(|(k, v)| {
620                     let gl_res = imported_gl_tex.get(k).unwrap().clone();
621                     res_map.insert(v.clone(), gl_res);
622                 })
623                 .for_each(drop);
624 
625             (Some(res_map), shadow)
626         } else {
627             (None, imported_gl_tex)
628         };
629 
630         // it's kinda not supported, but we want to know if anything actually hits this as it's
631         // certainly not tested by the CL CTS.
632         if mem_type != CL_MEM_OBJECT_BUFFER {
633             assert_eq!(gl_mem_props.offset, 0);
634         }
635 
636         let base = Self {
637             base: CLObjectBase::new(rusticl_type),
638             context: context,
639             parent: None,
640             mem_type: mem_type,
641             flags: flags,
642             size: gl_mem_props.size(),
643             host_ptr: 0,
644             props: Vec::new(),
645             gl_obj: Some(GLObject {
646                 gl_object_target: gl_export_manager.export_in.target,
647                 gl_object_type: gl_object_type,
648                 gl_object_name: export_in.obj,
649                 shadow_map: shadow_map,
650             }),
651             cbs: Mutex::new(Vec::new()),
652             res: Some(texture),
653         };
654 
655         Ok(if rusticl_type == RusticlTypes::Buffer {
656             Arc::new(Buffer {
657                 base: base,
658                 offset: gl_mem_props.offset as usize,
659                 maps: Mutex::new(TrackedPointers::new()),
660             })
661             .into_cl()
662         } else {
663             Arc::new(Image {
664                 base: base,
665                 image_format: image_format,
666                 pipe_format: pipe_format,
667                 image_desc: cl_image_desc {
668                     image_type: mem_type,
669                     image_width: gl_mem_props.width as usize,
670                     image_height: gl_mem_props.height as usize,
671                     image_depth: gl_mem_props.depth as usize,
672                     image_array_size: gl_mem_props.array_size as usize,
673                     image_row_pitch: 0,
674                     image_slice_pitch: 0,
675                     num_mip_levels: 1,
676                     num_samples: 1,
677                     ..Default::default()
678                 },
679                 image_elem_size: gl_mem_props.pixel_size,
680                 maps: Mutex::new(TrackedPointers::new()),
681             })
682             .into_cl()
683         })
684     }
685 
is_buffer(&self) -> bool686     pub fn is_buffer(&self) -> bool {
687         self.mem_type == CL_MEM_OBJECT_BUFFER
688     }
689 
has_same_parent(&self, other: &Self) -> bool690     pub fn has_same_parent(&self, other: &Self) -> bool {
691         ptr::eq(self.get_parent(), other.get_parent())
692     }
693 
694     // this is kinda bogus, because that won't work with system SVM, but the spec wants us to
695     // implement this.
is_svm(&self) -> bool696     pub fn is_svm(&self) -> bool {
697         let mem = self.get_parent();
698         self.context.find_svm_alloc(mem.host_ptr).is_some()
699             && bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
700     }
701 
get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>>702     pub fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
703         self.get_parent()
704             .res
705             .as_ref()
706             .and_then(|resources| resources.get(dev))
707             .ok_or(CL_OUT_OF_HOST_MEMORY)
708     }
709 
get_parent(&self) -> &Self710     fn get_parent(&self) -> &Self {
711         if let Some(parent) = &self.parent {
712             parent
713         } else {
714             self
715         }
716     }
717 
host_ptr(&self) -> *mut c_void718     pub fn host_ptr(&self) -> *mut c_void {
719         self.host_ptr as *mut c_void
720     }
721 
is_pure_user_memory(&self, d: &Device) -> CLResult<bool>722     fn is_pure_user_memory(&self, d: &Device) -> CLResult<bool> {
723         let r = self.get_res_of_dev(d)?;
724         // 1Dbuffer objects are weird. The parent memory object can be a host_ptr thing, but we are
725         // not allowed to actually return a pointer based on the host_ptr when mapping.
726         Ok(r.is_user() && !self.host_ptr().is_null())
727     }
728 
map<T>( &self, offset: usize, layout: Layout, writes: bool, maps: &Mutex<TrackedPointers<usize, Mapping<T>>>, inner: T, ) -> CLResult<MutMemoryPtr>729     fn map<T>(
730         &self,
731         offset: usize,
732         layout: Layout,
733         writes: bool,
734         maps: &Mutex<TrackedPointers<usize, Mapping<T>>>,
735         inner: T,
736     ) -> CLResult<MutMemoryPtr> {
737         let host_ptr = self.host_ptr();
738         let ptr = unsafe {
739             let ptr = if !host_ptr.is_null() {
740                 host_ptr.byte_add(offset)
741             } else {
742                 alloc::alloc(layout).cast()
743             };
744 
745             MutMemoryPtr::from_ptr(ptr)
746         };
747 
748         match maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
749             Entry::Occupied(mut e) => {
750                 debug_assert!(!host_ptr.is_null());
751                 e.get_mut().count += 1;
752             }
753             Entry::Vacant(e) => {
754                 e.insert(Mapping {
755                     layout: layout,
756                     writes: writes,
757                     ptr: host_ptr.is_null().then_some(ptr),
758                     count: 1,
759                     inner: inner,
760                 });
761             }
762         }
763 
764         Ok(ptr)
765     }
766 }
767 
768 impl Drop for MemBase {
drop(&mut self)769     fn drop(&mut self) {
770         let cbs = mem::take(self.cbs.get_mut().unwrap());
771         for cb in cbs.into_iter().rev() {
772             cb.call(self);
773         }
774     }
775 }
776 
777 impl Buffer {
apply_offset(&self, offset: usize) -> CLResult<usize>778     fn apply_offset(&self, offset: usize) -> CLResult<usize> {
779         self.offset.checked_add(offset).ok_or(CL_OUT_OF_HOST_MEMORY)
780     }
781 
copy_rect( &self, dst: &Self, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>782     pub fn copy_rect(
783         &self,
784         dst: &Self,
785         q: &Queue,
786         ctx: &PipeContext,
787         region: &CLVec<usize>,
788         src_origin: &CLVec<usize>,
789         src_row_pitch: usize,
790         src_slice_pitch: usize,
791         dst_origin: &CLVec<usize>,
792         dst_row_pitch: usize,
793         dst_slice_pitch: usize,
794     ) -> CLResult<()> {
795         let (offset, size) =
796             CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
797         let tx_src = self.tx(q, ctx, offset, size, RWFlags::RD)?;
798 
799         let (offset, size) =
800             CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
801         let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
802 
803         perf_warning!("clEnqueueCopyBufferRect stalls the GPU");
804 
805         // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
806         sw_copy(
807             tx_src.ptr(),
808             tx_dst.ptr(),
809             region,
810             &CLVec::default(),
811             src_row_pitch,
812             src_slice_pitch,
813             &CLVec::default(),
814             dst_row_pitch,
815             dst_slice_pitch,
816             1,
817         );
818 
819         Ok(())
820     }
821 
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_offset: usize, dst_offset: usize, size: usize, ) -> CLResult<()>822     pub fn copy_to_buffer(
823         &self,
824         q: &Queue,
825         ctx: &PipeContext,
826         dst: &Buffer,
827         src_offset: usize,
828         dst_offset: usize,
829         size: usize,
830     ) -> CLResult<()> {
831         let src_offset = self.apply_offset(src_offset)?;
832         let dst_offset = dst.apply_offset(dst_offset)?;
833         let src_res = self.get_res_of_dev(q.device)?;
834         let dst_res = dst.get_res_of_dev(q.device)?;
835 
836         let bx = create_pipe_box(
837             [src_offset, 0, 0].into(),
838             [size, 1, 1].into(),
839             CL_MEM_OBJECT_BUFFER,
840         )?;
841         let dst_origin: [u32; 3] = [
842             dst_offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
843             0,
844             0,
845         ];
846 
847         ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
848         Ok(())
849     }
850 
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_offset: usize, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>851     pub fn copy_to_image(
852         &self,
853         q: &Queue,
854         ctx: &PipeContext,
855         dst: &Image,
856         src_offset: usize,
857         dst_origin: CLVec<usize>,
858         region: &CLVec<usize>,
859     ) -> CLResult<()> {
860         let src_offset = self.apply_offset(src_offset)?;
861         let bpp = dst.image_format.pixel_size().unwrap().into();
862         let src_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
863         let size = CLVec::calc_size(region, src_pitch);
864         let tx_src = self.tx(q, ctx, src_offset, size, RWFlags::RD)?;
865 
866         // If image is created from a buffer, use image's slice and row pitch instead
867         let tx_dst;
868         let dst_pitch;
869         if let Some(Mem::Buffer(buffer)) = &dst.parent {
870             dst_pitch = [
871                 bpp,
872                 dst.image_desc.row_pitch()? as usize,
873                 dst.image_desc.slice_pitch(),
874             ];
875 
876             let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
877             tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
878         } else {
879             tx_dst = dst.tx_image(
880                 q,
881                 ctx,
882                 &create_pipe_box(dst_origin, *region, dst.mem_type)?,
883                 RWFlags::WR,
884             )?;
885 
886             dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
887         }
888 
889         // Those pitch values cannot have 0 value in its coordinates
890         debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
891         debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
892 
893         perf_warning!("clEnqueueCopyBufferToImage stalls the GPU");
894 
895         sw_copy(
896             tx_src.ptr(),
897             tx_dst.ptr(),
898             region,
899             &CLVec::default(),
900             src_pitch[1],
901             src_pitch[2],
902             &CLVec::default(),
903             dst_pitch[1],
904             dst_pitch[2],
905             bpp as u8,
906         );
907         Ok(())
908     }
909 
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u8], offset: usize, size: usize, ) -> CLResult<()>910     pub fn fill(
911         &self,
912         q: &Queue,
913         ctx: &PipeContext,
914         pattern: &[u8],
915         offset: usize,
916         size: usize,
917     ) -> CLResult<()> {
918         let offset = self.apply_offset(offset)?;
919         let res = self.get_res_of_dev(q.device)?;
920         ctx.clear_buffer(
921             res,
922             pattern,
923             offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
924             size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
925         );
926         Ok(())
927     }
928 
is_mapped_ptr(&self, ptr: *mut c_void) -> bool929     fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
930         let mut maps = self.maps.lock().unwrap();
931         let entry = maps.entry(ptr as usize);
932         matches!(entry, Entry::Occupied(entry) if entry.get().count > 0)
933     }
934 
map(&self, size: usize, offset: usize, writes: bool) -> CLResult<MutMemoryPtr>935     pub fn map(&self, size: usize, offset: usize, writes: bool) -> CLResult<MutMemoryPtr> {
936         let layout =
937             unsafe { Layout::from_size_align_unchecked(size, size_of::<[cl_ulong; 16]>()) };
938         self.base.map(
939             offset,
940             layout,
941             writes,
942             &self.maps,
943             BufferMapping { offset: offset },
944         )
945     }
946 
read( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: MutMemoryPtr, size: usize, ) -> CLResult<()>947     pub fn read(
948         &self,
949         q: &Queue,
950         ctx: &PipeContext,
951         offset: usize,
952         ptr: MutMemoryPtr,
953         size: usize,
954     ) -> CLResult<()> {
955         let ptr = ptr.as_ptr();
956         let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
957 
958         perf_warning!("clEnqueueReadBuffer and clEnqueueMapBuffer stall the GPU");
959 
960         unsafe {
961             ptr::copy(tx.ptr(), ptr, size);
962         }
963 
964         Ok(())
965     }
966 
read_rect( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>967     pub fn read_rect(
968         &self,
969         dst: MutMemoryPtr,
970         q: &Queue,
971         ctx: &PipeContext,
972         region: &CLVec<usize>,
973         src_origin: &CLVec<usize>,
974         src_row_pitch: usize,
975         src_slice_pitch: usize,
976         dst_origin: &CLVec<usize>,
977         dst_row_pitch: usize,
978         dst_slice_pitch: usize,
979     ) -> CLResult<()> {
980         let dst = dst.as_ptr();
981         let (offset, size) =
982             CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
983         let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
984 
985         perf_warning!("clEnqueueReadBufferRect stalls the GPU");
986 
987         sw_copy(
988             tx.ptr(),
989             dst,
990             region,
991             &CLVec::default(),
992             src_row_pitch,
993             src_slice_pitch,
994             dst_origin,
995             dst_row_pitch,
996             dst_slice_pitch,
997             1,
998         );
999 
1000         Ok(())
1001     }
1002 
sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1003     pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1004         // no need to update
1005         if self.is_pure_user_memory(q.device)? {
1006             return Ok(());
1007         }
1008 
1009         let maps = self.maps.lock().unwrap();
1010         let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else {
1011             return Err(CL_INVALID_VALUE);
1012         };
1013 
1014         self.read(q, ctx, mapping.offset, ptr, mapping.size())
1015     }
1016 
sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1017     pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1018         // no need to update
1019         if self.is_pure_user_memory(q.device)? {
1020             return Ok(());
1021         }
1022 
1023         match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1024             Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1025             Entry::Occupied(entry) => {
1026                 let mapping = entry.get();
1027 
1028                 if mapping.writes {
1029                     self.write(q, ctx, mapping.offset, ptr.into(), mapping.size())?;
1030                 }
1031 
1032                 // only remove if the mapping wasn't reused in the meantime
1033                 if mapping.count == 0 {
1034                     entry.remove();
1035                 }
1036 
1037                 Ok(())
1038             }
1039         }
1040     }
1041 
tx<'a>( &self, q: &Queue, ctx: &'a PipeContext, offset: usize, size: usize, rw: RWFlags, ) -> CLResult<PipeTransfer<'a>>1042     fn tx<'a>(
1043         &self,
1044         q: &Queue,
1045         ctx: &'a PipeContext,
1046         offset: usize,
1047         size: usize,
1048         rw: RWFlags,
1049     ) -> CLResult<PipeTransfer<'a>> {
1050         let offset = self.apply_offset(offset)?;
1051         let r = self.get_res_of_dev(q.device)?;
1052 
1053         ctx.buffer_map(
1054             r,
1055             offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1056             size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1057             rw,
1058         )
1059         .ok_or(CL_OUT_OF_RESOURCES)
1060     }
1061 
unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool>1062     pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
1063         match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1064             Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1065             Entry::Occupied(mut entry) => {
1066                 let entry = entry.get_mut();
1067                 debug_assert!(entry.count > 0);
1068                 entry.count -= 1;
1069                 Ok(entry.count == 0)
1070             }
1071         }
1072     }
1073 
write( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: ConstMemoryPtr, size: usize, ) -> CLResult<()>1074     pub fn write(
1075         &self,
1076         q: &Queue,
1077         ctx: &PipeContext,
1078         offset: usize,
1079         ptr: ConstMemoryPtr,
1080         size: usize,
1081     ) -> CLResult<()> {
1082         let ptr = ptr.as_ptr();
1083         let offset = self.apply_offset(offset)?;
1084         let r = self.get_res_of_dev(q.device)?;
1085 
1086         perf_warning!("clEnqueueWriteBuffer and clEnqueueUnmapMemObject might stall the GPU");
1087 
1088         ctx.buffer_subdata(
1089             r,
1090             offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1091             ptr,
1092             size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1093         );
1094         Ok(())
1095     }
1096 
write_rect( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1097     pub fn write_rect(
1098         &self,
1099         src: ConstMemoryPtr,
1100         q: &Queue,
1101         ctx: &PipeContext,
1102         region: &CLVec<usize>,
1103         src_origin: &CLVec<usize>,
1104         src_row_pitch: usize,
1105         src_slice_pitch: usize,
1106         dst_origin: &CLVec<usize>,
1107         dst_row_pitch: usize,
1108         dst_slice_pitch: usize,
1109     ) -> CLResult<()> {
1110         let src = src.as_ptr();
1111         let (offset, size) =
1112             CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
1113         let tx = self.tx(q, ctx, offset, size, RWFlags::WR)?;
1114 
1115         perf_warning!("clEnqueueWriteBufferRect stalls the GPU");
1116 
1117         sw_copy(
1118             src,
1119             tx.ptr(),
1120             region,
1121             src_origin,
1122             src_row_pitch,
1123             src_slice_pitch,
1124             &CLVec::default(),
1125             dst_row_pitch,
1126             dst_slice_pitch,
1127             1,
1128         );
1129 
1130         Ok(())
1131     }
1132 }
1133 
1134 impl Image {
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_origin: CLVec<usize>, dst_offset: usize, region: &CLVec<usize>, ) -> CLResult<()>1135     pub fn copy_to_buffer(
1136         &self,
1137         q: &Queue,
1138         ctx: &PipeContext,
1139         dst: &Buffer,
1140         src_origin: CLVec<usize>,
1141         dst_offset: usize,
1142         region: &CLVec<usize>,
1143     ) -> CLResult<()> {
1144         let dst_offset = dst.apply_offset(dst_offset)?;
1145         let bpp = self.image_format.pixel_size().unwrap().into();
1146 
1147         let src_pitch;
1148         let tx_src;
1149         if let Some(Mem::Buffer(buffer)) = &self.parent {
1150             src_pitch = [
1151                 bpp,
1152                 self.image_desc.row_pitch()? as usize,
1153                 self.image_desc.slice_pitch(),
1154             ];
1155             let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1156             tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1157         } else {
1158             tx_src = self.tx_image(
1159                 q,
1160                 ctx,
1161                 &create_pipe_box(src_origin, *region, self.mem_type)?,
1162                 RWFlags::RD,
1163             )?;
1164             src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1165         }
1166 
1167         // If image is created from a buffer, use image's slice and row pitch instead
1168         let dst_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
1169 
1170         let dst_origin: CLVec<usize> = [dst_offset, 0, 0].into();
1171         let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1172         let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
1173 
1174         // Those pitch values cannot have 0 value in its coordinates
1175         debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1176         debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1177 
1178         perf_warning!("clEnqueueCopyImageToBuffer stalls the GPU");
1179 
1180         sw_copy(
1181             tx_src.ptr(),
1182             tx_dst.ptr(),
1183             region,
1184             &CLVec::default(),
1185             src_pitch[1],
1186             src_pitch[2],
1187             &CLVec::default(),
1188             dst_pitch[1],
1189             dst_pitch[2],
1190             bpp as u8,
1191         );
1192         Ok(())
1193     }
1194 
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_origin: CLVec<usize>, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1195     pub fn copy_to_image(
1196         &self,
1197         q: &Queue,
1198         ctx: &PipeContext,
1199         dst: &Image,
1200         src_origin: CLVec<usize>,
1201         dst_origin: CLVec<usize>,
1202         region: &CLVec<usize>,
1203     ) -> CLResult<()> {
1204         let src_parent = self.get_parent();
1205         let dst_parent = dst.get_parent();
1206         let src_res = src_parent.get_res_of_dev(q.device)?;
1207         let dst_res = dst_parent.get_res_of_dev(q.device)?;
1208 
1209         // We just want to use sw_copy if mem objects have different types or if copy can have
1210         // custom strides (image2d from buff/images)
1211         if src_parent.is_buffer() || dst_parent.is_buffer() {
1212             let bpp = self.image_format.pixel_size().unwrap().into();
1213 
1214             let tx_src;
1215             let tx_dst;
1216             let dst_pitch;
1217             let src_pitch;
1218             if let Some(Mem::Buffer(buffer)) = &self.parent {
1219                 src_pitch = [
1220                     bpp,
1221                     self.image_desc.row_pitch()? as usize,
1222                     self.image_desc.slice_pitch(),
1223                 ];
1224 
1225                 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1226                 tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1227             } else {
1228                 tx_src = self.tx_image(
1229                     q,
1230                     ctx,
1231                     &create_pipe_box(src_origin, *region, src_parent.mem_type)?,
1232                     RWFlags::RD,
1233                 )?;
1234 
1235                 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1236             }
1237 
1238             if let Some(Mem::Buffer(buffer)) = &dst.parent {
1239                 // If image is created from a buffer, use image's slice and row pitch instead
1240                 dst_pitch = [
1241                     bpp,
1242                     dst.image_desc.row_pitch()? as usize,
1243                     dst.image_desc.slice_pitch(),
1244                 ];
1245 
1246                 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1247                 tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1248             } else {
1249                 tx_dst = dst.tx_image(
1250                     q,
1251                     ctx,
1252                     &create_pipe_box(dst_origin, *region, dst_parent.mem_type)?,
1253                     RWFlags::WR,
1254                 )?;
1255 
1256                 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
1257             }
1258 
1259             // Those pitch values cannot have 0 value in its coordinates
1260             debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1261             debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1262 
1263             perf_warning!(
1264                 "clEnqueueCopyImage stalls the GPU when src or dst are created from a buffer"
1265             );
1266 
1267             sw_copy(
1268                 tx_src.ptr(),
1269                 tx_dst.ptr(),
1270                 region,
1271                 &CLVec::default(),
1272                 src_pitch[1],
1273                 src_pitch[2],
1274                 &CLVec::default(),
1275                 dst_pitch[1],
1276                 dst_pitch[2],
1277                 bpp as u8,
1278             )
1279         } else {
1280             let bx = create_pipe_box(src_origin, *region, src_parent.mem_type)?;
1281             let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
1282 
1283             if src_parent.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1284                 (dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
1285             }
1286 
1287             ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
1288         }
1289         Ok(())
1290     }
1291 
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u32], origin: &CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1292     pub fn fill(
1293         &self,
1294         q: &Queue,
1295         ctx: &PipeContext,
1296         pattern: &[u32],
1297         origin: &CLVec<usize>,
1298         region: &CLVec<usize>,
1299     ) -> CLResult<()> {
1300         let res = self.get_res_of_dev(q.device)?;
1301 
1302         // make sure we allocate multiples of 4 bytes so drivers don't read out of bounds or
1303         // unaligned.
1304         // TODO: use div_ceil once it's available
1305         let pixel_size: usize = self.image_format.pixel_size().unwrap().into();
1306         let mut new_pattern: Vec<u32> = vec![0; pixel_size.div_ceil(size_of::<u32>())];
1307 
1308         // we don't support CL_DEPTH for now
1309         assert!(pattern.len() == 4);
1310 
1311         // SAFETY: pointers have to be valid for read/writes of exactly one pixel of their
1312         // respective format.
1313         // `new_pattern` has the correct size due to the `size` above.
1314         // `pattern` is validated through the CL API and allows undefined behavior if not followed
1315         // by CL API rules. It's expected to be a 4 component array of 32 bit values, except for
1316         // CL_DEPTH where it's just one value.
1317         unsafe {
1318             util_format_pack_rgba(
1319                 self.pipe_format,
1320                 new_pattern.as_mut_ptr().cast(),
1321                 pattern.as_ptr().cast(),
1322                 1,
1323             );
1324         }
1325 
1326         // If image is created from a buffer, use clear_image_buffer instead
1327         if self.is_parent_buffer() {
1328             let strides = (
1329                 self.image_desc.row_pitch()? as usize,
1330                 self.image_desc.slice_pitch(),
1331             );
1332             ctx.clear_image_buffer(res, &new_pattern, origin, region, strides, pixel_size);
1333         } else {
1334             let bx = create_pipe_box(*origin, *region, self.mem_type)?;
1335             ctx.clear_texture(res, &new_pattern, &bx);
1336         }
1337 
1338         Ok(())
1339     }
1340 
is_mapped_ptr(&self, ptr: *mut c_void) -> bool1341     fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
1342         let mut maps = self.maps.lock().unwrap();
1343         let entry = maps.entry(ptr as usize);
1344         matches!(entry, Entry::Occupied(entry) if entry.get().count > 0)
1345     }
1346 
is_parent_buffer(&self) -> bool1347     pub fn is_parent_buffer(&self) -> bool {
1348         matches!(self.parent, Some(Mem::Buffer(_)))
1349     }
1350 
map( &self, origin: CLVec<usize>, region: CLVec<usize>, row_pitch: &mut usize, slice_pitch: &mut usize, writes: bool, ) -> CLResult<MutMemoryPtr>1351     pub fn map(
1352         &self,
1353         origin: CLVec<usize>,
1354         region: CLVec<usize>,
1355         row_pitch: &mut usize,
1356         slice_pitch: &mut usize,
1357         writes: bool,
1358     ) -> CLResult<MutMemoryPtr> {
1359         let pixel_size = self.image_format.pixel_size().unwrap() as usize;
1360 
1361         *row_pitch = self.image_desc.row_pitch()? as usize;
1362         *slice_pitch = self.image_desc.slice_pitch();
1363 
1364         let offset = CLVec::calc_offset(origin, [pixel_size, *row_pitch, *slice_pitch]);
1365 
1366         // From the CL Spec:
1367         //
1368         //   The pointer returned maps a 1D, 2D or 3D region starting at origin and is at least
1369         //   region[0] pixels in size for a 1D image, 1D image buffer or 1D image array,
1370         //   (image_row_pitch × region[1]) pixels in size for a 2D image or 2D image array, and
1371         //   (image_slice_pitch × region[2]) pixels in size for a 3D image. The result of a memory
1372         //   access outside this region is undefined.
1373         //
1374         // It's not guaranteed that the row_pitch is taken into account for 1D images, but the CL
1375         // CTS relies on this behavior.
1376         //
1377         // Also note, that the spec wording is wrong in regards to arrays, which need to take the
1378         // image_slice_pitch into account.
1379         let size = if self.image_desc.is_array() || self.image_desc.dims() == 3 {
1380             debug_assert_ne!(*slice_pitch, 0);
1381             // the slice count is in region[1] for 1D array images
1382             if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1383                 region[1] * *slice_pitch
1384             } else {
1385                 region[2] * *slice_pitch
1386             }
1387         } else {
1388             debug_assert_ne!(*row_pitch, 0);
1389             region[1] * *row_pitch
1390         };
1391 
1392         let layout;
1393         unsafe {
1394             layout = Layout::from_size_align_unchecked(size, size_of::<[u32; 4]>());
1395         }
1396 
1397         self.base.map(
1398             offset,
1399             layout,
1400             writes,
1401             &self.maps,
1402             ImageMapping {
1403                 origin: origin,
1404                 region: region,
1405             },
1406         )
1407     }
1408 
pipe_image_host_access(&self) -> u161409     pub fn pipe_image_host_access(&self) -> u16 {
1410         // those flags are all mutually exclusive
1411         (if bit_check(self.flags, CL_MEM_HOST_READ_ONLY) {
1412             PIPE_IMAGE_ACCESS_READ
1413         } else if bit_check(self.flags, CL_MEM_HOST_WRITE_ONLY) {
1414             PIPE_IMAGE_ACCESS_WRITE
1415         } else if bit_check(self.flags, CL_MEM_HOST_NO_ACCESS) {
1416             0
1417         } else {
1418             PIPE_IMAGE_ACCESS_READ_WRITE
1419         }) as u16
1420     }
1421 
read( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1422     pub fn read(
1423         &self,
1424         dst: MutMemoryPtr,
1425         q: &Queue,
1426         ctx: &PipeContext,
1427         region: &CLVec<usize>,
1428         src_origin: &CLVec<usize>,
1429         dst_row_pitch: usize,
1430         dst_slice_pitch: usize,
1431     ) -> CLResult<()> {
1432         let dst = dst.as_ptr();
1433         let pixel_size = self.image_format.pixel_size().unwrap();
1434 
1435         let tx;
1436         let src_row_pitch;
1437         let src_slice_pitch;
1438         if let Some(Mem::Buffer(buffer)) = &self.parent {
1439             src_row_pitch = self.image_desc.image_row_pitch;
1440             src_slice_pitch = self.image_desc.image_slice_pitch;
1441 
1442             let (offset, size) = CLVec::calc_offset_size(
1443                 src_origin,
1444                 region,
1445                 [pixel_size.into(), src_row_pitch, src_slice_pitch],
1446             );
1447 
1448             tx = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1449         } else {
1450             let bx = create_pipe_box(*src_origin, *region, self.mem_type)?;
1451             tx = self.tx_image(q, ctx, &bx, RWFlags::RD)?;
1452             src_row_pitch = tx.row_pitch() as usize;
1453             src_slice_pitch = tx.slice_pitch();
1454         };
1455 
1456         perf_warning!("clEnqueueReadImage and clEnqueueMapImage stall the GPU");
1457 
1458         sw_copy(
1459             tx.ptr(),
1460             dst,
1461             region,
1462             &CLVec::default(),
1463             src_row_pitch,
1464             src_slice_pitch,
1465             &CLVec::default(),
1466             dst_row_pitch,
1467             dst_slice_pitch,
1468             pixel_size,
1469         );
1470 
1471         Ok(())
1472     }
1473 
sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1474     pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1475         // no need to update
1476         if self.is_pure_user_memory(q.device)? {
1477             return Ok(());
1478         }
1479 
1480         let maps = self.maps.lock().unwrap();
1481         let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else {
1482             return Err(CL_INVALID_VALUE);
1483         };
1484 
1485         let row_pitch = self.image_desc.row_pitch()? as usize;
1486         let slice_pitch = self.image_desc.slice_pitch();
1487 
1488         self.read(
1489             ptr,
1490             q,
1491             ctx,
1492             &mapping.region,
1493             &mapping.origin,
1494             row_pitch,
1495             slice_pitch,
1496         )
1497     }
1498 
sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1499     pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1500         // no need to update
1501         if self.is_pure_user_memory(q.device)? {
1502             return Ok(());
1503         }
1504 
1505         match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1506             Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1507             Entry::Occupied(entry) => {
1508                 let mapping = entry.get();
1509                 let row_pitch = self.image_desc.row_pitch()? as usize;
1510                 let slice_pitch = self.image_desc.slice_pitch();
1511 
1512                 if mapping.writes {
1513                     self.write(
1514                         ptr.into(),
1515                         q,
1516                         ctx,
1517                         &mapping.region,
1518                         row_pitch,
1519                         slice_pitch,
1520                         &mapping.origin,
1521                     )?;
1522                 }
1523 
1524                 // only remove if the mapping wasn't reused in the meantime
1525                 if mapping.count == 0 {
1526                     entry.remove();
1527                 }
1528 
1529                 Ok(())
1530             }
1531         }
1532     }
1533 
tx_image<'a>( &self, q: &Queue, ctx: &'a PipeContext, bx: &pipe_box, rw: RWFlags, ) -> CLResult<PipeTransfer<'a>>1534     fn tx_image<'a>(
1535         &self,
1536         q: &Queue,
1537         ctx: &'a PipeContext,
1538         bx: &pipe_box,
1539         rw: RWFlags,
1540     ) -> CLResult<PipeTransfer<'a>> {
1541         let r = self.get_res_of_dev(q.device)?;
1542         ctx.texture_map(r, bx, rw).ok_or(CL_OUT_OF_RESOURCES)
1543     }
1544 
unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool>1545     pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
1546         match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
1547             Entry::Vacant(_) => Err(CL_INVALID_VALUE),
1548             Entry::Occupied(mut entry) => {
1549                 let entry = entry.get_mut();
1550                 debug_assert!(entry.count > 0);
1551                 entry.count -= 1;
1552                 Ok(entry.count == 0)
1553             }
1554         }
1555     }
1556 
write( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_row_pitch: usize, mut src_slice_pitch: usize, dst_origin: &CLVec<usize>, ) -> CLResult<()>1557     pub fn write(
1558         &self,
1559         src: ConstMemoryPtr,
1560         q: &Queue,
1561         ctx: &PipeContext,
1562         region: &CLVec<usize>,
1563         src_row_pitch: usize,
1564         mut src_slice_pitch: usize,
1565         dst_origin: &CLVec<usize>,
1566     ) -> CLResult<()> {
1567         let src = src.as_ptr();
1568         let dst_row_pitch = self.image_desc.image_row_pitch;
1569         let dst_slice_pitch = self.image_desc.image_slice_pitch;
1570 
1571         // texture_subdata most likely maps the resource anyway
1572         perf_warning!("clEnqueueWriteImage and clEnqueueUnmapMemObject stall the GPU");
1573 
1574         if let Some(Mem::Buffer(buffer)) = &self.parent {
1575             let pixel_size = self.image_format.pixel_size().unwrap();
1576             let (offset, size) = CLVec::calc_offset_size(
1577                 dst_origin,
1578                 region,
1579                 [pixel_size.into(), dst_row_pitch, dst_slice_pitch],
1580             );
1581             let tx = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1582 
1583             sw_copy(
1584                 src,
1585                 tx.ptr(),
1586                 region,
1587                 &CLVec::default(),
1588                 src_row_pitch,
1589                 src_slice_pitch,
1590                 &CLVec::default(),
1591                 dst_row_pitch,
1592                 dst_slice_pitch,
1593                 pixel_size,
1594             );
1595         } else {
1596             let res = self.get_res_of_dev(q.device)?;
1597             let bx = create_pipe_box(*dst_origin, *region, self.mem_type)?;
1598 
1599             if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1600                 src_slice_pitch = src_row_pitch;
1601             }
1602 
1603             ctx.texture_subdata(
1604                 res,
1605                 &bx,
1606                 src,
1607                 src_row_pitch
1608                     .try_into()
1609                     .map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1610                 src_slice_pitch,
1611             );
1612         }
1613         Ok(())
1614     }
1615 }
1616 
1617 pub struct Sampler {
1618     pub base: CLObjectBase<CL_INVALID_SAMPLER>,
1619     pub context: Arc<Context>,
1620     pub normalized_coords: bool,
1621     pub addressing_mode: cl_addressing_mode,
1622     pub filter_mode: cl_filter_mode,
1623     pub props: Option<Properties<cl_sampler_properties>>,
1624 }
1625 
1626 impl_cl_type_trait!(cl_sampler, Sampler, CL_INVALID_SAMPLER);
1627 
1628 impl Sampler {
new( context: Arc<Context>, normalized_coords: bool, addressing_mode: cl_addressing_mode, filter_mode: cl_filter_mode, props: Option<Properties<cl_sampler_properties>>, ) -> Arc<Sampler>1629     pub fn new(
1630         context: Arc<Context>,
1631         normalized_coords: bool,
1632         addressing_mode: cl_addressing_mode,
1633         filter_mode: cl_filter_mode,
1634         props: Option<Properties<cl_sampler_properties>>,
1635     ) -> Arc<Sampler> {
1636         Arc::new(Self {
1637             base: CLObjectBase::new(RusticlTypes::Sampler),
1638             context: context,
1639             normalized_coords: normalized_coords,
1640             addressing_mode: addressing_mode,
1641             filter_mode: filter_mode,
1642             props: props,
1643         })
1644     }
1645 
nir_to_cl( addressing_mode: u32, filter_mode: u32, normalized_coords: u32, ) -> (cl_addressing_mode, cl_filter_mode, bool)1646     pub fn nir_to_cl(
1647         addressing_mode: u32,
1648         filter_mode: u32,
1649         normalized_coords: u32,
1650     ) -> (cl_addressing_mode, cl_filter_mode, bool) {
1651         let addr_mode = match addressing_mode {
1652             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_NONE => CL_ADDRESS_NONE,
1653             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE => {
1654                 CL_ADDRESS_CLAMP_TO_EDGE
1655             }
1656             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP => CL_ADDRESS_CLAMP,
1657             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT => CL_ADDRESS_REPEAT,
1658             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED => {
1659                 CL_ADDRESS_MIRRORED_REPEAT
1660             }
1661             _ => panic!("unknown addressing_mode"),
1662         };
1663 
1664         let filter = match filter_mode {
1665             cl_sampler_filter_mode::SAMPLER_FILTER_MODE_NEAREST => CL_FILTER_NEAREST,
1666             cl_sampler_filter_mode::SAMPLER_FILTER_MODE_LINEAR => CL_FILTER_LINEAR,
1667             _ => panic!("unknown filter_mode"),
1668         };
1669 
1670         (addr_mode, filter, normalized_coords != 0)
1671     }
1672 
cl_to_pipe( (addressing_mode, filter_mode, normalized_coords): ( cl_addressing_mode, cl_filter_mode, bool, ), ) -> pipe_sampler_state1673     pub fn cl_to_pipe(
1674         (addressing_mode, filter_mode, normalized_coords): (
1675             cl_addressing_mode,
1676             cl_filter_mode,
1677             bool,
1678         ),
1679     ) -> pipe_sampler_state {
1680         let mut res = pipe_sampler_state::default();
1681 
1682         let wrap = match addressing_mode {
1683             CL_ADDRESS_CLAMP_TO_EDGE => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1684             CL_ADDRESS_CLAMP => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_BORDER,
1685             CL_ADDRESS_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_REPEAT,
1686             CL_ADDRESS_MIRRORED_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_MIRROR_REPEAT,
1687             // TODO: what's a reasonable default?
1688             _ => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1689         };
1690 
1691         let img_filter = match filter_mode {
1692             CL_FILTER_NEAREST => pipe_tex_filter::PIPE_TEX_FILTER_NEAREST,
1693             CL_FILTER_LINEAR => pipe_tex_filter::PIPE_TEX_FILTER_LINEAR,
1694             _ => panic!("unknown filter_mode"),
1695         };
1696 
1697         res.set_min_img_filter(img_filter);
1698         res.set_mag_img_filter(img_filter);
1699         res.set_unnormalized_coords((!normalized_coords).into());
1700         res.set_wrap_r(wrap);
1701         res.set_wrap_s(wrap);
1702         res.set_wrap_t(wrap);
1703 
1704         res
1705     }
1706 
pipe(&self) -> pipe_sampler_state1707     pub fn pipe(&self) -> pipe_sampler_state {
1708         Self::cl_to_pipe((
1709             self.addressing_mode,
1710             self.filter_mode,
1711             self.normalized_coords,
1712         ))
1713     }
1714 }
1715