1 use crate::api::event::create_and_queue;
2 use crate::api::icd::*;
3 use crate::api::util::*;
4 use crate::core::device::*;
5 use crate::core::event::*;
6 use crate::core::kernel::*;
7 use crate::core::memory::*;
8 use crate::core::program::*;
9 use crate::core::queue::*;
10
11 use mesa_rust_util::ptr::*;
12 use mesa_rust_util::string::*;
13 use rusticl_opencl_gen::*;
14 use rusticl_proc_macros::cl_entrypoint;
15 use rusticl_proc_macros::cl_info_entrypoint;
16
17 use std::cmp;
18 use std::mem::{self, MaybeUninit};
19 use std::os::raw::c_void;
20 use std::ptr;
21 use std::slice;
22 use std::sync::Arc;
23
24 #[cl_info_entrypoint(clGetKernelInfo)]
25 impl CLInfo<cl_kernel_info> for cl_kernel {
query(&self, q: cl_kernel_info, _: &[u8]) -> CLResult<Vec<MaybeUninit<u8>>>26 fn query(&self, q: cl_kernel_info, _: &[u8]) -> CLResult<Vec<MaybeUninit<u8>>> {
27 let kernel = Kernel::ref_from_raw(*self)?;
28 Ok(match q {
29 CL_KERNEL_ATTRIBUTES => cl_prop::<&str>(&kernel.kernel_info.attributes_string),
30 CL_KERNEL_CONTEXT => {
31 let ptr = Arc::as_ptr(&kernel.prog.context);
32 cl_prop::<cl_context>(cl_context::from_ptr(ptr))
33 }
34 CL_KERNEL_FUNCTION_NAME => cl_prop::<&str>(&kernel.name),
35 CL_KERNEL_NUM_ARGS => cl_prop::<cl_uint>(kernel.kernel_info.args.len() as cl_uint),
36 CL_KERNEL_PROGRAM => {
37 let ptr = Arc::as_ptr(&kernel.prog);
38 cl_prop::<cl_program>(cl_program::from_ptr(ptr))
39 }
40 CL_KERNEL_REFERENCE_COUNT => cl_prop::<cl_uint>(Kernel::refcnt(*self)?),
41 // CL_INVALID_VALUE if param_name is not one of the supported values
42 _ => return Err(CL_INVALID_VALUE),
43 })
44 }
45 }
46
47 #[cl_info_entrypoint(clGetKernelArgInfo)]
48 impl CLInfoObj<cl_kernel_arg_info, cl_uint> for cl_kernel {
query(&self, idx: cl_uint, q: cl_kernel_arg_info) -> CLResult<Vec<MaybeUninit<u8>>>49 fn query(&self, idx: cl_uint, q: cl_kernel_arg_info) -> CLResult<Vec<MaybeUninit<u8>>> {
50 let kernel = Kernel::ref_from_raw(*self)?;
51
52 // CL_INVALID_ARG_INDEX if arg_index is not a valid argument index.
53 if idx as usize >= kernel.kernel_info.args.len() {
54 return Err(CL_INVALID_ARG_INDEX);
55 }
56
57 Ok(match *q {
58 CL_KERNEL_ARG_ACCESS_QUALIFIER => {
59 cl_prop::<cl_kernel_arg_access_qualifier>(kernel.access_qualifier(idx))
60 }
61 CL_KERNEL_ARG_ADDRESS_QUALIFIER => {
62 cl_prop::<cl_kernel_arg_address_qualifier>(kernel.address_qualifier(idx))
63 }
64 CL_KERNEL_ARG_NAME => cl_prop::<&str>(kernel.arg_name(idx)),
65 CL_KERNEL_ARG_TYPE_NAME => cl_prop::<&str>(kernel.arg_type_name(idx)),
66 CL_KERNEL_ARG_TYPE_QUALIFIER => {
67 cl_prop::<cl_kernel_arg_type_qualifier>(kernel.type_qualifier(idx))
68 }
69 // CL_INVALID_VALUE if param_name is not one of the supported values
70 _ => return Err(CL_INVALID_VALUE),
71 })
72 }
73 }
74
75 #[cl_info_entrypoint(clGetKernelWorkGroupInfo)]
76 impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
query( &self, dev: cl_device_id, q: cl_kernel_work_group_info, ) -> CLResult<Vec<MaybeUninit<u8>>>77 fn query(
78 &self,
79 dev: cl_device_id,
80 q: cl_kernel_work_group_info,
81 ) -> CLResult<Vec<MaybeUninit<u8>>> {
82 let kernel = Kernel::ref_from_raw(*self)?;
83
84 // CL_INVALID_DEVICE [..] if device is NULL but there is more than one device associated with kernel.
85 let dev = if dev.is_null() {
86 if kernel.prog.devs.len() > 1 {
87 return Err(CL_INVALID_DEVICE);
88 } else {
89 kernel.prog.devs[0]
90 }
91 } else {
92 Device::ref_from_raw(dev)?
93 };
94
95 // CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
96 if !kernel.prog.devs.contains(&dev) {
97 return Err(CL_INVALID_DEVICE);
98 }
99
100 Ok(match *q {
101 CL_KERNEL_COMPILE_WORK_GROUP_SIZE => cl_prop::<[usize; 3]>(kernel.work_group_size()),
102 CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(dev)),
103 CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => {
104 cl_prop::<usize>(kernel.preferred_simd_size(dev))
105 }
106 CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(dev)),
107 CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(dev)),
108 // CL_INVALID_VALUE if param_name is not one of the supported values
109 _ => return Err(CL_INVALID_VALUE),
110 })
111 }
112 }
113
114 impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, usize)>
115 for cl_kernel
116 {
query( &self, (dev, input_value_size, input_value, output_value_size): ( cl_device_id, usize, *const c_void, usize, ), q: cl_program_build_info, ) -> CLResult<Vec<MaybeUninit<u8>>>117 fn query(
118 &self,
119 (dev, input_value_size, input_value, output_value_size): (
120 cl_device_id,
121 usize,
122 *const c_void,
123 usize,
124 ),
125 q: cl_program_build_info,
126 ) -> CLResult<Vec<MaybeUninit<u8>>> {
127 let kernel = Kernel::ref_from_raw(*self)?;
128
129 // CL_INVALID_DEVICE [..] if device is NULL but there is more than one device associated
130 // with kernel.
131 let dev = if dev.is_null() {
132 if kernel.prog.devs.len() > 1 {
133 return Err(CL_INVALID_DEVICE);
134 } else {
135 kernel.prog.devs[0]
136 }
137 } else {
138 Device::ref_from_raw(dev)?
139 };
140
141 // CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
142 if !kernel.prog.devs.contains(&dev) {
143 return Err(CL_INVALID_DEVICE);
144 }
145
146 // CL_INVALID_OPERATION if device does not support subgroups.
147 if !dev.subgroups_supported() {
148 return Err(CL_INVALID_OPERATION);
149 }
150
151 let usize_byte = mem::size_of::<usize>();
152 // first we have to convert the input to a proper thing
153 let input: &[usize] = match q {
154 CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE | CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE => {
155 // CL_INVALID_VALUE if param_name is CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
156 // CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE or ... and the size in bytes specified by
157 // input_value_size is not valid or if input_value is NULL.
158 if ![usize_byte, 2 * usize_byte, 3 * usize_byte].contains(&input_value_size) {
159 return Err(CL_INVALID_VALUE);
160 }
161 // SAFETY: we verified the size as best as possible, with the rest we trust the client
162 unsafe { slice::from_raw_parts(input_value.cast(), input_value_size / usize_byte) }
163 }
164 CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT => {
165 // CL_INVALID_VALUE if param_name is ... CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT
166 // and the size in bytes specified by input_value_size is not valid or if
167 // input_value is NULL.
168 if input_value_size != usize_byte || input_value.is_null() {
169 return Err(CL_INVALID_VALUE);
170 }
171 // SAFETY: we trust the client here
172 unsafe { slice::from_raw_parts(input_value.cast(), 1) }
173 }
174 _ => &[],
175 };
176
177 Ok(match q {
178 CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE => {
179 cl_prop::<usize>(kernel.subgroups_for_block(dev, input))
180 }
181 CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE => {
182 cl_prop::<usize>(kernel.subgroup_size_for_block(dev, input))
183 }
184 CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT => {
185 let subgroups = input[0];
186 let mut res = vec![0; 3];
187
188 for subgroup_size in kernel.subgroup_sizes(dev) {
189 let threads = subgroups * subgroup_size;
190
191 if threads > dev.max_threads_per_block() {
192 continue;
193 }
194
195 let block = [threads, 1, 1];
196 let real_subgroups = kernel.subgroups_for_block(dev, &block);
197
198 if real_subgroups == subgroups {
199 res = block.to_vec();
200 break;
201 }
202 }
203
204 res.truncate(output_value_size / usize_byte);
205 cl_prop::<Vec<usize>>(res)
206 }
207 CL_KERNEL_MAX_NUM_SUB_GROUPS => {
208 let threads = kernel.max_threads_per_block(dev);
209 let max_groups = dev.max_subgroups();
210
211 let mut result = 0;
212 for sgs in kernel.subgroup_sizes(dev) {
213 result = cmp::max(result, threads / sgs);
214 result = cmp::min(result, max_groups as usize);
215 }
216 cl_prop::<usize>(result)
217 }
218 CL_KERNEL_COMPILE_NUM_SUB_GROUPS => cl_prop::<usize>(kernel.num_subgroups()),
219 CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL => cl_prop::<usize>(kernel.subgroup_size()),
220 // CL_INVALID_VALUE if param_name is not one of the supported values
221 _ => return Err(CL_INVALID_VALUE),
222 })
223 }
224 }
225
226 const ZERO_ARR: [usize; 3] = [0; 3];
227
228 /// # Safety
229 ///
230 /// This function is only safe when called on an array of `work_dim` length
kernel_work_arr_or_default<'a>(arr: *const usize, work_dim: cl_uint) -> &'a [usize]231 unsafe fn kernel_work_arr_or_default<'a>(arr: *const usize, work_dim: cl_uint) -> &'a [usize] {
232 if !arr.is_null() {
233 unsafe { slice::from_raw_parts(arr, work_dim as usize) }
234 } else {
235 &ZERO_ARR
236 }
237 }
238
239 /// # Safety
240 ///
241 /// This function is only safe when called on an array of `work_dim` length
kernel_work_arr_mut<'a>(arr: *mut usize, work_dim: cl_uint) -> Option<&'a mut [usize]>242 unsafe fn kernel_work_arr_mut<'a>(arr: *mut usize, work_dim: cl_uint) -> Option<&'a mut [usize]> {
243 if !arr.is_null() {
244 unsafe { Some(slice::from_raw_parts_mut(arr, work_dim as usize)) }
245 } else {
246 None
247 }
248 }
249
250 #[cl_entrypoint(clCreateKernel)]
create_kernel( program: cl_program, kernel_name: *const ::std::os::raw::c_char, ) -> CLResult<cl_kernel>251 fn create_kernel(
252 program: cl_program,
253 kernel_name: *const ::std::os::raw::c_char,
254 ) -> CLResult<cl_kernel> {
255 let p = Program::arc_from_raw(program)?;
256 let name = c_string_to_string(kernel_name);
257
258 // CL_INVALID_VALUE if kernel_name is NULL.
259 if kernel_name.is_null() {
260 return Err(CL_INVALID_VALUE);
261 }
262
263 let build = p.build_info();
264 // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable for program.
265 if build.kernels().is_empty() {
266 return Err(CL_INVALID_PROGRAM_EXECUTABLE);
267 }
268
269 // CL_INVALID_KERNEL_NAME if kernel_name is not found in program.
270 if !build.kernels().contains(&name) {
271 return Err(CL_INVALID_KERNEL_NAME);
272 }
273
274 // CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel function given by
275 // kernel_name such as the number of arguments, the argument types are not the same for all
276 // devices for which the program executable has been built.
277 if !p.has_unique_kernel_signatures(&name) {
278 return Err(CL_INVALID_KERNEL_DEFINITION);
279 }
280
281 Ok(Kernel::new(name, Arc::clone(&p), &build).into_cl())
282 }
283
284 #[cl_entrypoint(clRetainKernel)]
retain_kernel(kernel: cl_kernel) -> CLResult<()>285 fn retain_kernel(kernel: cl_kernel) -> CLResult<()> {
286 Kernel::retain(kernel)
287 }
288
289 #[cl_entrypoint(clReleaseKernel)]
release_kernel(kernel: cl_kernel) -> CLResult<()>290 fn release_kernel(kernel: cl_kernel) -> CLResult<()> {
291 Kernel::release(kernel)
292 }
293
294 #[cl_entrypoint(clCreateKernelsInProgram)]
create_kernels_in_program( program: cl_program, num_kernels: cl_uint, kernels: *mut cl_kernel, num_kernels_ret: *mut cl_uint, ) -> CLResult<()>295 fn create_kernels_in_program(
296 program: cl_program,
297 num_kernels: cl_uint,
298 kernels: *mut cl_kernel,
299 num_kernels_ret: *mut cl_uint,
300 ) -> CLResult<()> {
301 let p = Program::arc_from_raw(program)?;
302 let build = p.build_info();
303
304 // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable for any device in
305 // program.
306 if build.kernels().is_empty() {
307 return Err(CL_INVALID_PROGRAM_EXECUTABLE);
308 }
309
310 // CL_INVALID_VALUE if kernels is not NULL and num_kernels is less than the number of kernels
311 // in program.
312 if !kernels.is_null() && build.kernels().len() > num_kernels as usize {
313 return Err(CL_INVALID_VALUE);
314 }
315
316 let mut num_kernels = 0;
317 for name in build.kernels() {
318 // Kernel objects are not created for any __kernel functions in program that do not have the
319 // same function definition across all devices for which a program executable has been
320 // successfully built.
321 if !p.has_unique_kernel_signatures(name) {
322 continue;
323 }
324
325 if !kernels.is_null() {
326 // we just assume the client isn't stupid
327 unsafe {
328 kernels
329 .add(num_kernels as usize)
330 .write(Kernel::new(name.clone(), p.clone(), &build).into_cl());
331 }
332 }
333 num_kernels += 1;
334 }
335 num_kernels_ret.write_checked(num_kernels);
336 Ok(())
337 }
338
339 #[cl_entrypoint(clSetKernelArg)]
set_kernel_arg( kernel: cl_kernel, arg_index: cl_uint, arg_size: usize, arg_value: *const ::std::os::raw::c_void, ) -> CLResult<()>340 fn set_kernel_arg(
341 kernel: cl_kernel,
342 arg_index: cl_uint,
343 arg_size: usize,
344 arg_value: *const ::std::os::raw::c_void,
345 ) -> CLResult<()> {
346 let k = Kernel::ref_from_raw(kernel)?;
347 let arg_index = arg_index as usize;
348
349 // CL_INVALID_ARG_INDEX if arg_index is not a valid argument index.
350 if let Some(arg) = k.kernel_info.args.get(arg_index) {
351 // CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument
352 // that is not a memory object or if the argument is a memory object and
353 // arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the
354 // local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler).
355 match arg.kind {
356 KernelArgType::MemLocal => {
357 if arg_size == 0 {
358 return Err(CL_INVALID_ARG_SIZE);
359 }
360 }
361 KernelArgType::MemGlobal
362 | KernelArgType::MemConstant
363 | KernelArgType::Image
364 | KernelArgType::RWImage
365 | KernelArgType::Texture => {
366 if arg_size != std::mem::size_of::<cl_mem>() {
367 return Err(CL_INVALID_ARG_SIZE);
368 }
369 }
370
371 KernelArgType::Sampler => {
372 if arg_size != std::mem::size_of::<cl_sampler>() {
373 return Err(CL_INVALID_ARG_SIZE);
374 }
375 }
376
377 KernelArgType::Constant(size) => {
378 if size as usize != arg_size {
379 return Err(CL_INVALID_ARG_SIZE);
380 }
381 }
382 }
383
384 // CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
385 match arg.kind {
386 // If the argument is declared with the local qualifier, the arg_value entry must be
387 // NULL.
388 KernelArgType::MemLocal => {
389 if !arg_value.is_null() {
390 return Err(CL_INVALID_ARG_VALUE);
391 }
392 }
393 // If the argument is of type sampler_t, the arg_value entry must be a pointer to the
394 // sampler object.
395 KernelArgType::Constant(_) | KernelArgType::Sampler => {
396 if arg_value.is_null() {
397 return Err(CL_INVALID_ARG_VALUE);
398 }
399 }
400 _ => {}
401 };
402
403 // let's create the arg now
404 let arg = unsafe {
405 if arg.dead {
406 KernelArgValue::None
407 } else {
408 match arg.kind {
409 KernelArgType::Constant(_) => KernelArgValue::Constant(
410 slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
411 ),
412 KernelArgType::MemConstant | KernelArgType::MemGlobal => {
413 let ptr: *const cl_mem = arg_value.cast();
414 if ptr.is_null() || (*ptr).is_null() {
415 KernelArgValue::None
416 } else {
417 KernelArgValue::Buffer(Buffer::arc_from_raw(*ptr)?)
418 }
419 }
420 KernelArgType::MemLocal => KernelArgValue::LocalMem(arg_size),
421 KernelArgType::Image | KernelArgType::RWImage | KernelArgType::Texture => {
422 let img: *const cl_mem = arg_value.cast();
423 KernelArgValue::Image(Image::arc_from_raw(*img)?)
424 }
425 KernelArgType::Sampler => {
426 let ptr: *const cl_sampler = arg_value.cast();
427 KernelArgValue::Sampler(Sampler::arc_from_raw(*ptr)?)
428 }
429 }
430 }
431 };
432 k.set_kernel_arg(arg_index, arg)
433 } else {
434 Err(CL_INVALID_ARG_INDEX)
435 }
436
437 //• CL_INVALID_DEVICE_QUEUE for an argument declared to be of type queue_t when the specified arg_value is not a valid device queue object. This error code is missing before version 2.0.
438 //• CL_INVALID_ARG_VALUE if the argument is an image declared with the read_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_WRITE_ONLY or if the image argument is declared with the write_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_READ_ONLY.
439 //• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2.
440 }
441
442 #[cl_entrypoint(clSetKernelArgSVMPointer)]
set_kernel_arg_svm_pointer( kernel: cl_kernel, arg_index: cl_uint, arg_value: *const ::std::os::raw::c_void, ) -> CLResult<()>443 fn set_kernel_arg_svm_pointer(
444 kernel: cl_kernel,
445 arg_index: cl_uint,
446 arg_value: *const ::std::os::raw::c_void,
447 ) -> CLResult<()> {
448 let kernel = Kernel::ref_from_raw(kernel)?;
449 let arg_index = arg_index as usize;
450 let arg_value = arg_value as usize;
451
452 if !kernel.has_svm_devs() {
453 return Err(CL_INVALID_OPERATION);
454 }
455
456 if let Some(arg) = kernel.kernel_info.args.get(arg_index) {
457 if !matches!(
458 arg.kind,
459 KernelArgType::MemConstant | KernelArgType::MemGlobal
460 ) {
461 return Err(CL_INVALID_ARG_INDEX);
462 }
463
464 let arg_value = KernelArgValue::Constant(arg_value.to_ne_bytes().to_vec());
465 kernel.set_kernel_arg(arg_index, arg_value)
466 } else {
467 Err(CL_INVALID_ARG_INDEX)
468 }
469
470 // CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
471 }
472
473 #[cl_entrypoint(clSetKernelExecInfo)]
set_kernel_exec_info( kernel: cl_kernel, param_name: cl_kernel_exec_info, param_value_size: usize, param_value: *const ::std::os::raw::c_void, ) -> CLResult<()>474 fn set_kernel_exec_info(
475 kernel: cl_kernel,
476 param_name: cl_kernel_exec_info,
477 param_value_size: usize,
478 param_value: *const ::std::os::raw::c_void,
479 ) -> CLResult<()> {
480 let k = Kernel::ref_from_raw(kernel)?;
481
482 // CL_INVALID_OPERATION if no devices in the context associated with kernel support SVM.
483 if !k.prog.devs.iter().any(|dev| dev.svm_supported()) {
484 return Err(CL_INVALID_OPERATION);
485 }
486
487 // CL_INVALID_VALUE ... if param_value is NULL
488 if param_value.is_null() {
489 return Err(CL_INVALID_VALUE);
490 }
491
492 // CL_INVALID_VALUE ... if the size specified by param_value_size is not valid.
493 match param_name {
494 CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => {
495 // it's a list of pointers
496 if param_value_size % mem::size_of::<*const c_void>() != 0 {
497 return Err(CL_INVALID_VALUE);
498 }
499 }
500 CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
501 | CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM => {
502 if param_value_size != mem::size_of::<cl_bool>() {
503 return Err(CL_INVALID_VALUE);
504 }
505 }
506 // CL_INVALID_VALUE if param_name is not valid
507 _ => return Err(CL_INVALID_VALUE),
508 }
509
510 Ok(())
511
512 // CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and param_value is CL_TRUE but no devices in context associated with kernel support fine-grain system SVM allocations.
513 }
514
515 #[cl_entrypoint(clEnqueueNDRangeKernel)]
enqueue_ndrange_kernel( command_queue: cl_command_queue, kernel: cl_kernel, work_dim: cl_uint, global_work_offset: *const usize, global_work_size: *const usize, local_work_size: *const usize, num_events_in_wait_list: cl_uint, event_wait_list: *const cl_event, event: *mut cl_event, ) -> CLResult<()>516 fn enqueue_ndrange_kernel(
517 command_queue: cl_command_queue,
518 kernel: cl_kernel,
519 work_dim: cl_uint,
520 global_work_offset: *const usize,
521 global_work_size: *const usize,
522 local_work_size: *const usize,
523 num_events_in_wait_list: cl_uint,
524 event_wait_list: *const cl_event,
525 event: *mut cl_event,
526 ) -> CLResult<()> {
527 let q = Queue::arc_from_raw(command_queue)?;
528 let k = Kernel::arc_from_raw(kernel)?;
529 let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
530
531 // CL_INVALID_CONTEXT if context associated with command_queue and kernel are not the same
532 if q.context != k.prog.context {
533 return Err(CL_INVALID_CONTEXT);
534 }
535
536 // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
537 // for device associated with command_queue.
538 if k.prog.status(q.device) != CL_BUILD_SUCCESS as cl_build_status {
539 return Err(CL_INVALID_PROGRAM_EXECUTABLE);
540 }
541
542 // CL_INVALID_KERNEL_ARGS if the kernel argument values have not been specified.
543 if k.arg_values().iter().any(|v| v.is_none()) {
544 return Err(CL_INVALID_KERNEL_ARGS);
545 }
546
547 // CL_INVALID_WORK_DIMENSION if work_dim is not a valid value (i.e. a value between 1 and
548 // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS).
549 if work_dim == 0 || work_dim > q.device.max_grid_dimensions() {
550 return Err(CL_INVALID_WORK_DIMENSION);
551 }
552
553 // we assume the application gets it right and doesn't pass shorter arrays then actually needed.
554 let global_work_size = unsafe { kernel_work_arr_or_default(global_work_size, work_dim) };
555 let local_work_size = unsafe { kernel_work_arr_or_default(local_work_size, work_dim) };
556 let global_work_offset = unsafe { kernel_work_arr_or_default(global_work_offset, work_dim) };
557
558 let device_bits = q.device.address_bits();
559 let device_max = u64::MAX >> (u64::BITS - device_bits);
560
561 let mut threads = 0;
562 for i in 0..work_dim as usize {
563 let lws = local_work_size[i];
564 let gws = global_work_size[i];
565 let gwo = global_work_offset[i];
566
567 threads *= lws;
568
569 // CL_INVALID_WORK_ITEM_SIZE if the number of work-items specified in any of
570 // local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding
571 // values specified by
572 // CL_DEVICE_MAX_WORK_ITEM_SIZES[0], …, CL_DEVICE_MAX_WORK_ITEM_SIZES[work_dim - 1].
573 if lws > q.device.max_block_sizes()[i] {
574 return Err(CL_INVALID_WORK_ITEM_SIZE);
575 }
576
577 // CL_INVALID_WORK_GROUP_SIZE if the work-group size must be uniform and the
578 // local_work_size is not NULL, [...] if the global_work_size is not evenly divisible by
579 // the local_work_size.
580 if lws != 0 && gws % lws != 0 {
581 return Err(CL_INVALID_WORK_GROUP_SIZE);
582 }
583
584 // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the
585 // required work-group size for kernel in the program source.
586 if lws != 0 && k.work_group_size()[i] != 0 && lws != k.work_group_size()[i] {
587 return Err(CL_INVALID_WORK_GROUP_SIZE);
588 }
589
590 // CL_INVALID_GLOBAL_WORK_SIZE if any of the values specified in global_work_size[0], …
591 // global_work_size[work_dim - 1] exceed the maximum value representable by size_t on
592 // the device on which the kernel-instance will be enqueued.
593 if gws as u64 > device_max {
594 return Err(CL_INVALID_GLOBAL_WORK_SIZE);
595 }
596
597 // CL_INVALID_GLOBAL_OFFSET if the value specified in global_work_size + the
598 // corresponding values in global_work_offset for any dimensions is greater than the
599 // maximum value representable by size t on the device on which the kernel-instance
600 // will be enqueued
601 if u64::checked_add(gws as u64, gwo as u64)
602 .filter(|&x| x <= device_max)
603 .is_none()
604 {
605 return Err(CL_INVALID_GLOBAL_OFFSET);
606 }
607 }
608
609 // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items
610 // in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater
611 // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries
612 // table.
613 if threads != 0 && threads > k.max_threads_per_block(q.device) {
614 return Err(CL_INVALID_WORK_GROUP_SIZE);
615 }
616
617 // If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel
618 // command will trivially succeed after its event dependencies are satisfied and subsequently
619 // update its completion event.
620 let cb: EventSig = if global_work_size.contains(&0) {
621 Box::new(|_, _| Ok(()))
622 } else {
623 k.launch(
624 &q,
625 work_dim,
626 local_work_size,
627 global_work_size,
628 global_work_offset,
629 )?
630 };
631
632 create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb)
633
634 //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source.
635 //• CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code
636 //• CL_INVALID_IMAGE_SIZE if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue.
637 //• CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is specified as an argument value and the image format (image channel order and data type) is not supported by device associated with queue.
638 //• CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance of kernel on the command-queue because of insufficient resources needed to execute the kernel. For example, the explicitly specified local_work_size causes a failure to execute the kernel because of insufficient resources such as registers or local memory. Another example would be the number of read-only image args used in kernel exceed the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of write-only and read-write image args used in kernel exceed the CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS value for device or the number of samplers used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
639 //• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with image or buffer objects specified as arguments to kernel.
640 //• CL_INVALID_OPERATION if SVM pointers are passed as arguments to a kernel and the device does not support SVM or if system pointers are passed as arguments to a kernel and/or stored inside SVM allocations passed as kernel arguments and the device does not support fine grain system SVM allocations.
641 }
642
643 #[cl_entrypoint(clEnqueueTask)]
enqueue_task( command_queue: cl_command_queue, kernel: cl_kernel, num_events_in_wait_list: cl_uint, event_wait_list: *const cl_event, event: *mut cl_event, ) -> CLResult<()>644 fn enqueue_task(
645 command_queue: cl_command_queue,
646 kernel: cl_kernel,
647 num_events_in_wait_list: cl_uint,
648 event_wait_list: *const cl_event,
649 event: *mut cl_event,
650 ) -> CLResult<()> {
651 // clEnqueueTask is equivalent to calling clEnqueueNDRangeKernel with work_dim set to 1,
652 // global_work_offset set to NULL, global_work_size[0] set to 1, and local_work_size[0] set to
653 // 1.
654 enqueue_ndrange_kernel(
655 command_queue,
656 kernel,
657 1,
658 ptr::null(),
659 [1, 1, 1].as_ptr(),
660 [1, 0, 0].as_ptr(),
661 num_events_in_wait_list,
662 event_wait_list,
663 event,
664 )
665 }
666
667 #[cl_entrypoint(clCloneKernel)]
clone_kernel(source_kernel: cl_kernel) -> CLResult<cl_kernel>668 fn clone_kernel(source_kernel: cl_kernel) -> CLResult<cl_kernel> {
669 let k = Kernel::ref_from_raw(source_kernel)?;
670 Ok(Arc::new(k.clone()).into_cl())
671 }
672
673 #[cl_entrypoint(clGetKernelSuggestedLocalWorkSizeKHR)]
get_kernel_suggested_local_work_size_khr( command_queue: cl_command_queue, kernel: cl_kernel, work_dim: cl_uint, global_work_offset: *const usize, global_work_size: *const usize, suggested_local_work_size: *mut usize, ) -> CLResult<()>674 fn get_kernel_suggested_local_work_size_khr(
675 command_queue: cl_command_queue,
676 kernel: cl_kernel,
677 work_dim: cl_uint,
678 global_work_offset: *const usize,
679 global_work_size: *const usize,
680 suggested_local_work_size: *mut usize,
681 ) -> CLResult<()> {
682 // CL_INVALID_GLOBAL_WORK_SIZE if global_work_size is NULL or if any of the values specified in
683 // global_work_size are 0.
684 if global_work_size.is_null() {
685 return Err(CL_INVALID_GLOBAL_WORK_SIZE);
686 }
687
688 if global_work_offset.is_null() {
689 return Err(CL_INVALID_GLOBAL_OFFSET);
690 }
691
692 // CL_INVALID_VALUE if suggested_local_work_size is NULL.
693 if suggested_local_work_size.is_null() {
694 return Err(CL_INVALID_VALUE);
695 }
696
697 // CL_INVALID_COMMAND_QUEUE if command_queue is not a valid host command-queue.
698 let queue = Queue::ref_from_raw(command_queue)?;
699
700 // CL_INVALID_KERNEL if kernel is not a valid kernel object.
701 let kernel = Kernel::ref_from_raw(kernel)?;
702
703 // CL_INVALID_CONTEXT if the context associated with kernel is not the same as the context
704 // associated with command_queue.
705 if queue.context != kernel.prog.context {
706 return Err(CL_INVALID_CONTEXT);
707 }
708
709 // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
710 // for kernel for the device associated with command_queue.
711 if kernel.prog.status(queue.device) != CL_BUILD_SUCCESS as cl_build_status {
712 return Err(CL_INVALID_PROGRAM_EXECUTABLE);
713 }
714
715 // CL_INVALID_KERNEL_ARGS if all argument values for kernel have not been set.
716 if kernel.arg_values().iter().any(|v| v.is_none()) {
717 return Err(CL_INVALID_KERNEL_ARGS);
718 }
719
720 // CL_INVALID_WORK_DIMENSION if work_dim is not a valid value (i.e. a value between 1 and
721 // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS).
722 if work_dim == 0 || work_dim > queue.device.max_grid_dimensions() {
723 return Err(CL_INVALID_WORK_DIMENSION);
724 }
725
726 let mut global_work_size =
727 unsafe { kernel_work_arr_or_default(global_work_size, work_dim).to_vec() };
728
729 let suggested_local_work_size = unsafe {
730 kernel_work_arr_mut(suggested_local_work_size, work_dim).ok_or(CL_INVALID_VALUE)?
731 };
732
733 let global_work_offset = unsafe { kernel_work_arr_or_default(global_work_offset, work_dim) };
734
735 let device_bits = queue.device.address_bits();
736 let device_max = u64::MAX >> (u64::BITS - device_bits);
737 for i in 0..work_dim as usize {
738 let gws = global_work_size[i];
739 let gwo = global_work_offset[i];
740
741 // CL_INVALID_GLOBAL_WORK_SIZE if global_work_size is NULL or if any of the values specified
742 // in global_work_size are 0.
743 if gws == 0 {
744 return Err(CL_INVALID_GLOBAL_WORK_SIZE);
745 }
746 // CL_INVALID_GLOBAL_WORK_SIZE if any of the values specified in global_work_size exceed the
747 // maximum value representable by size_t on the device associated with command_queue.
748 if gws as u64 > device_max {
749 return Err(CL_INVALID_GLOBAL_WORK_SIZE);
750 }
751 // CL_INVALID_GLOBAL_OFFSET if the value specified in global_work_size plus the
752 // corresponding value in global_work_offset for dimension exceeds the maximum value
753 // representable by size_t on the device associated with command_queue.
754 if u64::checked_add(gws as u64, gwo as u64)
755 .filter(|&x| x <= device_max)
756 .is_none()
757 {
758 return Err(CL_INVALID_GLOBAL_OFFSET);
759 }
760 }
761
762 kernel.suggest_local_size(
763 queue.device,
764 work_dim as usize,
765 &mut global_work_size,
766 suggested_local_work_size,
767 );
768
769 Ok(())
770
771 // CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is set as an argument to kernel and the offset specified when the sub-buffer object was created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN for the device associated with command_queue.
772 // CL_INVALID_IMAGE_SIZE if an image object is set as an argument to kernel and the image dimensions are not supported by device associated with command_queue.
773 // CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is set as an argument to kernel and the image format is not supported by the device associated with command_queue.
774 // CL_INVALID_OPERATION if an SVM pointer is set as an argument to kernel and the device associated with command_queue does not support SVM or the required SVM capabilities for the SVM pointer.
775 // CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device.
776 // CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host.
777 }
778