Lines Matching full:context
25 const struct transpose_context* context, in xnn_compute_transposec_2d() argument
31 const size_t log2_element_size = context->log2_element_size; in xnn_compute_transposec_2d()
33 context->const_size_ukernel( in xnn_compute_transposec_2d()
34 … (const void*) ((uintptr_t) context->x + (i << log2_element_size) + j * context->input_stride[1]), in xnn_compute_transposec_2d()
35 (void*) ((uintptr_t) context->y + (j << log2_element_size) + i * context->output_stride[0]), in xnn_compute_transposec_2d()
36 context->input_stride[1], in xnn_compute_transposec_2d()
37 context->output_stride[0], in xnn_compute_transposec_2d()
43 const struct transpose_context* context, in xnn_compute_transposec_3d() argument
50 const size_t log2_element_size = context->log2_element_size; in xnn_compute_transposec_3d()
51 const size_t ld_input = context->input_stride[2]; in xnn_compute_transposec_3d()
52 const size_t ld_output = context->output_stride[1]; in xnn_compute_transposec_3d()
53 const void* x = (const void*) ((uintptr_t) context->x + in xnn_compute_transposec_3d()
54 … (i * context->input_stride[0] + j * context->input_stride[1]) + k * ld_input); in xnn_compute_transposec_3d()
55 …void* y = (void*) ((uintptr_t)context->y + i * context->output_stride[0] + j * context->output_str… in xnn_compute_transposec_3d()
58 context->const_size_ukernel( in xnn_compute_transposec_3d()
68 const struct transpose_context* context, in xnn_compute_transposec_4d() argument
76 const size_t log2_element_size = context->log2_element_size; in xnn_compute_transposec_4d()
77 const size_t ld_input = context->input_stride[3]; in xnn_compute_transposec_4d()
78 const size_t ld_output = context->output_stride[2]; in xnn_compute_transposec_4d()
79 …const void* x = (const void*) ((uintptr_t)context->x + i * context->input_stride[0] + j * context-… in xnn_compute_transposec_4d()
80 k * context->input_stride[2] + l * ld_input); in xnn_compute_transposec_4d()
81 …void* y = (void*) ((uintptr_t)context->y + i * context->output_stride[0] + j * context->output_str… in xnn_compute_transposec_4d()
82 k * context->output_stride[2] + (l << log2_element_size)); in xnn_compute_transposec_4d()
84 context->const_size_ukernel( in xnn_compute_transposec_4d()
94 const struct transpose_context* context, in xnn_compute_transposec_5d() argument
103 const size_t log2_element_size = context->log2_element_size; in xnn_compute_transposec_5d()
104 const size_t ld_input = context->input_stride[4]; in xnn_compute_transposec_5d()
105 const size_t ld_output = context->output_stride[3]; in xnn_compute_transposec_5d()
106 …const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->… in xnn_compute_transposec_5d()
107 … k * context->input_stride[2] + l * context->input_stride[3] + m * ld_input); in xnn_compute_transposec_5d()
108 …void* y = (void*)((uintptr_t)context->y + i * context->output_stride[0] + j * context->output_stri… in xnn_compute_transposec_5d()
109 … k * context->output_stride[2] + l * context->output_stride[3] + (m << log2_element_size)); in xnn_compute_transposec_5d()
111 context->const_size_ukernel( in xnn_compute_transposec_5d()
121 const struct transpose_context* context, in xnn_compute_transposec_6d() argument
131 const size_t log2_element_size = context->log2_element_size; in xnn_compute_transposec_6d()
132 const size_t ld_input = context->input_stride[5]; in xnn_compute_transposec_6d()
133 const size_t ld_output = context->output_stride[4]; in xnn_compute_transposec_6d()
134 …const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->… in xnn_compute_transposec_6d()
135 k * context->input_stride[2] + l * context->input_stride[3] + in xnn_compute_transposec_6d()
136 m * context->input_stride[4] + n * ld_input); in xnn_compute_transposec_6d()
137 …void* y = (void*)((uintptr_t)context->y + i * context->output_stride[0] + j * context->output_stri… in xnn_compute_transposec_6d()
138 … k * context->output_stride[2] + l * context->output_stride[3] + m * context->output_stride[4] + in xnn_compute_transposec_6d()
141 context->const_size_ukernel( in xnn_compute_transposec_6d()
151 const struct transpose_context* context, in xnn_compute_transposev_2d() argument
157 const size_t element_size = context->element_size; in xnn_compute_transposev_2d()
158 const size_t ld_input = context->input_stride[1]; in xnn_compute_transposev_2d()
159 const size_t ld_output = context->output_stride[0]; in xnn_compute_transposev_2d()
160 const void* x = (const void*) ((uintptr_t) context->x + in xnn_compute_transposev_2d()
161 i * context->input_stride[0] + j * ld_input); in xnn_compute_transposev_2d()
162 …void* y = (void*) ((uintptr_t) context->y + context->output_stride[1] * j + i * context->output_st… in xnn_compute_transposev_2d()
164 context->variable_size_ukernel( in xnn_compute_transposev_2d()
169 context->input_stride[0], in xnn_compute_transposev_2d()
170 context->output_stride[1], in xnn_compute_transposev_2d()
177 const struct transpose_context* context, in xnn_compute_transposev_3d() argument
184 const size_t element_size = context->element_size; in xnn_compute_transposev_3d()
185 const size_t ld_input = context->input_stride[2]; in xnn_compute_transposev_3d()
186 const size_t ld_output = context->output_stride[1]; in xnn_compute_transposev_3d()
187 …const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->… in xnn_compute_transposev_3d()
189 …void* y = (void*)((uintptr_t)context->y + i * context->output_stride[0] + j * context->output_stri… in xnn_compute_transposev_3d()
190 k * context->output_stride[2]); in xnn_compute_transposev_3d()
192 context->variable_size_ukernel( in xnn_compute_transposev_3d()
197 context->input_stride[1], in xnn_compute_transposev_3d()
198 context->output_stride[2], in xnn_compute_transposev_3d()
205 const struct transpose_context* context, in xnn_compute_transposev_4d() argument
213 const size_t element_size = context->element_size; in xnn_compute_transposev_4d()
214 const size_t ld_input = context->input_stride[3]; in xnn_compute_transposev_4d()
215 const size_t ld_output = context->output_stride[2]; in xnn_compute_transposev_4d()
216 …const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->… in xnn_compute_transposev_4d()
217 k * context->input_stride[2] + l * ld_input); in xnn_compute_transposev_4d()
218 …void* y = (void*)((uintptr_t)context->y + context->output_stride[3] * l + i * context->output_stri… in xnn_compute_transposev_4d()
219 j * context->output_stride[1] + k * context->output_stride[2]); in xnn_compute_transposev_4d()
221 context->variable_size_ukernel( in xnn_compute_transposev_4d()
226 context->input_stride[2], in xnn_compute_transposev_4d()
227 context->output_stride[3], in xnn_compute_transposev_4d()
234 const struct transpose_context* context, in xnn_compute_transposev_5d() argument
243 const size_t element_size = context->element_size; in xnn_compute_transposev_5d()
244 const size_t ld_input = context->input_stride[4]; in xnn_compute_transposev_5d()
245 const size_t ld_output = context->output_stride[3]; in xnn_compute_transposev_5d()
246 …const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->… in xnn_compute_transposev_5d()
247 … k * context->input_stride[2] + l * context->input_stride[3] + m * ld_input); in xnn_compute_transposev_5d()
248 …void* y = (void*)((uintptr_t)context->y + context->output_stride[4] * m + i * context->output_stri… in xnn_compute_transposev_5d()
249 … j * context->output_stride[1] + k * context->output_stride[2] + l * context->output_stride[3]); in xnn_compute_transposev_5d()
251 context->variable_size_ukernel( in xnn_compute_transposev_5d()
256 context->input_stride[3], in xnn_compute_transposev_5d()
257 context->output_stride[4], in xnn_compute_transposev_5d()
264 const struct transpose_context* context, in xnn_compute_transposev_6d() argument
274 const size_t element_size = context->element_size; in xnn_compute_transposev_6d()
275 const size_t ld_input = context->input_stride[5]; in xnn_compute_transposev_6d()
276 const size_t ld_output = context->output_stride[4]; in xnn_compute_transposev_6d()
277 …const void* x = (const void*)((uintptr_t)context->x + i * context->input_stride[0] + j * context->… in xnn_compute_transposev_6d()
278 k * context->input_stride[2] + l * context->input_stride[3] + in xnn_compute_transposev_6d()
279 m * context->input_stride[4] + n * ld_input); in xnn_compute_transposev_6d()
280 …void* y = (void*)((uintptr_t)context->y + context->output_stride[5] * n + i * context->output_stri… in xnn_compute_transposev_6d()
281 … j * context->output_stride[1] + k * context->output_stride[2] + l * context->output_stride[3] + in xnn_compute_transposev_6d()
282 m * context->output_stride[4]); in xnn_compute_transposev_6d()
284 context->variable_size_ukernel( in xnn_compute_transposev_6d()
289 context->input_stride[4], in xnn_compute_transposev_6d()
290 context->output_stride[5], in xnn_compute_transposev_6d()
297 const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_grouped_gemm()
304 const size_t k_scaled = context->k_scaled; in xnn_compute_grouped_gemm()
305 const size_t a_stride = context->a_stride; in xnn_compute_grouped_gemm()
306 const size_t cm_stride = context->cm_stride; in xnn_compute_grouped_gemm()
308 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_grouped_gemm()
312 (const void*) ((uintptr_t) context->a + mr_block_start * a_stride + group_index * k_scaled), in xnn_compute_grouped_gemm()
314 …(const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride + group_index * … in xnn_compute_grouped_gemm()
315 …(void*) ((uintptr_t) context->c + mr_block_start * cm_stride + (nr_block_start << context->log2_cs… in xnn_compute_grouped_gemm()
317 context->cn_stride, in xnn_compute_grouped_gemm()
318 &context->params); in xnn_compute_grouped_gemm()
322 const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_gemm()
328 const size_t a_stride = context->a_stride; in xnn_compute_gemm()
329 const size_t cm_stride = context->cm_stride; in xnn_compute_gemm()
331 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_gemm()
334 context->k_scaled, in xnn_compute_gemm()
335 (const void*) ((uintptr_t) context->a + mr_block_start * a_stride), in xnn_compute_gemm()
337 (const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride), in xnn_compute_gemm()
338 …(void*) ((uintptr_t) context->c + mr_block_start * cm_stride + (nr_block_start << context->log2_cs… in xnn_compute_gemm()
340 context->cn_stride, in xnn_compute_gemm()
341 context->fused_params); in xnn_compute_gemm()
345 const struct spmm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_spmm()
350 context->ukernel( in xnn_compute_spmm()
352 context->n, in xnn_compute_spmm()
353 …(const void*) ((uintptr_t) context->input + batch_index * context->batched_input_stride + mr_block… in xnn_compute_spmm()
354 context->nonzero_weights, in xnn_compute_spmm()
355 context->input_increments, in xnn_compute_spmm()
356 context->output_channel_nonzeros, in xnn_compute_spmm()
357 …(void*) ((uintptr_t) context->output + batch_index * context->batched_output_stride + mr_block_sta… in xnn_compute_spmm()
358 context->scaled_m, in xnn_compute_spmm()
359 &context->params); in xnn_compute_spmm()
363 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_grouped_batch_igemm()
371 const size_t ks = context->ks; in xnn_compute_grouped_batch_igemm()
372 const size_t cm_stride = context->cm_stride; in xnn_compute_grouped_batch_igemm()
374 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_grouped_batch_igemm()
377 context->kc, in xnn_compute_grouped_batch_igemm()
378 context->ks_scaled, in xnn_compute_grouped_batch_igemm()
379 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_grouped_batch_igemm()
380 …(const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride + group_index * … in xnn_compute_grouped_batch_igemm()
381 …ptr_t) context->c + group_index * context->gc_stride + batch_index * context->bc_stride + mr_block… in xnn_compute_grouped_batch_igemm()
383 context->cn_stride, in xnn_compute_grouped_batch_igemm()
384 context->a_offset + group_index * context->ga_stride + batch_index * context->ba_stride, in xnn_compute_grouped_batch_igemm()
385 context->zero, in xnn_compute_grouped_batch_igemm()
386 &context->params); in xnn_compute_grouped_batch_igemm()
390 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_grouped_igemm()
397 const size_t ks = context->ks; in xnn_compute_grouped_igemm()
398 const size_t cm_stride = context->cm_stride; in xnn_compute_grouped_igemm()
400 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_grouped_igemm()
403 context->kc, in xnn_compute_grouped_igemm()
404 context->ks_scaled, in xnn_compute_grouped_igemm()
405 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_grouped_igemm()
406 …(const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride + group_index * … in xnn_compute_grouped_igemm()
407 …(void*) ((uintptr_t) context->c + group_index * context->gc_stride + mr_block_start * cm_stride + … in xnn_compute_grouped_igemm()
409 context->cn_stride, in xnn_compute_grouped_igemm()
410 context->a_offset + group_index * context->ga_stride, in xnn_compute_grouped_igemm()
411 context->zero, in xnn_compute_grouped_igemm()
412 &context->params); in xnn_compute_grouped_igemm()
416 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_batch_igemm()
423 const size_t ks = context->ks; in xnn_compute_batch_igemm()
424 const size_t cm_stride = context->cm_stride; in xnn_compute_batch_igemm()
426 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_batch_igemm()
429 context->kc, in xnn_compute_batch_igemm()
430 context->ks_scaled, in xnn_compute_batch_igemm()
431 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_batch_igemm()
432 (const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride), in xnn_compute_batch_igemm()
433 …(void*) ((uintptr_t) context->c + batch_index * context->bc_stride + mr_block_start * cm_stride + … in xnn_compute_batch_igemm()
435 context->cn_stride, in xnn_compute_batch_igemm()
436 context->a_offset + batch_index * context->ba_stride, in xnn_compute_batch_igemm()
437 context->zero, in xnn_compute_batch_igemm()
438 &context->params); in xnn_compute_batch_igemm()
442 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_igemm()
448 const size_t ks = context->ks; in xnn_compute_igemm()
449 const size_t cm_stride = context->cm_stride; in xnn_compute_igemm()
451 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_igemm()
454 context->kc, in xnn_compute_igemm()
455 context->ks_scaled, in xnn_compute_igemm()
456 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_igemm()
457 (const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride), in xnn_compute_igemm()
458 …(void*) ((uintptr_t) context->c + mr_block_start * cm_stride + (nr_block_start << context->log2_cs… in xnn_compute_igemm()
460 context->cn_stride, in xnn_compute_igemm()
461 context->a_offset, in xnn_compute_igemm()
462 context->zero, in xnn_compute_igemm()
463 &context->params); in xnn_compute_igemm()
467 const struct subgemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_grouped_subgemm2d()
477 …const struct subconvolution_params* subconvolution_params = &context->subconvolution_params[subker… in xnn_compute_grouped_subgemm2d()
489 const size_t ax_stride = context->ax_stride; in xnn_compute_grouped_subgemm2d()
490 const size_t cx_stride = context->cx_stride; in xnn_compute_grouped_subgemm2d()
491 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_grouped_subgemm2d()
494 context->kc, in xnn_compute_grouped_subgemm2d()
495 …(uintptr_t) context->a + group_index * context->ga_stride + slice_y * context->ay_stride + slice_x… in xnn_compute_grouped_subgemm2d()
497 …ms->weights + nc_block_start * subconvolution_params->w_stride + group_index * context->gw_stride), in xnn_compute_grouped_subgemm2d()
498 …dex * context->gc_stride + slice_y * context->cy_stride + slice_x_start * cx_stride + batch_index … in xnn_compute_grouped_subgemm2d()
500 context->cn_stride, in xnn_compute_grouped_subgemm2d()
501 &context->params); in xnn_compute_grouped_subgemm2d()
505 const struct subgemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_subgemm2d()
514 …const struct subconvolution_params* subconvolution_params = &context->subconvolution_params[subker… in xnn_compute_subgemm2d()
526 const size_t ax_stride = context->ax_stride; in xnn_compute_subgemm2d()
527 const size_t cx_stride = context->cx_stride; in xnn_compute_subgemm2d()
528 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_subgemm2d()
531 context->kc, in xnn_compute_subgemm2d()
532 …(const void*) ((uintptr_t) context->a + slice_y * context->ay_stride + slice_x_start * ax_stride +… in xnn_compute_subgemm2d()
535 …->output + slice_y * context->cy_stride + slice_x_start * cx_stride + batch_index * context->bc_st… in xnn_compute_subgemm2d()
537 context->cn_stride, in xnn_compute_subgemm2d()
538 &context->params); in xnn_compute_subgemm2d()
542 const struct subconv_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_grouped_subconv2d()
552 …const struct subconvolution_params* subconvolution_params = &context->subconvolution_params[subker… in xnn_compute_grouped_subconv2d()
564 const size_t cx_stride = context->cx_stride; in xnn_compute_grouped_subconv2d()
565 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_grouped_subconv2d()
568 context->kc, in xnn_compute_grouped_subconv2d()
571 …ms->weights + nc_block_start * subconvolution_params->w_stride + group_index * context->gw_stride), in xnn_compute_grouped_subconv2d()
572 …dex * context->gc_stride + slice_y * context->cy_stride + slice_x_start * cx_stride + batch_index … in xnn_compute_grouped_subconv2d()
574 context->cn_stride, in xnn_compute_grouped_subconv2d()
575 context->a_offset + group_index * context->ga_stride + batch_index * context->ba_stride, in xnn_compute_grouped_subconv2d()
576 context->zero, in xnn_compute_grouped_subconv2d()
577 &context->params); in xnn_compute_grouped_subconv2d()
581 const struct subconv_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_subconv2d()
590 …const struct subconvolution_params* subconvolution_params = &context->subconvolution_params[subker… in xnn_compute_subconv2d()
602 const size_t cx_stride = context->cx_stride; in xnn_compute_subconv2d()
603 context->ukernel.function[XNN_UARCH_DEFAULT]( in xnn_compute_subconv2d()
606 context->kc, in xnn_compute_subconv2d()
610 …->output + slice_y * context->cy_stride + slice_x_start * cx_stride + batch_index * context->bc_st… in xnn_compute_subconv2d()
612 context->cn_stride, in xnn_compute_subconv2d()
613 context->a_offset + batch_index * context->ba_stride, in xnn_compute_subconv2d()
614 context->zero, in xnn_compute_subconv2d()
615 &context->params); in xnn_compute_subconv2d()
619 const struct conv2d_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_conv2d_hwc2chw()
624 context->hwc2chw_ukernel( in xnn_compute_conv2d_hwc2chw()
625 context->input_height, in xnn_compute_conv2d_hwc2chw()
626 context->input_width, in xnn_compute_conv2d_hwc2chw()
629 (const void*) ((uintptr_t) context->input + batch_index * context->input_batch_stride), in xnn_compute_conv2d_hwc2chw()
630 context->zero, in xnn_compute_conv2d_hwc2chw()
631 context->packed_weights, in xnn_compute_conv2d_hwc2chw()
632 (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride), in xnn_compute_conv2d_hwc2chw()
633 context->input_padding_top, in xnn_compute_conv2d_hwc2chw()
634 context->output_channels, in xnn_compute_conv2d_hwc2chw()
635 context->output_height_stride, in xnn_compute_conv2d_hwc2chw()
636 context->output_channel_stride, in xnn_compute_conv2d_hwc2chw()
637 &context->params); in xnn_compute_conv2d_hwc2chw()
641 const struct dwconv_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_dwconv_unipass()
646 …(const void**) ((uintptr_t) context->indirect_input + output_y * context->indirect_input_height_st… in xnn_compute_dwconv_unipass()
647 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_dwconv_unipass()
648 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_dwconv_unipass()
649 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_dwconv_unipass()
651 context->unipass_ukernel( in xnn_compute_dwconv_unipass()
652 context->groups, context->output_width, in xnn_compute_dwconv_unipass()
653 indirect_input, context->packed_weights, output, in xnn_compute_dwconv_unipass()
654 context->indirect_input_width_stride, context->output_increment, in xnn_compute_dwconv_unipass()
655 input_offset, context->zero, in xnn_compute_dwconv_unipass()
656 &context->params); in xnn_compute_dwconv_unipass()
660 const struct dwconv2d_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_dwconv2d_chw()
664 context->chw_ukernel( in xnn_compute_dwconv2d_chw()
665 context->input_height, in xnn_compute_dwconv2d_chw()
666 context->input_width, in xnn_compute_dwconv2d_chw()
667 …(const void*) ((uintptr_t) context->input + channel * context->input_channel_stride + batch_index … in xnn_compute_dwconv2d_chw()
668 (const void*) ((uintptr_t) context->packed_weights + channel * context->weights_channel_stride), in xnn_compute_dwconv2d_chw()
669 context->zero, in xnn_compute_dwconv2d_chw()
670 …(void*) ((uintptr_t) context->output + channel * context->output_channel_stride + batch_index * co… in xnn_compute_dwconv2d_chw()
671 context->input_padding_top, in xnn_compute_dwconv2d_chw()
672 &context->params); in xnn_compute_dwconv2d_chw()
676 const struct argmax_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_argmax_pooling_unipass()
680 const void** indirect_input = (const void**) ((uintptr_t) context->indirect_input + in xnn_compute_argmax_pooling_unipass()
681 output_y * context->indirect_input_height_stride); in xnn_compute_argmax_pooling_unipass()
682 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_argmax_pooling_unipass()
683 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_argmax_pooling_unipass()
684 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_argmax_pooling_unipass()
685 uint32_t* index = (uint32_t*) ((uintptr_t) context->index + in xnn_compute_argmax_pooling_unipass()
686 batch_index * context->index_batch_stride + output_y * context->index_height_stride); in xnn_compute_argmax_pooling_unipass()
688 context->unipass_ukernel( in xnn_compute_argmax_pooling_unipass()
689 context->output_width, context->pooling_size, context->channels, in xnn_compute_argmax_pooling_unipass()
691 context->input_increment, context->output_increment); in xnn_compute_argmax_pooling_unipass()
695 const struct argmax_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_argmax_pooling_multipass()
699 const void** indirect_input = (const void**) ((uintptr_t) context->indirect_input + in xnn_compute_argmax_pooling_multipass()
700 output_y * context->indirect_input_height_stride); in xnn_compute_argmax_pooling_multipass()
701 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_argmax_pooling_multipass()
702 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_argmax_pooling_multipass()
703 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_argmax_pooling_multipass()
704 uint32_t* index = (uint32_t*) ((uintptr_t) context->index + in xnn_compute_argmax_pooling_multipass()
705 batch_index * context->index_batch_stride + output_y * context->index_height_stride); in xnn_compute_argmax_pooling_multipass()
707 …void* multipass_accumulation_buffer = XNN_SIMD_ALLOCA(context->channels * sizeof(float) + XNN_EXTR… in xnn_compute_argmax_pooling_multipass()
708 …void* multipass_index_buffer = XNN_SIMD_ALLOCA(context->channels * sizeof(uint32_t) + XNN_EXTRA_BY… in xnn_compute_argmax_pooling_multipass()
710 context->multipass_ukernel( in xnn_compute_argmax_pooling_multipass()
711 context->output_width, context->pooling_size, context->channels, in xnn_compute_argmax_pooling_multipass()
713 context->input_increment, context->output_increment); in xnn_compute_argmax_pooling_multipass()
717 const struct max_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_max_pooling()
721 const void** indirect_input = (const void**) ((uintptr_t) context->indirect_input + in xnn_compute_max_pooling()
722 output_y * context->indirect_input_height_stride); in xnn_compute_max_pooling()
723 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_max_pooling()
724 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_max_pooling()
725 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_max_pooling()
727 context->ukernel( in xnn_compute_max_pooling()
728 context->output_width, context->pooling_size, context->channels, in xnn_compute_max_pooling()
730 context->input_increment, context->output_increment, in xnn_compute_max_pooling()
731 &context->params); in xnn_compute_max_pooling()
735 const struct unpooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_unpooling()
739 const void* input = (const void*) ((uintptr_t) context->input + in xnn_compute_unpooling()
740 input_y * context->input_height_stride + input_x * context->input_width_stride); in xnn_compute_unpooling()
741 const uint32_t* index = (const uint32_t*) ((uintptr_t) context->index + in xnn_compute_unpooling()
742 input_y * context->index_height_stride + input_x * context->index_width_stride); in xnn_compute_unpooling()
744 (void**) ((uintptr_t) context->indirect_output + in xnn_compute_unpooling()
745 …input_y * context->indirect_output_height_stride + input_x * context->indirect_output_width_stride… in xnn_compute_unpooling()
747 context->ukernel( in xnn_compute_unpooling()
748 context->pooling_size, in xnn_compute_unpooling()
749 context->channels, in xnn_compute_unpooling()
750 context->fill_value, in xnn_compute_unpooling()
755 const struct average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_average_pooling_unipass()
760 …(const void**) ((uintptr_t) context->indirect_input + output_y * context->indirect_input_height_st… in xnn_compute_average_pooling_unipass()
761 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_average_pooling_unipass()
762 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_average_pooling_unipass()
763 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_average_pooling_unipass()
765 context->unipass_ukernel( in xnn_compute_average_pooling_unipass()
766 context->output_width, context->pooling_size, context->channels, in xnn_compute_average_pooling_unipass()
767 indirect_input, input_offset, context->zero, output, in xnn_compute_average_pooling_unipass()
768 context->input_increment, context->output_increment, in xnn_compute_average_pooling_unipass()
769 &context->params); in xnn_compute_average_pooling_unipass()
773 const struct average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_average_pooling_multipass()
778 …(const void**) ((uintptr_t) context->indirect_input + output_y * context->indirect_input_height_st… in xnn_compute_average_pooling_multipass()
779 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_average_pooling_multipass()
780 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_average_pooling_multipass()
781 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_average_pooling_multipass()
784 …XNN_SIMD_ALLOCA(context->channels * sizeof(int32_t) + XNN_EXTRA_BYTES * sizeof(int32_t) / sizeof(u… in xnn_compute_average_pooling_multipass()
786 context->multipass_ukernel( in xnn_compute_average_pooling_multipass()
787 context->output_width, context->pooling_size, context->channels, in xnn_compute_average_pooling_multipass()
788 indirect_input, input_offset, context->zero, multipass_buffer, output, in xnn_compute_average_pooling_multipass()
789 context->input_increment, context->output_increment, in xnn_compute_average_pooling_multipass()
790 &context->params); in xnn_compute_average_pooling_multipass()
794 const struct pixelwise_average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_pixelwise_average_pooling_unipass()
799 …(const void**) ((uintptr_t) context->indirect_input + output_y * context->indirect_input_height_st… in xnn_compute_pixelwise_average_pooling_unipass()
800 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_pixelwise_average_pooling_unipass()
802 …(const void*) ((uintptr_t) context->pixelwise_buffer + output_y * context->pixelwise_buffer_height… in xnn_compute_pixelwise_average_pooling_unipass()
803 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_pixelwise_average_pooling_unipass()
804 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_pixelwise_average_pooling_unipass()
806 context->unipass_ukernel( in xnn_compute_pixelwise_average_pooling_unipass()
807 context->output_width, context->pooling_size, context->channels, in xnn_compute_pixelwise_average_pooling_unipass()
808 indirect_input, input_offset, context->zero, pixelwise_buffer, output, in xnn_compute_pixelwise_average_pooling_unipass()
809 context->input_increment, context->output_increment, in xnn_compute_pixelwise_average_pooling_unipass()
810 &context->params); in xnn_compute_pixelwise_average_pooling_unipass()
814 const struct pixelwise_average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_pixelwise_average_pooling_multipass()
819 …(const void**) ((uintptr_t) context->indirect_input + output_y * context->indirect_input_height_st… in xnn_compute_pixelwise_average_pooling_multipass()
820 const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride; in xnn_compute_pixelwise_average_pooling_multipass()
822 …(const void*) ((uintptr_t) context->pixelwise_buffer + output_y * context->pixelwise_buffer_height… in xnn_compute_pixelwise_average_pooling_multipass()
823 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_pixelwise_average_pooling_multipass()
824 batch_index * context->output_batch_stride + output_y * context->output_height_stride); in xnn_compute_pixelwise_average_pooling_multipass()
826 …void* multipass_buffer = XNN_SIMD_ALLOCA(context->channels * sizeof(int32_t) + XNN_EXTRA_BYTES * s… in xnn_compute_pixelwise_average_pooling_multipass()
828 context->multipass_ukernel( in xnn_compute_pixelwise_average_pooling_multipass()
829 context->output_width, context->pooling_size, context->channels, in xnn_compute_pixelwise_average_pooling_multipass()
830 indirect_input, input_offset, context->zero, pixelwise_buffer, multipass_buffer, output, in xnn_compute_pixelwise_average_pooling_multipass()
831 context->input_increment, context->output_increment, in xnn_compute_pixelwise_average_pooling_multipass()
832 &context->params); in xnn_compute_pixelwise_average_pooling_multipass()
836 const struct global_average_pooling_nwc_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_global_average_pooling_nwc_unipass()
840 (const void*) ((uintptr_t) context->input + batch_index * context->input_batch_stride); in xnn_compute_global_average_pooling_nwc_unipass()
842 (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride); in xnn_compute_global_average_pooling_nwc_unipass()
844 context->unipass_ukernel( in xnn_compute_global_average_pooling_nwc_unipass()
845 context->input_elements, in xnn_compute_global_average_pooling_nwc_unipass()
846 context->channels, in xnn_compute_global_average_pooling_nwc_unipass()
848 context->input_pixel_stride, in xnn_compute_global_average_pooling_nwc_unipass()
849 context->zero, in xnn_compute_global_average_pooling_nwc_unipass()
851 &context->params); in xnn_compute_global_average_pooling_nwc_unipass()
855 const struct global_average_pooling_nwc_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_global_average_pooling_nwc_multipass()
859 (const void*) ((uintptr_t) context->input + batch_index * context->input_batch_stride); in xnn_compute_global_average_pooling_nwc_multipass()
861 (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride); in xnn_compute_global_average_pooling_nwc_multipass()
864 …XNN_SIMD_ALLOCA(context->channels * sizeof(int32_t) + XNN_EXTRA_BYTES * sizeof(int32_t) / sizeof(u… in xnn_compute_global_average_pooling_nwc_multipass()
866 context->multipass_ukernel( in xnn_compute_global_average_pooling_nwc_multipass()
867 context->input_elements, in xnn_compute_global_average_pooling_nwc_multipass()
868 context->channels, in xnn_compute_global_average_pooling_nwc_multipass()
870 context->input_pixel_stride, in xnn_compute_global_average_pooling_nwc_multipass()
871 context->zero, in xnn_compute_global_average_pooling_nwc_multipass()
874 &context->params); in xnn_compute_global_average_pooling_nwc_multipass()
878 const struct global_average_pooling_ncw_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_global_average_pooling_ncw()
883 const void* input = (const void*) ((uintptr_t) context->input + in xnn_compute_global_average_pooling_ncw()
884 channels_start * context->input_channel_stride + batch_index * context->input_batch_stride); in xnn_compute_global_average_pooling_ncw()
885 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_global_average_pooling_ncw()
886 channels_start * context->output_channel_stride + batch_index * context->output_batch_stride); in xnn_compute_global_average_pooling_ncw()
888 context->ukernel( in xnn_compute_global_average_pooling_ncw()
889 context->input_elements, in xnn_compute_global_average_pooling_ncw()
893 &context->params); in xnn_compute_global_average_pooling_ncw()
897 const struct resize_bilinear_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_resize_bilinear()
903 …(void*) ((uintptr_t) context->output + pixel_start * context->output_pixel_stride + batch_index * … in xnn_compute_resize_bilinear()
905 context->ukernel( in xnn_compute_resize_bilinear()
907 context->scaled_channels, in xnn_compute_resize_bilinear()
908 context->indirect_input + pixel_start * 4, in xnn_compute_resize_bilinear()
909 context->input_offset + batch_index * context->input_batch_stride, in xnn_compute_resize_bilinear()
910 (const void*) ((uintptr_t) context->packed_weights + (pixel_start << context->log2_wsize)), in xnn_compute_resize_bilinear()
912 context->output_pixel_stride - context->scaled_channels); in xnn_compute_resize_bilinear()
916 const struct resize_bilinear_chw_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_resize_bilinear_chw()
922 …(void*) ((uintptr_t) context->output + channel_start * context->output_channel_stride + batch_inde… in xnn_compute_resize_bilinear_chw()
923 …const size_t input_offset = context->input_offset + batch_index * context->input_batch_stride + ch… in xnn_compute_resize_bilinear_chw()
925 context->ukernel( in xnn_compute_resize_bilinear_chw()
926 context->output_pixels, in xnn_compute_resize_bilinear_chw()
928 context->indirect_input, in xnn_compute_resize_bilinear_chw()
930 context->packed_weights, in xnn_compute_resize_bilinear_chw()
932 context->input_channel_stride); in xnn_compute_resize_bilinear_chw()
936 const struct prelu_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_prelu()
940 const size_t x_stride = context->x_stride; in xnn_compute_prelu()
941 const size_t y_stride = context->y_stride; in xnn_compute_prelu()
942 const void* x = (const void*) ((uintptr_t) context->x + x_stride * batch_start); in xnn_compute_prelu()
943 void* y = (void*) ((uintptr_t) context->y + y_stride * batch_start); in xnn_compute_prelu()
945 context->ukernel(batch_range, context->n, x, x_stride, context->w, y, y_stride); in xnn_compute_prelu()
949 const struct pad_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_pad_5d()
952 const void* input = (const void*) ((uintptr_t) context->input + in xnn_compute_pad_5d()
953 …i * context->input_stride[4] + j * context->input_stride[3] + k * context->input_stride[2] + l * c… in xnn_compute_pad_5d()
954 void* output = (void*) ((uintptr_t) context->output + in xnn_compute_pad_5d()
955 …i * context->output_stride[4] + j * context->output_stride[3] + k * context->output_stride[2] + l … in xnn_compute_pad_5d()
957 const size_t i_padding = context->pre_paddings[5]; in xnn_compute_pad_5d()
958 const size_t j_padding = context->pre_paddings[4]; in xnn_compute_pad_5d()
959 const size_t k_padding = context->pre_paddings[3]; in xnn_compute_pad_5d()
960 const size_t l_padding = context->pre_paddings[2]; in xnn_compute_pad_5d()
961 const size_t m_padding = context->pre_paddings[1]; in xnn_compute_pad_5d()
963 const size_t i_size = context->input_size[5]; in xnn_compute_pad_5d()
964 const size_t j_size = context->input_size[4]; in xnn_compute_pad_5d()
965 const size_t k_size = context->input_size[3]; in xnn_compute_pad_5d()
966 const size_t l_size = context->input_size[2]; in xnn_compute_pad_5d()
967 const size_t m_size = context->input_size[1]; in xnn_compute_pad_5d()
972 context->pad_ukernel( in xnn_compute_pad_5d()
974 context->input_size[0], context->pre_paddings[0], context->post_paddings[0], in xnn_compute_pad_5d()
976 context->padding_value); in xnn_compute_pad_5d()
978 …context->fill_ukernel(1 /* rows */, context->output_size[0], output, 0 /* output stride */, contex… in xnn_compute_pad_5d()
983 const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_elementwise_binary_1d()
986 const void* a = (const void*) ((uintptr_t) context->a + i * context->a_stride[4]); in xnn_compute_elementwise_binary_1d()
987 const void* b = (const void*) ((uintptr_t) context->b + i * context->b_stride[4]); in xnn_compute_elementwise_binary_1d()
988 void* y = (void*) ((uintptr_t) context->y + i * context->y_stride[4]); in xnn_compute_elementwise_binary_1d()
989 context->ukernel(context->elements, a, b, y, &context->params); in xnn_compute_elementwise_binary_1d()
993 const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_elementwise_binary_2d()
996 …const void* a = (const void*) ((uintptr_t) context->a + i * context->a_stride[3] + j * context->a_… in xnn_compute_elementwise_binary_2d()
997 …const void* b = (const void*) ((uintptr_t) context->b + i * context->b_stride[3] + j * context->b_… in xnn_compute_elementwise_binary_2d()
998 void* y = (void*) ((uintptr_t) context->y + i * context->y_stride[3] + j * context->y_stride[4]); in xnn_compute_elementwise_binary_2d()
999 context->ukernel(context->elements, a, b, y, &context->params); in xnn_compute_elementwise_binary_2d()
1003 const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_elementwise_binary_3d()
1006 const void* a = (const void*) ((uintptr_t) context->a + in xnn_compute_elementwise_binary_3d()
1007 i * context->a_stride[2] + j * context->a_stride[3] + k * context->a_stride[4]); in xnn_compute_elementwise_binary_3d()
1008 const void* b = (const void*) ((uintptr_t) context->b + in xnn_compute_elementwise_binary_3d()
1009 i * context->b_stride[2] + j * context->b_stride[3] + k * context->b_stride[4]); in xnn_compute_elementwise_binary_3d()
1010 void* y = (void*) ((uintptr_t) context->y + in xnn_compute_elementwise_binary_3d()
1011 i * context->y_stride[2] + j * context->y_stride[3] + k * context->y_stride[4]); in xnn_compute_elementwise_binary_3d()
1012 context->ukernel(context->elements, a, b, y, &context->params); in xnn_compute_elementwise_binary_3d()
1016 const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_elementwise_binary_4d()
1019 const void* a = (const void*) ((uintptr_t) context->a + in xnn_compute_elementwise_binary_4d()
1020 …i * context->a_stride[1] + j * context->a_stride[2] + k * context->a_stride[3] + l * context->a_st… in xnn_compute_elementwise_binary_4d()
1021 const void* b = (const void*) ((uintptr_t) context->b + in xnn_compute_elementwise_binary_4d()
1022 …i * context->b_stride[1] + j * context->b_stride[2] + k * context->b_stride[3] + l * context->b_st… in xnn_compute_elementwise_binary_4d()
1023 void* y = (void*) ((uintptr_t) context->y + in xnn_compute_elementwise_binary_4d()
1024 …i * context->y_stride[1] + j * context->y_stride[2] + k * context->y_stride[3] + l * context->y_st… in xnn_compute_elementwise_binary_4d()
1025 context->ukernel(context->elements, a, b, y, &context->params); in xnn_compute_elementwise_binary_4d()
1029 const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_elementwise_binary_5d()
1032 const void* a = (const void*) ((uintptr_t) context->a + in xnn_compute_elementwise_binary_5d()
1033 …i * context->a_stride[0] + j * context->a_stride[1] + k * context->a_stride[2] + l * context->a_st… in xnn_compute_elementwise_binary_5d()
1034 const void* b = (const void*) ((uintptr_t) context->b + in xnn_compute_elementwise_binary_5d()
1035 …i * context->b_stride[0] + j * context->b_stride[1] + k * context->b_stride[2] + l * context->b_st… in xnn_compute_elementwise_binary_5d()
1036 void* y = (void*) ((uintptr_t) context->y + in xnn_compute_elementwise_binary_5d()
1037 …i * context->y_stride[0] + j * context->y_stride[1] + k * context->y_stride[2] + l * context->y_st… in xnn_compute_elementwise_binary_5d()
1038 context->ukernel(context->elements, a, b, y, &context->params); in xnn_compute_elementwise_binary_5d()
1042 const struct channel_shuffle_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_channel_shuffle_fixed()
1045 const void* x = (const void*) ((uintptr_t) context->x + index * context->x_stride); in xnn_compute_channel_shuffle_fixed()
1046 void* y = (void*) ((uintptr_t) context->y + index * context->y_stride); in xnn_compute_channel_shuffle_fixed()
1048 context->fixed_ukernel(context->n, x, y); in xnn_compute_channel_shuffle_fixed()
1052 const struct channel_shuffle_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_channel_shuffle_variable()
1055 const void* x = (const void*) ((uintptr_t) context->x + index * context->x_stride); in xnn_compute_channel_shuffle_variable()
1056 void* y = (void*) ((uintptr_t) context->y + index * context->y_stride); in xnn_compute_channel_shuffle_variable()
1058 context->variable_ukernel(context->n, context->m, x, y); in xnn_compute_channel_shuffle_variable()
1062 const struct lut_strided_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_lut_strided()
1065 const void* x = (const void*) ((uintptr_t) context->x + context->x_stride * batch_index); in xnn_compute_lut_strided()
1066 void* y = (void*) ((uintptr_t) context->y + context->y_stride * batch_index); in xnn_compute_lut_strided()
1068 context->ukernel(context->n, x, y, context->t); in xnn_compute_lut_strided()
1072 const struct lut_contiguous_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_lut_contiguous()
1076 const void* x = (const void*) ((uintptr_t) context->x + offset); in xnn_compute_lut_contiguous()
1077 void* y = (void*) ((uintptr_t) context->y + offset); in xnn_compute_lut_contiguous()
1079 context->ukernel(size, x, y, context->t); in xnn_compute_lut_contiguous()
1083 const struct univector_strided_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_univector_strided()
1087 const size_t x_stride = context->x_stride; in xnn_compute_univector_strided()
1088 const size_t y_stride = context->y_stride; in xnn_compute_univector_strided()
1090 const void* x = (const void*) ((uintptr_t) context->x + x_stride * batch_index); in xnn_compute_univector_strided()
1091 void* y = (void*) ((uintptr_t) context->y + y_stride * batch_index); in xnn_compute_univector_strided()
1093 context->ukernel(context->n, x, y, &context->params); in xnn_compute_univector_strided()
1100 const struct univector_contiguous_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_univector_contiguous()
1104 const uint32_t log2_xsize = context->log2_xsize; in xnn_compute_univector_contiguous()
1105 const uint32_t log2_ysize = context->log2_ysize; in xnn_compute_univector_contiguous()
1106 const void* x = (const void*) ((uintptr_t) context->x + offset); in xnn_compute_univector_contiguous()
1107 void* y = (void*) ((uintptr_t) context->y + ((offset >> log2_xsize) << log2_ysize)); in xnn_compute_univector_contiguous()
1108 context->ukernel(size, x, y, &context->params); in xnn_compute_univector_contiguous()
1112 const struct u8_softmax_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_u8_softmax()
1115 const uint8_t* x = (const uint8_t*) ((uintptr_t) context->x + context->x_stride * batch_index); in xnn_compute_u8_softmax()
1116 uint8_t* y = (uint8_t*) ((uintptr_t) context->y + context->y_stride * batch_index); in xnn_compute_u8_softmax()
1117 const size_t n = context->n; in xnn_compute_u8_softmax()
1120 context->rmax_ukernel(n, x, &x_max); in xnn_compute_u8_softmax()
1122 const uint32_t* t = (const uint32_t*) context->t + adjustment; in xnn_compute_u8_softmax()
1123 context->lut_norm_ukernel(n, x, t, y); in xnn_compute_u8_softmax()
1127 const struct floating_point_softmax_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_floating_point_softmax()
1130 const void* x = (const void*) ((uintptr_t) context->x + context->x_stride * batch_index); in xnn_compute_floating_point_softmax()
1131 void* y = (void*) ((uintptr_t) context->y + context->y_stride * batch_index); in xnn_compute_floating_point_softmax()
1132 const size_t n = context->n; in xnn_compute_floating_point_softmax()
1139 context->rmax_ukernel(n, x, &x_max); in xnn_compute_floating_point_softmax()
1146 context->raddstoreexpminusmax_ukernel(n, x, &x_max, y, &y_sum, &context->expminus_params); in xnn_compute_floating_point_softmax()
1153 context->compute_reciprocal(&y_sum, &y_scale); in xnn_compute_floating_point_softmax()
1154 context->vmulc_ukernel(n, y, &y_scale, y, &context->minmax_params); in xnn_compute_floating_point_softmax()
1158 const struct vmulcaddc_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_vmulcaddc()
1162 const size_t x_stride = context->x_stride; in xnn_compute_vmulcaddc()
1163 const size_t y_stride = context->y_stride; in xnn_compute_vmulcaddc()
1165 const void* x = (const void*) ((uintptr_t) context->x + x_stride * batch_start); in xnn_compute_vmulcaddc()
1166 void* y = (void*) ((uintptr_t) context->y + y_stride * batch_start); in xnn_compute_vmulcaddc()
1168 context->ukernel( in xnn_compute_vmulcaddc()
1170 context->n, in xnn_compute_vmulcaddc()
1172 context->w, in xnn_compute_vmulcaddc()
1174 &context->params); in xnn_compute_vmulcaddc()
1179 const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_hmp_grouped_gemm()
1187 const size_t k_scaled = context->k_scaled; in xnn_compute_hmp_grouped_gemm()
1188 const size_t a_stride = context->a_stride; in xnn_compute_hmp_grouped_gemm()
1189 const size_t cm_stride = context->cm_stride; in xnn_compute_hmp_grouped_gemm()
1191 context->ukernel.function[uarch_index]( in xnn_compute_hmp_grouped_gemm()
1195 (const void*) ((uintptr_t) context->a + mr_block_start * a_stride + group_index * k_scaled), in xnn_compute_hmp_grouped_gemm()
1197 …(const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride + group_index * … in xnn_compute_hmp_grouped_gemm()
1198 …(void*) ((uintptr_t) context->c + mr_block_start * cm_stride + (nr_block_start << context->log2_cs… in xnn_compute_hmp_grouped_gemm()
1200 context->cn_stride, in xnn_compute_hmp_grouped_gemm()
1201 &context->params); in xnn_compute_hmp_grouped_gemm()
1205 const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_hmp_gemm()
1212 const size_t a_stride = context->a_stride; in xnn_compute_hmp_gemm()
1213 const size_t cm_stride = context->cm_stride; in xnn_compute_hmp_gemm()
1215 context->ukernel.function[uarch_index]( in xnn_compute_hmp_gemm()
1218 context->k_scaled, in xnn_compute_hmp_gemm()
1219 (const void*) ((uintptr_t) context->a + mr_block_start * a_stride), in xnn_compute_hmp_gemm()
1221 (const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride), in xnn_compute_hmp_gemm()
1222 …(void*) ((uintptr_t) context->c + mr_block_start * cm_stride + (nr_block_start << context->log2_cs… in xnn_compute_hmp_gemm()
1224 context->cn_stride, in xnn_compute_hmp_gemm()
1225 context->fused_params); in xnn_compute_hmp_gemm()
1229 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_hmp_grouped_batch_igemm()
1238 const size_t ks = context->ks; in xnn_compute_hmp_grouped_batch_igemm()
1239 const size_t cm_stride = context->cm_stride; in xnn_compute_hmp_grouped_batch_igemm()
1241 context->ukernel.function[uarch_index]( in xnn_compute_hmp_grouped_batch_igemm()
1244 context->kc, in xnn_compute_hmp_grouped_batch_igemm()
1245 context->ks_scaled, in xnn_compute_hmp_grouped_batch_igemm()
1246 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_hmp_grouped_batch_igemm()
1247 …(const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride + group_index * … in xnn_compute_hmp_grouped_batch_igemm()
1248 …ptr_t) context->c + group_index * context->gc_stride + batch_index * context->bc_stride + mr_block… in xnn_compute_hmp_grouped_batch_igemm()
1250 context->cn_stride, in xnn_compute_hmp_grouped_batch_igemm()
1251 context->a_offset + group_index * context->ga_stride + batch_index * context->ba_stride, in xnn_compute_hmp_grouped_batch_igemm()
1252 context->zero, in xnn_compute_hmp_grouped_batch_igemm()
1253 &context->params); in xnn_compute_hmp_grouped_batch_igemm()
1257 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_hmp_grouped_igemm()
1265 const size_t ks = context->ks; in xnn_compute_hmp_grouped_igemm()
1266 const size_t cm_stride = context->cm_stride; in xnn_compute_hmp_grouped_igemm()
1268 context->ukernel.function[uarch_index]( in xnn_compute_hmp_grouped_igemm()
1271 context->kc, in xnn_compute_hmp_grouped_igemm()
1272 context->ks_scaled, in xnn_compute_hmp_grouped_igemm()
1273 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_hmp_grouped_igemm()
1274 …(const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride + group_index * … in xnn_compute_hmp_grouped_igemm()
1275 …(void*) ((uintptr_t) context->c + group_index * context->gc_stride + mr_block_start * cm_stride + … in xnn_compute_hmp_grouped_igemm()
1277 context->cn_stride, in xnn_compute_hmp_grouped_igemm()
1278 context->a_offset + group_index * context->ga_stride, in xnn_compute_hmp_grouped_igemm()
1279 context->zero, in xnn_compute_hmp_grouped_igemm()
1280 &context->params); in xnn_compute_hmp_grouped_igemm()
1284 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_batch_hmp_igemm()
1292 const size_t ks = context->ks; in xnn_compute_batch_hmp_igemm()
1293 const size_t cm_stride = context->cm_stride; in xnn_compute_batch_hmp_igemm()
1295 context->ukernel.function[uarch_index]( in xnn_compute_batch_hmp_igemm()
1298 context->kc, in xnn_compute_batch_hmp_igemm()
1299 context->ks_scaled, in xnn_compute_batch_hmp_igemm()
1300 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_batch_hmp_igemm()
1301 (const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride), in xnn_compute_batch_hmp_igemm()
1302 …(void*) ((uintptr_t) context->c + batch_index * context->bc_stride + mr_block_start * cm_stride + … in xnn_compute_batch_hmp_igemm()
1304 context->cn_stride, in xnn_compute_batch_hmp_igemm()
1305 context->a_offset + batch_index * context->ba_stride, in xnn_compute_batch_hmp_igemm()
1306 context->zero, in xnn_compute_batch_hmp_igemm()
1307 &context->params); in xnn_compute_batch_hmp_igemm()
1311 const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], in xnn_compute_hmp_igemm()
1318 const size_t ks = context->ks; in xnn_compute_hmp_igemm()
1319 const size_t cm_stride = context->cm_stride; in xnn_compute_hmp_igemm()
1321 context->ukernel.function[uarch_index]( in xnn_compute_hmp_igemm()
1324 context->kc, in xnn_compute_hmp_igemm()
1325 context->ks_scaled, in xnn_compute_hmp_igemm()
1326 (const void**) ((uintptr_t) context->indirect_a + mr_block_start * ks * sizeof(void*)), in xnn_compute_hmp_igemm()
1327 (const void*) ((uintptr_t) context->packed_w + nr_block_start * context->w_stride), in xnn_compute_hmp_igemm()
1328 …(void*) ((uintptr_t) context->c + mr_block_start * cm_stride + (nr_block_start << context->log2_cs… in xnn_compute_hmp_igemm()
1330 context->cn_stride, in xnn_compute_hmp_igemm()
1331 context->a_offset, in xnn_compute_hmp_igemm()
1332 context->zero, in xnn_compute_hmp_igemm()
1333 &context->params); in xnn_compute_hmp_igemm()
1365 &op->context, in xnn_run_operator()
1375 &op->context, in xnn_run_operator()
1386 &op->context, in xnn_run_operator()
1397 &op->context, in xnn_run_operator()
1410 &op->context, in xnn_run_operator()
1422 &op->context, in xnn_run_operator()
1435 &op->context, in xnn_run_operator()
1448 &op->context, in xnn_run_operator()
1462 &op->context, in xnn_run_operator()
1476 &op->context, in xnn_run_operator()
1491 &op->context, in xnn_run_operator()
1508 &op->context, in xnn_run_operator()
1522 &op->context, in xnn_run_operator()
1537 &op->context, in xnn_run_operator()
1553 &op->context, in xnn_run_operator()