1/* 2 * Copyright (c) 2018-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24#include "helpers.h" 25 26#if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) 27/** This function perform a select operation between two tensors when condition tensor has the same rank. 28 * 29 * @attention The data_type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar 30 * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 31 * @attention Leftover size in the X dimension should be given as preprocessor argument using -DVEC_SIZE_LEFTOVER=value: e.g. x_dimension % VEC_SIZE 32 * 33 * @param[in] c_ptr Pointer to the source tensor. Supported data types: U8 34 * @param[in] c_stride_x Stride of the source tensor in X dimension (in bytes) 35 * @param[in] c_step_x c_stride_x * number of elements along X processed per workitem(in bytes) 36 * @param[in] c_stride_y Stride of the source tensor in Y dimension (in bytes) 37 * @param[in] c_step_y c_stride_y * number of elements along Y processed per workitem(in bytes) 38 * @param[in] c_stride_z Stride of the source tensor in Z dimension (in bytes) 39 * @param[in] c_step_z c_stride_z * number of elements along Z processed per workitem(in bytes) 40 * @param[in] c_offset_first_element_in_bytes The offset of the first element in the source tensor 41 * @param[in] x_ptr Pointer to the source tensor. Supported data types: All 42 * @param[in] x_stride_x Stride of the source tensor in X dimension (in bytes) 43 * @param[in] x_step_x x_stride_x * number of elements along X processed per workitem(in bytes) 44 * @param[in] x_stride_y Stride of the source tensor in Y dimension (in bytes) 45 * @param[in] x_step_y x_stride_y * number of elements along Y processed per workitem(in bytes) 46 * @param[in] x_stride_z Stride of the source tensor in Z dimension (in bytes) 47 * @param[in] x_step_z x_stride_z * number of elements along Z processed per workitem(in bytes) 48 * @param[in] x_offset_first_element_in_bytes The offset of the first element in the source tensor 49 * @param[in] y_ptr Pointer to the source tensor. Supported data types: same as @p x_ptr 50 * @param[in] y_stride_x Stride of the source tensor in X dimension (in bytes) 51 * @param[in] y_step_x y_stride_x * number of elements along X processed per workitem(in bytes) 52 * @param[in] y_stride_y Stride of the source tensor in Y dimension (in bytes) 53 * @param[in] y_step_y y_stride_y * number of elements along Y processed per workitem(in bytes) 54 * @param[in] y_stride_z Stride of the source tensor in Z dimension (in bytes) 55 * @param[in] y_step_z y_stride_z * number of elements along Z processed per workitem(in bytes) 56 * @param[in] y_offset_first_element_in_bytes The offset of the first element in the source tensor 57 * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p x_ptr 58 * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes) 59 * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) 60 * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes) 61 * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) 62 * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes) 63 * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes) 64 * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor 65 */ 66__kernel void select_same_rank( 67 TENSOR3D_DECLARATION(c), 68 TENSOR3D_DECLARATION(x), 69 TENSOR3D_DECLARATION(y), 70 TENSOR3D_DECLARATION(out)) 71{ 72 // Get pointers 73 uint offset = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0); 74 __global uchar *c_addr = c_ptr + c_offset_first_element_in_bytes + offset + get_global_id(1) * c_step_y + get_global_id(2) * c_step_z; 75 __global uchar *x_addr = x_ptr + x_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * x_step_y + get_global_id(2) * x_step_z; 76 __global uchar *y_addr = y_ptr + y_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * y_step_y + get_global_id(2) * y_step_z; 77 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z; 78 79 // Load values 80 SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 81 in_c = CONVERT(VLOAD(VEC_SIZE)(0, c_addr), SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); 82 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 83 in_x = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)x_addr); 84 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 85 in_y = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)y_addr); 86 87 // Calculate result 88 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 89 res0 = select(in_y, in_x, CONVERT(in_c > (SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0, SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); 90 91 // Boundary-aware store 92 STORE_VECTOR_SELECT(res, DATA_TYPE, (__global DATA_TYPE *)out_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0); 93} 94 95/** This function perform a select operation between two tensors when condition tensor has a different rank. 96 * 97 * @attention The data_type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar 98 * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 99 * @attention Leftover size in the X dimension should be given as preprocessor argument using -DVEC_SIZE_LEFTOVER=value: e.g. x_dimension % VEC_SIZE 100 * 101 * @param[in] c_ptr Pointer to the source tensor. Supported data types: U8 102 * @param[in] c_stride_x Stride of the source tensor in X dimension (in bytes) 103 * @param[in] c_step_x c_stride_x * number of elements along X processed per workitem(in bytes) 104 * @param[in] c_offset_first_element_in_bytes The offset of the first element in the source tensor 105 * @param[in] x_ptr Pointer to the source tensor. Supported data types: All 106 * @param[in] x_stride_x Stride of the source tensor in X dimension (in bytes) 107 * @param[in] x_step_x x_stride_x * number of elements along X processed per workitem(in bytes) 108 * @param[in] x_stride_y Stride of the source tensor in Y dimension (in bytes) 109 * @param[in] x_step_y x_stride_y * number of elements along Y processed per workitem(in bytes) 110 * @param[in] x_stride_z Stride of the source tensor in Z dimension (in bytes) 111 * @param[in] x_step_z x_stride_z * number of elements along Z processed per workitem(in bytes) 112 * @param[in] x_offset_first_element_in_bytes The offset of the first element in the source tensor 113 * @param[in] y_ptr Pointer to the source tensor. Supported data types: same as @p x_ptr 114 * @param[in] y_stride_x Stride of the source tensor in X dimension (in bytes) 115 * @param[in] y_step_x y_stride_x * number of elements along X processed per workitem(in bytes) 116 * @param[in] y_stride_y Stride of the source tensor in Y dimension (in bytes) 117 * @param[in] y_step_y y_stride_y * number of elements along Y processed per workitem(in bytes) 118 * @param[in] y_stride_z Stride of the source tensor in Z dimension (in bytes) 119 * @param[in] y_step_z y_stride_z * number of elements along Z processed per workitem(in bytes) 120 * @param[in] y_offset_first_element_in_bytes The offset of the first element in the source tensor 121 * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p x_ptr 122 * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes) 123 * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) 124 * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes) 125 * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) 126 * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes) 127 * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes) 128 * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor 129 */ 130__kernel void select_different_rank_2( 131 VECTOR_DECLARATION(c), 132 TENSOR3D_DECLARATION(x), 133 TENSOR3D_DECLARATION(y), 134 TENSOR3D_DECLARATION(out)) 135{ 136 const int c_idx = get_global_id(1); 137 138 // Get pointers 139 uint offset = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0); 140 __global uchar *c_addr = c_ptr + c_offset_first_element_in_bytes; 141 __global uchar *x_addr = x_ptr + x_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * x_step_y + get_global_id(2) * x_step_z; 142 __global uchar *y_addr = y_ptr + y_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * y_step_y + get_global_id(2) * y_step_z; 143 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z; 144 145 // Load values 146 SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 147 in_c = *((__global uchar *)(c_addr + c_idx * c_stride_x)); 148 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 149 in_x = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)x_addr); 150 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 151 in_y = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)y_addr); 152 153 // Calculate result 154 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 155 res0 = select(in_y, in_x, CONVERT(in_c > (SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0, SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); 156 157 // Boundary-aware store 158 STORE_VECTOR_SELECT(res, DATA_TYPE, (__global DATA_TYPE *)out_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0); 159} 160#endif /* defined(DATA_TYPE) && defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) */ 161 162#if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(DEPTH_SIZE) && defined(VEC_SIZE_LEFTOVER) 163/** This function perform a select operation between two tensors when condition tensor has a different rank. 164 * 165 * @attention The data_type need to be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=uchar 166 * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 167 * @attention Leftover size in the X dimension should be given as preprocessor argument using -DVEC_SIZE_LEFTOVER=value: e.g. x_dimension % VEC_SIZE 168 * 169 * @param[in] c_ptr Pointer to the source tensor. Supported data types: U8 170 * @param[in] c_stride_x Stride of the source tensor in X dimension (in bytes) 171 * @param[in] c_step_x c_stride_x * number of elements along X processed per workitem(in bytes) 172 * @param[in] c_offset_first_element_in_bytes The offset of the first element in the source tensor 173 * @param[in] x_ptr Pointer to the source tensor. Supported data types: All 174 * @param[in] x_stride_x Stride of the source tensor in X dimension (in bytes) 175 * @param[in] x_step_x x_stride_x * number of elements along X processed per workitem(in bytes) 176 * @param[in] x_stride_y Stride of the source tensor in Y dimension (in bytes) 177 * @param[in] x_step_y x_stride_y * number of elements along Y processed per workitem(in bytes) 178 * @param[in] x_stride_z Stride of the source tensor in Z dimension (in bytes) 179 * @param[in] x_step_z x_stride_z * number of elements along Z processed per workitem(in bytes) 180 * @param[in] x_offset_first_element_in_bytes The offset of the first element in the source tensor 181 * @param[in] y_ptr Pointer to the source tensor. Supported data types: same as @p x_ptr 182 * @param[in] y_stride_x Stride of the source tensor in X dimension (in bytes) 183 * @param[in] y_step_x y_stride_x * number of elements along X processed per workitem(in bytes) 184 * @param[in] y_stride_y Stride of the source tensor in Y dimension (in bytes) 185 * @param[in] y_step_y y_stride_y * number of elements along Y processed per workitem(in bytes) 186 * @param[in] y_stride_z Stride of the source tensor in Z dimension (in bytes) 187 * @param[in] y_step_z y_stride_z * number of elements along Z processed per workitem(in bytes) 188 * @param[in] y_offset_first_element_in_bytes The offset of the first element in the source tensor 189 * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p x_ptr 190 * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes) 191 * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) 192 * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes) 193 * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) 194 * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes) 195 * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes) 196 * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor 197 */ 198__kernel void select_different_rank_n( 199 VECTOR_DECLARATION(c), 200 TENSOR3D_DECLARATION(x), 201 TENSOR3D_DECLARATION(y), 202 TENSOR3D_DECLARATION(out)) 203{ 204 const int c_idx = get_global_id(2) / DEPTH_SIZE; 205 206 // Get pointers 207 uint offset = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0); 208 __global uchar *c_addr = c_ptr + c_offset_first_element_in_bytes; 209 __global uchar *x_addr = x_ptr + x_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * x_step_y + get_global_id(2) * x_step_z; 210 __global uchar *y_addr = y_ptr + y_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * y_step_y + get_global_id(2) * y_step_z; 211 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + offset * sizeof(DATA_TYPE) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z; 212 213 // Load values 214 SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 215 in_c = *((__global uchar *)(c_addr + c_idx * c_stride_x)); 216 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 217 in_x = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)x_addr); 218 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 219 in_y = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)y_addr); 220 221 // Calculate result 222 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 223 res0 = select(in_y, in_x, CONVERT(in_c > (SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0, SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))); 224 225 // Boundary-aware store 226 STORE_VECTOR_SELECT(res, DATA_TYPE, (__global DATA_TYPE *)out_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0); 227} 228#endif /* defined(DATA_TYPE) && defined(VEC_SIZE) && defined(DEPTH_SIZE) && defined(VEC_SIZE_LEFTOVER) */