1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef _PSTL_UNSEQ_BACKEND_SIMD_H
11 #define _PSTL_UNSEQ_BACKEND_SIMD_H
12
13 #include <__config>
14 #include <__functional/operations.h>
15 #include <__iterator/iterator_traits.h>
16 #include <__type_traits/is_arithmetic.h>
17 #include <__type_traits/is_same.h>
18 #include <__utility/move.h>
19 #include <__utility/pair.h>
20 #include <cstddef>
21 #include <cstdint>
22
23 #include <__pstl/internal/utils.h>
24
25 // This header defines the minimum set of vector routines required
26 // to support parallel STL.
27
28 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
29
30 namespace __pstl
31 {
32 namespace __unseq_backend
33 {
34
35 // Expect vector width up to 64 (or 512 bit)
36 const std::size_t __lane_size = 64;
37
38 template <class _Iterator, class _DifferenceType, class _Function>
39 _LIBCPP_HIDE_FROM_ABI _Iterator
__simd_walk_1(_Iterator __first,_DifferenceType __n,_Function __f)40 __simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept
41 {
42 _PSTL_PRAGMA_SIMD
43 for (_DifferenceType __i = 0; __i < __n; ++__i)
44 __f(__first[__i]);
45
46 return __first + __n;
47 }
48
49 template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
50 _LIBCPP_HIDE_FROM_ABI _Iterator2
__simd_walk_2(_Iterator1 __first1,_DifferenceType __n,_Iterator2 __first2,_Function __f)51 __simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept
52 {
53 _PSTL_PRAGMA_SIMD
54 for (_DifferenceType __i = 0; __i < __n; ++__i)
55 __f(__first1[__i], __first2[__i]);
56 return __first2 + __n;
57 }
58
59 template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Iterator3, class _Function>
60 _LIBCPP_HIDE_FROM_ABI _Iterator3
__simd_walk_3(_Iterator1 __first1,_DifferenceType __n,_Iterator2 __first2,_Iterator3 __first3,_Function __f)61 __simd_walk_3(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3,
62 _Function __f) noexcept
63 {
64 _PSTL_PRAGMA_SIMD
65 for (_DifferenceType __i = 0; __i < __n; ++__i)
66 __f(__first1[__i], __first2[__i], __first3[__i]);
67 return __first3 + __n;
68 }
69
70 // TODO: check whether __simd_first() can be used here
71 template <class _Index, class _DifferenceType, class _Pred>
72 _LIBCPP_HIDE_FROM_ABI bool
__simd_or(_Index __first,_DifferenceType __n,_Pred __pred)73 __simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept
74 {
75 _DifferenceType __block_size = 4 < __n ? 4 : __n;
76 const _Index __last = __first + __n;
77 while (__last != __first)
78 {
79 int32_t __flag = 1;
80 _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag)
81 for (_DifferenceType __i = 0; __i < __block_size; ++__i)
82 if (__pred(*(__first + __i)))
83 __flag = 0;
84 if (!__flag)
85 return true;
86
87 __first += __block_size;
88 if (__last - __first >= __block_size << 1)
89 {
90 // Double the block _Size. Any unnecessary iterations can be amortized against work done so far.
91 __block_size <<= 1;
92 }
93 else
94 {
95 __block_size = __last - __first;
96 }
97 }
98 return false;
99 }
100
101 template <class _Index1, class _DifferenceType, class _Index2, class _Pred>
102 _LIBCPP_HIDE_FROM_ABI std::pair<_Index1, _Index2>
__simd_first(_Index1 __first1,_DifferenceType __n,_Index2 __first2,_Pred __pred)103 __simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pred) noexcept
104 {
105 const _Index1 __last1 = __first1 + __n;
106 const _Index2 __last2 = __first2 + __n;
107 // Experiments show good block sizes like this
108 const _DifferenceType __block_size = 8;
109 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
110 while (__last1 - __first1 >= __block_size)
111 {
112 _DifferenceType __found = 0;
113 _DifferenceType __i;
114 _PSTL_PRAGMA_SIMD_REDUCTION(|
115 : __found) for (__i = 0; __i < __block_size; ++__i)
116 {
117 const _DifferenceType __t = __pred(__first1[__i], __first2[__i]);
118 __lane[__i] = __t;
119 __found |= __t;
120 }
121 if (__found)
122 {
123 _DifferenceType __i2;
124 // This will vectorize
125 for (__i2 = 0; __i2 < __block_size; ++__i2)
126 {
127 if (__lane[__i2])
128 break;
129 }
130 return std::make_pair(__first1 + __i2, __first2 + __i2);
131 }
132 __first1 += __block_size;
133 __first2 += __block_size;
134 }
135
136 //Keep remainder scalar
137 for (; __last1 != __first1; ++__first1, ++__first2)
138 if (__pred(*(__first1), *(__first2)))
139 return std::make_pair(__first1, __first2);
140
141 return std::make_pair(__last1, __last2);
142 }
143
144 template <class _Index, class _DifferenceType, class _Pred>
145 _LIBCPP_HIDE_FROM_ABI _DifferenceType
__simd_count(_Index __index,_DifferenceType __n,_Pred __pred)146 __simd_count(_Index __index, _DifferenceType __n, _Pred __pred) noexcept
147 {
148 _DifferenceType __count = 0;
149 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
150 for (_DifferenceType __i = 0; __i < __n; ++__i)
151 if (__pred(*(__index + __i)))
152 ++__count;
153
154 return __count;
155 }
156
157 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _BinaryPredicate>
158 _LIBCPP_HIDE_FROM_ABI _OutputIterator
__simd_unique_copy(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,_BinaryPredicate __pred)159 __simd_unique_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator __result,
160 _BinaryPredicate __pred) noexcept
161 {
162 if (__n == 0)
163 return __result;
164
165 _DifferenceType __cnt = 1;
166 __result[0] = __first[0];
167
168 _PSTL_PRAGMA_SIMD
169 for (_DifferenceType __i = 1; __i < __n; ++__i)
170 {
171 if (!__pred(__first[__i], __first[__i - 1]))
172 {
173 __result[__cnt] = __first[__i];
174 ++__cnt;
175 }
176 }
177 return __result + __cnt;
178 }
179
180 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _Assigner>
181 _LIBCPP_HIDE_FROM_ABI _OutputIterator
__simd_assign(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,_Assigner __assigner)182 __simd_assign(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _Assigner __assigner) noexcept
183 {
184 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
185 _PSTL_PRAGMA_SIMD
186 for (_DifferenceType __i = 0; __i < __n; ++__i)
187 __assigner(__first + __i, __result + __i);
188 return __result + __n;
189 }
190
191 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _UnaryPredicate>
192 _LIBCPP_HIDE_FROM_ABI _OutputIterator
__simd_copy_if(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,_UnaryPredicate __pred)193 __simd_copy_if(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _UnaryPredicate __pred) noexcept
194 {
195 _DifferenceType __cnt = 0;
196
197 _PSTL_PRAGMA_SIMD
198 for (_DifferenceType __i = 0; __i < __n; ++__i)
199 {
200 if (__pred(__first[__i]))
201 {
202 __result[__cnt] = __first[__i];
203 ++__cnt;
204 }
205 }
206 return __result + __cnt;
207 }
208
209 template <class _InputIterator, class _DifferenceType, class _BinaryPredicate>
210 _LIBCPP_HIDE_FROM_ABI _DifferenceType
__simd_calc_mask_2(_InputIterator __first,_DifferenceType __n,bool * __mask,_BinaryPredicate __pred)211 __simd_calc_mask_2(_InputIterator __first, _DifferenceType __n, bool* __mask, _BinaryPredicate __pred) noexcept
212 {
213 _DifferenceType __count = 0;
214
215 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
216 for (_DifferenceType __i = 0; __i < __n; ++__i)
217 {
218 __mask[__i] = !__pred(__first[__i], __first[__i - 1]);
219 __count += __mask[__i];
220 }
221 return __count;
222 }
223
224 template <class _InputIterator, class _DifferenceType, class _UnaryPredicate>
225 _LIBCPP_HIDE_FROM_ABI _DifferenceType
__simd_calc_mask_1(_InputIterator __first,_DifferenceType __n,bool * __mask,_UnaryPredicate __pred)226 __simd_calc_mask_1(_InputIterator __first, _DifferenceType __n, bool* __mask, _UnaryPredicate __pred) noexcept
227 {
228 _DifferenceType __count = 0;
229
230 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
231 for (_DifferenceType __i = 0; __i < __n; ++__i)
232 {
233 __mask[__i] = __pred(__first[__i]);
234 __count += __mask[__i];
235 }
236 return __count;
237 }
238
239 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _Assigner>
240 _LIBCPP_HIDE_FROM_ABI void
__simd_copy_by_mask(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,bool * __mask,_Assigner __assigner)241 __simd_copy_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, bool* __mask,
242 _Assigner __assigner) noexcept
243 {
244 _DifferenceType __cnt = 0;
245 _PSTL_PRAGMA_SIMD
246 for (_DifferenceType __i = 0; __i < __n; ++__i)
247 {
248 if (__mask[__i])
249 {
250 {
251 __assigner(__first + __i, __result + __cnt);
252 ++__cnt;
253 }
254 }
255 }
256 }
257
258 template <class _InputIterator, class _DifferenceType, class _OutputIterator1, class _OutputIterator2>
259 _LIBCPP_HIDE_FROM_ABI void
__simd_partition_by_mask(_InputIterator __first,_DifferenceType __n,_OutputIterator1 __out_true,_OutputIterator2 __out_false,bool * __mask)260 __simd_partition_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true,
261 _OutputIterator2 __out_false, bool* __mask) noexcept
262 {
263 _DifferenceType __cnt_true = 0, __cnt_false = 0;
264 _PSTL_PRAGMA_SIMD
265 for (_DifferenceType __i = 0; __i < __n; ++__i)
266 {
267 if (__mask[__i])
268 {
269 __out_true[__cnt_true] = __first[__i];
270 ++__cnt_true;
271 }
272 else
273 {
274 __out_false[__cnt_false] = __first[__i];
275 ++__cnt_false;
276 }
277 }
278 }
279
280 template <class _Index, class _DifferenceType, class _Generator>
281 _LIBCPP_HIDE_FROM_ABI _Index
__simd_generate_n(_Index __first,_DifferenceType __size,_Generator __g)282 __simd_generate_n(_Index __first, _DifferenceType __size, _Generator __g) noexcept
283 {
284 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
285 _PSTL_PRAGMA_SIMD
286 for (_DifferenceType __i = 0; __i < __size; ++__i)
287 __first[__i] = __g();
288 return __first + __size;
289 }
290
291 template <class _Index, class _BinaryPredicate>
292 _LIBCPP_HIDE_FROM_ABI _Index
__simd_adjacent_find(_Index __first,_Index __last,_BinaryPredicate __pred,bool __or_semantic)293 __simd_adjacent_find(_Index __first, _Index __last, _BinaryPredicate __pred, bool __or_semantic) noexcept
294 {
295 if (__last - __first < 2)
296 return __last;
297
298 typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
299 _DifferenceType __i = 0;
300
301 // Experiments show good block sizes like this
302 //TODO: to consider tuning block_size for various data types
303 const _DifferenceType __block_size = 8;
304 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
305 while (__last - __first >= __block_size)
306 {
307 _DifferenceType __found = 0;
308 _PSTL_PRAGMA_SIMD_REDUCTION(|
309 : __found) for (__i = 0; __i < __block_size - 1; ++__i)
310 {
311 //TODO: to improve SIMD vectorization
312 const _DifferenceType __t = __pred(*(__first + __i), *(__first + __i + 1));
313 __lane[__i] = __t;
314 __found |= __t;
315 }
316
317 //Process a pair of elements on a boundary of a data block
318 if (__first + __block_size < __last && __pred(*(__first + __i), *(__first + __i + 1)))
319 __lane[__i] = __found = 1;
320
321 if (__found)
322 {
323 if (__or_semantic)
324 return __first;
325
326 // This will vectorize
327 for (__i = 0; __i < __block_size; ++__i)
328 if (__lane[__i])
329 break;
330 return __first + __i; //As far as found is true a __result (__lane[__i] is true) is guaranteed
331 }
332 __first += __block_size;
333 }
334 //Process the rest elements
335 for (; __last - __first > 1; ++__first)
336 if (__pred(*__first, *(__first + 1)))
337 return __first;
338
339 return __last;
340 }
341
342 // It was created to reduce the code inside std::enable_if
343 template <typename _Tp, typename _BinaryOperation>
344 using is_arithmetic_plus = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
345 std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
346
347 // Exclusive scan for "+" and arithmetic types
348 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
349 class _BinaryOperation>
350 _LIBCPP_HIDE_FROM_ABI
351 typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation,std::false_type)352 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
353 _BinaryOperation, /*Inclusive*/ std::false_type)
354 {
355 _PSTL_PRAGMA_SIMD_SCAN(+ : __init)
356 for (_Size __i = 0; __i < __n; ++__i)
357 {
358 __result[__i] = __init;
359 _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init)
360 __init += __unary_op(__first[__i]);
361 }
362 return std::make_pair(__result + __n, __init);
363 }
364
365 // As soon as we cannot call __binary_op in "combiner" we create a wrapper over _Tp to encapsulate __binary_op
366 template <typename _Tp, typename _BinaryOp>
367 struct _Combiner
368 {
369 _Tp __value_;
370 _BinaryOp* __bin_op_; // Here is a pointer to function because of default ctor
371
_Combiner_Combiner372 _LIBCPP_HIDE_FROM_ABI _Combiner() : __value_{}, __bin_op_(nullptr) {}
373 _LIBCPP_HIDE_FROM_ABI
_Combiner_Combiner374 _Combiner(const _Tp& __value, const _BinaryOp* __bin_op)
375 : __value_(__value), __bin_op_(const_cast<_BinaryOp*>(__bin_op)) {}
_Combiner_Combiner376 _LIBCPP_HIDE_FROM_ABI _Combiner(const _Combiner& __obj) : __value_{}, __bin_op_(__obj.__bin_op) {}
377
378 _LIBCPP_HIDE_FROM_ABI void
operator_Combiner379 operator()(const _Combiner& __obj)
380 {
381 __value_ = (*__bin_op_)(__value_, __obj.__value_);
382 }
383 };
384
385 // Exclusive scan for other binary operations and types
386 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
387 class _BinaryOperation>
388 _LIBCPP_HIDE_FROM_ABI
389 typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,std::false_type)390 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
391 _BinaryOperation __binary_op, /*Inclusive*/ std::false_type)
392 {
393 typedef _Combiner<_Tp, _BinaryOperation> _CombinerType;
394 _CombinerType __combined_init{__init, &__binary_op};
395
396 _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType)
397 _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __combined_init)
398 for (_Size __i = 0; __i < __n; ++__i)
399 {
400 __result[__i] = __combined_init.__value_;
401 _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__combined_init)
402 __combined_init.__value_ = __binary_op(__combined_init.__value_, __unary_op(__first[__i]));
403 }
404 return std::make_pair(__result + __n, __combined_init.__value_);
405 }
406
407 // Inclusive scan for "+" and arithmetic types
408 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
409 class _BinaryOperation>
410 _LIBCPP_HIDE_FROM_ABI
411 typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation,std::true_type)412 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
413 _BinaryOperation, /*Inclusive*/ std::true_type)
414 {
415 _PSTL_PRAGMA_SIMD_SCAN(+ : __init)
416 for (_Size __i = 0; __i < __n; ++__i)
417 {
418 __init += __unary_op(__first[__i]);
419 _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init)
420 __result[__i] = __init;
421 }
422 return std::make_pair(__result + __n, __init);
423 }
424
425 // Inclusive scan for other binary operations and types
426 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
427 class _BinaryOperation>
428 _LIBCPP_HIDE_FROM_ABI
429 typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,std::true_type)430 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
431 _BinaryOperation __binary_op, std::true_type)
432 {
433 typedef _Combiner<_Tp, _BinaryOperation> _CombinerType;
434 _CombinerType __combined_init{__init, &__binary_op};
435
436 _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType)
437 _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __combined_init)
438 for (_Size __i = 0; __i < __n; ++__i)
439 {
440 __combined_init.__value_ = __binary_op(__combined_init.__value_, __unary_op(__first[__i]));
441 _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__combined_init)
442 __result[__i] = __combined_init.__value_;
443 }
444 return std::make_pair(__result + __n, __combined_init.__value_);
445 }
446
447 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
448 // complexity [violation] - We will have at most (__n-1 + number_of_lanes) comparisons instead of at most __n-1.
449 template <typename _ForwardIterator, typename _Size, typename _Compare>
450 _LIBCPP_HIDE_FROM_ABI _ForwardIterator
__simd_min_element(_ForwardIterator __first,_Size __n,_Compare __comp)451 __simd_min_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept
452 {
453 if (__n == 0)
454 {
455 return __first;
456 }
457
458 typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType;
459 struct _ComplexType
460 {
461 _ValueType __min_val_;
462 _Size __min_ind_;
463 _Compare* __min_comp_;
464
465 _LIBCPP_HIDE_FROM_ABI _ComplexType() : __min_val_{}, __min_ind_{}, __min_comp_(nullptr) {}
466 _LIBCPP_HIDE_FROM_ABI _ComplexType(const _ValueType& __val, const _Compare* __comp)
467 : __min_val_(__val), __min_ind_(0), __min_comp_(const_cast<_Compare*>(__comp))
468 {
469 }
470 _LIBCPP_HIDE_FROM_ABI _ComplexType(const _ComplexType& __obj)
471 : __min_val_(__obj.__min_val_), __min_ind_(__obj.__min_ind_), __min_comp_(__obj.__min_comp_)
472 {
473 }
474
475 _PSTL_PRAGMA_DECLARE_SIMD
476 _LIBCPP_HIDE_FROM_ABI void
477 operator()(const _ComplexType& __obj)
478 {
479 if (!(*__min_comp_)(__min_val_, __obj.__min_val_) &&
480 ((*__min_comp_)(__obj.__min_val_, __min_val_) || __obj.__min_ind_ - __min_ind_ < 0))
481 {
482 __min_val_ = __obj.__min_val_;
483 __min_ind_ = __obj.__min_ind_;
484 }
485 }
486 };
487
488 _ComplexType __init{*__first, &__comp};
489
490 _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType)
491
492 _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init)
493 for (_Size __i = 1; __i < __n; ++__i)
494 {
495 const _ValueType __min_val = __init.__min_val_;
496 const _ValueType __current = __first[__i];
497 if (__comp(__current, __min_val))
498 {
499 __init.__min_val_ = __current;
500 __init.__min_ind_ = __i;
501 }
502 }
503 return __first + __init.__min_ind_;
504 }
505
506 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
507 // complexity [violation] - We will have at most (2*(__n-1) + 4*number_of_lanes) comparisons instead of at most [1.5*(__n-1)].
508 template <typename _ForwardIterator, typename _Size, typename _Compare>
509 _LIBCPP_HIDE_FROM_ABI std::pair<_ForwardIterator, _ForwardIterator>
__simd_minmax_element(_ForwardIterator __first,_Size __n,_Compare __comp)510 __simd_minmax_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept
511 {
512 if (__n == 0)
513 {
514 return std::make_pair(__first, __first);
515 }
516 typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType;
517
518 struct _ComplexType
519 {
520 _ValueType __min_val_;
521 _ValueType __max_val_;
522 _Size __min_ind_;
523 _Size __max_ind_;
524 _Compare* __minmax_comp;
525
526 _LIBCPP_HIDE_FROM_ABI _ComplexType()
527 : __min_val_{}, __max_val_{}, __min_ind_{}, __max_ind_{}, __minmax_comp(nullptr) {}
528 _LIBCPP_HIDE_FROM_ABI _ComplexType(
529 const _ValueType& __min_val, const _ValueType& __max_val, const _Compare* __comp)
530 : __min_val_(__min_val), __max_val_(__max_val), __min_ind_(0), __max_ind_(0),
531 __minmax_comp(const_cast<_Compare*>(__comp))
532 {
533 }
534 _LIBCPP_HIDE_FROM_ABI _ComplexType(const _ComplexType& __obj)
535 : __min_val_(__obj.__min_val_), __max_val_(__obj.__max_val_), __min_ind_(__obj.__min_ind_),
536 __max_ind_(__obj.__max_ind_), __minmax_comp(__obj.__minmax_comp)
537 {
538 }
539
540 _LIBCPP_HIDE_FROM_ABI void
541 operator()(const _ComplexType& __obj)
542 {
543 // min
544 if ((*__minmax_comp)(__obj.__min_val_, __min_val_))
545 {
546 __min_val_ = __obj.__min_val_;
547 __min_ind_ = __obj.__min_ind_;
548 }
549 else if (!(*__minmax_comp)(__min_val_, __obj.__min_val_))
550 {
551 __min_val_ = __obj.__min_val_;
552 __min_ind_ = (__min_ind_ - __obj.__min_ind_ < 0) ? __min_ind_ : __obj.__min_ind_;
553 }
554
555 // max
556 if ((*__minmax_comp)(__max_val_, __obj.__max_val_))
557 {
558 __max_val_ = __obj.__max_val_;
559 __max_ind_ = __obj.__max_ind_;
560 }
561 else if (!(*__minmax_comp)(__obj.__max_val_, __max_val_))
562 {
563 __max_val_ = __obj.__max_val_;
564 __max_ind_ = (__max_ind_ - __obj.__max_ind_ < 0) ? __obj.__max_ind_ : __max_ind_;
565 }
566 }
567 };
568
569 _ComplexType __init{*__first, *__first, &__comp};
570
571 _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType);
572
573 _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init)
574 for (_Size __i = 1; __i < __n; ++__i)
575 {
576 auto __min_val = __init.__min_val_;
577 auto __max_val = __init.__max_val_;
578 auto __current = __first + __i;
579 if (__comp(*__current, __min_val))
580 {
581 __init.__min_val_ = *__current;
582 __init.__min_ind_ = __i;
583 }
584 else if (!__comp(*__current, __max_val))
585 {
586 __init.__max_val_ = *__current;
587 __init.__max_ind_ = __i;
588 }
589 }
590 return std::make_pair(__first + __init.__min_ind_, __first + __init.__max_ind_);
591 }
592
593 template <class _InputIterator, class _DifferenceType, class _OutputIterator1, class _OutputIterator2,
594 class _UnaryPredicate>
595 _LIBCPP_HIDE_FROM_ABI std::pair<_OutputIterator1, _OutputIterator2>
__simd_partition_copy(_InputIterator __first,_DifferenceType __n,_OutputIterator1 __out_true,_OutputIterator2 __out_false,_UnaryPredicate __pred)596 __simd_partition_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true,
597 _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept
598 {
599 _DifferenceType __cnt_true = 0, __cnt_false = 0;
600
601 _PSTL_PRAGMA_SIMD
602 for (_DifferenceType __i = 0; __i < __n; ++__i)
603 {
604 if (__pred(__first[__i]))
605 {
606 __out_true[__cnt_true] = __first[__i];
607 ++__cnt_true;
608 }
609 else
610 {
611 __out_false[__cnt_false] = __first[__i];
612 ++__cnt_false;
613 }
614 }
615 return std::make_pair(__out_true + __cnt_true, __out_false + __cnt_false);
616 }
617
618 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
619 _LIBCPP_HIDE_FROM_ABI _ForwardIterator1
__simd_find_first_of(_ForwardIterator1 __first,_ForwardIterator1 __last,_ForwardIterator2 __s_first,_ForwardIterator2 __s_last,_BinaryPredicate __pred)620 __simd_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
621 _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept
622 {
623 typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferencType;
624
625 const _DifferencType __n1 = __last - __first;
626 const _DifferencType __n2 = __s_last - __s_first;
627 if (__n1 == 0 || __n2 == 0)
628 {
629 return __last; // according to the standard
630 }
631
632 // Common case
633 // If first sequence larger than second then we'll run simd_first with parameters of first sequence.
634 // Otherwise, vice versa.
635 if (__n1 < __n2)
636 {
637 for (; __first != __last; ++__first)
638 {
639 if (__unseq_backend::__simd_or(
640 __s_first, __n2,
641 __internal::__equal_value_by_pred<decltype(*__first), _BinaryPredicate>(*__first, __pred)))
642 {
643 return __first;
644 }
645 }
646 }
647 else
648 {
649 for (; __s_first != __s_last; ++__s_first)
650 {
651 const auto __result = __unseq_backend::__simd_first(
652 __first, _DifferencType(0), __n1, [__s_first, &__pred](_ForwardIterator1 __it, _DifferencType __i) {
653 return __pred(__it[__i], *__s_first);
654 });
655 if (__result != __last)
656 {
657 return __result;
658 }
659 }
660 }
661 return __last;
662 }
663
664 template <class _RandomAccessIterator, class _DifferenceType, class _UnaryPredicate>
665 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
__simd_remove_if(_RandomAccessIterator __first,_DifferenceType __n,_UnaryPredicate __pred)666 __simd_remove_if(_RandomAccessIterator __first, _DifferenceType __n, _UnaryPredicate __pred) noexcept
667 {
668 // find first element we need to remove
669 auto __current = __unseq_backend::__simd_first(
670 __first, _DifferenceType(0), __n,
671 [&__pred](_RandomAccessIterator __it, _DifferenceType __i) { return __pred(__it[__i]); });
672 __n -= __current - __first;
673
674 // if we have in sequence only one element that pred(__current[1]) != false we can exit the function
675 if (__n < 2)
676 {
677 return __current;
678 }
679
680 _DifferenceType __cnt = 0;
681 _PSTL_PRAGMA_SIMD
682 for (_DifferenceType __i = 1; __i < __n; ++__i)
683 {
684 if (!__pred(__current[__i]))
685 {
686 __current[__cnt] = std::move(__current[__i]);
687 ++__cnt;
688 }
689 }
690 return __current + __cnt;
691 }
692 } // namespace __unseq_backend
693 } // namespace __pstl
694
695 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
696
697 #endif /* _PSTL_UNSEQ_BACKEND_SIMD_H */
698