xref: /aosp_15_r20/external/cronet/third_party/libc++/src/include/__pstl/internal/unseq_backend_simd.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _PSTL_UNSEQ_BACKEND_SIMD_H
11 #define _PSTL_UNSEQ_BACKEND_SIMD_H
12 
13 #include <__config>
14 #include <__functional/operations.h>
15 #include <__iterator/iterator_traits.h>
16 #include <__type_traits/is_arithmetic.h>
17 #include <__type_traits/is_same.h>
18 #include <__utility/move.h>
19 #include <__utility/pair.h>
20 #include <cstddef>
21 #include <cstdint>
22 
23 #include <__pstl/internal/utils.h>
24 
25 // This header defines the minimum set of vector routines required
26 // to support parallel STL.
27 
28 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
29 
30 namespace __pstl
31 {
32 namespace __unseq_backend
33 {
34 
35 // Expect vector width up to 64 (or 512 bit)
36 const std::size_t __lane_size = 64;
37 
38 template <class _Iterator, class _DifferenceType, class _Function>
39 _LIBCPP_HIDE_FROM_ABI _Iterator
__simd_walk_1(_Iterator __first,_DifferenceType __n,_Function __f)40 __simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept
41 {
42     _PSTL_PRAGMA_SIMD
43     for (_DifferenceType __i = 0; __i < __n; ++__i)
44         __f(__first[__i]);
45 
46     return __first + __n;
47 }
48 
49 template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
50 _LIBCPP_HIDE_FROM_ABI _Iterator2
__simd_walk_2(_Iterator1 __first1,_DifferenceType __n,_Iterator2 __first2,_Function __f)51 __simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept
52 {
53     _PSTL_PRAGMA_SIMD
54     for (_DifferenceType __i = 0; __i < __n; ++__i)
55         __f(__first1[__i], __first2[__i]);
56     return __first2 + __n;
57 }
58 
59 template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Iterator3, class _Function>
60 _LIBCPP_HIDE_FROM_ABI _Iterator3
__simd_walk_3(_Iterator1 __first1,_DifferenceType __n,_Iterator2 __first2,_Iterator3 __first3,_Function __f)61 __simd_walk_3(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3,
62               _Function __f) noexcept
63 {
64     _PSTL_PRAGMA_SIMD
65     for (_DifferenceType __i = 0; __i < __n; ++__i)
66         __f(__first1[__i], __first2[__i], __first3[__i]);
67     return __first3 + __n;
68 }
69 
70 // TODO: check whether __simd_first() can be used here
71 template <class _Index, class _DifferenceType, class _Pred>
72 _LIBCPP_HIDE_FROM_ABI bool
__simd_or(_Index __first,_DifferenceType __n,_Pred __pred)73 __simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept
74 {
75     _DifferenceType __block_size = 4 < __n ? 4 : __n;
76     const _Index __last = __first + __n;
77     while (__last != __first)
78     {
79         int32_t __flag = 1;
80         _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag)
81         for (_DifferenceType __i = 0; __i < __block_size; ++__i)
82             if (__pred(*(__first + __i)))
83                 __flag = 0;
84         if (!__flag)
85             return true;
86 
87         __first += __block_size;
88         if (__last - __first >= __block_size << 1)
89         {
90             // Double the block _Size.  Any unnecessary iterations can be amortized against work done so far.
91             __block_size <<= 1;
92         }
93         else
94         {
95             __block_size = __last - __first;
96         }
97     }
98     return false;
99 }
100 
101 template <class _Index1, class _DifferenceType, class _Index2, class _Pred>
102 _LIBCPP_HIDE_FROM_ABI std::pair<_Index1, _Index2>
__simd_first(_Index1 __first1,_DifferenceType __n,_Index2 __first2,_Pred __pred)103 __simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pred) noexcept
104 {
105     const _Index1 __last1 = __first1 + __n;
106     const _Index2 __last2 = __first2 + __n;
107     // Experiments show good block sizes like this
108     const _DifferenceType __block_size = 8;
109     alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
110     while (__last1 - __first1 >= __block_size)
111     {
112         _DifferenceType __found = 0;
113         _DifferenceType __i;
114             _PSTL_PRAGMA_SIMD_REDUCTION(|
115                                         : __found) for (__i = 0; __i < __block_size; ++__i)
116         {
117             const _DifferenceType __t = __pred(__first1[__i], __first2[__i]);
118             __lane[__i] = __t;
119             __found |= __t;
120         }
121         if (__found)
122         {
123             _DifferenceType __i2;
124             // This will vectorize
125             for (__i2 = 0; __i2 < __block_size; ++__i2)
126             {
127                 if (__lane[__i2])
128                     break;
129             }
130             return std::make_pair(__first1 + __i2, __first2 + __i2);
131         }
132         __first1 += __block_size;
133         __first2 += __block_size;
134     }
135 
136     //Keep remainder scalar
137     for (; __last1 != __first1; ++__first1, ++__first2)
138         if (__pred(*(__first1), *(__first2)))
139             return std::make_pair(__first1, __first2);
140 
141     return std::make_pair(__last1, __last2);
142 }
143 
144 template <class _Index, class _DifferenceType, class _Pred>
145 _LIBCPP_HIDE_FROM_ABI _DifferenceType
__simd_count(_Index __index,_DifferenceType __n,_Pred __pred)146 __simd_count(_Index __index, _DifferenceType __n, _Pred __pred) noexcept
147 {
148     _DifferenceType __count = 0;
149     _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
150     for (_DifferenceType __i = 0; __i < __n; ++__i)
151         if (__pred(*(__index + __i)))
152             ++__count;
153 
154     return __count;
155 }
156 
157 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _BinaryPredicate>
158 _LIBCPP_HIDE_FROM_ABI _OutputIterator
__simd_unique_copy(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,_BinaryPredicate __pred)159 __simd_unique_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator __result,
160                    _BinaryPredicate __pred) noexcept
161 {
162     if (__n == 0)
163         return __result;
164 
165     _DifferenceType __cnt = 1;
166     __result[0] = __first[0];
167 
168     _PSTL_PRAGMA_SIMD
169     for (_DifferenceType __i = 1; __i < __n; ++__i)
170     {
171         if (!__pred(__first[__i], __first[__i - 1]))
172         {
173             __result[__cnt] = __first[__i];
174             ++__cnt;
175         }
176     }
177     return __result + __cnt;
178 }
179 
180 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _Assigner>
181 _LIBCPP_HIDE_FROM_ABI _OutputIterator
__simd_assign(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,_Assigner __assigner)182 __simd_assign(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _Assigner __assigner) noexcept
183 {
184     _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
185     _PSTL_PRAGMA_SIMD
186     for (_DifferenceType __i = 0; __i < __n; ++__i)
187         __assigner(__first + __i, __result + __i);
188     return __result + __n;
189 }
190 
191 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _UnaryPredicate>
192 _LIBCPP_HIDE_FROM_ABI _OutputIterator
__simd_copy_if(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,_UnaryPredicate __pred)193 __simd_copy_if(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _UnaryPredicate __pred) noexcept
194 {
195     _DifferenceType __cnt = 0;
196 
197     _PSTL_PRAGMA_SIMD
198     for (_DifferenceType __i = 0; __i < __n; ++__i)
199     {
200         if (__pred(__first[__i]))
201         {
202             __result[__cnt] = __first[__i];
203             ++__cnt;
204         }
205     }
206     return __result + __cnt;
207 }
208 
209 template <class _InputIterator, class _DifferenceType, class _BinaryPredicate>
210 _LIBCPP_HIDE_FROM_ABI _DifferenceType
__simd_calc_mask_2(_InputIterator __first,_DifferenceType __n,bool * __mask,_BinaryPredicate __pred)211 __simd_calc_mask_2(_InputIterator __first, _DifferenceType __n, bool* __mask, _BinaryPredicate __pred) noexcept
212 {
213     _DifferenceType __count = 0;
214 
215     _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
216     for (_DifferenceType __i = 0; __i < __n; ++__i)
217     {
218         __mask[__i] = !__pred(__first[__i], __first[__i - 1]);
219         __count += __mask[__i];
220     }
221     return __count;
222 }
223 
224 template <class _InputIterator, class _DifferenceType, class _UnaryPredicate>
225 _LIBCPP_HIDE_FROM_ABI _DifferenceType
__simd_calc_mask_1(_InputIterator __first,_DifferenceType __n,bool * __mask,_UnaryPredicate __pred)226 __simd_calc_mask_1(_InputIterator __first, _DifferenceType __n, bool* __mask, _UnaryPredicate __pred) noexcept
227 {
228     _DifferenceType __count = 0;
229 
230     _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
231     for (_DifferenceType __i = 0; __i < __n; ++__i)
232     {
233         __mask[__i] = __pred(__first[__i]);
234         __count += __mask[__i];
235     }
236     return __count;
237 }
238 
239 template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _Assigner>
240 _LIBCPP_HIDE_FROM_ABI void
__simd_copy_by_mask(_InputIterator __first,_DifferenceType __n,_OutputIterator __result,bool * __mask,_Assigner __assigner)241 __simd_copy_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, bool* __mask,
242                     _Assigner __assigner) noexcept
243 {
244     _DifferenceType __cnt = 0;
245     _PSTL_PRAGMA_SIMD
246     for (_DifferenceType __i = 0; __i < __n; ++__i)
247     {
248         if (__mask[__i])
249         {
250             {
251                 __assigner(__first + __i, __result + __cnt);
252                 ++__cnt;
253             }
254         }
255     }
256 }
257 
258 template <class _InputIterator, class _DifferenceType, class _OutputIterator1, class _OutputIterator2>
259 _LIBCPP_HIDE_FROM_ABI void
__simd_partition_by_mask(_InputIterator __first,_DifferenceType __n,_OutputIterator1 __out_true,_OutputIterator2 __out_false,bool * __mask)260 __simd_partition_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true,
261                          _OutputIterator2 __out_false, bool* __mask) noexcept
262 {
263     _DifferenceType __cnt_true = 0, __cnt_false = 0;
264     _PSTL_PRAGMA_SIMD
265     for (_DifferenceType __i = 0; __i < __n; ++__i)
266     {
267         if (__mask[__i])
268         {
269             __out_true[__cnt_true] = __first[__i];
270             ++__cnt_true;
271         }
272         else
273         {
274             __out_false[__cnt_false] = __first[__i];
275             ++__cnt_false;
276         }
277     }
278 }
279 
280 template <class _Index, class _DifferenceType, class _Generator>
281 _LIBCPP_HIDE_FROM_ABI _Index
__simd_generate_n(_Index __first,_DifferenceType __size,_Generator __g)282 __simd_generate_n(_Index __first, _DifferenceType __size, _Generator __g) noexcept
283 {
284     _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
285     _PSTL_PRAGMA_SIMD
286     for (_DifferenceType __i = 0; __i < __size; ++__i)
287         __first[__i] = __g();
288     return __first + __size;
289 }
290 
291 template <class _Index, class _BinaryPredicate>
292 _LIBCPP_HIDE_FROM_ABI _Index
__simd_adjacent_find(_Index __first,_Index __last,_BinaryPredicate __pred,bool __or_semantic)293 __simd_adjacent_find(_Index __first, _Index __last, _BinaryPredicate __pred, bool __or_semantic) noexcept
294 {
295     if (__last - __first < 2)
296         return __last;
297 
298     typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
299     _DifferenceType __i = 0;
300 
301     // Experiments show good block sizes like this
302     //TODO: to consider tuning block_size for various data types
303     const _DifferenceType __block_size = 8;
304     alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
305     while (__last - __first >= __block_size)
306     {
307         _DifferenceType __found = 0;
308             _PSTL_PRAGMA_SIMD_REDUCTION(|
309                                         : __found) for (__i = 0; __i < __block_size - 1; ++__i)
310         {
311             //TODO: to improve SIMD vectorization
312             const _DifferenceType __t = __pred(*(__first + __i), *(__first + __i + 1));
313             __lane[__i] = __t;
314             __found |= __t;
315         }
316 
317         //Process a pair of elements on a boundary of a data block
318         if (__first + __block_size < __last && __pred(*(__first + __i), *(__first + __i + 1)))
319             __lane[__i] = __found = 1;
320 
321         if (__found)
322         {
323             if (__or_semantic)
324                 return __first;
325 
326             // This will vectorize
327             for (__i = 0; __i < __block_size; ++__i)
328                 if (__lane[__i])
329                     break;
330             return __first + __i; //As far as found is true a __result (__lane[__i] is true) is guaranteed
331         }
332         __first += __block_size;
333     }
334     //Process the rest elements
335     for (; __last - __first > 1; ++__first)
336         if (__pred(*__first, *(__first + 1)))
337             return __first;
338 
339     return __last;
340 }
341 
342 // It was created to reduce the code inside std::enable_if
343 template <typename _Tp, typename _BinaryOperation>
344 using is_arithmetic_plus = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
345                                                             std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
346 
347 // Exclusive scan for "+" and arithmetic types
348 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
349           class _BinaryOperation>
350 _LIBCPP_HIDE_FROM_ABI
351 typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation,std::false_type)352 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
353             _BinaryOperation, /*Inclusive*/ std::false_type)
354 {
355     _PSTL_PRAGMA_SIMD_SCAN(+ : __init)
356     for (_Size __i = 0; __i < __n; ++__i)
357     {
358         __result[__i] = __init;
359         _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init)
360         __init += __unary_op(__first[__i]);
361     }
362     return std::make_pair(__result + __n, __init);
363 }
364 
365 // As soon as we cannot call __binary_op in "combiner" we create a wrapper over _Tp to encapsulate __binary_op
366 template <typename _Tp, typename _BinaryOp>
367 struct _Combiner
368 {
369     _Tp __value_;
370     _BinaryOp* __bin_op_; // Here is a pointer to function because of default ctor
371 
_Combiner_Combiner372     _LIBCPP_HIDE_FROM_ABI _Combiner() : __value_{}, __bin_op_(nullptr) {}
373     _LIBCPP_HIDE_FROM_ABI
_Combiner_Combiner374     _Combiner(const _Tp& __value, const _BinaryOp* __bin_op)
375         : __value_(__value), __bin_op_(const_cast<_BinaryOp*>(__bin_op)) {}
_Combiner_Combiner376     _LIBCPP_HIDE_FROM_ABI _Combiner(const _Combiner& __obj) : __value_{}, __bin_op_(__obj.__bin_op) {}
377 
378     _LIBCPP_HIDE_FROM_ABI void
operator_Combiner379     operator()(const _Combiner& __obj)
380     {
381         __value_ = (*__bin_op_)(__value_, __obj.__value_);
382     }
383 };
384 
385 // Exclusive scan for other binary operations and types
386 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
387           class _BinaryOperation>
388 _LIBCPP_HIDE_FROM_ABI
389 typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,std::false_type)390 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
391             _BinaryOperation __binary_op, /*Inclusive*/ std::false_type)
392 {
393     typedef _Combiner<_Tp, _BinaryOperation> _CombinerType;
394     _CombinerType __combined_init{__init, &__binary_op};
395 
396     _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType)
397     _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __combined_init)
398     for (_Size __i = 0; __i < __n; ++__i)
399     {
400         __result[__i] = __combined_init.__value_;
401         _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__combined_init)
402         __combined_init.__value_ = __binary_op(__combined_init.__value_, __unary_op(__first[__i]));
403     }
404     return std::make_pair(__result + __n, __combined_init.__value_);
405 }
406 
407 // Inclusive scan for "+" and arithmetic types
408 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
409           class _BinaryOperation>
410 _LIBCPP_HIDE_FROM_ABI
411 typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation,std::true_type)412 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
413             _BinaryOperation, /*Inclusive*/ std::true_type)
414 {
415     _PSTL_PRAGMA_SIMD_SCAN(+ : __init)
416     for (_Size __i = 0; __i < __n; ++__i)
417     {
418         __init += __unary_op(__first[__i]);
419         _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init)
420         __result[__i] = __init;
421     }
422     return std::make_pair(__result + __n, __init);
423 }
424 
425 // Inclusive scan for other binary operations and types
426 template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
427           class _BinaryOperation>
428 _LIBCPP_HIDE_FROM_ABI
429 typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
__simd_scan(_InputIterator __first,_Size __n,_OutputIterator __result,_UnaryOperation __unary_op,_Tp __init,_BinaryOperation __binary_op,std::true_type)430 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
431             _BinaryOperation __binary_op, std::true_type)
432 {
433     typedef _Combiner<_Tp, _BinaryOperation> _CombinerType;
434     _CombinerType __combined_init{__init, &__binary_op};
435 
436     _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType)
437     _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __combined_init)
438     for (_Size __i = 0; __i < __n; ++__i)
439     {
440         __combined_init.__value_ = __binary_op(__combined_init.__value_, __unary_op(__first[__i]));
441         _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__combined_init)
442         __result[__i] = __combined_init.__value_;
443     }
444     return std::make_pair(__result + __n, __combined_init.__value_);
445 }
446 
447 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
448 // complexity [violation] - We will have at most (__n-1 + number_of_lanes) comparisons instead of at most __n-1.
449 template <typename _ForwardIterator, typename _Size, typename _Compare>
450 _LIBCPP_HIDE_FROM_ABI _ForwardIterator
__simd_min_element(_ForwardIterator __first,_Size __n,_Compare __comp)451 __simd_min_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept
452 {
453     if (__n == 0)
454     {
455         return __first;
456     }
457 
458     typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType;
459     struct _ComplexType
460     {
461         _ValueType __min_val_;
462         _Size __min_ind_;
463         _Compare* __min_comp_;
464 
465         _LIBCPP_HIDE_FROM_ABI _ComplexType() : __min_val_{}, __min_ind_{}, __min_comp_(nullptr) {}
466         _LIBCPP_HIDE_FROM_ABI _ComplexType(const _ValueType& __val, const _Compare* __comp)
467             : __min_val_(__val), __min_ind_(0), __min_comp_(const_cast<_Compare*>(__comp))
468         {
469         }
470         _LIBCPP_HIDE_FROM_ABI _ComplexType(const _ComplexType& __obj)
471             : __min_val_(__obj.__min_val_), __min_ind_(__obj.__min_ind_), __min_comp_(__obj.__min_comp_)
472         {
473         }
474 
475         _PSTL_PRAGMA_DECLARE_SIMD
476         _LIBCPP_HIDE_FROM_ABI void
477         operator()(const _ComplexType& __obj)
478         {
479             if (!(*__min_comp_)(__min_val_, __obj.__min_val_) &&
480                 ((*__min_comp_)(__obj.__min_val_, __min_val_) || __obj.__min_ind_ - __min_ind_ < 0))
481             {
482                 __min_val_ = __obj.__min_val_;
483                 __min_ind_ = __obj.__min_ind_;
484             }
485         }
486     };
487 
488     _ComplexType __init{*__first, &__comp};
489 
490     _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType)
491 
492     _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init)
493     for (_Size __i = 1; __i < __n; ++__i)
494     {
495         const _ValueType __min_val = __init.__min_val_;
496         const _ValueType __current = __first[__i];
497         if (__comp(__current, __min_val))
498         {
499             __init.__min_val_ = __current;
500             __init.__min_ind_ = __i;
501         }
502     }
503     return __first + __init.__min_ind_;
504 }
505 
506 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
507 // complexity [violation] - We will have at most (2*(__n-1) + 4*number_of_lanes) comparisons instead of at most [1.5*(__n-1)].
508 template <typename _ForwardIterator, typename _Size, typename _Compare>
509 _LIBCPP_HIDE_FROM_ABI std::pair<_ForwardIterator, _ForwardIterator>
__simd_minmax_element(_ForwardIterator __first,_Size __n,_Compare __comp)510 __simd_minmax_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept
511 {
512     if (__n == 0)
513     {
514         return std::make_pair(__first, __first);
515     }
516     typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType;
517 
518     struct _ComplexType
519     {
520         _ValueType __min_val_;
521         _ValueType __max_val_;
522         _Size __min_ind_;
523         _Size __max_ind_;
524         _Compare* __minmax_comp;
525 
526         _LIBCPP_HIDE_FROM_ABI _ComplexType()
527             : __min_val_{}, __max_val_{}, __min_ind_{}, __max_ind_{}, __minmax_comp(nullptr) {}
528         _LIBCPP_HIDE_FROM_ABI _ComplexType(
529                 const _ValueType& __min_val, const _ValueType& __max_val, const _Compare* __comp)
530             : __min_val_(__min_val), __max_val_(__max_val), __min_ind_(0), __max_ind_(0),
531               __minmax_comp(const_cast<_Compare*>(__comp))
532         {
533         }
534         _LIBCPP_HIDE_FROM_ABI _ComplexType(const _ComplexType& __obj)
535             : __min_val_(__obj.__min_val_), __max_val_(__obj.__max_val_), __min_ind_(__obj.__min_ind_),
536               __max_ind_(__obj.__max_ind_), __minmax_comp(__obj.__minmax_comp)
537         {
538         }
539 
540         _LIBCPP_HIDE_FROM_ABI void
541         operator()(const _ComplexType& __obj)
542         {
543             // min
544             if ((*__minmax_comp)(__obj.__min_val_, __min_val_))
545             {
546                 __min_val_ = __obj.__min_val_;
547                 __min_ind_ = __obj.__min_ind_;
548             }
549             else if (!(*__minmax_comp)(__min_val_, __obj.__min_val_))
550             {
551                 __min_val_ = __obj.__min_val_;
552                 __min_ind_ = (__min_ind_ - __obj.__min_ind_ < 0) ? __min_ind_ : __obj.__min_ind_;
553             }
554 
555             // max
556             if ((*__minmax_comp)(__max_val_, __obj.__max_val_))
557             {
558                 __max_val_ = __obj.__max_val_;
559                 __max_ind_ = __obj.__max_ind_;
560             }
561             else if (!(*__minmax_comp)(__obj.__max_val_, __max_val_))
562             {
563                 __max_val_ = __obj.__max_val_;
564                 __max_ind_ = (__max_ind_ - __obj.__max_ind_ < 0) ? __obj.__max_ind_ : __max_ind_;
565             }
566         }
567     };
568 
569     _ComplexType __init{*__first, *__first, &__comp};
570 
571     _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType);
572 
573     _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init)
574     for (_Size __i = 1; __i < __n; ++__i)
575     {
576         auto __min_val = __init.__min_val_;
577         auto __max_val = __init.__max_val_;
578         auto __current = __first + __i;
579         if (__comp(*__current, __min_val))
580         {
581             __init.__min_val_ = *__current;
582             __init.__min_ind_ = __i;
583         }
584         else if (!__comp(*__current, __max_val))
585         {
586             __init.__max_val_ = *__current;
587             __init.__max_ind_ = __i;
588         }
589     }
590     return std::make_pair(__first + __init.__min_ind_, __first + __init.__max_ind_);
591 }
592 
593 template <class _InputIterator, class _DifferenceType, class _OutputIterator1, class _OutputIterator2,
594           class _UnaryPredicate>
595 _LIBCPP_HIDE_FROM_ABI std::pair<_OutputIterator1, _OutputIterator2>
__simd_partition_copy(_InputIterator __first,_DifferenceType __n,_OutputIterator1 __out_true,_OutputIterator2 __out_false,_UnaryPredicate __pred)596 __simd_partition_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true,
597                       _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept
598 {
599     _DifferenceType __cnt_true = 0, __cnt_false = 0;
600 
601     _PSTL_PRAGMA_SIMD
602     for (_DifferenceType __i = 0; __i < __n; ++__i)
603     {
604         if (__pred(__first[__i]))
605         {
606             __out_true[__cnt_true] = __first[__i];
607             ++__cnt_true;
608         }
609         else
610         {
611             __out_false[__cnt_false] = __first[__i];
612             ++__cnt_false;
613         }
614     }
615     return std::make_pair(__out_true + __cnt_true, __out_false + __cnt_false);
616 }
617 
618 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
619 _LIBCPP_HIDE_FROM_ABI _ForwardIterator1
__simd_find_first_of(_ForwardIterator1 __first,_ForwardIterator1 __last,_ForwardIterator2 __s_first,_ForwardIterator2 __s_last,_BinaryPredicate __pred)620 __simd_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
621                      _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept
622 {
623     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferencType;
624 
625     const _DifferencType __n1 = __last - __first;
626     const _DifferencType __n2 = __s_last - __s_first;
627     if (__n1 == 0 || __n2 == 0)
628     {
629         return __last; // according to the standard
630     }
631 
632     // Common case
633     // If first sequence larger than second then we'll run simd_first with parameters of first sequence.
634     // Otherwise, vice versa.
635     if (__n1 < __n2)
636     {
637         for (; __first != __last; ++__first)
638         {
639             if (__unseq_backend::__simd_or(
640                     __s_first, __n2,
641                     __internal::__equal_value_by_pred<decltype(*__first), _BinaryPredicate>(*__first, __pred)))
642             {
643                 return __first;
644             }
645         }
646     }
647     else
648     {
649         for (; __s_first != __s_last; ++__s_first)
650         {
651             const auto __result = __unseq_backend::__simd_first(
652                 __first, _DifferencType(0), __n1, [__s_first, &__pred](_ForwardIterator1 __it, _DifferencType __i) {
653                     return __pred(__it[__i], *__s_first);
654                 });
655             if (__result != __last)
656             {
657                 return __result;
658             }
659         }
660     }
661     return __last;
662 }
663 
664 template <class _RandomAccessIterator, class _DifferenceType, class _UnaryPredicate>
665 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
__simd_remove_if(_RandomAccessIterator __first,_DifferenceType __n,_UnaryPredicate __pred)666 __simd_remove_if(_RandomAccessIterator __first, _DifferenceType __n, _UnaryPredicate __pred) noexcept
667 {
668     // find first element we need to remove
669     auto __current = __unseq_backend::__simd_first(
670         __first, _DifferenceType(0), __n,
671         [&__pred](_RandomAccessIterator __it, _DifferenceType __i) { return __pred(__it[__i]); });
672     __n -= __current - __first;
673 
674     // if we have in sequence only one element that pred(__current[1]) != false we can exit the function
675     if (__n < 2)
676     {
677         return __current;
678     }
679 
680     _DifferenceType __cnt = 0;
681     _PSTL_PRAGMA_SIMD
682     for (_DifferenceType __i = 1; __i < __n; ++__i)
683     {
684         if (!__pred(__current[__i]))
685         {
686             __current[__cnt] = std::move(__current[__i]);
687             ++__cnt;
688         }
689     }
690     return __current + __cnt;
691 }
692 } // namespace __unseq_backend
693 } // namespace __pstl
694 
695 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
696 
697 #endif /* _PSTL_UNSEQ_BACKEND_SIMD_H */
698