1 // -*- C++ -*-
2 // -*-===----------------------------------------------------------------------===//
3 //
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7 // See https://llvm.org/LICENSE.txt for license information.
8 //
9 //===----------------------------------------------------------------------===//
10
11 #ifndef _PSTL_INTERNAL_OMP_UTIL_H
12 #define _PSTL_INTERNAL_OMP_UTIL_H
13
14 #include <algorithm>
15 #include <atomic>
16 #include <iterator>
17 #include <cstddef>
18 #include <cstdio>
19 #include <memory>
20 #include <vector>
21 #include <omp.h>
22
23 #include "../parallel_backend_utils.h"
24 #include "../unseq_backend_simd.h"
25 #include "../utils.h"
26
27 // Portability "#pragma" definition
28 #ifdef _MSC_VER
29 # define _PSTL_PRAGMA(x) __pragma(x)
30 #else
31 # define _PSTL_PRAGMA(x) _Pragma(# x)
32 #endif
33
34 namespace __pstl
35 {
36 namespace __omp_backend
37 {
38
39 //------------------------------------------------------------------------
40 // use to cancel execution
41 //------------------------------------------------------------------------
42 inline void
__cancel_execution()43 __cancel_execution()
44 {
45 // TODO: Figure out how to make cancelation work.
46 }
47
48 //------------------------------------------------------------------------
49 // raw buffer
50 //------------------------------------------------------------------------
51
52 template <typename _Tp>
53 class __buffer
54 {
55 std::allocator<_Tp> __allocator_;
56 _Tp* __ptr_;
57 const std::size_t __buf_size_;
58 __buffer(const __buffer&) = delete;
59 void
60 operator=(const __buffer&) = delete;
61
62 public:
__buffer(std::size_t __n)63 __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
64
65 operator bool() const { return __ptr_ != nullptr; }
66
67 _Tp*
get()68 get() const
69 {
70 return __ptr_;
71 }
~__buffer()72 ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
73 };
74
75 // Preliminary size of each chunk: requires further discussion
76 inline constexpr std::size_t __default_chunk_size = 2048;
77
78 // Convenience function to determine when we should run serial.
79 template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
80 constexpr auto
81 __should_run_serial(_Iterator __first, _Iterator __last) -> bool
82 {
83 using _difference_type = typename std::iterator_traits<_Iterator>::difference_type;
84 auto __size = std::distance(__first, __last);
85 return __size <= static_cast<_difference_type>(__default_chunk_size);
86 }
87
88 template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
89 constexpr auto
90 __should_run_serial(_Index __first, _Index __last) -> bool
91 {
92 using _difference_type = _Index;
93 auto __size = __last - __first;
94 return __size <= static_cast<_difference_type>(__default_chunk_size);
95 }
96
97 struct __chunk_metrics
98 {
99 std::size_t __n_chunks;
100 std::size_t __chunk_size;
101 std::size_t __first_chunk_size;
102 };
103
104 // The iteration space partitioner according to __requested_chunk_size
105 template <class _RandomAccessIterator, class _Size = std::size_t>
106 auto
107 __chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
108 _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
109 {
110 /*
111 * This algorithm improves distribution of elements in chunks by avoiding
112 * small tail chunks. The leftover elements that do not fit neatly into
113 * the chunk size are redistributed to early chunks. This improves
114 * utilization of the processor's prefetch and reduces the number of
115 * tasks needed by 1.
116 */
117
118 const _Size __n = __last - __first;
119 _Size __n_chunks = 0;
120 _Size __chunk_size = 0;
121 _Size __first_chunk_size = 0;
122 if (__n < __requested_chunk_size)
123 {
124 __chunk_size = __n;
125 __first_chunk_size = __n;
126 __n_chunks = 1;
127 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
128 }
129
130 __n_chunks = (__n / __requested_chunk_size) + 1;
131 __chunk_size = __n / __n_chunks;
132 __first_chunk_size = __chunk_size;
133 const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
134
135 if (__n_leftover_items == __chunk_size)
136 {
137 __n_chunks += 1;
138 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
139 }
140 else if (__n_leftover_items == 0)
141 {
142 __first_chunk_size = __chunk_size;
143 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
144 }
145
146 const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
147 const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
148
149 __chunk_size += __n_extra_items_per_chunk;
150 __first_chunk_size = __chunk_size + __n_final_leftover_items;
151
152 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
153 }
154
155 template <typename _Iterator, typename _Index, typename _Func>
156 void
__process_chunk(const __chunk_metrics & __metrics,_Iterator __base,_Index __chunk_index,_Func __f)157 __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
158 {
159 auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
160 auto __index = __chunk_index == 0 ? 0
161 : (__chunk_index * __metrics.__chunk_size) +
162 (__metrics.__first_chunk_size - __metrics.__chunk_size);
163 auto __first = __base + __index;
164 auto __last = __first + __this_chunk_size;
165 __f(__first, __last);
166 }
167
168 } // namespace __omp_backend
169 } // namespace __pstl
170
171 #endif // _PSTL_INTERNAL_OMP_UTIL_H
172