1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11
12 #ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP
13 #define BOOST_MOVE_ADAPTIVE_MERGE_HPP
14
15 #include <boost/move/detail/config_begin.hpp>
16 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
17
18 namespace boost {
19 namespace movelib {
20
21 ///@cond
22 namespace detail_adaptive {
23
24 template<class RandIt, class Compare, class XBuf>
adaptive_merge_combine_blocks(RandIt first,typename iterator_traits<RandIt>::size_type len1,typename iterator_traits<RandIt>::size_type len2,typename iterator_traits<RandIt>::size_type collected,typename iterator_traits<RandIt>::size_type n_keys,typename iterator_traits<RandIt>::size_type l_block,bool use_internal_buf,bool xbuf_used,Compare comp,XBuf & xbuf)25 inline void adaptive_merge_combine_blocks( RandIt first
26 , typename iterator_traits<RandIt>::size_type len1
27 , typename iterator_traits<RandIt>::size_type len2
28 , typename iterator_traits<RandIt>::size_type collected
29 , typename iterator_traits<RandIt>::size_type n_keys
30 , typename iterator_traits<RandIt>::size_type l_block
31 , bool use_internal_buf
32 , bool xbuf_used
33 , Compare comp
34 , XBuf & xbuf
35 )
36 {
37 typedef typename iterator_traits<RandIt>::size_type size_type;
38 size_type const len = len1+len2;
39 size_type const l_combine = len-collected;
40 size_type const l_combine1 = len1-collected;
41
42 if(n_keys){
43 RandIt const first_data = first+collected;
44 RandIt const keys = first;
45 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
46 if(xbuf_used){
47 if(xbuf.size() < l_block){
48 xbuf.initialize_until(l_block, *first);
49 }
50 BOOST_ASSERT(xbuf.size() >= l_block);
51 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
52 combine_params( keys, comp, l_combine
53 , l_combine1, l_block, xbuf
54 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
55 op_merge_blocks_with_buf
56 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
57 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len);
58 }
59 else{
60 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
61 combine_params( keys, comp, l_combine
62 , l_combine1, l_block, xbuf
63 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
64 if(use_internal_buf){
65 op_merge_blocks_with_buf
66 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block);
67 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len);
68 }
69 else{
70 merge_blocks_bufferless
71 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
72 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len);
73 }
74 }
75 }
76 else{
77 xbuf.shrink_to_fit(l_block);
78 if(xbuf.size() < l_block){
79 xbuf.initialize_until(l_block, *first);
80 }
81 size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
82 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
83 combine_params( uint_keys, less(), l_combine
84 , l_combine1, l_block, xbuf
85 , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs
86 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
87 BOOST_ASSERT(xbuf.size() >= l_block);
88 op_merge_blocks_with_buf
89 (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
90 xbuf.clear();
91 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len);
92 }
93 }
94
95 template<class RandIt, class Compare, class XBuf>
adaptive_merge_final_merge(RandIt first,typename iterator_traits<RandIt>::size_type len1,typename iterator_traits<RandIt>::size_type len2,typename iterator_traits<RandIt>::size_type collected,typename iterator_traits<RandIt>::size_type l_intbuf,typename iterator_traits<RandIt>::size_type,bool,bool xbuf_used,Compare comp,XBuf & xbuf)96 inline void adaptive_merge_final_merge( RandIt first
97 , typename iterator_traits<RandIt>::size_type len1
98 , typename iterator_traits<RandIt>::size_type len2
99 , typename iterator_traits<RandIt>::size_type collected
100 , typename iterator_traits<RandIt>::size_type l_intbuf
101 , typename iterator_traits<RandIt>::size_type //l_block
102 , bool //use_internal_buf
103 , bool xbuf_used
104 , Compare comp
105 , XBuf & xbuf
106 )
107 {
108 typedef typename iterator_traits<RandIt>::size_type size_type;
109 size_type n_keys = collected-l_intbuf;
110 size_type len = len1+len2;
111 if (!xbuf_used || n_keys) {
112 xbuf.clear();
113 const size_type middle = xbuf_used && n_keys ? n_keys: collected;
114 unstable_sort(first, first + middle, comp, xbuf);
115 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
116 stable_merge(first, first + middle, first + len, comp, xbuf);
117 }
118 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len);
119 }
120
121 template<class SizeType>
adaptive_merge_n_keys_without_external_keys(SizeType l_block,SizeType len1,SizeType len2,SizeType l_intbuf)122 inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
123 {
124 typedef SizeType size_type;
125 //This is the minimum number of keys to implement the ideal algorithm
126 size_type n_keys = len1/l_block+len2/l_block;
127 const size_type second_half_blocks = len2/l_block;
128 const size_type first_half_aux = len1-l_intbuf;
129 while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){
130 --n_keys;
131 }
132 ++n_keys;
133 return n_keys;
134 }
135
136 template<class SizeType>
adaptive_merge_n_keys_with_external_keys(SizeType l_block,SizeType len1,SizeType len2,SizeType l_intbuf)137 inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
138 {
139 typedef SizeType size_type;
140 //This is the minimum number of keys to implement the ideal algorithm
141 size_type n_keys = (len1-l_intbuf)/l_block + len2/l_block;
142 return n_keys;
143 }
144
145 template<class SizeType, class Xbuf>
adaptive_merge_n_keys_intbuf(SizeType & rl_block,SizeType len1,SizeType len2,Xbuf & xbuf,SizeType & l_intbuf_inout)146 inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
147 {
148 typedef SizeType size_type;
149 size_type l_block = rl_block;
150 size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
151
152 if (xbuf.capacity() > l_block){
153 l_block = xbuf.capacity();
154 }
155
156 //This is the minimum number of keys to implement the ideal algorithm
157 size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf);
158 BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
159
160 if(xbuf.template supports_aligned_trailing<size_type>
161 ( l_block
162 , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf)))
163 {
164 n_keys = 0u;
165 }
166 l_intbuf_inout = l_intbuf;
167 rl_block = l_block;
168 return n_keys;
169 }
170
171 // Main explanation of the merge algorithm.
172 //
173 // csqrtlen = ceil(sqrt(len));
174 //
175 // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
176 // unique elements are extracted from elements to be sorted and placed in the beginning of the range.
177 //
178 // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
179 // are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
180 //
181 // Explanation of the "combine_blocks" step:
182 //
183 // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
184 // Remaining elements that can't form a group are grouped in front of those elements.
185 // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
186 // Remaining elements that can't form a group are grouped in the back of those elements.
187 // * In parallel the following two steps are performed:
188 // * Groups are selection-sorted by first or last element (depending whether they are going
189 // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
190 // * Elements of each block pair are merged using the csqrtlen buffer taking into account
191 // if they belong to the first half or second half (marked by the key).
192 //
193 // * In the final merge step leading "to_collect" elements are merged with rotations
194 // with the rest of merged elements in the "combine_blocks" step.
195 //
196 // Corner cases:
197 //
198 // * If no "to_collect" elements can be extracted:
199 //
200 // * If more than a minimum number of elements is extracted
201 // then reduces the number of elements used as buffer and keys in the
202 // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
203 // then uses a rotation based smart merge.
204 //
205 // * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
206 //
207 // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
208 //
209 // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
210 //
211 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
212 // then no csqrtlen need to be extracted and "combine_blocks" will use integral
213 // keys to combine blocks.
214 template<class RandIt, class Compare, class XBuf>
adaptive_merge_impl(RandIt first,typename iterator_traits<RandIt>::size_type len1,typename iterator_traits<RandIt>::size_type len2,Compare comp,XBuf & xbuf)215 void adaptive_merge_impl
216 ( RandIt first
217 , typename iterator_traits<RandIt>::size_type len1
218 , typename iterator_traits<RandIt>::size_type len2
219 , Compare comp
220 , XBuf & xbuf
221 )
222 {
223 typedef typename iterator_traits<RandIt>::size_type size_type;
224
225 if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
226 buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
227 }
228 else{
229 const size_type len = len1+len2;
230 //Calculate ideal parameters and try to collect needed unique keys
231 size_type l_block = size_type(ceil_sqrt(len));
232
233 //One range is not big enough to extract keys and the internal buffer so a
234 //rotation-based based merge will do just fine
235 if(len1 <= l_block*2 || len2 <= l_block*2){
236 merge_bufferless(first, first+len1, first+len1+len2, comp);
237 return;
238 }
239
240 //Detail the number of keys and internal buffer. If xbuf has enough memory, no
241 //internal buffer is needed so l_intbuf will remain 0.
242 size_type l_intbuf = 0;
243 size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
244 size_type const to_collect = l_intbuf+n_keys;
245 //Try to extract needed unique values from the first range
246 size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
247 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len);
248
249 //Not the minimum number of keys is not available on the first range, so fallback to rotations
250 if(collected != to_collect && collected < 4){
251 merge_bufferless(first, first+collected, first+len1, comp);
252 merge_bufferless(first, first + len1, first + len1 + len2, comp);
253 return;
254 }
255
256 //If not enough keys but more than minimum, adjust the internal buffer and key count
257 bool use_internal_buf = collected == to_collect;
258 if (!use_internal_buf){
259 l_intbuf = 0u;
260 n_keys = collected;
261 l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
262 //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
263 l_intbuf = use_internal_buf ? l_block : 0u;
264 }
265
266 bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
267 //Merge trailing elements using smart merges
268 adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
269 //Merge buffer and keys with the rest of the values
270 adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
271 }
272 }
273
274 } //namespace detail_adaptive {
275
276 ///@endcond
277
278 //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
279 //! into one sorted range [first, last) according to the given comparison function comp.
280 //! The algorithm is stable (if there are equivalent elements in the original two ranges,
281 //! the elements from the first range (preserving their original order) precede the elements
282 //! from the second range (preserving their original order).
283 //!
284 //! <b>Requires</b>:
285 //! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
286 //! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
287 //!
288 //! <b>Parameters</b>:
289 //! - first: the beginning of the first sorted range.
290 //! - middle: the end of the first sorted range and the beginning of the second
291 //! - last: the end of the second sorted range
292 //! - comp: comparison function object which returns true if the first argument is is ordered before the second.
293 //! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
294 //! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
295 //! is min(std::distance(first, middle), std::distance(middle, last)).
296 //!
297 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
298 //! of dereferenced RandIt throws.
299 //!
300 //! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps.
301 //! Constant factor for comparisons and data movement is minimized when uninitialized_len
302 //! is min(std::distance(first, middle), std::distance(middle, last)).
303 //! Pretty good enough performance is achieved when uninitialized_len is
304 //! ceil(sqrt(std::distance(first, last)))*2.
305 //!
306 //! <b>Caution</b>: Experimental implementation, not production-ready.
307 template<class RandIt, class Compare>
adaptive_merge(RandIt first,RandIt middle,RandIt last,Compare comp,typename iterator_traits<RandIt>::value_type * uninitialized=0,typename iterator_traits<RandIt>::size_type uninitialized_len=0)308 void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp
309 , typename iterator_traits<RandIt>::value_type* uninitialized = 0
310 , typename iterator_traits<RandIt>::size_type uninitialized_len = 0)
311 {
312 typedef typename iterator_traits<RandIt>::size_type size_type;
313 typedef typename iterator_traits<RandIt>::value_type value_type;
314
315 if (first == middle || middle == last){
316 return;
317 }
318
319 //Reduce ranges to merge if possible
320 do {
321 if (comp(*middle, *first)){
322 break;
323 }
324 ++first;
325 if (first == middle)
326 return;
327 } while(1);
328
329 RandIt first_high(middle);
330 --first_high;
331 do {
332 --last;
333 if (comp(*last, *first_high)){
334 ++last;
335 break;
336 }
337 if (last == middle)
338 return;
339 } while(1);
340
341 ::boost::movelib::adaptive_xbuf<value_type, value_type*, size_type> xbuf(uninitialized, size_type(uninitialized_len));
342 ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf);
343 }
344
345 } //namespace movelib {
346 } //namespace boost {
347
348 #include <boost/move/detail/config_end.hpp>
349
350 #endif //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP
351