1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11 
12 #ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP
13 #define BOOST_MOVE_ADAPTIVE_SORT_HPP
14 
15 #include <boost/move/detail/config_begin.hpp>
16 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
17 #include <boost/core/ignore_unused.hpp>
18 
19 namespace boost {
20 namespace movelib {
21 
22 ///@cond
23 namespace detail_adaptive {
24 
25 template<class RandIt>
move_data_backward(RandIt cur_pos,typename iterator_traits<RandIt>::size_type const l_data,RandIt new_pos,bool const xbuf_used)26 void move_data_backward( RandIt cur_pos
27               , typename iterator_traits<RandIt>::size_type const l_data
28               , RandIt new_pos
29               , bool const xbuf_used)
30 {
31    //Move buffer to the total combination right
32    if(xbuf_used){
33       boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
34    }
35    else{
36       boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
37       //Rotate does less moves but it seems slower due to cache issues
38       //rotate_gcd(first-l_block, first+len-l_block, first+len);
39    }
40 }
41 
42 template<class RandIt>
move_data_forward(RandIt cur_pos,typename iterator_traits<RandIt>::size_type const l_data,RandIt new_pos,bool const xbuf_used)43 void move_data_forward( RandIt cur_pos
44               , typename iterator_traits<RandIt>::size_type const l_data
45               , RandIt new_pos
46               , bool const xbuf_used)
47 {
48    //Move buffer to the total combination right
49    if(xbuf_used){
50       boost::move(cur_pos, cur_pos+l_data, new_pos);
51    }
52    else{
53       boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
54       //Rotate does less moves but it seems slower due to cache issues
55       //rotate_gcd(first-l_block, first+len-l_block, first+len);
56    }
57 }
58 
59 // build blocks of length 2*l_build_buf. l_build_buf is power of two
60 // input: [0, l_build_buf) elements are buffer, rest unsorted elements
61 // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
62 //
63 // First elements are merged from right to left until elements start
64 // at first. All old elements [first, first + l_build_buf) are placed at the end
65 // [first+len-l_build_buf, first+len). To achieve this:
66 // - If we have external memory to merge, we save elements from the buffer
67 //   so that a non-swapping merge is used. Buffer elements are restored
68 //   at the end of the buffer from the external memory.
69 //
70 // - When the external memory is not available or it is insufficient
71 //   for a merge operation, left swap merging is used.
72 //
73 // Once elements are merged left to right in blocks of l_build_buf, then a single left
74 // to right merge step is performed to achieve merged blocks of size 2K.
75 // If external memory is available, usual merge is used, swap merging otherwise.
76 //
77 // As a last step, if auxiliary memory is available in-place merge is performed.
78 // until all is merged or auxiliary memory is not large enough.
79 template<class RandIt, class Compare, class XBuf>
80 typename iterator_traits<RandIt>::size_type
adaptive_sort_build_blocks(RandIt const first,typename iterator_traits<RandIt>::size_type const len,typename iterator_traits<RandIt>::size_type const l_base,typename iterator_traits<RandIt>::size_type const l_build_buf,XBuf & xbuf,Compare comp)81    adaptive_sort_build_blocks
82       ( RandIt const first
83       , typename iterator_traits<RandIt>::size_type const len
84       , typename iterator_traits<RandIt>::size_type const l_base
85       , typename iterator_traits<RandIt>::size_type const l_build_buf
86       , XBuf & xbuf
87       , Compare comp)
88 {
89    typedef typename iterator_traits<RandIt>::size_type  size_type;
90    BOOST_ASSERT(l_build_buf <= len);
91    BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
92 
93    //Place the start pointer after the buffer
94    RandIt first_block = first + l_build_buf;
95    size_type const elements_in_blocks = len - l_build_buf;
96 
97    //////////////////////////////////
98    // Start of merge to left step
99    //////////////////////////////////
100    size_type l_merged = 0u;
101 
102    BOOST_ASSERT(l_build_buf);
103    //If there is no enough buffer for the insertion sort step, just avoid the external buffer
104    size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
105    kbuf = kbuf < l_base ? 0 : kbuf;
106 
107    if(kbuf){
108       //Backup internal buffer values in external buffer so they can be overwritten
109       xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
110       l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
111 
112       //Now combine them using the buffer. Elements from buffer can be
113       //overwritten since they've been saved to xbuf
114       l_merged = op_merge_left_step_multiple
115          ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
116 
117       //Restore internal buffer from external buffer unless kbuf was l_build_buf,
118       //in that case restoration will happen later
119       if(kbuf != l_build_buf){
120          boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
121       }
122    }
123    else{
124       l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
125       rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
126    }
127 
128    //Now combine elements using the buffer. Elements from buffer can't be
129    //overwritten since xbuf was not big enough, so merge swapping elements.
130    l_merged = op_merge_left_step_multiple
131       (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
132 
133    BOOST_ASSERT(l_merged == l_build_buf);
134 
135    //////////////////////////////////
136    // Start of merge to right step
137    //////////////////////////////////
138 
139    //If kbuf is l_build_buf then we can merge right without swapping
140    //Saved data is still in xbuf
141    if(kbuf && kbuf == l_build_buf){
142       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
143       //Restore internal buffer from external buffer if kbuf was l_build_buf.
144       //as this operation was previously delayed.
145       boost::move(xbuf.data(), xbuf.data() + kbuf, first);
146    }
147    else{
148       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
149    }
150    xbuf.clear();
151    //2*l_build_buf or total already merged
152    return min_value<size_type>(elements_in_blocks, 2*l_build_buf);
153 }
154 
155 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
adaptive_sort_combine_blocks(RandItKeys const keys,KeyCompare key_comp,RandIt const first,typename iterator_traits<RandIt>::size_type const len,typename iterator_traits<RandIt>::size_type const l_prev_merged,typename iterator_traits<RandIt>::size_type const l_block,bool const use_buf,bool const xbuf_used,XBuf & xbuf,Compare comp,bool merge_left)156 void adaptive_sort_combine_blocks
157    ( RandItKeys const keys
158    , KeyCompare key_comp
159    , RandIt const first
160    , typename iterator_traits<RandIt>::size_type const len
161    , typename iterator_traits<RandIt>::size_type const l_prev_merged
162    , typename iterator_traits<RandIt>::size_type const l_block
163    , bool const use_buf
164    , bool const xbuf_used
165    , XBuf & xbuf
166    , Compare comp
167    , bool merge_left)
168 {
169    boost::ignore_unused(xbuf);
170    typedef typename iterator_traits<RandIt>::size_type   size_type;
171 
172    size_type const l_reg_combined   = 2*l_prev_merged;
173    size_type l_irreg_combined = 0;
174    size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
175    size_type const n_reg_combined = len/l_reg_combined;
176    RandIt combined_first = first;
177 
178    boost::ignore_unused(l_total_combined);
179    BOOST_ASSERT(l_total_combined <= len);
180 
181    size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
182 
183    if(merge_left || !use_buf) {
184       for( size_type combined_i = 0; combined_i != max_i; ) {
185          //Now merge blocks
186          bool const is_last = combined_i==n_reg_combined;
187          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
188 
189          range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
190          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
191          combine_params( keys, key_comp, l_cur_combined
192                         , l_prev_merged, l_block, rbuf
193                         , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
194          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
195          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
196             BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
197          if(!use_buf){
198             merge_blocks_bufferless
199                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
200          }
201          else{
202             merge_blocks_left
203                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
204          }
205          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_L: ", len + l_block);
206          ++combined_i;
207          if(combined_i != max_i)
208             combined_first += l_reg_combined;
209       }
210    }
211    else{
212       combined_first += l_reg_combined*(max_i-1);
213       for( size_type combined_i = max_i; combined_i; ) {
214          --combined_i;
215          bool const is_last = combined_i==n_reg_combined;
216          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
217 
218          RandIt const combined_last(combined_first+l_cur_combined);
219          range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
220          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
221          combine_params( keys, key_comp, l_cur_combined
222                         , l_prev_merged, l_block, rbuf
223                         , n_block_a, n_block_b, l_irreg1, l_irreg2);  //Outputs
224          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
225          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
226          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
227          merge_blocks_right
228             (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
229          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_R: ", len + l_block);
230          if(combined_i)
231             combined_first -= l_reg_combined;
232       }
233    }
234 }
235 
236 //Returns true if buffer is placed in
237 //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
238 //[buffer,buffer+l_intbuf)
239 template<class RandIt, class Compare, class XBuf>
adaptive_sort_combine_all_blocks(RandIt keys,typename iterator_traits<RandIt>::size_type & n_keys,RandIt const buffer,typename iterator_traits<RandIt>::size_type const l_buf_plus_data,typename iterator_traits<RandIt>::size_type l_merged,typename iterator_traits<RandIt>::size_type & l_intbuf,XBuf & xbuf,Compare comp)240 bool adaptive_sort_combine_all_blocks
241    ( RandIt keys
242    , typename iterator_traits<RandIt>::size_type &n_keys
243    , RandIt const buffer
244    , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
245    , typename iterator_traits<RandIt>::size_type l_merged
246    , typename iterator_traits<RandIt>::size_type &l_intbuf
247    , XBuf & xbuf
248    , Compare comp)
249 {
250    typedef typename iterator_traits<RandIt>::size_type  size_type;
251    RandIt const first = buffer + l_intbuf;
252    size_type const l_data = l_buf_plus_data - l_intbuf;
253    size_type const l_unique = l_intbuf+n_keys;
254    //Backup data to external buffer once if possible
255    bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
256    if(common_xbuf){
257       xbuf.move_assign(buffer, l_intbuf);
258    }
259 
260    bool prev_merge_left = true;
261    size_type l_prev_total_combined = l_merged, l_prev_block = 0;
262    bool prev_use_internal_buf = true;
263 
264    for( size_type n = 0; l_data > l_merged
265       ; l_merged*=2
266       , ++n){
267       //If l_intbuf is non-zero, use that internal buffer.
268       //    Implies l_block == l_intbuf && use_internal_buf == true
269       //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
270       //    Implies l_block == n_keys/2 && use_internal_buf == true
271       //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
272       bool use_internal_buf = false;
273       size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf);
274       BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
275       BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
276       BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
277 
278       bool const is_merge_left = (n&1) == 0;
279       size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
280       if(n && prev_use_internal_buf && prev_merge_left){
281          if(is_merge_left || !use_internal_buf){
282             move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
283          }
284          else{
285             //Put the buffer just after l_total_combined
286             RandIt const buf_end = first+l_prev_total_combined;
287             RandIt const buf_beg = buf_end-l_block;
288             if(l_prev_total_combined > l_total_combined){
289                size_type const l_diff = l_prev_total_combined - l_total_combined;
290                move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
291             }
292             else if(l_prev_total_combined < l_total_combined){
293                size_type const l_diff = l_total_combined - l_prev_total_combined;
294                move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
295             }
296          }
297          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After move_data     : ", l_data + l_intbuf);
298       }
299 
300       //Combine to form l_merged*2 segments
301       if(n_keys){
302          size_type upper_n_keys_this_iter = 2*l_merged/l_block;
303          if(upper_n_keys_this_iter > 256){
304             adaptive_sort_combine_blocks
305                ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
306                , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
307          }
308          else{
309             unsigned char uint_keys[256];
310             adaptive_sort_combine_blocks
311                ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
312                , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
313             }
314       }
315       else{
316          size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
317          adaptive_sort_combine_blocks
318             ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
319             , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
320       }
321 
322       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? "   After comb blocks L:  " : "   After comb blocks R:  ", l_data + l_intbuf);
323       prev_merge_left = is_merge_left;
324       l_prev_total_combined = l_total_combined;
325       l_prev_block = l_block;
326       prev_use_internal_buf = use_internal_buf;
327    }
328    BOOST_ASSERT(l_prev_total_combined == l_data);
329    bool const buffer_right = prev_use_internal_buf && prev_merge_left;
330 
331    l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
332    n_keys = l_unique - l_intbuf;
333    //Restore data from to external common buffer if used
334    if(common_xbuf){
335       if(buffer_right){
336          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
337       }
338       else{
339          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
340       }
341    }
342    return buffer_right;
343 }
344 
345 
346 template<class RandIt, class Compare, class XBuf>
adaptive_sort_final_merge(bool buffer_right,RandIt const first,typename iterator_traits<RandIt>::size_type const l_intbuf,typename iterator_traits<RandIt>::size_type const n_keys,typename iterator_traits<RandIt>::size_type const len,XBuf & xbuf,Compare comp)347 void adaptive_sort_final_merge( bool buffer_right
348                               , RandIt const first
349                               , typename iterator_traits<RandIt>::size_type const l_intbuf
350                               , typename iterator_traits<RandIt>::size_type const n_keys
351                               , typename iterator_traits<RandIt>::size_type const len
352                               , XBuf & xbuf
353                               , Compare comp)
354 {
355    //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
356    xbuf.clear();
357 
358    typedef typename iterator_traits<RandIt>::size_type  size_type;
359    size_type const n_key_plus_buf = l_intbuf+n_keys;
360    if(buffer_right){
361       //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
362       stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
363       stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
364       unstable_sort(first, first+n_keys, comp, xbuf);
365       stable_merge(first, first+n_keys, first+len, comp, xbuf);
366    }
367    else{
368       //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
369       stable_sort(first, first+n_key_plus_buf, comp, xbuf);
370       if(xbuf.capacity() >= n_key_plus_buf){
371          buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
372       }
373       else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
374          stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
375          stable_merge(first, first+n_keys, first+len, comp, xbuf);
376       }
377       else{
378          stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
379       }
380    }
381    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After final_merge   : ", len);
382 }
383 
384 template<class RandIt, class Compare, class Unsigned, class XBuf>
adaptive_sort_build_params(RandIt first,Unsigned const len,Compare comp,Unsigned & n_keys,Unsigned & l_intbuf,Unsigned & l_base,Unsigned & l_build_buf,XBuf & xbuf)385 bool adaptive_sort_build_params
386    (RandIt first, Unsigned const len, Compare comp
387    , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
388    , XBuf & xbuf
389    )
390 {
391    typedef Unsigned size_type;
392 
393    //Calculate ideal parameters and try to collect needed unique keys
394    l_base = 0u;
395 
396    //Try to find a value near sqrt(len) that is 2^N*l_base where
397    //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
398    //as build_blocks merges to the left iteratively duplicating the
399    //merged size and all the buffer must be used just before the final
400    //merge to right step. This guarantees "build_blocks" produces
401    //segments of size l_build_buf*2, maximizing the classic merge phase.
402    l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
403 
404    //The internal buffer can be expanded if there is enough external memory
405    while(xbuf.capacity() >= l_intbuf*2){
406       l_intbuf *= 2;
407    }
408 
409    //This is the minimum number of keys to implement the ideal algorithm
410    //
411    //l_intbuf is used as buffer plus the key count
412    size_type n_min_ideal_keys = l_intbuf-1;
413    while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
414       --n_min_ideal_keys;
415    }
416    n_min_ideal_keys += 1;
417    BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
418 
419    if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
420       n_keys = 0u;
421       l_build_buf = l_intbuf;
422    }
423    else{
424       //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
425       //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
426       //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
427       //
428       //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
429       //(to be used for keys in combine_all_blocks) as the whole l_build_buf
430       //will be backuped in the buffer during build_blocks.
431       bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
432       size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
433       size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
434 
435       //If available memory is 2*sqrt(l), then for "build_params"
436       //the situation is the same as if 2*l_intbuf were collected.
437       if(non_unique_buf && collected == n_min_ideal_keys){
438          l_build_buf = l_intbuf;
439          n_keys = n_min_ideal_keys;
440       }
441       else if(collected == 2*l_intbuf){
442          //l_intbuf*2 elements found. Use all of them in the build phase
443          l_build_buf = l_intbuf*2;
444          n_keys = l_intbuf;
445       }
446       else if(collected == (n_min_ideal_keys+l_intbuf)){
447          l_build_buf = l_intbuf;
448          n_keys = n_min_ideal_keys;
449       }
450       //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
451       //is possible (due to very low unique keys), then go to a slow sort based on rotations.
452       else{
453          BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
454          if(collected < 4){  //No combination possible with less that 4 keys
455             return false;
456          }
457          n_keys = l_intbuf;
458          while(n_keys&(n_keys-1)){
459             n_keys &= n_keys-1;  // make it power or 2
460          }
461          while(n_keys > collected){
462             n_keys/=2;
463          }
464          //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
465          l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
466          l_intbuf = 0;
467          l_build_buf = n_keys;
468       }
469       BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
470    }
471 
472    return true;
473 }
474 
475 // Main explanation of the sort algorithm.
476 //
477 // csqrtlen = ceil(sqrt(len));
478 //
479 // * First, 2*csqrtlen unique elements elements are extracted from elements to be
480 //   sorted and placed in the beginning of the range.
481 //
482 // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
483 //   will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
484 //   are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
485 //   2*csqrtlen unique elements are again the leading elements of the whole range.
486 //
487 // * Step "combine_blocks": pairs of previously formed blocks are merged with a different
488 //   ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
489 //   "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
490 //   elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
491 //
492 //   In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
493 //   know if elements belong to the first or second block to be merged and another
494 //   leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
495 //
496 //   Iteratively until all trailing (len-2*csqrtlen) elements are merged:
497 //      Iteratively for each pair of previously merged block:
498 //         * Blocks are divided groups of csqrtlen elements and
499 //           2*merged_block/csqrtlen keys are sorted to be used as markers
500 //         * Groups are selection-sorted by first or last element (depending whether they are going
501 //           to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
502 //         * Elements of each block pair are merged using the csqrtlen buffer taking into account
503 //           if they belong to the first half or second half (marked by the key).
504 //
505 // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
506 //   rotations with the rest of sorted elements in the "combine_blocks" step.
507 //
508 // Corner cases:
509 //
510 // * If no 2*csqrtlen elements can be extracted:
511 //
512 //    * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
513 //      as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
514 //      means that an additional "combine_blocks" step will be needed to merge all elements.
515 //
516 //    * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
517 //      then reduces the number of elements used as buffer and keys in the "build_blocks"
518 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
519 //      then uses a rotation based smart merge.
520 //
521 //    * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
522 //
523 // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
524 //
525 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
526 //   then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
527 //   keys to combine blocks.
528 //
529 // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
530 //   using classic merge and "combine_blocks" will use bigger blocks when merging.
531 template<class RandIt, class Compare, class XBuf>
adaptive_sort_impl(RandIt first,typename iterator_traits<RandIt>::size_type const len,Compare comp,XBuf & xbuf)532 void adaptive_sort_impl
533    ( RandIt first
534    , typename iterator_traits<RandIt>::size_type const len
535    , Compare comp
536    , XBuf & xbuf
537    )
538 {
539    typedef typename iterator_traits<RandIt>::size_type  size_type;
540 
541    //Small sorts go directly to insertion sort
542    if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
543       insertion_sort(first, first + len, comp);
544    }
545    else if((len-len/2) <= xbuf.capacity()){
546       merge_sort(first, first+len, comp, xbuf.data());
547    }
548    else{
549       //Make sure it is at least four
550       BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
551 
552       size_type l_base = 0;
553       size_type l_intbuf = 0;
554       size_type n_keys = 0;
555       size_type l_build_buf = 0;
556 
557       //Calculate and extract needed unique elements. If a minimum is not achieved
558       //fallback to a slow stable sort
559       if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
560          stable_sort(first, first+len, comp, xbuf);
561       }
562       else{
563          BOOST_ASSERT(l_build_buf);
564          //Otherwise, continue the adaptive_sort
565          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   After collect_unique: ", len);
566          size_type const n_key_plus_buf = l_intbuf+n_keys;
567          //l_build_buf is always power of two if l_intbuf is zero
568          BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
569 
570          //Classic merge sort until internal buffer and xbuf are exhausted
571          size_type const l_merged = adaptive_sort_build_blocks
572             (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
573          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After build_blocks:   ", len);
574 
575          //Non-trivial merge
576          bool const buffer_right = adaptive_sort_combine_all_blocks
577             (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
578 
579          //Sort keys and buffer and merge the whole sequence
580          adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
581       }
582    }
583 }
584 
585 }  //namespace detail_adaptive {
586 
587 ///@endcond
588 
589 //! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
590 //!   to comparison functor "comp". The sort is stable (order of equal elements
591 //!   is guaranteed to be preserved). Performance is improved if additional raw storage is
592 //!   provided.
593 //!
594 //! <b>Requires</b>:
595 //!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
596 //!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
597 //!
598 //! <b>Parameters</b>:
599 //!   - first, last: the range of elements to sort
600 //!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
601 //!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
602 //!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
603 //!      is ceil(std::distance(first, last)/2).
604 //!
605 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
606 //!   of dereferenced RandIt throws.
607 //!
608 //! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps.
609 //!   Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized
610 //!   when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when
611 //!   ceil(sqrt(std::distance(first, last)))*2.
612 //!
613 //! <b>Caution</b>: Experimental implementation, not production-ready.
614 template<class RandIt, class RandRawIt, class Compare>
adaptive_sort(RandIt first,RandIt last,Compare comp,RandRawIt uninitialized,typename iterator_traits<RandIt>::size_type uninitialized_len)615 void adaptive_sort( RandIt first, RandIt last, Compare comp
616                , RandRawIt uninitialized
617                , typename iterator_traits<RandIt>::size_type uninitialized_len)
618 {
619    typedef typename iterator_traits<RandIt>::size_type  size_type;
620    typedef typename iterator_traits<RandIt>::value_type value_type;
621 
622    ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len);
623    ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf);
624 }
625 
626 template<class RandIt, class Compare>
adaptive_sort(RandIt first,RandIt last,Compare comp)627 void adaptive_sort( RandIt first, RandIt last, Compare comp)
628 {
629    typedef typename iterator_traits<RandIt>::value_type value_type;
630    adaptive_sort(first, last, comp, (value_type*)0, 0u);
631 }
632 
633 }  //namespace movelib {
634 }  //namespace boost {
635 
636 #include <boost/move/detail/config_end.hpp>
637 
638 #endif   //#define BOOST_MOVE_ADAPTIVE_SORT_HPP
639