1 /* BwtSort.c -- BWT block sorting
2 2023-04-02 : Igor Pavlov : Public domain */
3
4 #include "Precomp.h"
5
6 #include "BwtSort.h"
7 #include "Sort.h"
8
9 /* #define BLOCK_SORT_USE_HEAP_SORT */
10
11 /* Don't change it !!! */
12 #define kNumHashBytes 2
13 #define kNumHashValues (1 << (kNumHashBytes * 8))
14
15 /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
16 #define kNumRefBitsMax 12
17
18 #define BS_TEMP_SIZE kNumHashValues
19
20 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
21
22 /* 32 Flags in UInt32 word */
23 #define kNumFlagsBits 5
24 #define kNumFlagsInWord (1 << kNumFlagsBits)
25 #define kFlagsMask (kNumFlagsInWord - 1)
26 #define kAllFlags 0xFFFFFFFF
27
28 #else
29
30 #define kNumBitsMax 20
31 #define kIndexMask ((1 << kNumBitsMax) - 1)
32 #define kNumExtraBits (32 - kNumBitsMax)
33 #define kNumExtra0Bits (kNumExtraBits - 2)
34 #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
35
36 #define SetFinishedGroupSize(p, size) \
37 { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
38 if ((size) > (1 << kNumExtra0Bits)) { \
39 *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
40
SetGroupSize(UInt32 * p,UInt32 size)41 static void SetGroupSize(UInt32 *p, UInt32 size)
42 {
43 if (--size == 0)
44 return;
45 *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
46 if (size >= (1 << kNumExtra0Bits))
47 {
48 *p |= 0x40000000;
49 p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
50 }
51 }
52
53 #endif
54
55 /*
56 SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
57 "range" is not real range. It's only for optimization.
58 returns: 1 - if there are groups, 0 - no more groups
59 */
60
61 static
62 UInt32
63 Z7_FASTCALL
SortGroup(UInt32 BlockSize,UInt32 NumSortedBytes,UInt32 groupOffset,UInt32 groupSize,int NumRefBits,UInt32 * Indices,UInt32 left,UInt32 range)64 SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
65 #ifndef BLOCK_SORT_USE_HEAP_SORT
66 , UInt32 left, UInt32 range
67 #endif
68 )
69 {
70 UInt32 *ind2 = Indices + groupOffset;
71 UInt32 *Groups;
72 if (groupSize <= 1)
73 {
74 /*
75 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
76 SetFinishedGroupSize(ind2, 1)
77 #endif
78 */
79 return 0;
80 }
81 Groups = Indices + BlockSize + BS_TEMP_SIZE;
82 if (groupSize <= ((UInt32)1 << NumRefBits)
83 #ifndef BLOCK_SORT_USE_HEAP_SORT
84 && groupSize <= range
85 #endif
86 )
87 {
88 UInt32 *temp = Indices + BlockSize;
89 UInt32 j;
90 UInt32 mask, thereAreGroups, group, cg;
91 {
92 UInt32 gPrev;
93 UInt32 gRes = 0;
94 {
95 UInt32 sp = ind2[0] + NumSortedBytes;
96 if (sp >= BlockSize) sp -= BlockSize;
97 gPrev = Groups[sp];
98 temp[0] = (gPrev << NumRefBits);
99 }
100
101 for (j = 1; j < groupSize; j++)
102 {
103 UInt32 sp = ind2[j] + NumSortedBytes;
104 UInt32 g;
105 if (sp >= BlockSize) sp -= BlockSize;
106 g = Groups[sp];
107 temp[j] = (g << NumRefBits) | j;
108 gRes |= (gPrev ^ g);
109 }
110 if (gRes == 0)
111 {
112 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
113 SetGroupSize(ind2, groupSize);
114 #endif
115 return 1;
116 }
117 }
118
119 HeapSort(temp, groupSize);
120 mask = (((UInt32)1 << NumRefBits) - 1);
121 thereAreGroups = 0;
122
123 group = groupOffset;
124 cg = (temp[0] >> NumRefBits);
125 temp[0] = ind2[temp[0] & mask];
126
127 {
128 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
129 UInt32 *Flags = Groups + BlockSize;
130 #else
131 UInt32 prevGroupStart = 0;
132 #endif
133
134 for (j = 1; j < groupSize; j++)
135 {
136 UInt32 val = temp[j];
137 UInt32 cgCur = (val >> NumRefBits);
138
139 if (cgCur != cg)
140 {
141 cg = cgCur;
142 group = groupOffset + j;
143
144 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
145 {
146 UInt32 t = group - 1;
147 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
148 }
149 #else
150 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
151 prevGroupStart = j;
152 #endif
153 }
154 else
155 thereAreGroups = 1;
156 {
157 UInt32 ind = ind2[val & mask];
158 temp[j] = ind;
159 Groups[ind] = group;
160 }
161 }
162
163 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
164 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
165 #endif
166 }
167
168 for (j = 0; j < groupSize; j++)
169 ind2[j] = temp[j];
170 return thereAreGroups;
171 }
172
173 /* Check that all strings are in one group (cannot sort) */
174 {
175 UInt32 group, j;
176 UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
177 group = Groups[sp];
178 for (j = 1; j < groupSize; j++)
179 {
180 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
181 if (Groups[sp] != group)
182 break;
183 }
184 if (j == groupSize)
185 {
186 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
187 SetGroupSize(ind2, groupSize);
188 #endif
189 return 1;
190 }
191 }
192
193 #ifndef BLOCK_SORT_USE_HEAP_SORT
194 {
195 /* ---------- Range Sort ---------- */
196 UInt32 i;
197 UInt32 mid;
198 for (;;)
199 {
200 UInt32 j;
201 if (range <= 1)
202 {
203 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
204 SetGroupSize(ind2, groupSize);
205 #endif
206 return 1;
207 }
208 mid = left + ((range + 1) >> 1);
209 j = groupSize;
210 i = 0;
211 do
212 {
213 UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
214 if (Groups[sp] >= mid)
215 {
216 for (j--; j > i; j--)
217 {
218 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
219 if (Groups[sp] < mid)
220 {
221 UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
222 break;
223 }
224 }
225 if (i >= j)
226 break;
227 }
228 }
229 while (++i < j);
230 if (i == 0)
231 {
232 range = range - (mid - left);
233 left = mid;
234 }
235 else if (i == groupSize)
236 range = (mid - left);
237 else
238 break;
239 }
240
241 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
242 {
243 UInt32 t = (groupOffset + i - 1);
244 UInt32 *Flags = Groups + BlockSize;
245 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
246 }
247 #endif
248
249 {
250 UInt32 j;
251 for (j = i; j < groupSize; j++)
252 Groups[ind2[j]] = groupOffset + i;
253 }
254
255 {
256 UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
257 return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
258 }
259
260 }
261
262 #else
263
264 /* ---------- Heap Sort ---------- */
265
266 {
267 UInt32 j;
268 for (j = 0; j < groupSize; j++)
269 {
270 UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
271 ind2[j] = sp;
272 }
273
274 HeapSortRef(ind2, Groups, groupSize);
275
276 /* Write Flags */
277 {
278 UInt32 sp = ind2[0];
279 UInt32 group = Groups[sp];
280
281 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
282 UInt32 *Flags = Groups + BlockSize;
283 #else
284 UInt32 prevGroupStart = 0;
285 #endif
286
287 for (j = 1; j < groupSize; j++)
288 {
289 sp = ind2[j];
290 if (Groups[sp] != group)
291 {
292 group = Groups[sp];
293 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
294 {
295 UInt32 t = groupOffset + j - 1;
296 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
297 }
298 #else
299 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
300 prevGroupStart = j;
301 #endif
302 }
303 }
304
305 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
306 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
307 #endif
308 }
309 {
310 /* Write new Groups values and Check that there are groups */
311 UInt32 thereAreGroups = 0;
312 for (j = 0; j < groupSize; j++)
313 {
314 UInt32 group = groupOffset + j;
315 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
316 UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
317 if ((ind2[j] & 0x40000000) != 0)
318 subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
319 subGroupSize++;
320 for (;;)
321 {
322 UInt32 original = ind2[j];
323 UInt32 sp = original & kIndexMask;
324 if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
325 ind2[j] = sp | (original & ~kIndexMask);
326 Groups[sp] = group;
327 if (--subGroupSize == 0)
328 break;
329 j++;
330 thereAreGroups = 1;
331 }
332 #else
333 UInt32 *Flags = Groups + BlockSize;
334 for (;;)
335 {
336 UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
337 ind2[j] = sp;
338 Groups[sp] = group;
339 if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
340 break;
341 j++;
342 thereAreGroups = 1;
343 }
344 #endif
345 }
346 return thereAreGroups;
347 }
348 }
349 #endif
350 }
351
352 /* conditions: blockSize > 0 */
BlockSort(UInt32 * Indices,const Byte * data,UInt32 blockSize)353 UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
354 {
355 UInt32 *counters = Indices + blockSize;
356 UInt32 i;
357 UInt32 *Groups;
358 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
359 UInt32 *Flags;
360 #endif
361
362 /* Radix-Sort for 2 bytes */
363 for (i = 0; i < kNumHashValues; i++)
364 counters[i] = 0;
365 for (i = 0; i < blockSize - 1; i++)
366 counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
367 counters[((UInt32)data[i] << 8) | data[0]]++;
368
369 Groups = counters + BS_TEMP_SIZE;
370 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
371 Flags = Groups + blockSize;
372 {
373 UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
374 for (i = 0; i < numWords; i++)
375 Flags[i] = kAllFlags;
376 }
377 #endif
378
379 {
380 UInt32 sum = 0;
381 for (i = 0; i < kNumHashValues; i++)
382 {
383 UInt32 groupSize = counters[i];
384 if (groupSize > 0)
385 {
386 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
387 UInt32 t = sum + groupSize - 1;
388 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
389 #endif
390 sum += groupSize;
391 }
392 counters[i] = sum - groupSize;
393 }
394
395 for (i = 0; i < blockSize - 1; i++)
396 Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
397 Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
398
399 for (i = 0; i < blockSize - 1; i++)
400 Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
401 Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
402
403 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
404 {
405 UInt32 prev = 0;
406 for (i = 0; i < kNumHashValues; i++)
407 {
408 UInt32 prevGroupSize = counters[i] - prev;
409 if (prevGroupSize == 0)
410 continue;
411 SetGroupSize(Indices + prev, prevGroupSize);
412 prev = counters[i];
413 }
414 }
415 #endif
416 }
417
418 {
419 int NumRefBits;
420 UInt32 NumSortedBytes;
421 for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
422 NumRefBits = 32 - NumRefBits;
423 if (NumRefBits > kNumRefBitsMax)
424 NumRefBits = kNumRefBitsMax;
425
426 for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
427 {
428 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
429 UInt32 finishedGroupSize = 0;
430 #endif
431 UInt32 newLimit = 0;
432 for (i = 0; i < blockSize;)
433 {
434 UInt32 groupSize;
435 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
436
437 if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
438 {
439 i++;
440 continue;
441 }
442 for (groupSize = 1;
443 (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
444 groupSize++);
445
446 groupSize++;
447
448 #else
449
450 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
451 {
452 BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
453 if ((Indices[i] & 0x40000000) != 0)
454 {
455 groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
456 Indices[(size_t)i + 1] &= kIndexMask;
457 }
458 Indices[i] &= kIndexMask;
459 groupSize++;
460 if (finishedGroup || groupSize == 1)
461 {
462 Indices[i - finishedGroupSize] &= kIndexMask;
463 if (finishedGroupSize > 1)
464 Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
465 {
466 UInt32 newGroupSize = groupSize + finishedGroupSize;
467 SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
468 finishedGroupSize = newGroupSize;
469 }
470 i += groupSize;
471 continue;
472 }
473 finishedGroupSize = 0;
474 }
475
476 #endif
477
478 if (NumSortedBytes >= blockSize)
479 {
480 UInt32 j;
481 for (j = 0; j < groupSize; j++)
482 {
483 UInt32 t = (i + j);
484 /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
485 Groups[Indices[t]] = t;
486 }
487 }
488 else
489 if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
490 #ifndef BLOCK_SORT_USE_HEAP_SORT
491 , 0, blockSize
492 #endif
493 ) != 0)
494 newLimit = i + groupSize;
495 i += groupSize;
496 }
497 if (newLimit == 0)
498 break;
499 }
500 }
501 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
502 for (i = 0; i < blockSize;)
503 {
504 UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
505 if ((Indices[i] & 0x40000000) != 0)
506 {
507 groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
508 Indices[(size_t)i + 1] &= kIndexMask;
509 }
510 Indices[i] &= kIndexMask;
511 groupSize++;
512 i += groupSize;
513 }
514 #endif
515 return Groups[0];
516 }
517