xref: /aosp_15_r20/external/lzma/C/BwtSort.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 /* BwtSort.c -- BWT block sorting
2 2023-04-02 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "BwtSort.h"
7 #include "Sort.h"
8 
9 /* #define BLOCK_SORT_USE_HEAP_SORT */
10 
11 /* Don't change it !!! */
12 #define kNumHashBytes 2
13 #define kNumHashValues (1 << (kNumHashBytes * 8))
14 
15 /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
16 #define kNumRefBitsMax 12
17 
18 #define BS_TEMP_SIZE kNumHashValues
19 
20 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
21 
22 /* 32 Flags in UInt32 word */
23 #define kNumFlagsBits 5
24 #define kNumFlagsInWord (1 << kNumFlagsBits)
25 #define kFlagsMask (kNumFlagsInWord - 1)
26 #define kAllFlags 0xFFFFFFFF
27 
28 #else
29 
30 #define kNumBitsMax 20
31 #define kIndexMask ((1 << kNumBitsMax) - 1)
32 #define kNumExtraBits (32 - kNumBitsMax)
33 #define kNumExtra0Bits (kNumExtraBits - 2)
34 #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
35 
36 #define SetFinishedGroupSize(p, size) \
37   {  *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
38     if ((size) > (1 << kNumExtra0Bits)) { \
39     *(p) |= 0x40000000;  *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
40 
SetGroupSize(UInt32 * p,UInt32 size)41 static void SetGroupSize(UInt32 *p, UInt32 size)
42 {
43   if (--size == 0)
44     return;
45   *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
46   if (size >= (1 << kNumExtra0Bits))
47   {
48     *p |= 0x40000000;
49     p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
50   }
51 }
52 
53 #endif
54 
55 /*
56 SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
57   "range" is not real range. It's only for optimization.
58 returns: 1 - if there are groups, 0 - no more groups
59 */
60 
61 static
62 UInt32
63 Z7_FASTCALL
SortGroup(UInt32 BlockSize,UInt32 NumSortedBytes,UInt32 groupOffset,UInt32 groupSize,int NumRefBits,UInt32 * Indices,UInt32 left,UInt32 range)64 SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
65   #ifndef BLOCK_SORT_USE_HEAP_SORT
66   , UInt32 left, UInt32 range
67   #endif
68   )
69 {
70   UInt32 *ind2 = Indices + groupOffset;
71   UInt32 *Groups;
72   if (groupSize <= 1)
73   {
74     /*
75     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
76     SetFinishedGroupSize(ind2, 1)
77     #endif
78     */
79     return 0;
80   }
81   Groups = Indices + BlockSize + BS_TEMP_SIZE;
82   if (groupSize <= ((UInt32)1 << NumRefBits)
83       #ifndef BLOCK_SORT_USE_HEAP_SORT
84       && groupSize <= range
85       #endif
86       )
87   {
88     UInt32 *temp = Indices + BlockSize;
89     UInt32 j;
90     UInt32 mask, thereAreGroups, group, cg;
91     {
92       UInt32 gPrev;
93       UInt32 gRes = 0;
94       {
95         UInt32 sp = ind2[0] + NumSortedBytes;
96         if (sp >= BlockSize) sp -= BlockSize;
97         gPrev = Groups[sp];
98         temp[0] = (gPrev << NumRefBits);
99       }
100 
101       for (j = 1; j < groupSize; j++)
102       {
103         UInt32 sp = ind2[j] + NumSortedBytes;
104         UInt32 g;
105         if (sp >= BlockSize) sp -= BlockSize;
106         g = Groups[sp];
107         temp[j] = (g << NumRefBits) | j;
108         gRes |= (gPrev ^ g);
109       }
110       if (gRes == 0)
111       {
112         #ifndef BLOCK_SORT_EXTERNAL_FLAGS
113         SetGroupSize(ind2, groupSize);
114         #endif
115         return 1;
116       }
117     }
118 
119     HeapSort(temp, groupSize);
120     mask = (((UInt32)1 << NumRefBits) - 1);
121     thereAreGroups = 0;
122 
123     group = groupOffset;
124     cg = (temp[0] >> NumRefBits);
125     temp[0] = ind2[temp[0] & mask];
126 
127     {
128     #ifdef BLOCK_SORT_EXTERNAL_FLAGS
129     UInt32 *Flags = Groups + BlockSize;
130     #else
131     UInt32 prevGroupStart = 0;
132     #endif
133 
134     for (j = 1; j < groupSize; j++)
135     {
136       UInt32 val = temp[j];
137       UInt32 cgCur = (val >> NumRefBits);
138 
139       if (cgCur != cg)
140       {
141         cg = cgCur;
142         group = groupOffset + j;
143 
144         #ifdef BLOCK_SORT_EXTERNAL_FLAGS
145         {
146         UInt32 t = group - 1;
147         Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
148         }
149         #else
150         SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
151         prevGroupStart = j;
152         #endif
153       }
154       else
155         thereAreGroups = 1;
156       {
157       UInt32 ind = ind2[val & mask];
158       temp[j] = ind;
159       Groups[ind] = group;
160       }
161     }
162 
163     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
164     SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
165     #endif
166     }
167 
168     for (j = 0; j < groupSize; j++)
169       ind2[j] = temp[j];
170     return thereAreGroups;
171   }
172 
173   /* Check that all strings are in one group (cannot sort) */
174   {
175     UInt32 group, j;
176     UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
177     group = Groups[sp];
178     for (j = 1; j < groupSize; j++)
179     {
180       sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
181       if (Groups[sp] != group)
182         break;
183     }
184     if (j == groupSize)
185     {
186       #ifndef BLOCK_SORT_EXTERNAL_FLAGS
187       SetGroupSize(ind2, groupSize);
188       #endif
189       return 1;
190     }
191   }
192 
193   #ifndef BLOCK_SORT_USE_HEAP_SORT
194   {
195   /* ---------- Range Sort ---------- */
196   UInt32 i;
197   UInt32 mid;
198   for (;;)
199   {
200     UInt32 j;
201     if (range <= 1)
202     {
203       #ifndef BLOCK_SORT_EXTERNAL_FLAGS
204       SetGroupSize(ind2, groupSize);
205       #endif
206       return 1;
207     }
208     mid = left + ((range + 1) >> 1);
209     j = groupSize;
210     i = 0;
211     do
212     {
213       UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
214       if (Groups[sp] >= mid)
215       {
216         for (j--; j > i; j--)
217         {
218           sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
219           if (Groups[sp] < mid)
220           {
221             UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
222             break;
223           }
224         }
225         if (i >= j)
226           break;
227       }
228     }
229     while (++i < j);
230     if (i == 0)
231     {
232       range = range - (mid - left);
233       left = mid;
234     }
235     else if (i == groupSize)
236       range = (mid - left);
237     else
238       break;
239   }
240 
241   #ifdef BLOCK_SORT_EXTERNAL_FLAGS
242   {
243     UInt32 t = (groupOffset + i - 1);
244     UInt32 *Flags = Groups + BlockSize;
245     Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
246   }
247   #endif
248 
249   {
250     UInt32 j;
251     for (j = i; j < groupSize; j++)
252       Groups[ind2[j]] = groupOffset + i;
253   }
254 
255   {
256   UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
257   return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
258   }
259 
260   }
261 
262   #else
263 
264   /* ---------- Heap Sort ---------- */
265 
266   {
267     UInt32 j;
268     for (j = 0; j < groupSize; j++)
269     {
270       UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
271       ind2[j] = sp;
272     }
273 
274     HeapSortRef(ind2, Groups, groupSize);
275 
276     /* Write Flags */
277     {
278     UInt32 sp = ind2[0];
279     UInt32 group = Groups[sp];
280 
281     #ifdef BLOCK_SORT_EXTERNAL_FLAGS
282     UInt32 *Flags = Groups + BlockSize;
283     #else
284     UInt32 prevGroupStart = 0;
285     #endif
286 
287     for (j = 1; j < groupSize; j++)
288     {
289       sp = ind2[j];
290       if (Groups[sp] != group)
291       {
292         group = Groups[sp];
293         #ifdef BLOCK_SORT_EXTERNAL_FLAGS
294         {
295         UInt32 t = groupOffset + j - 1;
296         Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
297         }
298         #else
299         SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
300         prevGroupStart = j;
301         #endif
302       }
303     }
304 
305     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
306     SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
307     #endif
308     }
309     {
310     /* Write new Groups values and Check that there are groups */
311     UInt32 thereAreGroups = 0;
312     for (j = 0; j < groupSize; j++)
313     {
314       UInt32 group = groupOffset + j;
315       #ifndef BLOCK_SORT_EXTERNAL_FLAGS
316       UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
317       if ((ind2[j] & 0x40000000) != 0)
318         subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
319       subGroupSize++;
320       for (;;)
321       {
322         UInt32 original = ind2[j];
323         UInt32 sp = original & kIndexMask;
324         if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
325         ind2[j] = sp | (original & ~kIndexMask);
326         Groups[sp] = group;
327         if (--subGroupSize == 0)
328           break;
329         j++;
330         thereAreGroups = 1;
331       }
332       #else
333       UInt32 *Flags = Groups + BlockSize;
334       for (;;)
335       {
336         UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
337         ind2[j] = sp;
338         Groups[sp] = group;
339         if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
340           break;
341         j++;
342         thereAreGroups = 1;
343       }
344       #endif
345     }
346     return thereAreGroups;
347     }
348   }
349   #endif
350 }
351 
352 /* conditions: blockSize > 0 */
BlockSort(UInt32 * Indices,const Byte * data,UInt32 blockSize)353 UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
354 {
355   UInt32 *counters = Indices + blockSize;
356   UInt32 i;
357   UInt32 *Groups;
358   #ifdef BLOCK_SORT_EXTERNAL_FLAGS
359   UInt32 *Flags;
360   #endif
361 
362   /* Radix-Sort for 2 bytes */
363   for (i = 0; i < kNumHashValues; i++)
364     counters[i] = 0;
365   for (i = 0; i < blockSize - 1; i++)
366     counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
367   counters[((UInt32)data[i] << 8) | data[0]]++;
368 
369   Groups = counters + BS_TEMP_SIZE;
370   #ifdef BLOCK_SORT_EXTERNAL_FLAGS
371   Flags = Groups + blockSize;
372     {
373       UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
374       for (i = 0; i < numWords; i++)
375         Flags[i] = kAllFlags;
376     }
377   #endif
378 
379   {
380     UInt32 sum = 0;
381     for (i = 0; i < kNumHashValues; i++)
382     {
383       UInt32 groupSize = counters[i];
384       if (groupSize > 0)
385       {
386         #ifdef BLOCK_SORT_EXTERNAL_FLAGS
387         UInt32 t = sum + groupSize - 1;
388         Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
389         #endif
390         sum += groupSize;
391       }
392       counters[i] = sum - groupSize;
393     }
394 
395     for (i = 0; i < blockSize - 1; i++)
396       Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
397     Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
398 
399     for (i = 0; i < blockSize - 1; i++)
400       Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
401     Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
402 
403     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
404     {
405     UInt32 prev = 0;
406     for (i = 0; i < kNumHashValues; i++)
407     {
408       UInt32 prevGroupSize = counters[i] - prev;
409       if (prevGroupSize == 0)
410         continue;
411       SetGroupSize(Indices + prev, prevGroupSize);
412       prev = counters[i];
413     }
414     }
415     #endif
416   }
417 
418   {
419   int NumRefBits;
420   UInt32 NumSortedBytes;
421   for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
422   NumRefBits = 32 - NumRefBits;
423   if (NumRefBits > kNumRefBitsMax)
424     NumRefBits = kNumRefBitsMax;
425 
426   for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
427   {
428     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
429     UInt32 finishedGroupSize = 0;
430     #endif
431     UInt32 newLimit = 0;
432     for (i = 0; i < blockSize;)
433     {
434       UInt32 groupSize;
435       #ifdef BLOCK_SORT_EXTERNAL_FLAGS
436 
437       if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
438       {
439         i++;
440         continue;
441       }
442       for (groupSize = 1;
443         (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
444         groupSize++);
445 
446       groupSize++;
447 
448       #else
449 
450       groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
451       {
452       BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
453       if ((Indices[i] & 0x40000000) != 0)
454       {
455         groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
456         Indices[(size_t)i + 1] &= kIndexMask;
457       }
458       Indices[i] &= kIndexMask;
459       groupSize++;
460       if (finishedGroup || groupSize == 1)
461       {
462         Indices[i - finishedGroupSize] &= kIndexMask;
463         if (finishedGroupSize > 1)
464           Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
465         {
466         UInt32 newGroupSize = groupSize + finishedGroupSize;
467         SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
468         finishedGroupSize = newGroupSize;
469         }
470         i += groupSize;
471         continue;
472       }
473       finishedGroupSize = 0;
474       }
475 
476       #endif
477 
478       if (NumSortedBytes >= blockSize)
479       {
480         UInt32 j;
481         for (j = 0; j < groupSize; j++)
482         {
483           UInt32 t = (i + j);
484           /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
485           Groups[Indices[t]] = t;
486         }
487       }
488       else
489         if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
490           #ifndef BLOCK_SORT_USE_HEAP_SORT
491           , 0, blockSize
492           #endif
493           ) != 0)
494           newLimit = i + groupSize;
495       i += groupSize;
496     }
497     if (newLimit == 0)
498       break;
499   }
500   }
501   #ifndef BLOCK_SORT_EXTERNAL_FLAGS
502   for (i = 0; i < blockSize;)
503   {
504     UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
505     if ((Indices[i] & 0x40000000) != 0)
506     {
507       groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
508       Indices[(size_t)i + 1] &= kIndexMask;
509     }
510     Indices[i] &= kIndexMask;
511     groupSize++;
512     i += groupSize;
513   }
514   #endif
515   return Groups[0];
516 }
517