1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Code generated by go generate; DO NOT EDIT.
6
7package suffixarray
8
9func text_64(text []byte, sa []int64) {
10	if int(int64(len(text))) != len(text) || len(text) != len(sa) {
11		panic("suffixarray: misuse of text_64")
12	}
13	sais_8_64(text, 256, sa, make([]int64, 2*256))
14}
15
16func sais_8_64(text []byte, textMax int, sa, tmp []int64) {
17	if len(sa) != len(text) || len(tmp) < textMax {
18		panic("suffixarray: misuse of sais_8_64")
19	}
20
21	// Trivial base cases. Sorting 0 or 1 things is easy.
22	if len(text) == 0 {
23		return
24	}
25	if len(text) == 1 {
26		sa[0] = 0
27		return
28	}
29
30	// Establish slices indexed by text character
31	// holding character frequency and bucket-sort offsets.
32	// If there's only enough tmp for one slice,
33	// we make it the bucket offsets and recompute
34	// the character frequency each time we need it.
35	var freq, bucket []int64
36	if len(tmp) >= 2*textMax {
37		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
38		freq[0] = -1 // mark as uninitialized
39	} else {
40		freq, bucket = nil, tmp[:textMax]
41	}
42
43	// The SAIS algorithm.
44	// Each of these calls makes one scan through sa.
45	// See the individual functions for documentation
46	// about each's role in the algorithm.
47	numLMS := placeLMS_8_64(text, sa, freq, bucket)
48	if numLMS <= 1 {
49		// 0 or 1 items are already sorted. Do nothing.
50	} else {
51		induceSubL_8_64(text, sa, freq, bucket)
52		induceSubS_8_64(text, sa, freq, bucket)
53		length_8_64(text, sa, numLMS)
54		maxID := assignID_8_64(text, sa, numLMS)
55		if maxID < numLMS {
56			map_64(sa, numLMS)
57			recurse_64(sa, tmp, numLMS, maxID)
58			unmap_8_64(text, sa, numLMS)
59		} else {
60			// If maxID == numLMS, then each LMS-substring
61			// is unique, so the relative ordering of two LMS-suffixes
62			// is determined by just the leading LMS-substring.
63			// That is, the LMS-suffix sort order matches the
64			// (simpler) LMS-substring sort order.
65			// Copy the original LMS-substring order into the
66			// suffix array destination.
67			copy(sa, sa[len(sa)-numLMS:])
68		}
69		expand_8_64(text, freq, bucket, sa, numLMS)
70	}
71	induceL_8_64(text, sa, freq, bucket)
72	induceS_8_64(text, sa, freq, bucket)
73
74	// Mark for caller that we overwrote tmp.
75	tmp[0] = -1
76}
77
78func sais_32(text []int32, textMax int, sa, tmp []int32) {
79	if len(sa) != len(text) || len(tmp) < textMax {
80		panic("suffixarray: misuse of sais_32")
81	}
82
83	// Trivial base cases. Sorting 0 or 1 things is easy.
84	if len(text) == 0 {
85		return
86	}
87	if len(text) == 1 {
88		sa[0] = 0
89		return
90	}
91
92	// Establish slices indexed by text character
93	// holding character frequency and bucket-sort offsets.
94	// If there's only enough tmp for one slice,
95	// we make it the bucket offsets and recompute
96	// the character frequency each time we need it.
97	var freq, bucket []int32
98	if len(tmp) >= 2*textMax {
99		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
100		freq[0] = -1 // mark as uninitialized
101	} else {
102		freq, bucket = nil, tmp[:textMax]
103	}
104
105	// The SAIS algorithm.
106	// Each of these calls makes one scan through sa.
107	// See the individual functions for documentation
108	// about each's role in the algorithm.
109	numLMS := placeLMS_32(text, sa, freq, bucket)
110	if numLMS <= 1 {
111		// 0 or 1 items are already sorted. Do nothing.
112	} else {
113		induceSubL_32(text, sa, freq, bucket)
114		induceSubS_32(text, sa, freq, bucket)
115		length_32(text, sa, numLMS)
116		maxID := assignID_32(text, sa, numLMS)
117		if maxID < numLMS {
118			map_32(sa, numLMS)
119			recurse_32(sa, tmp, numLMS, maxID)
120			unmap_32(text, sa, numLMS)
121		} else {
122			// If maxID == numLMS, then each LMS-substring
123			// is unique, so the relative ordering of two LMS-suffixes
124			// is determined by just the leading LMS-substring.
125			// That is, the LMS-suffix sort order matches the
126			// (simpler) LMS-substring sort order.
127			// Copy the original LMS-substring order into the
128			// suffix array destination.
129			copy(sa, sa[len(sa)-numLMS:])
130		}
131		expand_32(text, freq, bucket, sa, numLMS)
132	}
133	induceL_32(text, sa, freq, bucket)
134	induceS_32(text, sa, freq, bucket)
135
136	// Mark for caller that we overwrote tmp.
137	tmp[0] = -1
138}
139
140func sais_64(text []int64, textMax int, sa, tmp []int64) {
141	if len(sa) != len(text) || len(tmp) < textMax {
142		panic("suffixarray: misuse of sais_64")
143	}
144
145	// Trivial base cases. Sorting 0 or 1 things is easy.
146	if len(text) == 0 {
147		return
148	}
149	if len(text) == 1 {
150		sa[0] = 0
151		return
152	}
153
154	// Establish slices indexed by text character
155	// holding character frequency and bucket-sort offsets.
156	// If there's only enough tmp for one slice,
157	// we make it the bucket offsets and recompute
158	// the character frequency each time we need it.
159	var freq, bucket []int64
160	if len(tmp) >= 2*textMax {
161		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
162		freq[0] = -1 // mark as uninitialized
163	} else {
164		freq, bucket = nil, tmp[:textMax]
165	}
166
167	// The SAIS algorithm.
168	// Each of these calls makes one scan through sa.
169	// See the individual functions for documentation
170	// about each's role in the algorithm.
171	numLMS := placeLMS_64(text, sa, freq, bucket)
172	if numLMS <= 1 {
173		// 0 or 1 items are already sorted. Do nothing.
174	} else {
175		induceSubL_64(text, sa, freq, bucket)
176		induceSubS_64(text, sa, freq, bucket)
177		length_64(text, sa, numLMS)
178		maxID := assignID_64(text, sa, numLMS)
179		if maxID < numLMS {
180			map_64(sa, numLMS)
181			recurse_64(sa, tmp, numLMS, maxID)
182			unmap_64(text, sa, numLMS)
183		} else {
184			// If maxID == numLMS, then each LMS-substring
185			// is unique, so the relative ordering of two LMS-suffixes
186			// is determined by just the leading LMS-substring.
187			// That is, the LMS-suffix sort order matches the
188			// (simpler) LMS-substring sort order.
189			// Copy the original LMS-substring order into the
190			// suffix array destination.
191			copy(sa, sa[len(sa)-numLMS:])
192		}
193		expand_64(text, freq, bucket, sa, numLMS)
194	}
195	induceL_64(text, sa, freq, bucket)
196	induceS_64(text, sa, freq, bucket)
197
198	// Mark for caller that we overwrote tmp.
199	tmp[0] = -1
200}
201
202func freq_8_64(text []byte, freq, bucket []int64) []int64 {
203	if freq != nil && freq[0] >= 0 {
204		return freq // already computed
205	}
206	if freq == nil {
207		freq = bucket
208	}
209
210	freq = freq[:256] // eliminate bounds check for freq[c] below
211	clear(freq)
212	for _, c := range text {
213		freq[c]++
214	}
215	return freq
216}
217
218func freq_32(text []int32, freq, bucket []int32) []int32 {
219	if freq != nil && freq[0] >= 0 {
220		return freq // already computed
221	}
222	if freq == nil {
223		freq = bucket
224	}
225
226	clear(freq)
227	for _, c := range text {
228		freq[c]++
229	}
230	return freq
231}
232
233func freq_64(text []int64, freq, bucket []int64) []int64 {
234	if freq != nil && freq[0] >= 0 {
235		return freq // already computed
236	}
237	if freq == nil {
238		freq = bucket
239	}
240
241	clear(freq)
242	for _, c := range text {
243		freq[c]++
244	}
245	return freq
246}
247
248func bucketMin_8_64(text []byte, freq, bucket []int64) {
249	freq = freq_8_64(text, freq, bucket)
250	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
251	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
252	total := int64(0)
253	for i, n := range freq {
254		bucket[i] = total
255		total += n
256	}
257}
258
259func bucketMin_32(text []int32, freq, bucket []int32) {
260	freq = freq_32(text, freq, bucket)
261	total := int32(0)
262	for i, n := range freq {
263		bucket[i] = total
264		total += n
265	}
266}
267
268func bucketMin_64(text []int64, freq, bucket []int64) {
269	freq = freq_64(text, freq, bucket)
270	total := int64(0)
271	for i, n := range freq {
272		bucket[i] = total
273		total += n
274	}
275}
276
277func bucketMax_8_64(text []byte, freq, bucket []int64) {
278	freq = freq_8_64(text, freq, bucket)
279	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
280	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
281	total := int64(0)
282	for i, n := range freq {
283		total += n
284		bucket[i] = total
285	}
286}
287
288func bucketMax_32(text []int32, freq, bucket []int32) {
289	freq = freq_32(text, freq, bucket)
290	total := int32(0)
291	for i, n := range freq {
292		total += n
293		bucket[i] = total
294	}
295}
296
297func bucketMax_64(text []int64, freq, bucket []int64) {
298	freq = freq_64(text, freq, bucket)
299	total := int64(0)
300	for i, n := range freq {
301		total += n
302		bucket[i] = total
303	}
304}
305
306func placeLMS_8_64(text []byte, sa, freq, bucket []int64) int {
307	bucketMax_8_64(text, freq, bucket)
308
309	numLMS := 0
310	lastB := int64(-1)
311	bucket = bucket[:256] // eliminate bounds check for bucket[c1] below
312
313	// The next stanza of code (until the blank line) loop backward
314	// over text, stopping to execute a code body at each position i
315	// such that text[i] is an L-character and text[i+1] is an S-character.
316	// That is, i+1 is the position of the start of an LMS-substring.
317	// These could be hoisted out into a function with a callback,
318	// but at a significant speed cost. Instead, we just write these
319	// seven lines a few times in this source file. The copies below
320	// refer back to the pattern established by this original as the
321	// "LMS-substring iterator".
322	//
323	// In every scan through the text, c0, c1 are successive characters of text.
324	// In this backward scan, c0 == text[i] and c1 == text[i+1].
325	// By scanning backward, we can keep track of whether the current
326	// position is type-S or type-L according to the usual definition:
327	//
328	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
329	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
330	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
331	//
332	// The backward scan lets us maintain the current type,
333	// update it when we see c0 != c1, and otherwise leave it alone.
334	// We want to identify all S positions with a preceding L.
335	// Position len(text) is one such position by definition, but we have
336	// nowhere to write it down, so we eliminate it by untruthfully
337	// setting isTypeS = false at the start of the loop.
338	c0, c1, isTypeS := byte(0), byte(0), false
339	for i := len(text) - 1; i >= 0; i-- {
340		c0, c1 = text[i], c0
341		if c0 < c1 {
342			isTypeS = true
343		} else if c0 > c1 && isTypeS {
344			isTypeS = false
345
346			// Bucket the index i+1 for the start of an LMS-substring.
347			b := bucket[c1] - 1
348			bucket[c1] = b
349			sa[b] = int64(i + 1)
350			lastB = b
351			numLMS++
352		}
353	}
354
355	// We recorded the LMS-substring starts but really want the ends.
356	// Luckily, with two differences, the start indexes and the end indexes are the same.
357	// The first difference is that the rightmost LMS-substring's end index is len(text),
358	// so the caller must pretend that sa[-1] == len(text), as noted above.
359	// The second difference is that the first leftmost LMS-substring start index
360	// does not end an earlier LMS-substring, so as an optimization we can omit
361	// that leftmost LMS-substring start index (the last one we wrote).
362	//
363	// Exception: if numLMS <= 1, the caller is not going to bother with
364	// the recursion at all and will treat the result as containing LMS-substring starts.
365	// In that case, we don't remove the final entry.
366	if numLMS > 1 {
367		sa[lastB] = 0
368	}
369	return numLMS
370}
371
372func placeLMS_32(text []int32, sa, freq, bucket []int32) int {
373	bucketMax_32(text, freq, bucket)
374
375	numLMS := 0
376	lastB := int32(-1)
377
378	// The next stanza of code (until the blank line) loop backward
379	// over text, stopping to execute a code body at each position i
380	// such that text[i] is an L-character and text[i+1] is an S-character.
381	// That is, i+1 is the position of the start of an LMS-substring.
382	// These could be hoisted out into a function with a callback,
383	// but at a significant speed cost. Instead, we just write these
384	// seven lines a few times in this source file. The copies below
385	// refer back to the pattern established by this original as the
386	// "LMS-substring iterator".
387	//
388	// In every scan through the text, c0, c1 are successive characters of text.
389	// In this backward scan, c0 == text[i] and c1 == text[i+1].
390	// By scanning backward, we can keep track of whether the current
391	// position is type-S or type-L according to the usual definition:
392	//
393	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
394	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
395	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
396	//
397	// The backward scan lets us maintain the current type,
398	// update it when we see c0 != c1, and otherwise leave it alone.
399	// We want to identify all S positions with a preceding L.
400	// Position len(text) is one such position by definition, but we have
401	// nowhere to write it down, so we eliminate it by untruthfully
402	// setting isTypeS = false at the start of the loop.
403	c0, c1, isTypeS := int32(0), int32(0), false
404	for i := len(text) - 1; i >= 0; i-- {
405		c0, c1 = text[i], c0
406		if c0 < c1 {
407			isTypeS = true
408		} else if c0 > c1 && isTypeS {
409			isTypeS = false
410
411			// Bucket the index i+1 for the start of an LMS-substring.
412			b := bucket[c1] - 1
413			bucket[c1] = b
414			sa[b] = int32(i + 1)
415			lastB = b
416			numLMS++
417		}
418	}
419
420	// We recorded the LMS-substring starts but really want the ends.
421	// Luckily, with two differences, the start indexes and the end indexes are the same.
422	// The first difference is that the rightmost LMS-substring's end index is len(text),
423	// so the caller must pretend that sa[-1] == len(text), as noted above.
424	// The second difference is that the first leftmost LMS-substring start index
425	// does not end an earlier LMS-substring, so as an optimization we can omit
426	// that leftmost LMS-substring start index (the last one we wrote).
427	//
428	// Exception: if numLMS <= 1, the caller is not going to bother with
429	// the recursion at all and will treat the result as containing LMS-substring starts.
430	// In that case, we don't remove the final entry.
431	if numLMS > 1 {
432		sa[lastB] = 0
433	}
434	return numLMS
435}
436
437func placeLMS_64(text []int64, sa, freq, bucket []int64) int {
438	bucketMax_64(text, freq, bucket)
439
440	numLMS := 0
441	lastB := int64(-1)
442
443	// The next stanza of code (until the blank line) loop backward
444	// over text, stopping to execute a code body at each position i
445	// such that text[i] is an L-character and text[i+1] is an S-character.
446	// That is, i+1 is the position of the start of an LMS-substring.
447	// These could be hoisted out into a function with a callback,
448	// but at a significant speed cost. Instead, we just write these
449	// seven lines a few times in this source file. The copies below
450	// refer back to the pattern established by this original as the
451	// "LMS-substring iterator".
452	//
453	// In every scan through the text, c0, c1 are successive characters of text.
454	// In this backward scan, c0 == text[i] and c1 == text[i+1].
455	// By scanning backward, we can keep track of whether the current
456	// position is type-S or type-L according to the usual definition:
457	//
458	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
459	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
460	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
461	//
462	// The backward scan lets us maintain the current type,
463	// update it when we see c0 != c1, and otherwise leave it alone.
464	// We want to identify all S positions with a preceding L.
465	// Position len(text) is one such position by definition, but we have
466	// nowhere to write it down, so we eliminate it by untruthfully
467	// setting isTypeS = false at the start of the loop.
468	c0, c1, isTypeS := int64(0), int64(0), false
469	for i := len(text) - 1; i >= 0; i-- {
470		c0, c1 = text[i], c0
471		if c0 < c1 {
472			isTypeS = true
473		} else if c0 > c1 && isTypeS {
474			isTypeS = false
475
476			// Bucket the index i+1 for the start of an LMS-substring.
477			b := bucket[c1] - 1
478			bucket[c1] = b
479			sa[b] = int64(i + 1)
480			lastB = b
481			numLMS++
482		}
483	}
484
485	// We recorded the LMS-substring starts but really want the ends.
486	// Luckily, with two differences, the start indexes and the end indexes are the same.
487	// The first difference is that the rightmost LMS-substring's end index is len(text),
488	// so the caller must pretend that sa[-1] == len(text), as noted above.
489	// The second difference is that the first leftmost LMS-substring start index
490	// does not end an earlier LMS-substring, so as an optimization we can omit
491	// that leftmost LMS-substring start index (the last one we wrote).
492	//
493	// Exception: if numLMS <= 1, the caller is not going to bother with
494	// the recursion at all and will treat the result as containing LMS-substring starts.
495	// In that case, we don't remove the final entry.
496	if numLMS > 1 {
497		sa[lastB] = 0
498	}
499	return numLMS
500}
501
502func induceSubL_8_64(text []byte, sa, freq, bucket []int64) {
503	// Initialize positions for left side of character buckets.
504	bucketMin_8_64(text, freq, bucket)
505	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
506
507	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
508	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
509	// Because j-1 is type L, inserting it into sa now will sort it correctly.
510	// But we want to distinguish a j-1 with j-2 of type L from type S.
511	// We can process the former but want to leave the latter for the caller.
512	// We record the difference by negating j-1 if it is preceded by type S.
513	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
514	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
515	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
516	// and so on, in sorted but not necessarily adjacent order, until it finds
517	// one preceded by an index of type S, at which point it must stop.
518	//
519	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
520	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
521	// only the indexes of the leftmost L-type indexes for each LMS-substring.
522	//
523	// The suffix array sa therefore serves simultaneously as input, output,
524	// and a miraculously well-tailored work queue.
525
526	// placeLMS_8_64 left out the implicit entry sa[-1] == len(text),
527	// corresponding to the identified type-L index len(text)-1.
528	// Process it before the left-to-right scan of sa proper.
529	// See body in loop for commentary.
530	k := len(text) - 1
531	c0, c1 := text[k-1], text[k]
532	if c0 < c1 {
533		k = -k
534	}
535
536	// Cache recently used bucket index:
537	// we're processing suffixes in sorted order
538	// and accessing buckets indexed by the
539	// byte before the sorted order, which still
540	// has very good locality.
541	// Invariant: b is cached, possibly dirty copy of bucket[cB].
542	cB := c1
543	b := bucket[cB]
544	sa[b] = int64(k)
545	b++
546
547	for i := 0; i < len(sa); i++ {
548		j := int(sa[i])
549		if j == 0 {
550			// Skip empty entry.
551			continue
552		}
553		if j < 0 {
554			// Leave discovered type-S index for caller.
555			sa[i] = int64(-j)
556			continue
557		}
558		sa[i] = 0
559
560		// Index j was on work queue, meaning k := j-1 is L-type,
561		// so we can now place k correctly into sa.
562		// If k-1 is L-type, queue k for processing later in this loop.
563		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
564		k := j - 1
565		c0, c1 := text[k-1], text[k]
566		if c0 < c1 {
567			k = -k
568		}
569
570		if cB != c1 {
571			bucket[cB] = b
572			cB = c1
573			b = bucket[cB]
574		}
575		sa[b] = int64(k)
576		b++
577	}
578}
579
580func induceSubL_32(text []int32, sa, freq, bucket []int32) {
581	// Initialize positions for left side of character buckets.
582	bucketMin_32(text, freq, bucket)
583
584	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
585	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
586	// Because j-1 is type L, inserting it into sa now will sort it correctly.
587	// But we want to distinguish a j-1 with j-2 of type L from type S.
588	// We can process the former but want to leave the latter for the caller.
589	// We record the difference by negating j-1 if it is preceded by type S.
590	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
591	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
592	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
593	// and so on, in sorted but not necessarily adjacent order, until it finds
594	// one preceded by an index of type S, at which point it must stop.
595	//
596	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
597	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
598	// only the indexes of the leftmost L-type indexes for each LMS-substring.
599	//
600	// The suffix array sa therefore serves simultaneously as input, output,
601	// and a miraculously well-tailored work queue.
602
603	// placeLMS_32 left out the implicit entry sa[-1] == len(text),
604	// corresponding to the identified type-L index len(text)-1.
605	// Process it before the left-to-right scan of sa proper.
606	// See body in loop for commentary.
607	k := len(text) - 1
608	c0, c1 := text[k-1], text[k]
609	if c0 < c1 {
610		k = -k
611	}
612
613	// Cache recently used bucket index:
614	// we're processing suffixes in sorted order
615	// and accessing buckets indexed by the
616	// int32 before the sorted order, which still
617	// has very good locality.
618	// Invariant: b is cached, possibly dirty copy of bucket[cB].
619	cB := c1
620	b := bucket[cB]
621	sa[b] = int32(k)
622	b++
623
624	for i := 0; i < len(sa); i++ {
625		j := int(sa[i])
626		if j == 0 {
627			// Skip empty entry.
628			continue
629		}
630		if j < 0 {
631			// Leave discovered type-S index for caller.
632			sa[i] = int32(-j)
633			continue
634		}
635		sa[i] = 0
636
637		// Index j was on work queue, meaning k := j-1 is L-type,
638		// so we can now place k correctly into sa.
639		// If k-1 is L-type, queue k for processing later in this loop.
640		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
641		k := j - 1
642		c0, c1 := text[k-1], text[k]
643		if c0 < c1 {
644			k = -k
645		}
646
647		if cB != c1 {
648			bucket[cB] = b
649			cB = c1
650			b = bucket[cB]
651		}
652		sa[b] = int32(k)
653		b++
654	}
655}
656
657func induceSubL_64(text []int64, sa, freq, bucket []int64) {
658	// Initialize positions for left side of character buckets.
659	bucketMin_64(text, freq, bucket)
660
661	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
662	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
663	// Because j-1 is type L, inserting it into sa now will sort it correctly.
664	// But we want to distinguish a j-1 with j-2 of type L from type S.
665	// We can process the former but want to leave the latter for the caller.
666	// We record the difference by negating j-1 if it is preceded by type S.
667	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
668	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
669	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
670	// and so on, in sorted but not necessarily adjacent order, until it finds
671	// one preceded by an index of type S, at which point it must stop.
672	//
673	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
674	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
675	// only the indexes of the leftmost L-type indexes for each LMS-substring.
676	//
677	// The suffix array sa therefore serves simultaneously as input, output,
678	// and a miraculously well-tailored work queue.
679
680	// placeLMS_64 left out the implicit entry sa[-1] == len(text),
681	// corresponding to the identified type-L index len(text)-1.
682	// Process it before the left-to-right scan of sa proper.
683	// See body in loop for commentary.
684	k := len(text) - 1
685	c0, c1 := text[k-1], text[k]
686	if c0 < c1 {
687		k = -k
688	}
689
690	// Cache recently used bucket index:
691	// we're processing suffixes in sorted order
692	// and accessing buckets indexed by the
693	// int64 before the sorted order, which still
694	// has very good locality.
695	// Invariant: b is cached, possibly dirty copy of bucket[cB].
696	cB := c1
697	b := bucket[cB]
698	sa[b] = int64(k)
699	b++
700
701	for i := 0; i < len(sa); i++ {
702		j := int(sa[i])
703		if j == 0 {
704			// Skip empty entry.
705			continue
706		}
707		if j < 0 {
708			// Leave discovered type-S index for caller.
709			sa[i] = int64(-j)
710			continue
711		}
712		sa[i] = 0
713
714		// Index j was on work queue, meaning k := j-1 is L-type,
715		// so we can now place k correctly into sa.
716		// If k-1 is L-type, queue k for processing later in this loop.
717		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
718		k := j - 1
719		c0, c1 := text[k-1], text[k]
720		if c0 < c1 {
721			k = -k
722		}
723
724		if cB != c1 {
725			bucket[cB] = b
726			cB = c1
727			b = bucket[cB]
728		}
729		sa[b] = int64(k)
730		b++
731	}
732}
733
734func induceSubS_8_64(text []byte, sa, freq, bucket []int64) {
735	// Initialize positions for right side of character buckets.
736	bucketMax_8_64(text, freq, bucket)
737	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
738
739	// Analogous to induceSubL_8_64 above,
740	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
741	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
742	// Because j-1 is type S, inserting it into sa now will sort it correctly.
743	// But we want to distinguish a j-1 with j-2 of type S from type L.
744	// We can process the former but want to leave the latter for the caller.
745	// We record the difference by negating j-1 if it is preceded by type L.
746	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
747	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
748	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
749	// and so on, in sorted but not necessarily adjacent order, until it finds
750	// one preceded by an index of type L, at which point it must stop.
751	// That index (preceded by one of type L) is an LMS-substring start.
752	//
753	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
754	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
755	// so that the loop finishes with the top of sa containing exactly
756	// the LMS-substring start indexes, sorted by LMS-substring.
757
758	// Cache recently used bucket index:
759	cB := byte(0)
760	b := bucket[cB]
761
762	top := len(sa)
763	for i := len(sa) - 1; i >= 0; i-- {
764		j := int(sa[i])
765		if j == 0 {
766			// Skip empty entry.
767			continue
768		}
769		sa[i] = 0
770		if j < 0 {
771			// Leave discovered LMS-substring start index for caller.
772			top--
773			sa[top] = int64(-j)
774			continue
775		}
776
777		// Index j was on work queue, meaning k := j-1 is S-type,
778		// so we can now place k correctly into sa.
779		// If k-1 is S-type, queue k for processing later in this loop.
780		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
781		k := j - 1
782		c1 := text[k]
783		c0 := text[k-1]
784		if c0 > c1 {
785			k = -k
786		}
787
788		if cB != c1 {
789			bucket[cB] = b
790			cB = c1
791			b = bucket[cB]
792		}
793		b--
794		sa[b] = int64(k)
795	}
796}
797
798func induceSubS_32(text []int32, sa, freq, bucket []int32) {
799	// Initialize positions for right side of character buckets.
800	bucketMax_32(text, freq, bucket)
801
802	// Analogous to induceSubL_32 above,
803	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
804	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
805	// Because j-1 is type S, inserting it into sa now will sort it correctly.
806	// But we want to distinguish a j-1 with j-2 of type S from type L.
807	// We can process the former but want to leave the latter for the caller.
808	// We record the difference by negating j-1 if it is preceded by type L.
809	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
810	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
811	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
812	// and so on, in sorted but not necessarily adjacent order, until it finds
813	// one preceded by an index of type L, at which point it must stop.
814	// That index (preceded by one of type L) is an LMS-substring start.
815	//
816	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
817	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
818	// so that the loop finishes with the top of sa containing exactly
819	// the LMS-substring start indexes, sorted by LMS-substring.
820
821	// Cache recently used bucket index:
822	cB := int32(0)
823	b := bucket[cB]
824
825	top := len(sa)
826	for i := len(sa) - 1; i >= 0; i-- {
827		j := int(sa[i])
828		if j == 0 {
829			// Skip empty entry.
830			continue
831		}
832		sa[i] = 0
833		if j < 0 {
834			// Leave discovered LMS-substring start index for caller.
835			top--
836			sa[top] = int32(-j)
837			continue
838		}
839
840		// Index j was on work queue, meaning k := j-1 is S-type,
841		// so we can now place k correctly into sa.
842		// If k-1 is S-type, queue k for processing later in this loop.
843		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
844		k := j - 1
845		c1 := text[k]
846		c0 := text[k-1]
847		if c0 > c1 {
848			k = -k
849		}
850
851		if cB != c1 {
852			bucket[cB] = b
853			cB = c1
854			b = bucket[cB]
855		}
856		b--
857		sa[b] = int32(k)
858	}
859}
860
861func induceSubS_64(text []int64, sa, freq, bucket []int64) {
862	// Initialize positions for right side of character buckets.
863	bucketMax_64(text, freq, bucket)
864
865	// Analogous to induceSubL_64 above,
866	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
867	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
868	// Because j-1 is type S, inserting it into sa now will sort it correctly.
869	// But we want to distinguish a j-1 with j-2 of type S from type L.
870	// We can process the former but want to leave the latter for the caller.
871	// We record the difference by negating j-1 if it is preceded by type L.
872	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
873	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
874	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
875	// and so on, in sorted but not necessarily adjacent order, until it finds
876	// one preceded by an index of type L, at which point it must stop.
877	// That index (preceded by one of type L) is an LMS-substring start.
878	//
879	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
880	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
881	// so that the loop finishes with the top of sa containing exactly
882	// the LMS-substring start indexes, sorted by LMS-substring.
883
884	// Cache recently used bucket index:
885	cB := int64(0)
886	b := bucket[cB]
887
888	top := len(sa)
889	for i := len(sa) - 1; i >= 0; i-- {
890		j := int(sa[i])
891		if j == 0 {
892			// Skip empty entry.
893			continue
894		}
895		sa[i] = 0
896		if j < 0 {
897			// Leave discovered LMS-substring start index for caller.
898			top--
899			sa[top] = int64(-j)
900			continue
901		}
902
903		// Index j was on work queue, meaning k := j-1 is S-type,
904		// so we can now place k correctly into sa.
905		// If k-1 is S-type, queue k for processing later in this loop.
906		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
907		k := j - 1
908		c1 := text[k]
909		c0 := text[k-1]
910		if c0 > c1 {
911			k = -k
912		}
913
914		if cB != c1 {
915			bucket[cB] = b
916			cB = c1
917			b = bucket[cB]
918		}
919		b--
920		sa[b] = int64(k)
921	}
922}
923
924func length_8_64(text []byte, sa []int64, numLMS int) {
925	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
926
927	// The encoding of N text bytes into a “length” word
928	// adds 1 to each byte, packs them into the bottom
929	// N*8 bits of a word, and then bitwise inverts the result.
930	// That is, the text sequence A B C (hex 41 42 43)
931	// encodes as ^uint64(0x42_43_44).
932	// LMS-substrings can never start or end with 0xFF.
933	// Adding 1 ensures the encoded byte sequence never
934	// starts or ends with 0x00, so that present bytes can be
935	// distinguished from zero-padding in the top bits,
936	// so the length need not be separately encoded.
937	// Inverting the bytes increases the chance that a
938	// 4-byte encoding will still be ≥ len(text).
939	// In particular, if the first byte is ASCII (<= 0x7E, so +1 <= 0x7F)
940	// then the high bit of the inversion will be set,
941	// making it clearly not a valid length (it would be a negative one).
942	//
943	// cx holds the pre-inverted encoding (the packed incremented bytes).
944	cx := uint64(0) // byte-only
945
946	// This stanza (until the blank line) is the "LMS-substring iterator",
947	// described in placeLMS_8_64 above, with one line added to maintain cx.
948	c0, c1, isTypeS := byte(0), byte(0), false
949	for i := len(text) - 1; i >= 0; i-- {
950		c0, c1 = text[i], c0
951		cx = cx<<8 | uint64(c1+1) // byte-only
952		if c0 < c1 {
953			isTypeS = true
954		} else if c0 > c1 && isTypeS {
955			isTypeS = false
956
957			// Index j = i+1 is the start of an LMS-substring.
958			// Compute length or encoded text to store in sa[j/2].
959			j := i + 1
960			var code int64
961			if end == 0 {
962				code = 0
963			} else {
964				code = int64(end - j)
965				if code <= 64/8 && ^cx >= uint64(len(text)) { // byte-only
966					code = int64(^cx) // byte-only
967				} // byte-only
968			}
969			sa[j>>1] = code
970			end = j + 1
971			cx = uint64(c1 + 1) // byte-only
972		}
973	}
974}
975
976func length_32(text []int32, sa []int32, numLMS int) {
977	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
978
979	// The encoding of N text int32s into a “length” word
980	// adds 1 to each int32, packs them into the bottom
981	// N*8 bits of a word, and then bitwise inverts the result.
982	// That is, the text sequence A B C (hex 41 42 43)
983	// encodes as ^uint32(0x42_43_44).
984	// LMS-substrings can never start or end with 0xFF.
985	// Adding 1 ensures the encoded int32 sequence never
986	// starts or ends with 0x00, so that present int32s can be
987	// distinguished from zero-padding in the top bits,
988	// so the length need not be separately encoded.
989	// Inverting the int32s increases the chance that a
990	// 4-int32 encoding will still be ≥ len(text).
991	// In particular, if the first int32 is ASCII (<= 0x7E, so +1 <= 0x7F)
992	// then the high bit of the inversion will be set,
993	// making it clearly not a valid length (it would be a negative one).
994	//
995	// cx holds the pre-inverted encoding (the packed incremented int32s).
996
997	// This stanza (until the blank line) is the "LMS-substring iterator",
998	// described in placeLMS_32 above, with one line added to maintain cx.
999	c0, c1, isTypeS := int32(0), int32(0), false
1000	for i := len(text) - 1; i >= 0; i-- {
1001		c0, c1 = text[i], c0
1002		if c0 < c1 {
1003			isTypeS = true
1004		} else if c0 > c1 && isTypeS {
1005			isTypeS = false
1006
1007			// Index j = i+1 is the start of an LMS-substring.
1008			// Compute length or encoded text to store in sa[j/2].
1009			j := i + 1
1010			var code int32
1011			if end == 0 {
1012				code = 0
1013			} else {
1014				code = int32(end - j)
1015			}
1016			sa[j>>1] = code
1017			end = j + 1
1018		}
1019	}
1020}
1021
1022func length_64(text []int64, sa []int64, numLMS int) {
1023	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
1024
1025	// The encoding of N text int64s into a “length” word
1026	// adds 1 to each int64, packs them into the bottom
1027	// N*8 bits of a word, and then bitwise inverts the result.
1028	// That is, the text sequence A B C (hex 41 42 43)
1029	// encodes as ^uint64(0x42_43_44).
1030	// LMS-substrings can never start or end with 0xFF.
1031	// Adding 1 ensures the encoded int64 sequence never
1032	// starts or ends with 0x00, so that present int64s can be
1033	// distinguished from zero-padding in the top bits,
1034	// so the length need not be separately encoded.
1035	// Inverting the int64s increases the chance that a
1036	// 4-int64 encoding will still be ≥ len(text).
1037	// In particular, if the first int64 is ASCII (<= 0x7E, so +1 <= 0x7F)
1038	// then the high bit of the inversion will be set,
1039	// making it clearly not a valid length (it would be a negative one).
1040	//
1041	// cx holds the pre-inverted encoding (the packed incremented int64s).
1042
1043	// This stanza (until the blank line) is the "LMS-substring iterator",
1044	// described in placeLMS_64 above, with one line added to maintain cx.
1045	c0, c1, isTypeS := int64(0), int64(0), false
1046	for i := len(text) - 1; i >= 0; i-- {
1047		c0, c1 = text[i], c0
1048		if c0 < c1 {
1049			isTypeS = true
1050		} else if c0 > c1 && isTypeS {
1051			isTypeS = false
1052
1053			// Index j = i+1 is the start of an LMS-substring.
1054			// Compute length or encoded text to store in sa[j/2].
1055			j := i + 1
1056			var code int64
1057			if end == 0 {
1058				code = 0
1059			} else {
1060				code = int64(end - j)
1061			}
1062			sa[j>>1] = code
1063			end = j + 1
1064		}
1065	}
1066}
1067
1068func assignID_8_64(text []byte, sa []int64, numLMS int) int {
1069	id := 0
1070	lastLen := int64(-1) // impossible
1071	lastPos := int64(0)
1072	for _, j := range sa[len(sa)-numLMS:] {
1073		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
1074		n := sa[j/2]
1075		if n != lastLen {
1076			goto New
1077		}
1078		if uint64(n) >= uint64(len(text)) {
1079			// “Length” is really encoded full text, and they match.
1080			goto Same
1081		}
1082		{
1083			// Compare actual texts.
1084			n := int(n)
1085			this := text[j:][:n]
1086			last := text[lastPos:][:n]
1087			for i := 0; i < n; i++ {
1088				if this[i] != last[i] {
1089					goto New
1090				}
1091			}
1092			goto Same
1093		}
1094	New:
1095		id++
1096		lastPos = j
1097		lastLen = n
1098	Same:
1099		sa[j/2] = int64(id)
1100	}
1101	return id
1102}
1103
1104func assignID_32(text []int32, sa []int32, numLMS int) int {
1105	id := 0
1106	lastLen := int32(-1) // impossible
1107	lastPos := int32(0)
1108	for _, j := range sa[len(sa)-numLMS:] {
1109		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
1110		n := sa[j/2]
1111		if n != lastLen {
1112			goto New
1113		}
1114		if uint32(n) >= uint32(len(text)) {
1115			// “Length” is really encoded full text, and they match.
1116			goto Same
1117		}
1118		{
1119			// Compare actual texts.
1120			n := int(n)
1121			this := text[j:][:n]
1122			last := text[lastPos:][:n]
1123			for i := 0; i < n; i++ {
1124				if this[i] != last[i] {
1125					goto New
1126				}
1127			}
1128			goto Same
1129		}
1130	New:
1131		id++
1132		lastPos = j
1133		lastLen = n
1134	Same:
1135		sa[j/2] = int32(id)
1136	}
1137	return id
1138}
1139
1140func assignID_64(text []int64, sa []int64, numLMS int) int {
1141	id := 0
1142	lastLen := int64(-1) // impossible
1143	lastPos := int64(0)
1144	for _, j := range sa[len(sa)-numLMS:] {
1145		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
1146		n := sa[j/2]
1147		if n != lastLen {
1148			goto New
1149		}
1150		if uint64(n) >= uint64(len(text)) {
1151			// “Length” is really encoded full text, and they match.
1152			goto Same
1153		}
1154		{
1155			// Compare actual texts.
1156			n := int(n)
1157			this := text[j:][:n]
1158			last := text[lastPos:][:n]
1159			for i := 0; i < n; i++ {
1160				if this[i] != last[i] {
1161					goto New
1162				}
1163			}
1164			goto Same
1165		}
1166	New:
1167		id++
1168		lastPos = j
1169		lastLen = n
1170	Same:
1171		sa[j/2] = int64(id)
1172	}
1173	return id
1174}
1175
1176func map_64(sa []int64, numLMS int) {
1177	w := len(sa)
1178	for i := len(sa) / 2; i >= 0; i-- {
1179		j := sa[i]
1180		if j > 0 {
1181			w--
1182			sa[w] = j - 1
1183		}
1184	}
1185}
1186
1187func recurse_64(sa, oldTmp []int64, numLMS, maxID int) {
1188	dst, saTmp, text := sa[:numLMS], sa[numLMS:len(sa)-numLMS], sa[len(sa)-numLMS:]
1189
1190	// Set up temporary space for recursive call.
1191	// We must pass sais_64 a tmp buffer with at least maxID entries.
1192	//
1193	// The subproblem is guaranteed to have length at most len(sa)/2,
1194	// so that sa can hold both the subproblem and its suffix array.
1195	// Nearly all the time, however, the subproblem has length < len(sa)/3,
1196	// in which case there is a subproblem-sized middle of sa that
1197	// we can reuse for temporary space (saTmp).
1198	// When recurse_64 is called from sais_8_64, oldTmp is length 512
1199	// (from text_64), and saTmp will typically be much larger, so we'll use saTmp.
1200	// When deeper recursions come back to recurse_64, now oldTmp is
1201	// the saTmp from the top-most recursion, it is typically larger than
1202	// the current saTmp (because the current sa gets smaller and smaller
1203	// as the recursion gets deeper), and we keep reusing that top-most
1204	// large saTmp instead of the offered smaller ones.
1205	//
1206	// Why is the subproblem length so often just under len(sa)/3?
1207	// See Nong, Zhang, and Chen, section 3.6 for a plausible explanation.
1208	// In brief, the len(sa)/2 case would correspond to an SLSLSLSLSLSL pattern
1209	// in the input, perfect alternation of larger and smaller input bytes.
1210	// Real text doesn't do that. If each L-type index is randomly followed
1211	// by either an L-type or S-type index, then half the substrings will
1212	// be of the form SLS, but the other half will be longer. Of that half,
1213	// half (a quarter overall) will be SLLS; an eighth will be SLLLS, and so on.
1214	// Not counting the final S in each (which overlaps the first S in the next),
1215	// This works out to an average length 2×½ + 3×¼ + 4×⅛ + ... = 3.
1216	// The space we need is further reduced by the fact that many of the
1217	// short patterns like SLS will often be the same character sequences
1218	// repeated throughout the text, reducing maxID relative to numLMS.
1219	//
1220	// For short inputs, the averages may not run in our favor, but then we
1221	// can often fall back to using the length-512 tmp available in the
1222	// top-most call. (Also a short allocation would not be a big deal.)
1223	//
1224	// For pathological inputs, we fall back to allocating a new tmp of length
1225	// max(maxID, numLMS/2). This level of the recursion needs maxID,
1226	// and all deeper levels of the recursion will need no more than numLMS/2,
1227	// so this one allocation is guaranteed to suffice for the entire stack
1228	// of recursive calls.
1229	tmp := oldTmp
1230	if len(tmp) < len(saTmp) {
1231		tmp = saTmp
1232	}
1233	if len(tmp) < numLMS {
1234		// TestSAIS/forcealloc reaches this code.
1235		n := maxID
1236		if n < numLMS/2 {
1237			n = numLMS / 2
1238		}
1239		tmp = make([]int64, n)
1240	}
1241
1242	// sais_64 requires that the caller arrange to clear dst,
1243	// because in general the caller may know dst is
1244	// freshly-allocated and already cleared. But this one is not.
1245	clear(dst)
1246	sais_64(text, maxID, dst, tmp)
1247}
1248
1249func unmap_8_64(text []byte, sa []int64, numLMS int) {
1250	unmap := sa[len(sa)-numLMS:]
1251	j := len(unmap)
1252
1253	// "LMS-substring iterator" (see placeLMS_8_64 above).
1254	c0, c1, isTypeS := byte(0), byte(0), false
1255	for i := len(text) - 1; i >= 0; i-- {
1256		c0, c1 = text[i], c0
1257		if c0 < c1 {
1258			isTypeS = true
1259		} else if c0 > c1 && isTypeS {
1260			isTypeS = false
1261
1262			// Populate inverse map.
1263			j--
1264			unmap[j] = int64(i + 1)
1265		}
1266	}
1267
1268	// Apply inverse map to subproblem suffix array.
1269	sa = sa[:numLMS]
1270	for i := 0; i < len(sa); i++ {
1271		sa[i] = unmap[sa[i]]
1272	}
1273}
1274
1275func unmap_32(text []int32, sa []int32, numLMS int) {
1276	unmap := sa[len(sa)-numLMS:]
1277	j := len(unmap)
1278
1279	// "LMS-substring iterator" (see placeLMS_32 above).
1280	c0, c1, isTypeS := int32(0), int32(0), false
1281	for i := len(text) - 1; i >= 0; i-- {
1282		c0, c1 = text[i], c0
1283		if c0 < c1 {
1284			isTypeS = true
1285		} else if c0 > c1 && isTypeS {
1286			isTypeS = false
1287
1288			// Populate inverse map.
1289			j--
1290			unmap[j] = int32(i + 1)
1291		}
1292	}
1293
1294	// Apply inverse map to subproblem suffix array.
1295	sa = sa[:numLMS]
1296	for i := 0; i < len(sa); i++ {
1297		sa[i] = unmap[sa[i]]
1298	}
1299}
1300
1301func unmap_64(text []int64, sa []int64, numLMS int) {
1302	unmap := sa[len(sa)-numLMS:]
1303	j := len(unmap)
1304
1305	// "LMS-substring iterator" (see placeLMS_64 above).
1306	c0, c1, isTypeS := int64(0), int64(0), false
1307	for i := len(text) - 1; i >= 0; i-- {
1308		c0, c1 = text[i], c0
1309		if c0 < c1 {
1310			isTypeS = true
1311		} else if c0 > c1 && isTypeS {
1312			isTypeS = false
1313
1314			// Populate inverse map.
1315			j--
1316			unmap[j] = int64(i + 1)
1317		}
1318	}
1319
1320	// Apply inverse map to subproblem suffix array.
1321	sa = sa[:numLMS]
1322	for i := 0; i < len(sa); i++ {
1323		sa[i] = unmap[sa[i]]
1324	}
1325}
1326
1327func expand_8_64(text []byte, freq, bucket, sa []int64, numLMS int) {
1328	bucketMax_8_64(text, freq, bucket)
1329	bucket = bucket[:256] // eliminate bound check for bucket[c] below
1330
1331	// Loop backward through sa, always tracking
1332	// the next index to populate from sa[:numLMS].
1333	// When we get to one, populate it.
1334	// Zero the rest of the slots; they have dead values in them.
1335	x := numLMS - 1
1336	saX := sa[x]
1337	c := text[saX]
1338	b := bucket[c] - 1
1339	bucket[c] = b
1340
1341	for i := len(sa) - 1; i >= 0; i-- {
1342		if i != int(b) {
1343			sa[i] = 0
1344			continue
1345		}
1346		sa[i] = saX
1347
1348		// Load next entry to put down (if any).
1349		if x > 0 {
1350			x--
1351			saX = sa[x] // TODO bounds check
1352			c = text[saX]
1353			b = bucket[c] - 1
1354			bucket[c] = b
1355		}
1356	}
1357}
1358
1359func expand_32(text []int32, freq, bucket, sa []int32, numLMS int) {
1360	bucketMax_32(text, freq, bucket)
1361
1362	// Loop backward through sa, always tracking
1363	// the next index to populate from sa[:numLMS].
1364	// When we get to one, populate it.
1365	// Zero the rest of the slots; they have dead values in them.
1366	x := numLMS - 1
1367	saX := sa[x]
1368	c := text[saX]
1369	b := bucket[c] - 1
1370	bucket[c] = b
1371
1372	for i := len(sa) - 1; i >= 0; i-- {
1373		if i != int(b) {
1374			sa[i] = 0
1375			continue
1376		}
1377		sa[i] = saX
1378
1379		// Load next entry to put down (if any).
1380		if x > 0 {
1381			x--
1382			saX = sa[x] // TODO bounds check
1383			c = text[saX]
1384			b = bucket[c] - 1
1385			bucket[c] = b
1386		}
1387	}
1388}
1389
1390func expand_64(text []int64, freq, bucket, sa []int64, numLMS int) {
1391	bucketMax_64(text, freq, bucket)
1392
1393	// Loop backward through sa, always tracking
1394	// the next index to populate from sa[:numLMS].
1395	// When we get to one, populate it.
1396	// Zero the rest of the slots; they have dead values in them.
1397	x := numLMS - 1
1398	saX := sa[x]
1399	c := text[saX]
1400	b := bucket[c] - 1
1401	bucket[c] = b
1402
1403	for i := len(sa) - 1; i >= 0; i-- {
1404		if i != int(b) {
1405			sa[i] = 0
1406			continue
1407		}
1408		sa[i] = saX
1409
1410		// Load next entry to put down (if any).
1411		if x > 0 {
1412			x--
1413			saX = sa[x] // TODO bounds check
1414			c = text[saX]
1415			b = bucket[c] - 1
1416			bucket[c] = b
1417		}
1418	}
1419}
1420
1421func induceL_8_64(text []byte, sa, freq, bucket []int64) {
1422	// Initialize positions for left side of character buckets.
1423	bucketMin_8_64(text, freq, bucket)
1424	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
1425
1426	// This scan is similar to the one in induceSubL_8_64 above.
1427	// That one arranges to clear all but the leftmost L-type indexes.
1428	// This scan leaves all the L-type indexes and the original S-type
1429	// indexes, but it negates the positive leftmost L-type indexes
1430	// (the ones that induceS_8_64 needs to process).
1431
1432	// expand_8_64 left out the implicit entry sa[-1] == len(text),
1433	// corresponding to the identified type-L index len(text)-1.
1434	// Process it before the left-to-right scan of sa proper.
1435	// See body in loop for commentary.
1436	k := len(text) - 1
1437	c0, c1 := text[k-1], text[k]
1438	if c0 < c1 {
1439		k = -k
1440	}
1441
1442	// Cache recently used bucket index.
1443	cB := c1
1444	b := bucket[cB]
1445	sa[b] = int64(k)
1446	b++
1447
1448	for i := 0; i < len(sa); i++ {
1449		j := int(sa[i])
1450		if j <= 0 {
1451			// Skip empty or negated entry (including negated zero).
1452			continue
1453		}
1454
1455		// Index j was on work queue, meaning k := j-1 is L-type,
1456		// so we can now place k correctly into sa.
1457		// If k-1 is L-type, queue k for processing later in this loop.
1458		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
1459		// If k is zero, k-1 doesn't exist, so we only need to leave it
1460		// for the caller. The caller can't tell the difference between
1461		// an empty slot and a non-empty zero, but there's no need
1462		// to distinguish them anyway: the final suffix array will end up
1463		// with one zero somewhere, and that will be a real zero.
1464		k := j - 1
1465		c1 := text[k]
1466		if k > 0 {
1467			if c0 := text[k-1]; c0 < c1 {
1468				k = -k
1469			}
1470		}
1471
1472		if cB != c1 {
1473			bucket[cB] = b
1474			cB = c1
1475			b = bucket[cB]
1476		}
1477		sa[b] = int64(k)
1478		b++
1479	}
1480}
1481
1482func induceL_32(text []int32, sa, freq, bucket []int32) {
1483	// Initialize positions for left side of character buckets.
1484	bucketMin_32(text, freq, bucket)
1485
1486	// This scan is similar to the one in induceSubL_32 above.
1487	// That one arranges to clear all but the leftmost L-type indexes.
1488	// This scan leaves all the L-type indexes and the original S-type
1489	// indexes, but it negates the positive leftmost L-type indexes
1490	// (the ones that induceS_32 needs to process).
1491
1492	// expand_32 left out the implicit entry sa[-1] == len(text),
1493	// corresponding to the identified type-L index len(text)-1.
1494	// Process it before the left-to-right scan of sa proper.
1495	// See body in loop for commentary.
1496	k := len(text) - 1
1497	c0, c1 := text[k-1], text[k]
1498	if c0 < c1 {
1499		k = -k
1500	}
1501
1502	// Cache recently used bucket index.
1503	cB := c1
1504	b := bucket[cB]
1505	sa[b] = int32(k)
1506	b++
1507
1508	for i := 0; i < len(sa); i++ {
1509		j := int(sa[i])
1510		if j <= 0 {
1511			// Skip empty or negated entry (including negated zero).
1512			continue
1513		}
1514
1515		// Index j was on work queue, meaning k := j-1 is L-type,
1516		// so we can now place k correctly into sa.
1517		// If k-1 is L-type, queue k for processing later in this loop.
1518		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
1519		// If k is zero, k-1 doesn't exist, so we only need to leave it
1520		// for the caller. The caller can't tell the difference between
1521		// an empty slot and a non-empty zero, but there's no need
1522		// to distinguish them anyway: the final suffix array will end up
1523		// with one zero somewhere, and that will be a real zero.
1524		k := j - 1
1525		c1 := text[k]
1526		if k > 0 {
1527			if c0 := text[k-1]; c0 < c1 {
1528				k = -k
1529			}
1530		}
1531
1532		if cB != c1 {
1533			bucket[cB] = b
1534			cB = c1
1535			b = bucket[cB]
1536		}
1537		sa[b] = int32(k)
1538		b++
1539	}
1540}
1541
1542func induceL_64(text []int64, sa, freq, bucket []int64) {
1543	// Initialize positions for left side of character buckets.
1544	bucketMin_64(text, freq, bucket)
1545
1546	// This scan is similar to the one in induceSubL_64 above.
1547	// That one arranges to clear all but the leftmost L-type indexes.
1548	// This scan leaves all the L-type indexes and the original S-type
1549	// indexes, but it negates the positive leftmost L-type indexes
1550	// (the ones that induceS_64 needs to process).
1551
1552	// expand_64 left out the implicit entry sa[-1] == len(text),
1553	// corresponding to the identified type-L index len(text)-1.
1554	// Process it before the left-to-right scan of sa proper.
1555	// See body in loop for commentary.
1556	k := len(text) - 1
1557	c0, c1 := text[k-1], text[k]
1558	if c0 < c1 {
1559		k = -k
1560	}
1561
1562	// Cache recently used bucket index.
1563	cB := c1
1564	b := bucket[cB]
1565	sa[b] = int64(k)
1566	b++
1567
1568	for i := 0; i < len(sa); i++ {
1569		j := int(sa[i])
1570		if j <= 0 {
1571			// Skip empty or negated entry (including negated zero).
1572			continue
1573		}
1574
1575		// Index j was on work queue, meaning k := j-1 is L-type,
1576		// so we can now place k correctly into sa.
1577		// If k-1 is L-type, queue k for processing later in this loop.
1578		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
1579		// If k is zero, k-1 doesn't exist, so we only need to leave it
1580		// for the caller. The caller can't tell the difference between
1581		// an empty slot and a non-empty zero, but there's no need
1582		// to distinguish them anyway: the final suffix array will end up
1583		// with one zero somewhere, and that will be a real zero.
1584		k := j - 1
1585		c1 := text[k]
1586		if k > 0 {
1587			if c0 := text[k-1]; c0 < c1 {
1588				k = -k
1589			}
1590		}
1591
1592		if cB != c1 {
1593			bucket[cB] = b
1594			cB = c1
1595			b = bucket[cB]
1596		}
1597		sa[b] = int64(k)
1598		b++
1599	}
1600}
1601
1602func induceS_8_64(text []byte, sa, freq, bucket []int64) {
1603	// Initialize positions for right side of character buckets.
1604	bucketMax_8_64(text, freq, bucket)
1605	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
1606
1607	cB := byte(0)
1608	b := bucket[cB]
1609
1610	for i := len(sa) - 1; i >= 0; i-- {
1611		j := int(sa[i])
1612		if j >= 0 {
1613			// Skip non-flagged entry.
1614			// (This loop can't see an empty entry; 0 means the real zero index.)
1615			continue
1616		}
1617
1618		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
1619		j = -j
1620		sa[i] = int64(j)
1621
1622		// Index j was on work queue (encoded as -j but now decoded),
1623		// meaning k := j-1 is L-type,
1624		// so we can now place k correctly into sa.
1625		// If k-1 is S-type, queue -k for processing later in this loop.
1626		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
1627		// If k is zero, k-1 doesn't exist, so we only need to leave it
1628		// for the caller.
1629		k := j - 1
1630		c1 := text[k]
1631		if k > 0 {
1632			if c0 := text[k-1]; c0 <= c1 {
1633				k = -k
1634			}
1635		}
1636
1637		if cB != c1 {
1638			bucket[cB] = b
1639			cB = c1
1640			b = bucket[cB]
1641		}
1642		b--
1643		sa[b] = int64(k)
1644	}
1645}
1646
1647func induceS_32(text []int32, sa, freq, bucket []int32) {
1648	// Initialize positions for right side of character buckets.
1649	bucketMax_32(text, freq, bucket)
1650
1651	cB := int32(0)
1652	b := bucket[cB]
1653
1654	for i := len(sa) - 1; i >= 0; i-- {
1655		j := int(sa[i])
1656		if j >= 0 {
1657			// Skip non-flagged entry.
1658			// (This loop can't see an empty entry; 0 means the real zero index.)
1659			continue
1660		}
1661
1662		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
1663		j = -j
1664		sa[i] = int32(j)
1665
1666		// Index j was on work queue (encoded as -j but now decoded),
1667		// meaning k := j-1 is L-type,
1668		// so we can now place k correctly into sa.
1669		// If k-1 is S-type, queue -k for processing later in this loop.
1670		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
1671		// If k is zero, k-1 doesn't exist, so we only need to leave it
1672		// for the caller.
1673		k := j - 1
1674		c1 := text[k]
1675		if k > 0 {
1676			if c0 := text[k-1]; c0 <= c1 {
1677				k = -k
1678			}
1679		}
1680
1681		if cB != c1 {
1682			bucket[cB] = b
1683			cB = c1
1684			b = bucket[cB]
1685		}
1686		b--
1687		sa[b] = int32(k)
1688	}
1689}
1690
1691func induceS_64(text []int64, sa, freq, bucket []int64) {
1692	// Initialize positions for right side of character buckets.
1693	bucketMax_64(text, freq, bucket)
1694
1695	cB := int64(0)
1696	b := bucket[cB]
1697
1698	for i := len(sa) - 1; i >= 0; i-- {
1699		j := int(sa[i])
1700		if j >= 0 {
1701			// Skip non-flagged entry.
1702			// (This loop can't see an empty entry; 0 means the real zero index.)
1703			continue
1704		}
1705
1706		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
1707		j = -j
1708		sa[i] = int64(j)
1709
1710		// Index j was on work queue (encoded as -j but now decoded),
1711		// meaning k := j-1 is L-type,
1712		// so we can now place k correctly into sa.
1713		// If k-1 is S-type, queue -k for processing later in this loop.
1714		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
1715		// If k is zero, k-1 doesn't exist, so we only need to leave it
1716		// for the caller.
1717		k := j - 1
1718		c1 := text[k]
1719		if k > 0 {
1720			if c0 := text[k-1]; c0 <= c1 {
1721				k = -k
1722			}
1723		}
1724
1725		if cB != c1 {
1726			bucket[cB] = b
1727			cB = c1
1728			b = bucket[cB]
1729		}
1730		b--
1731		sa[b] = int64(k)
1732	}
1733}
1734