1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run make_tables.go
6
7// Package bits implements bit counting and manipulation
8// functions for the predeclared unsigned integer types.
9//
10// Functions in this package may be implemented directly by
11// the compiler, for better performance. For those functions
12// the code in this package will not be used. Which
13// functions are implemented by the compiler depends on the
14// architecture and the Go release.
15package bits
16
17const uintSize = 32 << (^uint(0) >> 63) // 32 or 64
18
19// UintSize is the size of a uint in bits.
20const UintSize = uintSize
21
22// --- LeadingZeros ---
23
24// LeadingZeros returns the number of leading zero bits in x; the result is [UintSize] for x == 0.
25func LeadingZeros(x uint) int { return UintSize - Len(x) }
26
27// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
28func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
29
30// LeadingZeros16 returns the number of leading zero bits in x; the result is 16 for x == 0.
31func LeadingZeros16(x uint16) int { return 16 - Len16(x) }
32
33// LeadingZeros32 returns the number of leading zero bits in x; the result is 32 for x == 0.
34func LeadingZeros32(x uint32) int { return 32 - Len32(x) }
35
36// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
37func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
38
39// --- TrailingZeros ---
40
41// See http://supertech.csail.mit.edu/papers/debruijn.pdf
42const deBruijn32 = 0x077CB531
43
44var deBruijn32tab = [32]byte{
45	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
46	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
47}
48
49const deBruijn64 = 0x03f79d71b4ca8b09
50
51var deBruijn64tab = [64]byte{
52	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
53	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
54	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
55	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
56}
57
58// TrailingZeros returns the number of trailing zero bits in x; the result is [UintSize] for x == 0.
59func TrailingZeros(x uint) int {
60	if UintSize == 32 {
61		return TrailingZeros32(uint32(x))
62	}
63	return TrailingZeros64(uint64(x))
64}
65
66// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
67func TrailingZeros8(x uint8) int {
68	return int(ntz8tab[x])
69}
70
71// TrailingZeros16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
72func TrailingZeros16(x uint16) int {
73	if x == 0 {
74		return 16
75	}
76	// see comment in TrailingZeros64
77	return int(deBruijn32tab[uint32(x&-x)*deBruijn32>>(32-5)])
78}
79
80// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
81func TrailingZeros32(x uint32) int {
82	if x == 0 {
83		return 32
84	}
85	// see comment in TrailingZeros64
86	return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
87}
88
89// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
90func TrailingZeros64(x uint64) int {
91	if x == 0 {
92		return 64
93	}
94	// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
95	//
96	// x & -x leaves only the right-most bit set in the word. Let k be the
97	// index of that bit. Since only a single bit is set, the value is two
98	// to the power of k. Multiplying by a power of two is equivalent to
99	// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
100	// is such that all six bit, consecutive substrings are distinct.
101	// Therefore, if we have a left shifted version of this constant we can
102	// find by how many bits it was shifted by looking at which six bit
103	// substring ended up at the top of the word.
104	// (Knuth, volume 4, section 7.3.1)
105	return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
106}
107
108// --- OnesCount ---
109
110const m0 = 0x5555555555555555 // 01010101 ...
111const m1 = 0x3333333333333333 // 00110011 ...
112const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
113const m3 = 0x00ff00ff00ff00ff // etc.
114const m4 = 0x0000ffff0000ffff
115
116// OnesCount returns the number of one bits ("population count") in x.
117func OnesCount(x uint) int {
118	if UintSize == 32 {
119		return OnesCount32(uint32(x))
120	}
121	return OnesCount64(uint64(x))
122}
123
124// OnesCount8 returns the number of one bits ("population count") in x.
125func OnesCount8(x uint8) int {
126	return int(pop8tab[x])
127}
128
129// OnesCount16 returns the number of one bits ("population count") in x.
130func OnesCount16(x uint16) int {
131	return int(pop8tab[x>>8] + pop8tab[x&0xff])
132}
133
134// OnesCount32 returns the number of one bits ("population count") in x.
135func OnesCount32(x uint32) int {
136	return int(pop8tab[x>>24] + pop8tab[x>>16&0xff] + pop8tab[x>>8&0xff] + pop8tab[x&0xff])
137}
138
139// OnesCount64 returns the number of one bits ("population count") in x.
140func OnesCount64(x uint64) int {
141	// Implementation: Parallel summing of adjacent bits.
142	// See "Hacker's Delight", Chap. 5: Counting Bits.
143	// The following pattern shows the general approach:
144	//
145	//   x = x>>1&(m0&m) + x&(m0&m)
146	//   x = x>>2&(m1&m) + x&(m1&m)
147	//   x = x>>4&(m2&m) + x&(m2&m)
148	//   x = x>>8&(m3&m) + x&(m3&m)
149	//   x = x>>16&(m4&m) + x&(m4&m)
150	//   x = x>>32&(m5&m) + x&(m5&m)
151	//   return int(x)
152	//
153	// Masking (& operations) can be left away when there's no
154	// danger that a field's sum will carry over into the next
155	// field: Since the result cannot be > 64, 8 bits is enough
156	// and we can ignore the masks for the shifts by 8 and up.
157	// Per "Hacker's Delight", the first line can be simplified
158	// more, but it saves at best one instruction, so we leave
159	// it alone for clarity.
160	const m = 1<<64 - 1
161	x = x>>1&(m0&m) + x&(m0&m)
162	x = x>>2&(m1&m) + x&(m1&m)
163	x = (x>>4 + x) & (m2 & m)
164	x += x >> 8
165	x += x >> 16
166	x += x >> 32
167	return int(x) & (1<<7 - 1)
168}
169
170// --- RotateLeft ---
171
172// RotateLeft returns the value of x rotated left by (k mod [UintSize]) bits.
173// To rotate x right by k bits, call RotateLeft(x, -k).
174//
175// This function's execution time does not depend on the inputs.
176func RotateLeft(x uint, k int) uint {
177	if UintSize == 32 {
178		return uint(RotateLeft32(uint32(x), k))
179	}
180	return uint(RotateLeft64(uint64(x), k))
181}
182
183// RotateLeft8 returns the value of x rotated left by (k mod 8) bits.
184// To rotate x right by k bits, call RotateLeft8(x, -k).
185//
186// This function's execution time does not depend on the inputs.
187func RotateLeft8(x uint8, k int) uint8 {
188	const n = 8
189	s := uint(k) & (n - 1)
190	return x<<s | x>>(n-s)
191}
192
193// RotateLeft16 returns the value of x rotated left by (k mod 16) bits.
194// To rotate x right by k bits, call RotateLeft16(x, -k).
195//
196// This function's execution time does not depend on the inputs.
197func RotateLeft16(x uint16, k int) uint16 {
198	const n = 16
199	s := uint(k) & (n - 1)
200	return x<<s | x>>(n-s)
201}
202
203// RotateLeft32 returns the value of x rotated left by (k mod 32) bits.
204// To rotate x right by k bits, call RotateLeft32(x, -k).
205//
206// This function's execution time does not depend on the inputs.
207func RotateLeft32(x uint32, k int) uint32 {
208	const n = 32
209	s := uint(k) & (n - 1)
210	return x<<s | x>>(n-s)
211}
212
213// RotateLeft64 returns the value of x rotated left by (k mod 64) bits.
214// To rotate x right by k bits, call RotateLeft64(x, -k).
215//
216// This function's execution time does not depend on the inputs.
217func RotateLeft64(x uint64, k int) uint64 {
218	const n = 64
219	s := uint(k) & (n - 1)
220	return x<<s | x>>(n-s)
221}
222
223// --- Reverse ---
224
225// Reverse returns the value of x with its bits in reversed order.
226func Reverse(x uint) uint {
227	if UintSize == 32 {
228		return uint(Reverse32(uint32(x)))
229	}
230	return uint(Reverse64(uint64(x)))
231}
232
233// Reverse8 returns the value of x with its bits in reversed order.
234func Reverse8(x uint8) uint8 {
235	return rev8tab[x]
236}
237
238// Reverse16 returns the value of x with its bits in reversed order.
239func Reverse16(x uint16) uint16 {
240	return uint16(rev8tab[x>>8]) | uint16(rev8tab[x&0xff])<<8
241}
242
243// Reverse32 returns the value of x with its bits in reversed order.
244func Reverse32(x uint32) uint32 {
245	const m = 1<<32 - 1
246	x = x>>1&(m0&m) | x&(m0&m)<<1
247	x = x>>2&(m1&m) | x&(m1&m)<<2
248	x = x>>4&(m2&m) | x&(m2&m)<<4
249	return ReverseBytes32(x)
250}
251
252// Reverse64 returns the value of x with its bits in reversed order.
253func Reverse64(x uint64) uint64 {
254	const m = 1<<64 - 1
255	x = x>>1&(m0&m) | x&(m0&m)<<1
256	x = x>>2&(m1&m) | x&(m1&m)<<2
257	x = x>>4&(m2&m) | x&(m2&m)<<4
258	return ReverseBytes64(x)
259}
260
261// --- ReverseBytes ---
262
263// ReverseBytes returns the value of x with its bytes in reversed order.
264//
265// This function's execution time does not depend on the inputs.
266func ReverseBytes(x uint) uint {
267	if UintSize == 32 {
268		return uint(ReverseBytes32(uint32(x)))
269	}
270	return uint(ReverseBytes64(uint64(x)))
271}
272
273// ReverseBytes16 returns the value of x with its bytes in reversed order.
274//
275// This function's execution time does not depend on the inputs.
276func ReverseBytes16(x uint16) uint16 {
277	return x>>8 | x<<8
278}
279
280// ReverseBytes32 returns the value of x with its bytes in reversed order.
281//
282// This function's execution time does not depend on the inputs.
283func ReverseBytes32(x uint32) uint32 {
284	const m = 1<<32 - 1
285	x = x>>8&(m3&m) | x&(m3&m)<<8
286	return x>>16 | x<<16
287}
288
289// ReverseBytes64 returns the value of x with its bytes in reversed order.
290//
291// This function's execution time does not depend on the inputs.
292func ReverseBytes64(x uint64) uint64 {
293	const m = 1<<64 - 1
294	x = x>>8&(m3&m) | x&(m3&m)<<8
295	x = x>>16&(m4&m) | x&(m4&m)<<16
296	return x>>32 | x<<32
297}
298
299// --- Len ---
300
301// Len returns the minimum number of bits required to represent x; the result is 0 for x == 0.
302func Len(x uint) int {
303	if UintSize == 32 {
304		return Len32(uint32(x))
305	}
306	return Len64(uint64(x))
307}
308
309// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
310func Len8(x uint8) int {
311	return int(len8tab[x])
312}
313
314// Len16 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
315func Len16(x uint16) (n int) {
316	if x >= 1<<8 {
317		x >>= 8
318		n = 8
319	}
320	return n + int(len8tab[x])
321}
322
323// Len32 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
324func Len32(x uint32) (n int) {
325	if x >= 1<<16 {
326		x >>= 16
327		n = 16
328	}
329	if x >= 1<<8 {
330		x >>= 8
331		n += 8
332	}
333	return n + int(len8tab[x])
334}
335
336// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
337func Len64(x uint64) (n int) {
338	if x >= 1<<32 {
339		x >>= 32
340		n = 32
341	}
342	if x >= 1<<16 {
343		x >>= 16
344		n += 16
345	}
346	if x >= 1<<8 {
347		x >>= 8
348		n += 8
349	}
350	return n + int(len8tab[x])
351}
352
353// --- Add with carry ---
354
355// Add returns the sum with carry of x, y and carry: sum = x + y + carry.
356// The carry input must be 0 or 1; otherwise the behavior is undefined.
357// The carryOut output is guaranteed to be 0 or 1.
358//
359// This function's execution time does not depend on the inputs.
360func Add(x, y, carry uint) (sum, carryOut uint) {
361	if UintSize == 32 {
362		s32, c32 := Add32(uint32(x), uint32(y), uint32(carry))
363		return uint(s32), uint(c32)
364	}
365	s64, c64 := Add64(uint64(x), uint64(y), uint64(carry))
366	return uint(s64), uint(c64)
367}
368
369// Add32 returns the sum with carry of x, y and carry: sum = x + y + carry.
370// The carry input must be 0 or 1; otherwise the behavior is undefined.
371// The carryOut output is guaranteed to be 0 or 1.
372//
373// This function's execution time does not depend on the inputs.
374func Add32(x, y, carry uint32) (sum, carryOut uint32) {
375	sum64 := uint64(x) + uint64(y) + uint64(carry)
376	sum = uint32(sum64)
377	carryOut = uint32(sum64 >> 32)
378	return
379}
380
381// Add64 returns the sum with carry of x, y and carry: sum = x + y + carry.
382// The carry input must be 0 or 1; otherwise the behavior is undefined.
383// The carryOut output is guaranteed to be 0 or 1.
384//
385// This function's execution time does not depend on the inputs.
386func Add64(x, y, carry uint64) (sum, carryOut uint64) {
387	sum = x + y + carry
388	// The sum will overflow if both top bits are set (x & y) or if one of them
389	// is (x | y), and a carry from the lower place happened. If such a carry
390	// happens, the top bit will be 1 + 0 + 1 = 0 (&^ sum).
391	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
392	return
393}
394
395// --- Subtract with borrow ---
396
397// Sub returns the difference of x, y and borrow: diff = x - y - borrow.
398// The borrow input must be 0 or 1; otherwise the behavior is undefined.
399// The borrowOut output is guaranteed to be 0 or 1.
400//
401// This function's execution time does not depend on the inputs.
402func Sub(x, y, borrow uint) (diff, borrowOut uint) {
403	if UintSize == 32 {
404		d32, b32 := Sub32(uint32(x), uint32(y), uint32(borrow))
405		return uint(d32), uint(b32)
406	}
407	d64, b64 := Sub64(uint64(x), uint64(y), uint64(borrow))
408	return uint(d64), uint(b64)
409}
410
411// Sub32 returns the difference of x, y and borrow, diff = x - y - borrow.
412// The borrow input must be 0 or 1; otherwise the behavior is undefined.
413// The borrowOut output is guaranteed to be 0 or 1.
414//
415// This function's execution time does not depend on the inputs.
416func Sub32(x, y, borrow uint32) (diff, borrowOut uint32) {
417	diff = x - y - borrow
418	// The difference will underflow if the top bit of x is not set and the top
419	// bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a borrow
420	// from the lower place happens. If that borrow happens, the result will be
421	// 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff).
422	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 31
423	return
424}
425
426// Sub64 returns the difference of x, y and borrow: diff = x - y - borrow.
427// The borrow input must be 0 or 1; otherwise the behavior is undefined.
428// The borrowOut output is guaranteed to be 0 or 1.
429//
430// This function's execution time does not depend on the inputs.
431func Sub64(x, y, borrow uint64) (diff, borrowOut uint64) {
432	diff = x - y - borrow
433	// See Sub32 for the bit logic.
434	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
435	return
436}
437
438// --- Full-width multiply ---
439
440// Mul returns the full-width product of x and y: (hi, lo) = x * y
441// with the product bits' upper half returned in hi and the lower
442// half returned in lo.
443//
444// This function's execution time does not depend on the inputs.
445func Mul(x, y uint) (hi, lo uint) {
446	if UintSize == 32 {
447		h, l := Mul32(uint32(x), uint32(y))
448		return uint(h), uint(l)
449	}
450	h, l := Mul64(uint64(x), uint64(y))
451	return uint(h), uint(l)
452}
453
454// Mul32 returns the 64-bit product of x and y: (hi, lo) = x * y
455// with the product bits' upper half returned in hi and the lower
456// half returned in lo.
457//
458// This function's execution time does not depend on the inputs.
459func Mul32(x, y uint32) (hi, lo uint32) {
460	tmp := uint64(x) * uint64(y)
461	hi, lo = uint32(tmp>>32), uint32(tmp)
462	return
463}
464
465// Mul64 returns the 128-bit product of x and y: (hi, lo) = x * y
466// with the product bits' upper half returned in hi and the lower
467// half returned in lo.
468//
469// This function's execution time does not depend on the inputs.
470func Mul64(x, y uint64) (hi, lo uint64) {
471	const mask32 = 1<<32 - 1
472	x0 := x & mask32
473	x1 := x >> 32
474	y0 := y & mask32
475	y1 := y >> 32
476	w0 := x0 * y0
477	t := x1*y0 + w0>>32
478	w1 := t & mask32
479	w2 := t >> 32
480	w1 += x0 * y1
481	hi = x1*y1 + w2 + w1>>32
482	lo = x * y
483	return
484}
485
486// --- Full-width divide ---
487
488// Div returns the quotient and remainder of (hi, lo) divided by y:
489// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
490// half in parameter hi and the lower half in parameter lo.
491// Div panics for y == 0 (division by zero) or y <= hi (quotient overflow).
492func Div(hi, lo, y uint) (quo, rem uint) {
493	if UintSize == 32 {
494		q, r := Div32(uint32(hi), uint32(lo), uint32(y))
495		return uint(q), uint(r)
496	}
497	q, r := Div64(uint64(hi), uint64(lo), uint64(y))
498	return uint(q), uint(r)
499}
500
501// Div32 returns the quotient and remainder of (hi, lo) divided by y:
502// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
503// half in parameter hi and the lower half in parameter lo.
504// Div32 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
505func Div32(hi, lo, y uint32) (quo, rem uint32) {
506	if y != 0 && y <= hi {
507		panic(overflowError)
508	}
509	z := uint64(hi)<<32 | uint64(lo)
510	quo, rem = uint32(z/uint64(y)), uint32(z%uint64(y))
511	return
512}
513
514// Div64 returns the quotient and remainder of (hi, lo) divided by y:
515// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
516// half in parameter hi and the lower half in parameter lo.
517// Div64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
518func Div64(hi, lo, y uint64) (quo, rem uint64) {
519	if y == 0 {
520		panic(divideError)
521	}
522	if y <= hi {
523		panic(overflowError)
524	}
525
526	// If high part is zero, we can directly return the results.
527	if hi == 0 {
528		return lo / y, lo % y
529	}
530
531	s := uint(LeadingZeros64(y))
532	y <<= s
533
534	const (
535		two32  = 1 << 32
536		mask32 = two32 - 1
537	)
538	yn1 := y >> 32
539	yn0 := y & mask32
540	un32 := hi<<s | lo>>(64-s)
541	un10 := lo << s
542	un1 := un10 >> 32
543	un0 := un10 & mask32
544	q1 := un32 / yn1
545	rhat := un32 - q1*yn1
546
547	for q1 >= two32 || q1*yn0 > two32*rhat+un1 {
548		q1--
549		rhat += yn1
550		if rhat >= two32 {
551			break
552		}
553	}
554
555	un21 := un32*two32 + un1 - q1*y
556	q0 := un21 / yn1
557	rhat = un21 - q0*yn1
558
559	for q0 >= two32 || q0*yn0 > two32*rhat+un0 {
560		q0--
561		rhat += yn1
562		if rhat >= two32 {
563			break
564		}
565	}
566
567	return q1*two32 + q0, (un21*two32 + un0 - q0*y) >> s
568}
569
570// Rem returns the remainder of (hi, lo) divided by y. Rem panics for
571// y == 0 (division by zero) but, unlike Div, it doesn't panic on a
572// quotient overflow.
573func Rem(hi, lo, y uint) uint {
574	if UintSize == 32 {
575		return uint(Rem32(uint32(hi), uint32(lo), uint32(y)))
576	}
577	return uint(Rem64(uint64(hi), uint64(lo), uint64(y)))
578}
579
580// Rem32 returns the remainder of (hi, lo) divided by y. Rem32 panics
581// for y == 0 (division by zero) but, unlike [Div32], it doesn't panic
582// on a quotient overflow.
583func Rem32(hi, lo, y uint32) uint32 {
584	return uint32((uint64(hi)<<32 | uint64(lo)) % uint64(y))
585}
586
587// Rem64 returns the remainder of (hi, lo) divided by y. Rem64 panics
588// for y == 0 (division by zero) but, unlike [Div64], it doesn't panic
589// on a quotient overflow.
590func Rem64(hi, lo, y uint64) uint64 {
591	// We scale down hi so that hi < y, then use Div64 to compute the
592	// rem with the guarantee that it won't panic on quotient overflow.
593	// Given that
594	//   hi ≡ hi%y    (mod y)
595	// we have
596	//   hi<<64 + lo ≡ (hi%y)<<64 + lo    (mod y)
597	_, rem := Div64(hi%y, lo, y)
598	return rem
599}
600