1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import "math"
8
9// This encoding algorithm, which prioritizes speed over output size, is
10// based on Snappy's LZ77-style encoder: github.com/golang/snappy
11
12const (
13	tableBits  = 14             // Bits used in the table.
14	tableSize  = 1 << tableBits // Size of the table.
15	tableMask  = tableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
16	tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
17
18	// Reset the buffer offset when reaching this.
19	// Offsets are stored between blocks as int32 values.
20	// Since the offset we are checking against is at the beginning
21	// of the buffer, we need to subtract the current and input
22	// buffer to not risk overflowing the int32.
23	bufferReset = math.MaxInt32 - maxStoreBlockSize*2
24)
25
26func load32(b []byte, i int32) uint32 {
27	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
28	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
29}
30
31func load64(b []byte, i int32) uint64 {
32	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
33	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
34		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
35}
36
37func hash(u uint32) uint32 {
38	return (u * 0x1e35a7bd) >> tableShift
39}
40
41// These constants are defined by the Snappy implementation so that its
42// assembly implementation can fast-path some 16-bytes-at-a-time copies. They
43// aren't necessary in the pure Go implementation, as we don't use those same
44// optimizations, but using the same thresholds doesn't really hurt.
45const (
46	inputMargin            = 16 - 1
47	minNonLiteralBlockSize = 1 + 1 + inputMargin
48)
49
50type tableEntry struct {
51	val    uint32 // Value at destination
52	offset int32
53}
54
55// deflateFast maintains the table for matches,
56// and the previous byte block for cross block matching.
57type deflateFast struct {
58	table [tableSize]tableEntry
59	prev  []byte // Previous block, zero length if unknown.
60	cur   int32  // Current match offset.
61}
62
63func newDeflateFast() *deflateFast {
64	return &deflateFast{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}
65}
66
67// encode encodes a block given in src and appends tokens
68// to dst and returns the result.
69func (e *deflateFast) encode(dst []token, src []byte) []token {
70	// Ensure that e.cur doesn't wrap.
71	if e.cur >= bufferReset {
72		e.shiftOffsets()
73	}
74
75	// This check isn't in the Snappy implementation, but there, the caller
76	// instead of the callee handles this case.
77	if len(src) < minNonLiteralBlockSize {
78		e.cur += maxStoreBlockSize
79		e.prev = e.prev[:0]
80		return emitLiteral(dst, src)
81	}
82
83	// sLimit is when to stop looking for offset/length copies. The inputMargin
84	// lets us use a fast path for emitLiteral in the main loop, while we are
85	// looking for copies.
86	sLimit := int32(len(src) - inputMargin)
87
88	// nextEmit is where in src the next emitLiteral should start from.
89	nextEmit := int32(0)
90	s := int32(0)
91	cv := load32(src, s)
92	nextHash := hash(cv)
93
94	for {
95		// Copied from the C++ snappy implementation:
96		//
97		// Heuristic match skipping: If 32 bytes are scanned with no matches
98		// found, start looking only at every other byte. If 32 more bytes are
99		// scanned (or skipped), look at every third byte, etc.. When a match
100		// is found, immediately go back to looking at every byte. This is a
101		// small loss (~5% performance, ~0.1% density) for compressible data
102		// due to more bookkeeping, but for non-compressible data (such as
103		// JPEG) it's a huge win since the compressor quickly "realizes" the
104		// data is incompressible and doesn't bother looking for matches
105		// everywhere.
106		//
107		// The "skip" variable keeps track of how many bytes there are since
108		// the last match; dividing it by 32 (ie. right-shifting by five) gives
109		// the number of bytes to move ahead for each iteration.
110		skip := int32(32)
111
112		nextS := s
113		var candidate tableEntry
114		for {
115			s = nextS
116			bytesBetweenHashLookups := skip >> 5
117			nextS = s + bytesBetweenHashLookups
118			skip += bytesBetweenHashLookups
119			if nextS > sLimit {
120				goto emitRemainder
121			}
122			candidate = e.table[nextHash&tableMask]
123			now := load32(src, nextS)
124			e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
125			nextHash = hash(now)
126
127			offset := s - (candidate.offset - e.cur)
128			if offset > maxMatchOffset || cv != candidate.val {
129				// Out of range or not matched.
130				cv = now
131				continue
132			}
133			break
134		}
135
136		// A 4-byte match has been found. We'll later see if more than 4 bytes
137		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
138		// them as literal bytes.
139		dst = emitLiteral(dst, src[nextEmit:s])
140
141		// Call emitCopy, and then see if another emitCopy could be our next
142		// move. Repeat until we find no match for the input immediately after
143		// what was consumed by the last emitCopy call.
144		//
145		// If we exit this loop normally then we need to call emitLiteral next,
146		// though we don't yet know how big the literal will be. We handle that
147		// by proceeding to the next iteration of the main loop. We also can
148		// exit this loop via goto if we get close to exhausting the input.
149		for {
150			// Invariant: we have a 4-byte match at s, and no need to emit any
151			// literal bytes prior to s.
152
153			// Extend the 4-byte match as long as possible.
154			//
155			s += 4
156			t := candidate.offset - e.cur + 4
157			l := e.matchLen(s, t, src)
158
159			// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
160			dst = append(dst, matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)))
161			s += l
162			nextEmit = s
163			if s >= sLimit {
164				goto emitRemainder
165			}
166
167			// We could immediately start working at s now, but to improve
168			// compression we first update the hash table at s-1 and at s. If
169			// another emitCopy is not our next move, also calculate nextHash
170			// at s+1. At least on GOARCH=amd64, these three hash calculations
171			// are faster as one load64 call (with some shifts) instead of
172			// three load32 calls.
173			x := load64(src, s-1)
174			prevHash := hash(uint32(x))
175			e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
176			x >>= 8
177			currHash := hash(uint32(x))
178			candidate = e.table[currHash&tableMask]
179			e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
180
181			offset := s - (candidate.offset - e.cur)
182			if offset > maxMatchOffset || uint32(x) != candidate.val {
183				cv = uint32(x >> 8)
184				nextHash = hash(cv)
185				s++
186				break
187			}
188		}
189	}
190
191emitRemainder:
192	if int(nextEmit) < len(src) {
193		dst = emitLiteral(dst, src[nextEmit:])
194	}
195	e.cur += int32(len(src))
196	e.prev = e.prev[:len(src)]
197	copy(e.prev, src)
198	return dst
199}
200
201func emitLiteral(dst []token, lit []byte) []token {
202	for _, v := range lit {
203		dst = append(dst, literalToken(uint32(v)))
204	}
205	return dst
206}
207
208// matchLen returns the match length between src[s:] and src[t:].
209// t can be negative to indicate the match is starting in e.prev.
210// We assume that src[s-4:s] and src[t-4:t] already match.
211func (e *deflateFast) matchLen(s, t int32, src []byte) int32 {
212	s1 := int(s) + maxMatchLength - 4
213	if s1 > len(src) {
214		s1 = len(src)
215	}
216
217	// If we are inside the current block
218	if t >= 0 {
219		b := src[t:]
220		a := src[s:s1]
221		b = b[:len(a)]
222		// Extend the match to be as long as possible.
223		for i := range a {
224			if a[i] != b[i] {
225				return int32(i)
226			}
227		}
228		return int32(len(a))
229	}
230
231	// We found a match in the previous block.
232	tp := int32(len(e.prev)) + t
233	if tp < 0 {
234		return 0
235	}
236
237	// Extend the match to be as long as possible.
238	a := src[s:s1]
239	b := e.prev[tp:]
240	if len(b) > len(a) {
241		b = b[:len(a)]
242	}
243	a = a[:len(b)]
244	for i := range b {
245		if a[i] != b[i] {
246			return int32(i)
247		}
248	}
249
250	// If we reached our limit, we matched everything we are
251	// allowed to in the previous block and we return.
252	n := int32(len(b))
253	if int(s+n) == s1 {
254		return n
255	}
256
257	// Continue looking for more matches in the current block.
258	a = src[s+n : s1]
259	b = src[:len(a)]
260	for i := range a {
261		if a[i] != b[i] {
262			return int32(i) + n
263		}
264	}
265	return int32(len(a)) + n
266}
267
268// Reset resets the encoding history.
269// This ensures that no matches are made to the previous block.
270func (e *deflateFast) reset() {
271	e.prev = e.prev[:0]
272	// Bump the offset, so all matches will fail distance check.
273	// Nothing should be >= e.cur in the table.
274	e.cur += maxMatchOffset
275
276	// Protect against e.cur wraparound.
277	if e.cur >= bufferReset {
278		e.shiftOffsets()
279	}
280}
281
282// shiftOffsets will shift down all match offset.
283// This is only called in rare situations to prevent integer overflow.
284//
285// See https://golang.org/issue/18636 and https://github.com/golang/go/issues/34121.
286func (e *deflateFast) shiftOffsets() {
287	if len(e.prev) == 0 {
288		// We have no history; just clear the table.
289		for i := range e.table[:] {
290			e.table[i] = tableEntry{}
291		}
292		e.cur = maxMatchOffset + 1
293		return
294	}
295
296	// Shift down everything in the table that isn't already too far away.
297	for i := range e.table[:] {
298		v := e.table[i].offset - e.cur + maxMatchOffset + 1
299		if v < 0 {
300			// We want to reset e.cur to maxMatchOffset + 1, so we need to shift
301			// all table entries down by (e.cur - (maxMatchOffset + 1)).
302			// Because we ignore matches > maxMatchOffset, we can cap
303			// any negative offsets at 0.
304			v = 0
305		}
306		e.table[i].offset = v
307	}
308	e.cur = maxMatchOffset + 1
309}
310