1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8	"io"
9)
10
11const (
12	// The largest offset code.
13	offsetCodeCount = 30
14
15	// The special code used to mark the end of a block.
16	endBlockMarker = 256
17
18	// The first length code.
19	lengthCodesStart = 257
20
21	// The number of codegen codes.
22	codegenCodeCount = 19
23	badCode          = 255
24
25	// bufferFlushSize indicates the buffer size
26	// after which bytes are flushed to the writer.
27	// Should preferably be a multiple of 6, since
28	// we accumulate 6 bytes between writes to the buffer.
29	bufferFlushSize = 240
30
31	// bufferSize is the actual output byte buffer size.
32	// It must have additional headroom for a flush
33	// which can contain up to 8 bytes.
34	bufferSize = bufferFlushSize + 8
35)
36
37// The number of extra bits needed by length code X - LENGTH_CODES_START.
38var lengthExtraBits = []int8{
39	/* 257 */ 0, 0, 0,
40	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
41	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
42	/* 280 */ 4, 5, 5, 5, 5, 0,
43}
44
45// The length indicated by length code X - LENGTH_CODES_START.
46var lengthBase = []uint32{
47	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
48	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
49	64, 80, 96, 112, 128, 160, 192, 224, 255,
50}
51
52// offset code word extra bits.
53var offsetExtraBits = []int8{
54	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
55	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
56	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
57}
58
59var offsetBase = []uint32{
60	0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
61	0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
62	0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
63	0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
64	0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
65	0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
66}
67
68// The odd order in which the codegen code sizes are written.
69var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
70
71type huffmanBitWriter struct {
72	// writer is the underlying writer.
73	// Do not use it directly; use the write method, which ensures
74	// that Write errors are sticky.
75	writer io.Writer
76
77	// Data waiting to be written is bytes[0:nbytes]
78	// and then the low nbits of bits.  Data is always written
79	// sequentially into the bytes array.
80	bits            uint64
81	nbits           uint
82	bytes           [bufferSize]byte
83	codegenFreq     [codegenCodeCount]int32
84	nbytes          int
85	literalFreq     []int32
86	offsetFreq      []int32
87	codegen         []uint8
88	literalEncoding *huffmanEncoder
89	offsetEncoding  *huffmanEncoder
90	codegenEncoding *huffmanEncoder
91	err             error
92}
93
94func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
95	return &huffmanBitWriter{
96		writer:          w,
97		literalFreq:     make([]int32, maxNumLit),
98		offsetFreq:      make([]int32, offsetCodeCount),
99		codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
100		literalEncoding: newHuffmanEncoder(maxNumLit),
101		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
102		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
103	}
104}
105
106func (w *huffmanBitWriter) reset(writer io.Writer) {
107	w.writer = writer
108	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
109}
110
111func (w *huffmanBitWriter) flush() {
112	if w.err != nil {
113		w.nbits = 0
114		return
115	}
116	n := w.nbytes
117	for w.nbits != 0 {
118		w.bytes[n] = byte(w.bits)
119		w.bits >>= 8
120		if w.nbits > 8 { // Avoid underflow
121			w.nbits -= 8
122		} else {
123			w.nbits = 0
124		}
125		n++
126	}
127	w.bits = 0
128	w.write(w.bytes[:n])
129	w.nbytes = 0
130}
131
132func (w *huffmanBitWriter) write(b []byte) {
133	if w.err != nil {
134		return
135	}
136	_, w.err = w.writer.Write(b)
137}
138
139func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
140	if w.err != nil {
141		return
142	}
143	w.bits |= uint64(b) << w.nbits
144	w.nbits += nb
145	if w.nbits >= 48 {
146		bits := w.bits
147		w.bits >>= 48
148		w.nbits -= 48
149		n := w.nbytes
150		bytes := w.bytes[n : n+6]
151		bytes[0] = byte(bits)
152		bytes[1] = byte(bits >> 8)
153		bytes[2] = byte(bits >> 16)
154		bytes[3] = byte(bits >> 24)
155		bytes[4] = byte(bits >> 32)
156		bytes[5] = byte(bits >> 40)
157		n += 6
158		if n >= bufferFlushSize {
159			w.write(w.bytes[:n])
160			n = 0
161		}
162		w.nbytes = n
163	}
164}
165
166func (w *huffmanBitWriter) writeBytes(bytes []byte) {
167	if w.err != nil {
168		return
169	}
170	n := w.nbytes
171	if w.nbits&7 != 0 {
172		w.err = InternalError("writeBytes with unfinished bits")
173		return
174	}
175	for w.nbits != 0 {
176		w.bytes[n] = byte(w.bits)
177		w.bits >>= 8
178		w.nbits -= 8
179		n++
180	}
181	if n != 0 {
182		w.write(w.bytes[:n])
183	}
184	w.nbytes = 0
185	w.write(bytes)
186}
187
188// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
189// the literal and offset lengths arrays (which are concatenated into a single
190// array).  This method generates that run-length encoding.
191//
192// The result is written into the codegen array, and the frequencies
193// of each code is written into the codegenFreq array.
194// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
195// information. Code badCode is an end marker
196//
197//	numLiterals      The number of literals in literalEncoding
198//	numOffsets       The number of offsets in offsetEncoding
199//	litenc, offenc   The literal and offset encoder to use
200func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
201	for i := range w.codegenFreq {
202		w.codegenFreq[i] = 0
203	}
204	// Note that we are using codegen both as a temporary variable for holding
205	// a copy of the frequencies, and as the place where we put the result.
206	// This is fine because the output is always shorter than the input used
207	// so far.
208	codegen := w.codegen // cache
209	// Copy the concatenated code sizes to codegen. Put a marker at the end.
210	cgnl := codegen[:numLiterals]
211	for i := range cgnl {
212		cgnl[i] = uint8(litEnc.codes[i].len)
213	}
214
215	cgnl = codegen[numLiterals : numLiterals+numOffsets]
216	for i := range cgnl {
217		cgnl[i] = uint8(offEnc.codes[i].len)
218	}
219	codegen[numLiterals+numOffsets] = badCode
220
221	size := codegen[0]
222	count := 1
223	outIndex := 0
224	for inIndex := 1; size != badCode; inIndex++ {
225		// INVARIANT: We have seen "count" copies of size that have not yet
226		// had output generated for them.
227		nextSize := codegen[inIndex]
228		if nextSize == size {
229			count++
230			continue
231		}
232		// We need to generate codegen indicating "count" of size.
233		if size != 0 {
234			codegen[outIndex] = size
235			outIndex++
236			w.codegenFreq[size]++
237			count--
238			for count >= 3 {
239				n := 6
240				if n > count {
241					n = count
242				}
243				codegen[outIndex] = 16
244				outIndex++
245				codegen[outIndex] = uint8(n - 3)
246				outIndex++
247				w.codegenFreq[16]++
248				count -= n
249			}
250		} else {
251			for count >= 11 {
252				n := 138
253				if n > count {
254					n = count
255				}
256				codegen[outIndex] = 18
257				outIndex++
258				codegen[outIndex] = uint8(n - 11)
259				outIndex++
260				w.codegenFreq[18]++
261				count -= n
262			}
263			if count >= 3 {
264				// count >= 3 && count <= 10
265				codegen[outIndex] = 17
266				outIndex++
267				codegen[outIndex] = uint8(count - 3)
268				outIndex++
269				w.codegenFreq[17]++
270				count = 0
271			}
272		}
273		count--
274		for ; count >= 0; count-- {
275			codegen[outIndex] = size
276			outIndex++
277			w.codegenFreq[size]++
278		}
279		// Set up invariant for next time through the loop.
280		size = nextSize
281		count = 1
282	}
283	// Marker indicating the end of the codegen.
284	codegen[outIndex] = badCode
285}
286
287// dynamicSize returns the size of dynamically encoded data in bits.
288func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
289	numCodegens = len(w.codegenFreq)
290	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
291		numCodegens--
292	}
293	header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
294		w.codegenEncoding.bitLength(w.codegenFreq[:]) +
295		int(w.codegenFreq[16])*2 +
296		int(w.codegenFreq[17])*3 +
297		int(w.codegenFreq[18])*7
298	size = header +
299		litEnc.bitLength(w.literalFreq) +
300		offEnc.bitLength(w.offsetFreq) +
301		extraBits
302
303	return size, numCodegens
304}
305
306// fixedSize returns the size of dynamically encoded data in bits.
307func (w *huffmanBitWriter) fixedSize(extraBits int) int {
308	return 3 +
309		fixedLiteralEncoding.bitLength(w.literalFreq) +
310		fixedOffsetEncoding.bitLength(w.offsetFreq) +
311		extraBits
312}
313
314// storedSize calculates the stored size, including header.
315// The function returns the size in bits and whether the block
316// fits inside a single block.
317func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
318	if in == nil {
319		return 0, false
320	}
321	if len(in) <= maxStoreBlockSize {
322		return (len(in) + 5) * 8, true
323	}
324	return 0, false
325}
326
327func (w *huffmanBitWriter) writeCode(c hcode) {
328	if w.err != nil {
329		return
330	}
331	w.bits |= uint64(c.code) << w.nbits
332	w.nbits += uint(c.len)
333	if w.nbits >= 48 {
334		bits := w.bits
335		w.bits >>= 48
336		w.nbits -= 48
337		n := w.nbytes
338		bytes := w.bytes[n : n+6]
339		bytes[0] = byte(bits)
340		bytes[1] = byte(bits >> 8)
341		bytes[2] = byte(bits >> 16)
342		bytes[3] = byte(bits >> 24)
343		bytes[4] = byte(bits >> 32)
344		bytes[5] = byte(bits >> 40)
345		n += 6
346		if n >= bufferFlushSize {
347			w.write(w.bytes[:n])
348			n = 0
349		}
350		w.nbytes = n
351	}
352}
353
354// Write the header of a dynamic Huffman block to the output stream.
355//
356//	numLiterals  The number of literals specified in codegen
357//	numOffsets   The number of offsets specified in codegen
358//	numCodegens  The number of codegens used in codegen
359func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
360	if w.err != nil {
361		return
362	}
363	var firstBits int32 = 4
364	if isEof {
365		firstBits = 5
366	}
367	w.writeBits(firstBits, 3)
368	w.writeBits(int32(numLiterals-257), 5)
369	w.writeBits(int32(numOffsets-1), 5)
370	w.writeBits(int32(numCodegens-4), 4)
371
372	for i := 0; i < numCodegens; i++ {
373		value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
374		w.writeBits(int32(value), 3)
375	}
376
377	i := 0
378	for {
379		var codeWord int = int(w.codegen[i])
380		i++
381		if codeWord == badCode {
382			break
383		}
384		w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
385
386		switch codeWord {
387		case 16:
388			w.writeBits(int32(w.codegen[i]), 2)
389			i++
390		case 17:
391			w.writeBits(int32(w.codegen[i]), 3)
392			i++
393		case 18:
394			w.writeBits(int32(w.codegen[i]), 7)
395			i++
396		}
397	}
398}
399
400func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
401	if w.err != nil {
402		return
403	}
404	var flag int32
405	if isEof {
406		flag = 1
407	}
408	w.writeBits(flag, 3)
409	w.flush()
410	w.writeBits(int32(length), 16)
411	w.writeBits(int32(^uint16(length)), 16)
412}
413
414func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
415	if w.err != nil {
416		return
417	}
418	// Indicate that we are a fixed Huffman block
419	var value int32 = 2
420	if isEof {
421		value = 3
422	}
423	w.writeBits(value, 3)
424}
425
426// writeBlock will write a block of tokens with the smallest encoding.
427// The original input can be supplied, and if the huffman encoded data
428// is larger than the original bytes, the data will be written as a
429// stored block.
430// If the input is nil, the tokens will always be Huffman encoded.
431func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
432	if w.err != nil {
433		return
434	}
435
436	tokens = append(tokens, endBlockMarker)
437	numLiterals, numOffsets := w.indexTokens(tokens)
438
439	var extraBits int
440	storedSize, storable := w.storedSize(input)
441	if storable {
442		// We only bother calculating the costs of the extra bits required by
443		// the length of offset fields (which will be the same for both fixed
444		// and dynamic encoding), if we need to compare those two encodings
445		// against stored encoding.
446		for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
447			// First eight length codes have extra size = 0.
448			extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
449		}
450		for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
451			// First four offset codes have extra size = 0.
452			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
453		}
454	}
455
456	// Figure out smallest code.
457	// Fixed Huffman baseline.
458	var literalEncoding = fixedLiteralEncoding
459	var offsetEncoding = fixedOffsetEncoding
460	var size = w.fixedSize(extraBits)
461
462	// Dynamic Huffman?
463	var numCodegens int
464
465	// Generate codegen and codegenFrequencies, which indicates how to encode
466	// the literalEncoding and the offsetEncoding.
467	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
468	w.codegenEncoding.generate(w.codegenFreq[:], 7)
469	dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
470
471	if dynamicSize < size {
472		size = dynamicSize
473		literalEncoding = w.literalEncoding
474		offsetEncoding = w.offsetEncoding
475	}
476
477	// Stored bytes?
478	if storable && storedSize < size {
479		w.writeStoredHeader(len(input), eof)
480		w.writeBytes(input)
481		return
482	}
483
484	// Huffman.
485	if literalEncoding == fixedLiteralEncoding {
486		w.writeFixedHeader(eof)
487	} else {
488		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
489	}
490
491	// Write the tokens.
492	w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
493}
494
495// writeBlockDynamic encodes a block using a dynamic Huffman table.
496// This should be used if the symbols used have a disproportionate
497// histogram distribution.
498// If input is supplied and the compression savings are below 1/16th of the
499// input size the block is stored.
500func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
501	if w.err != nil {
502		return
503	}
504
505	tokens = append(tokens, endBlockMarker)
506	numLiterals, numOffsets := w.indexTokens(tokens)
507
508	// Generate codegen and codegenFrequencies, which indicates how to encode
509	// the literalEncoding and the offsetEncoding.
510	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
511	w.codegenEncoding.generate(w.codegenFreq[:], 7)
512	size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
513
514	// Store bytes, if we don't get a reasonable improvement.
515	if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
516		w.writeStoredHeader(len(input), eof)
517		w.writeBytes(input)
518		return
519	}
520
521	// Write Huffman table.
522	w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
523
524	// Write the tokens.
525	w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
526}
527
528// indexTokens indexes a slice of tokens, and updates
529// literalFreq and offsetFreq, and generates literalEncoding
530// and offsetEncoding.
531// The number of literal and offset tokens is returned.
532func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
533	for i := range w.literalFreq {
534		w.literalFreq[i] = 0
535	}
536	for i := range w.offsetFreq {
537		w.offsetFreq[i] = 0
538	}
539
540	for _, t := range tokens {
541		if t < matchType {
542			w.literalFreq[t.literal()]++
543			continue
544		}
545		length := t.length()
546		offset := t.offset()
547		w.literalFreq[lengthCodesStart+lengthCode(length)]++
548		w.offsetFreq[offsetCode(offset)]++
549	}
550
551	// get the number of literals
552	numLiterals = len(w.literalFreq)
553	for w.literalFreq[numLiterals-1] == 0 {
554		numLiterals--
555	}
556	// get the number of offsets
557	numOffsets = len(w.offsetFreq)
558	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
559		numOffsets--
560	}
561	if numOffsets == 0 {
562		// We haven't found a single match. If we want to go with the dynamic encoding,
563		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
564		w.offsetFreq[0] = 1
565		numOffsets = 1
566	}
567	w.literalEncoding.generate(w.literalFreq, 15)
568	w.offsetEncoding.generate(w.offsetFreq, 15)
569	return
570}
571
572// writeTokens writes a slice of tokens to the output.
573// codes for literal and offset encoding must be supplied.
574func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
575	if w.err != nil {
576		return
577	}
578	for _, t := range tokens {
579		if t < matchType {
580			w.writeCode(leCodes[t.literal()])
581			continue
582		}
583		// Write the length
584		length := t.length()
585		lengthCode := lengthCode(length)
586		w.writeCode(leCodes[lengthCode+lengthCodesStart])
587		extraLengthBits := uint(lengthExtraBits[lengthCode])
588		if extraLengthBits > 0 {
589			extraLength := int32(length - lengthBase[lengthCode])
590			w.writeBits(extraLength, extraLengthBits)
591		}
592		// Write the offset
593		offset := t.offset()
594		offsetCode := offsetCode(offset)
595		w.writeCode(oeCodes[offsetCode])
596		extraOffsetBits := uint(offsetExtraBits[offsetCode])
597		if extraOffsetBits > 0 {
598			extraOffset := int32(offset - offsetBase[offsetCode])
599			w.writeBits(extraOffset, extraOffsetBits)
600		}
601	}
602}
603
604// huffOffset is a static offset encoder used for huffman only encoding.
605// It can be reused since we will not be encoding offset values.
606var huffOffset *huffmanEncoder
607
608func init() {
609	offsetFreq := make([]int32, offsetCodeCount)
610	offsetFreq[0] = 1
611	huffOffset = newHuffmanEncoder(offsetCodeCount)
612	huffOffset.generate(offsetFreq, 15)
613}
614
615// writeBlockHuff encodes a block of bytes as either
616// Huffman encoded literals or uncompressed bytes if the
617// results only gains very little from compression.
618func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
619	if w.err != nil {
620		return
621	}
622
623	// Clear histogram
624	for i := range w.literalFreq {
625		w.literalFreq[i] = 0
626	}
627
628	// Add everything as literals
629	histogram(input, w.literalFreq)
630
631	w.literalFreq[endBlockMarker] = 1
632
633	const numLiterals = endBlockMarker + 1
634	w.offsetFreq[0] = 1
635	const numOffsets = 1
636
637	w.literalEncoding.generate(w.literalFreq, 15)
638
639	// Figure out smallest code.
640	// Always use dynamic Huffman or Store
641	var numCodegens int
642
643	// Generate codegen and codegenFrequencies, which indicates how to encode
644	// the literalEncoding and the offsetEncoding.
645	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
646	w.codegenEncoding.generate(w.codegenFreq[:], 7)
647	size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
648
649	// Store bytes, if we don't get a reasonable improvement.
650	if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
651		w.writeStoredHeader(len(input), eof)
652		w.writeBytes(input)
653		return
654	}
655
656	// Huffman.
657	w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
658	encoding := w.literalEncoding.codes[:257]
659	n := w.nbytes
660	for _, t := range input {
661		// Bitwriting inlined, ~30% speedup
662		c := encoding[t]
663		w.bits |= uint64(c.code) << w.nbits
664		w.nbits += uint(c.len)
665		if w.nbits < 48 {
666			continue
667		}
668		// Store 6 bytes
669		bits := w.bits
670		w.bits >>= 48
671		w.nbits -= 48
672		bytes := w.bytes[n : n+6]
673		bytes[0] = byte(bits)
674		bytes[1] = byte(bits >> 8)
675		bytes[2] = byte(bits >> 16)
676		bytes[3] = byte(bits >> 24)
677		bytes[4] = byte(bits >> 32)
678		bytes[5] = byte(bits >> 40)
679		n += 6
680		if n < bufferFlushSize {
681			continue
682		}
683		w.write(w.bytes[:n])
684		if w.err != nil {
685			return // Return early in the event of write failures
686		}
687		n = 0
688	}
689	w.nbytes = n
690	w.writeCode(encoding[endBlockMarker])
691}
692
693// histogram accumulates a histogram of b in h.
694//
695// len(h) must be >= 256, and h's elements must be all zeroes.
696func histogram(b []byte, h []int32) {
697	h = h[:256]
698	for _, t := range b {
699		h[t]++
700	}
701}
702