1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package hpack implements HPACK, a compression format for
6// efficiently representing HTTP header fields in the context of HTTP/2.
7//
8// See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
9package hpack
10
11import (
12	"bytes"
13	"errors"
14	"fmt"
15)
16
17// A DecodingError is something the spec defines as a decoding error.
18type DecodingError struct {
19	Err error
20}
21
22func (de DecodingError) Error() string {
23	return fmt.Sprintf("decoding error: %v", de.Err)
24}
25
26// An InvalidIndexError is returned when an encoder references a table
27// entry before the static table or after the end of the dynamic table.
28type InvalidIndexError int
29
30func (e InvalidIndexError) Error() string {
31	return fmt.Sprintf("invalid indexed representation index %d", int(e))
32}
33
34// A HeaderField is a name-value pair. Both the name and value are
35// treated as opaque sequences of octets.
36type HeaderField struct {
37	Name, Value string
38
39	// Sensitive means that this header field should never be
40	// indexed.
41	Sensitive bool
42}
43
44// IsPseudo reports whether the header field is an http2 pseudo header.
45// That is, it reports whether it starts with a colon.
46// It is not otherwise guaranteed to be a valid pseudo header field,
47// though.
48func (hf HeaderField) IsPseudo() bool {
49	return len(hf.Name) != 0 && hf.Name[0] == ':'
50}
51
52func (hf HeaderField) String() string {
53	var suffix string
54	if hf.Sensitive {
55		suffix = " (sensitive)"
56	}
57	return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix)
58}
59
60// Size returns the size of an entry per RFC 7541 section 4.1.
61func (hf HeaderField) Size() uint32 {
62	// https://httpwg.org/specs/rfc7541.html#rfc.section.4.1
63	// "The size of the dynamic table is the sum of the size of
64	// its entries. The size of an entry is the sum of its name's
65	// length in octets (as defined in Section 5.2), its value's
66	// length in octets (see Section 5.2), plus 32.  The size of
67	// an entry is calculated using the length of the name and
68	// value without any Huffman encoding applied."
69
70	// This can overflow if somebody makes a large HeaderField
71	// Name and/or Value by hand, but we don't care, because that
72	// won't happen on the wire because the encoding doesn't allow
73	// it.
74	return uint32(len(hf.Name) + len(hf.Value) + 32)
75}
76
77// A Decoder is the decoding context for incremental processing of
78// header blocks.
79type Decoder struct {
80	dynTab dynamicTable
81	emit   func(f HeaderField)
82
83	emitEnabled bool // whether calls to emit are enabled
84	maxStrLen   int  // 0 means unlimited
85
86	// buf is the unparsed buffer. It's only written to
87	// saveBuf if it was truncated in the middle of a header
88	// block. Because it's usually not owned, we can only
89	// process it under Write.
90	buf []byte // not owned; only valid during Write
91
92	// saveBuf is previous data passed to Write which we weren't able
93	// to fully parse before. Unlike buf, we own this data.
94	saveBuf bytes.Buffer
95
96	firstField bool // processing the first field of the header block
97}
98
99// NewDecoder returns a new decoder with the provided maximum dynamic
100// table size. The emitFunc will be called for each valid field
101// parsed, in the same goroutine as calls to Write, before Write returns.
102func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
103	d := &Decoder{
104		emit:        emitFunc,
105		emitEnabled: true,
106		firstField:  true,
107	}
108	d.dynTab.table.init()
109	d.dynTab.allowedMaxSize = maxDynamicTableSize
110	d.dynTab.setMaxSize(maxDynamicTableSize)
111	return d
112}
113
114// ErrStringLength is returned by Decoder.Write when the max string length
115// (as configured by Decoder.SetMaxStringLength) would be violated.
116var ErrStringLength = errors.New("hpack: string too long")
117
118// SetMaxStringLength sets the maximum size of a HeaderField name or
119// value string. If a string exceeds this length (even after any
120// decompression), Write will return ErrStringLength.
121// A value of 0 means unlimited and is the default from NewDecoder.
122func (d *Decoder) SetMaxStringLength(n int) {
123	d.maxStrLen = n
124}
125
126// SetEmitFunc changes the callback used when new header fields
127// are decoded.
128// It must be non-nil. It does not affect EmitEnabled.
129func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) {
130	d.emit = emitFunc
131}
132
133// SetEmitEnabled controls whether the emitFunc provided to NewDecoder
134// should be called. The default is true.
135//
136// This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
137// while still decoding and keeping in-sync with decoder state, but
138// without doing unnecessary decompression or generating unnecessary
139// garbage for header fields past the limit.
140func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
141
142// EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
143// are currently enabled. The default is true.
144func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
145
146// TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
147// underlying buffers for garbage reasons.
148
149func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
150	d.dynTab.setMaxSize(v)
151}
152
153// SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
154// stream (via dynamic table size updates) may set the maximum size
155// to.
156func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
157	d.dynTab.allowedMaxSize = v
158}
159
160type dynamicTable struct {
161	// https://httpwg.org/specs/rfc7541.html#rfc.section.2.3.2
162	table          headerFieldTable
163	size           uint32 // in bytes
164	maxSize        uint32 // current maxSize
165	allowedMaxSize uint32 // maxSize may go up to this, inclusive
166}
167
168func (dt *dynamicTable) setMaxSize(v uint32) {
169	dt.maxSize = v
170	dt.evict()
171}
172
173func (dt *dynamicTable) add(f HeaderField) {
174	dt.table.addEntry(f)
175	dt.size += f.Size()
176	dt.evict()
177}
178
179// If we're too big, evict old stuff.
180func (dt *dynamicTable) evict() {
181	var n int
182	for dt.size > dt.maxSize && n < dt.table.len() {
183		dt.size -= dt.table.ents[n].Size()
184		n++
185	}
186	dt.table.evictOldest(n)
187}
188
189func (d *Decoder) maxTableIndex() int {
190	// This should never overflow. RFC 7540 Section 6.5.2 limits the size of
191	// the dynamic table to 2^32 bytes, where each entry will occupy more than
192	// one byte. Further, the staticTable has a fixed, small length.
193	return d.dynTab.table.len() + staticTable.len()
194}
195
196func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
197	// See Section 2.3.3.
198	if i == 0 {
199		return
200	}
201	if i <= uint64(staticTable.len()) {
202		return staticTable.ents[i-1], true
203	}
204	if i > uint64(d.maxTableIndex()) {
205		return
206	}
207	// In the dynamic table, newer entries have lower indices.
208	// However, dt.ents[0] is the oldest entry. Hence, dt.ents is
209	// the reversed dynamic table.
210	dt := d.dynTab.table
211	return dt.ents[dt.len()-(int(i)-staticTable.len())], true
212}
213
214// DecodeFull decodes an entire block.
215//
216// TODO: remove this method and make it incremental later? This is
217// easier for debugging now.
218func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
219	var hf []HeaderField
220	saveFunc := d.emit
221	defer func() { d.emit = saveFunc }()
222	d.emit = func(f HeaderField) { hf = append(hf, f) }
223	if _, err := d.Write(p); err != nil {
224		return nil, err
225	}
226	if err := d.Close(); err != nil {
227		return nil, err
228	}
229	return hf, nil
230}
231
232// Close declares that the decoding is complete and resets the Decoder
233// to be reused again for a new header block. If there is any remaining
234// data in the decoder's buffer, Close returns an error.
235func (d *Decoder) Close() error {
236	if d.saveBuf.Len() > 0 {
237		d.saveBuf.Reset()
238		return DecodingError{errors.New("truncated headers")}
239	}
240	d.firstField = true
241	return nil
242}
243
244func (d *Decoder) Write(p []byte) (n int, err error) {
245	if len(p) == 0 {
246		// Prevent state machine CPU attacks (making us redo
247		// work up to the point of finding out we don't have
248		// enough data)
249		return
250	}
251	// Only copy the data if we have to. Optimistically assume
252	// that p will contain a complete header block.
253	if d.saveBuf.Len() == 0 {
254		d.buf = p
255	} else {
256		d.saveBuf.Write(p)
257		d.buf = d.saveBuf.Bytes()
258		d.saveBuf.Reset()
259	}
260
261	for len(d.buf) > 0 {
262		err = d.parseHeaderFieldRepr()
263		if err == errNeedMore {
264			// Extra paranoia, making sure saveBuf won't
265			// get too large. All the varint and string
266			// reading code earlier should already catch
267			// overlong things and return ErrStringLength,
268			// but keep this as a last resort.
269			const varIntOverhead = 8 // conservative
270			if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
271				return 0, ErrStringLength
272			}
273			d.saveBuf.Write(d.buf)
274			return len(p), nil
275		}
276		d.firstField = false
277		if err != nil {
278			break
279		}
280	}
281	return len(p), err
282}
283
284// errNeedMore is an internal sentinel error value that means the
285// buffer is truncated and we need to read more data before we can
286// continue parsing.
287var errNeedMore = errors.New("need more data")
288
289type indexType int
290
291const (
292	indexedTrue indexType = iota
293	indexedFalse
294	indexedNever
295)
296
297func (v indexType) indexed() bool   { return v == indexedTrue }
298func (v indexType) sensitive() bool { return v == indexedNever }
299
300// returns errNeedMore if there isn't enough data available.
301// any other error is fatal.
302// consumes d.buf iff it returns nil.
303// precondition: must be called with len(d.buf) > 0
304func (d *Decoder) parseHeaderFieldRepr() error {
305	b := d.buf[0]
306	switch {
307	case b&128 != 0:
308		// Indexed representation.
309		// High bit set?
310		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.1
311		return d.parseFieldIndexed()
312	case b&192 == 64:
313		// 6.2.1 Literal Header Field with Incremental Indexing
314		// 0b10xxxxxx: top two bits are 10
315		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.1
316		return d.parseFieldLiteral(6, indexedTrue)
317	case b&240 == 0:
318		// 6.2.2 Literal Header Field without Indexing
319		// 0b0000xxxx: top four bits are 0000
320		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.2
321		return d.parseFieldLiteral(4, indexedFalse)
322	case b&240 == 16:
323		// 6.2.3 Literal Header Field never Indexed
324		// 0b0001xxxx: top four bits are 0001
325		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.3
326		return d.parseFieldLiteral(4, indexedNever)
327	case b&224 == 32:
328		// 6.3 Dynamic Table Size Update
329		// Top three bits are '001'.
330		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.3
331		return d.parseDynamicTableSizeUpdate()
332	}
333
334	return DecodingError{errors.New("invalid encoding")}
335}
336
337// (same invariants and behavior as parseHeaderFieldRepr)
338func (d *Decoder) parseFieldIndexed() error {
339	buf := d.buf
340	idx, buf, err := readVarInt(7, buf)
341	if err != nil {
342		return err
343	}
344	hf, ok := d.at(idx)
345	if !ok {
346		return DecodingError{InvalidIndexError(idx)}
347	}
348	d.buf = buf
349	return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
350}
351
352// (same invariants and behavior as parseHeaderFieldRepr)
353func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
354	buf := d.buf
355	nameIdx, buf, err := readVarInt(n, buf)
356	if err != nil {
357		return err
358	}
359
360	var hf HeaderField
361	wantStr := d.emitEnabled || it.indexed()
362	var undecodedName undecodedString
363	if nameIdx > 0 {
364		ihf, ok := d.at(nameIdx)
365		if !ok {
366			return DecodingError{InvalidIndexError(nameIdx)}
367		}
368		hf.Name = ihf.Name
369	} else {
370		undecodedName, buf, err = d.readString(buf)
371		if err != nil {
372			return err
373		}
374	}
375	undecodedValue, buf, err := d.readString(buf)
376	if err != nil {
377		return err
378	}
379	if wantStr {
380		if nameIdx <= 0 {
381			hf.Name, err = d.decodeString(undecodedName)
382			if err != nil {
383				return err
384			}
385		}
386		hf.Value, err = d.decodeString(undecodedValue)
387		if err != nil {
388			return err
389		}
390	}
391	d.buf = buf
392	if it.indexed() {
393		d.dynTab.add(hf)
394	}
395	hf.Sensitive = it.sensitive()
396	return d.callEmit(hf)
397}
398
399func (d *Decoder) callEmit(hf HeaderField) error {
400	if d.maxStrLen != 0 {
401		if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
402			return ErrStringLength
403		}
404	}
405	if d.emitEnabled {
406		d.emit(hf)
407	}
408	return nil
409}
410
411// (same invariants and behavior as parseHeaderFieldRepr)
412func (d *Decoder) parseDynamicTableSizeUpdate() error {
413	// RFC 7541, sec 4.2: This dynamic table size update MUST occur at the
414	// beginning of the first header block following the change to the dynamic table size.
415	if !d.firstField && d.dynTab.size > 0 {
416		return DecodingError{errors.New("dynamic table size update MUST occur at the beginning of a header block")}
417	}
418
419	buf := d.buf
420	size, buf, err := readVarInt(5, buf)
421	if err != nil {
422		return err
423	}
424	if size > uint64(d.dynTab.allowedMaxSize) {
425		return DecodingError{errors.New("dynamic table size update too large")}
426	}
427	d.dynTab.setMaxSize(uint32(size))
428	d.buf = buf
429	return nil
430}
431
432var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
433
434// readVarInt reads an unsigned variable length integer off the
435// beginning of p. n is the parameter as described in
436// https://httpwg.org/specs/rfc7541.html#rfc.section.5.1.
437//
438// n must always be between 1 and 8.
439//
440// The returned remain buffer is either a smaller suffix of p, or err != nil.
441// The error is errNeedMore if p doesn't contain a complete integer.
442func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
443	if n < 1 || n > 8 {
444		panic("bad n")
445	}
446	if len(p) == 0 {
447		return 0, p, errNeedMore
448	}
449	i = uint64(p[0])
450	if n < 8 {
451		i &= (1 << uint64(n)) - 1
452	}
453	if i < (1<<uint64(n))-1 {
454		return i, p[1:], nil
455	}
456
457	origP := p
458	p = p[1:]
459	var m uint64
460	for len(p) > 0 {
461		b := p[0]
462		p = p[1:]
463		i += uint64(b&127) << m
464		if b&128 == 0 {
465			return i, p, nil
466		}
467		m += 7
468		if m >= 63 { // TODO: proper overflow check. making this up.
469			return 0, origP, errVarintOverflow
470		}
471	}
472	return 0, origP, errNeedMore
473}
474
475// readString reads an hpack string from p.
476//
477// It returns a reference to the encoded string data to permit deferring decode costs
478// until after the caller verifies all data is present.
479func (d *Decoder) readString(p []byte) (u undecodedString, remain []byte, err error) {
480	if len(p) == 0 {
481		return u, p, errNeedMore
482	}
483	isHuff := p[0]&128 != 0
484	strLen, p, err := readVarInt(7, p)
485	if err != nil {
486		return u, p, err
487	}
488	if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
489		// Returning an error here means Huffman decoding errors
490		// for non-indexed strings past the maximum string length
491		// are ignored, but the server is returning an error anyway
492		// and because the string is not indexed the error will not
493		// affect the decoding state.
494		return u, nil, ErrStringLength
495	}
496	if uint64(len(p)) < strLen {
497		return u, p, errNeedMore
498	}
499	u.isHuff = isHuff
500	u.b = p[:strLen]
501	return u, p[strLen:], nil
502}
503
504type undecodedString struct {
505	isHuff bool
506	b      []byte
507}
508
509func (d *Decoder) decodeString(u undecodedString) (string, error) {
510	if !u.isHuff {
511		return string(u.b), nil
512	}
513	buf := bufPool.Get().(*bytes.Buffer)
514	buf.Reset() // don't trust others
515	var s string
516	err := huffmanDecode(buf, d.maxStrLen, u.b)
517	if err == nil {
518		s = buf.String()
519	}
520	buf.Reset() // be nice to GC
521	bufPool.Put(buf)
522	return s, err
523}
524