xref: /aosp_15_r20/external/pigweed/pw_grpc/gen/hpack_gen.go (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1// Copyright 2024 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15// Program hpack_gen generates a C++ file to help parse and emit HPACK.
16package main
17
18import (
19	"fmt"
20)
21
22func main() {
23	var statsBefore, statsAfter Stats
24
25	buildTree()
26	validate(&rootNode)
27	computeStats(&statsBefore, &rootNode)
28
29	optimize(&rootNode)
30	validate(&rootNode)
31	computeStats(&statsAfter, &rootNode)
32
33	assignTableIndices(&rootNode, 0)
34
35	fmt.Println("// Copyright 2024 The Pigweed Authors")
36	fmt.Println("//")
37	fmt.Println("// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not")
38	fmt.Println("// use this file except in compliance with the License. You may obtain a copy of")
39	fmt.Println("// the License at")
40	fmt.Println("//")
41	fmt.Println("//     https://www.apache.org/licenses/LICENSE-2.0")
42	fmt.Println("//")
43	fmt.Println("// Unless required by applicable law or agreed to in writing, software")
44	fmt.Println("// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT")
45	fmt.Println("// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the")
46	fmt.Println("// License for the specific language governing permissions and limitations under")
47	fmt.Println("// the License.")
48	fmt.Println("//")
49	fmt.Println("// AUTOGENERATED FILE, DO NOT EDIT")
50	fmt.Println("//")
51	fmt.Println("// To regenerate, run `go run gen/hpack_gen.go > hpack.autogen.inc`.")
52	fmt.Println("// This file should be included in exactly one *.cc file.")
53	fmt.Println("//")
54	fmt.Println()
55	fmt.Println("// clang-format off")
56	printDecoderTable(&statsAfter)
57	fmt.Println()
58	fmt.Printf("// Decoder table stats:\n")
59	fmt.Printf("//   before optimization = %+v\n", statsBefore)
60	fmt.Printf("//   after  optimization = %+v\n", statsAfter)
61	fmt.Println()
62	printStaticResponses()
63}
64
65type NodeType int
66
67const (
68	BranchNode NodeType = iota
69	OutputNode
70	UnprintableNode
71	EOSNode
72)
73
74type Node struct {
75	t          NodeType
76	output     int // if t == OutputNode, byte to output
77	tableIndex int // if t == BranchNode, index in the decoder table
78	child      [2]*Node
79}
80
81var rootNode = Node{t: BranchNode}
82
83type Stats struct {
84	numBranchNodes      int
85	numOutputNodes      int
86	numUnprintableNodes int
87	numInvalidNodes     int
88}
89
90func buildTree() {
91	for out, code := range huffmanTable {
92		updateTree(out, code)
93	}
94}
95
96func updateTree(out int, code string) {
97	if len(code) == 0 {
98		panic(fmt.Sprintf("code for byte %v is empty", out))
99	}
100
101	curr := &rootNode
102
103	for i := 0; i < len(code); i++ {
104		if code[i] != '0' && code[i] != '1' {
105			panic(fmt.Sprintf("code for byte %v has invalid bit '%v'", out, code[i]))
106		}
107
108		bit := code[i] - '0'
109
110		// Middle of the code: create a branch node if needed.
111		if i < len(code)-1 {
112			next := curr.child[bit]
113			if next == nil {
114				next = &Node{t: BranchNode}
115				curr.child[bit] = next
116			} else if next.t != BranchNode {
117				panic(fmt.Sprintf("code for byte %v found non-branch node at bit number %v", out, i))
118			}
119			curr = next
120			continue
121		}
122
123		// End of the code: create a leaf node.
124		// Each code should lead to a unique leaf node.
125		if curr.child[bit] != nil {
126			panic(fmt.Sprintf("code for byte %v reached a duplicate leaf node", out))
127		}
128
129		switch {
130		case out == EOS:
131			curr.child[bit] = &Node{t: EOSNode}
132		case out < 32 || 127 < out:
133			// Unprintable characters are allowed by the Huffman code but not in gRPC.
134			// See: https://github.com/grpc/grpc/blob/v1.60.x/doc/PROTOCOL-HTTP2.md#requests.
135			curr.child[bit] = &Node{t: UnprintableNode}
136		default:
137			curr.child[bit] = &Node{t: OutputNode, output: out}
138		}
139	}
140}
141
142func validate(node *Node) {
143	// The binary tree should be complete: there should not be missing leaf nodes.
144	if node == nil {
145		panic("unexpected nil in binary tree")
146	}
147	if node.t != BranchNode {
148		if node.child[0] != nil || node.child[1] != nil {
149			panic(fmt.Sprintf("unexpected child under node %+v", node))
150		}
151		return
152	}
153	validate(node.child[0])
154	validate(node.child[1])
155}
156
157func optimize(node *Node) {
158	if node.t != BranchNode {
159		return
160	}
161	optimize(node.child[0])
162	optimize(node.child[1])
163	// Collapse each unprintable subtree into a single unprintable node.
164	if node.child[0].t == UnprintableNode && node.child[1].t == UnprintableNode {
165		node.t = UnprintableNode
166		node.child[0] = nil
167		node.child[1] = nil
168	}
169}
170
171func computeStats(stats *Stats, node *Node) {
172	if node == nil {
173		stats.numInvalidNodes++
174		return
175	}
176	switch node.t {
177	case BranchNode:
178		stats.numBranchNodes++
179		computeStats(stats, node.child[0])
180		computeStats(stats, node.child[1])
181	case OutputNode:
182		stats.numOutputNodes++
183	case UnprintableNode:
184		stats.numUnprintableNodes++
185	case EOSNode:
186		stats.numInvalidNodes++
187	}
188}
189
190func assignTableIndices(node *Node, idx int) int {
191	if node.t != BranchNode {
192		return idx
193	}
194	node.tableIndex = idx
195	idx++
196	// We use a preorder traversal to ensure the root node has index 0.
197	idx = assignTableIndices(node.child[0], idx)
198	idx = assignTableIndices(node.child[1], idx)
199	return idx
200}
201
202const decoderTablePrefix = `
203// Huffman decoder table. Decoding starts at index=0. For each bit, we inspect
204// kHuffmanDecoderTable[index][bit] and take an action based on that value:
205//
206//   * If the value matches 0b0..._...., set index=value
207//   * If the value matches 0b1xxx_xxxx, output byte 32 + xx_xxxx, set index=0
208//   * If the value matches 0b1111_1110, fail: unprintable character
209//   * If the value matches 0b1111_1111, fail: decoder entered an invalid state
210//
211static constexpr std::array<uint8_t[2], %d> kHuffmanDecoderTable = {{
212`
213const decoderTableSuffix = `
214}};
215`
216
217func printDecoderTable(stats *Stats) {
218	fmt.Printf(decoderTablePrefix, stats.numBranchNodes)
219	printDecoderTableEntries(&rootNode)
220	fmt.Print(decoderTableSuffix)
221}
222
223func printDecoderTableEntries(node *Node) {
224	if node.t != BranchNode {
225		return
226	}
227
228	// Print nodes in preorder, which is the same order the indices were created.
229	fmt.Printf("  /*%v=*/ {%v, %v},\n",
230		node.tableIndex,
231		toDecoderTableEntry(node.child[0]),
232		toDecoderTableEntry(node.child[1]))
233
234	printDecoderTableEntries(node.child[0])
235	printDecoderTableEntries(node.child[1])
236}
237
238func toDecoderTableEntry(node *Node) uint8 {
239	switch node.t {
240	case BranchNode:
241		if node.tableIndex > 127 {
242			panic(fmt.Sprintf("BranchNode index %d > 127", node.tableIndex))
243		}
244		return uint8(node.tableIndex)
245	case OutputNode:
246		return uint8(node.output-32) | 0b1000_0000
247	case UnprintableNode:
248		return 0b1111_1110
249	case EOSNode:
250		// RFC 7541 §5.2: "A Huffman-encoded string literal containing the EOS
251		// symbol MUST be treated as a decoding error."
252		return 0b1111_1111
253	}
254	panic(fmt.Sprintf("unexpected node type %v", node.t))
255}
256
257const responseHeaderFieldsPrefix = `
258// HPACK-encoded header fields which form grpc Response-Headers.
259// These are the same for every response.
260static constexpr std::array<uint8_t, %d> kResponseHeaderFields = {
261`
262
263const responseHeaderFieldsSuffix = `
264};
265`
266
267const maxStatusCode = 16
268const responseTrailerFieldsPrefix = `
269// HPACK-encoded header fields which form grpc Trailers. All response Trailers
270// are identical except for the status code.
271//
272// This is indexed by pw::Status::Code, which happens to be identical to grpc's
273// status code.
274struct ResponseTrailerPayload {
275  uint32_t size;
276  std::array<uint8_t, %d> bytes;
277};
278static constexpr std::array<ResponseTrailerPayload, %d> kResponseTrailerFields = {{
279`
280
281const responseTrailerFieldsSuffix = `
282}};
283`
284
285func printStaticResponses() {
286	respHeaderFields := buildResponseHeaderFields()
287	fmt.Printf(responseHeaderFieldsPrefix, len(respHeaderFields))
288	fmt.Printf("  %s", fmtBytes(respHeaderFields))
289	fmt.Printf("%s", responseHeaderFieldsSuffix)
290
291	maxLen := len(buildResponseTrailerFields(maxStatusCode))
292	fmt.Printf(responseTrailerFieldsPrefix, maxLen, maxStatusCode+1)
293	for code := 0; code <= maxStatusCode; code++ {
294		payload := buildResponseTrailerFields(code)
295		fmt.Printf("  {.size=%d, .bytes={%s}},\n", len(payload), fmtBytes(payload))
296	}
297	fmt.Printf("%s", responseTrailerFieldsSuffix)
298}
299
300func fmtBytes(bytes []byte) string {
301	var out string
302	for i, b := range bytes {
303		if i < len(bytes)-1 {
304			out += fmt.Sprintf("0x%x, ", b)
305		} else {
306			out += fmt.Sprintf("0x%x", b)
307		}
308	}
309	return out
310}
311
312// Response-Headers → ":status 200" "content-type application/grpc"
313func buildResponseHeaderFields() []byte {
314	var out []byte
315	out = append(out, 0b1000_1000) // RFC 7541 §6.1: index 8 is ":status" "200"
316	out = append(out, 0b0101_1111) // RFC 7541 §6.2.1: index 31 is "content-type"
317	out = append(out, literalEncodeWithLength("application/grpc")...)
318	return out
319}
320
321// Trailers → "grpc-status <DIGITS>"
322func buildResponseTrailerFields(statusCode int) []byte {
323	var out []byte
324	out = append(out, 0b0100_0000) // RFC 7541 §6.2.1: literal name and value
325	out = append(out, literalEncodeWithLength("grpc-status")...)
326	if statusCode >= 10 {
327		out = append(out, 0b0000_0010)             // RFC 7541 §6.2.1: length of value w/out huffman
328		out = append(out, byte(statusCode/10)+'0') // RFC 7541 §6.2.1: value
329		out = append(out, byte(statusCode%10)+'0')
330	} else {
331		out = append(out, 0b0000_0001)          // RFC 7541 §6.2.1: length of value w/out huffman
332		out = append(out, byte(statusCode)+'0') // RFC 7541 §6.2.1: value
333	}
334	return out
335}
336
337func literalEncodeWithLength(str string) []byte {
338	// RFC 7541 §6.2.1: we are generating this structure with H=0. Since the
339	// strings are short, there is minimal benefit to using a Huffman encoding,
340	// at most 2-3 bytes per string.
341	// +---+---+-----------------------+
342	// | H |     Value Length (7+)     |
343	// +---+---------------------------+
344	// | Value String (Length octets)  |
345	// +-------------------------------+
346	//
347	// The length must be encoded across multiple bytes if 128 or larger. See RFC
348	// 7541 §5.2. We don't write any strings that large.
349	if len(str) >= 128 {
350		panic(fmt.Sprintf("cannot encode string of length %v: %#v", len(str), str))
351	}
352	return append([]byte{byte(len(str))}, str...)
353}
354
355// Special symbol for Huffman EOS.
356// See RFC 7541 Appendix B.
357const EOS = 256
358
359// Mapping from symbol to Huffman code.
360// Taken verbatim from RFC 7541 Appendix B.
361var huffmanTable = map[int]string{
362	0:   "1111111111000",
363	1:   "11111111111111111011000",
364	2:   "1111111111111111111111100010",
365	3:   "1111111111111111111111100011",
366	4:   "1111111111111111111111100100",
367	5:   "1111111111111111111111100101",
368	6:   "1111111111111111111111100110",
369	7:   "1111111111111111111111100111",
370	8:   "1111111111111111111111101000",
371	9:   "111111111111111111101010",
372	10:  "111111111111111111111111111100",
373	11:  "1111111111111111111111101001",
374	12:  "1111111111111111111111101010",
375	13:  "111111111111111111111111111101",
376	14:  "1111111111111111111111101011",
377	15:  "1111111111111111111111101100",
378	16:  "1111111111111111111111101101",
379	17:  "1111111111111111111111101110",
380	18:  "1111111111111111111111101111",
381	19:  "1111111111111111111111110000",
382	20:  "1111111111111111111111110001",
383	21:  "1111111111111111111111110010",
384	22:  "111111111111111111111111111110",
385	23:  "1111111111111111111111110011",
386	24:  "1111111111111111111111110100",
387	25:  "1111111111111111111111110101",
388	26:  "1111111111111111111111110110",
389	27:  "1111111111111111111111110111",
390	28:  "1111111111111111111111111000",
391	29:  "1111111111111111111111111001",
392	30:  "1111111111111111111111111010",
393	31:  "1111111111111111111111111011",
394	32:  "010100",
395	33:  "1111111000",
396	34:  "1111111001",
397	35:  "111111111010",
398	36:  "1111111111001",
399	37:  "010101",
400	38:  "11111000",
401	39:  "11111111010",
402	40:  "1111111010",
403	41:  "1111111011",
404	42:  "11111001",
405	43:  "11111111011",
406	44:  "11111010",
407	45:  "010110",
408	46:  "010111",
409	47:  "011000",
410	48:  "00000",
411	49:  "00001",
412	50:  "00010",
413	51:  "011001",
414	52:  "011010",
415	53:  "011011",
416	54:  "011100",
417	55:  "011101",
418	56:  "011110",
419	57:  "011111",
420	58:  "1011100",
421	59:  "11111011",
422	60:  "111111111111100",
423	61:  "100000",
424	62:  "111111111011",
425	63:  "1111111100",
426	64:  "1111111111010",
427	65:  "100001",
428	66:  "1011101",
429	67:  "1011110",
430	68:  "1011111",
431	69:  "1100000",
432	70:  "1100001",
433	71:  "1100010",
434	72:  "1100011",
435	73:  "1100100",
436	74:  "1100101",
437	75:  "1100110",
438	76:  "1100111",
439	77:  "1101000",
440	78:  "1101001",
441	79:  "1101010",
442	80:  "1101011",
443	81:  "1101100",
444	82:  "1101101",
445	83:  "1101110",
446	84:  "1101111",
447	85:  "1110000",
448	86:  "1110001",
449	87:  "1110010",
450	88:  "11111100",
451	89:  "1110011",
452	90:  "11111101",
453	91:  "1111111111011",
454	92:  "1111111111111110000",
455	93:  "1111111111100",
456	94:  "11111111111100",
457	95:  "100010",
458	96:  "111111111111101",
459	97:  "00011",
460	98:  "100011",
461	99:  "00100",
462	100: "100100",
463	101: "00101",
464	102: "100101",
465	103: "100110",
466	104: "100111",
467	105: "00110",
468	106: "1110100",
469	107: "1110101",
470	108: "101000",
471	109: "101001",
472	110: "101010",
473	111: "00111",
474	112: "101011",
475	113: "1110110",
476	114: "101100",
477	115: "01000",
478	116: "01001",
479	117: "101101",
480	118: "1110111",
481	119: "1111000",
482	120: "1111001",
483	121: "1111010",
484	122: "1111011",
485	123: "111111111111110",
486	124: "11111111100",
487	125: "11111111111101",
488	126: "1111111111101",
489	127: "1111111111111111111111111100",
490	128: "11111111111111100110",
491	129: "1111111111111111010010",
492	130: "11111111111111100111",
493	131: "11111111111111101000",
494	132: "1111111111111111010011",
495	133: "1111111111111111010100",
496	134: "1111111111111111010101",
497	135: "11111111111111111011001",
498	136: "1111111111111111010110",
499	137: "11111111111111111011010",
500	138: "11111111111111111011011",
501	139: "11111111111111111011100",
502	140: "11111111111111111011101",
503	141: "11111111111111111011110",
504	142: "111111111111111111101011",
505	143: "11111111111111111011111",
506	144: "111111111111111111101100",
507	145: "111111111111111111101101",
508	146: "1111111111111111010111",
509	147: "11111111111111111100000",
510	148: "111111111111111111101110",
511	149: "11111111111111111100001",
512	150: "11111111111111111100010",
513	151: "11111111111111111100011",
514	152: "11111111111111111100100",
515	153: "111111111111111011100",
516	154: "1111111111111111011000",
517	155: "11111111111111111100101",
518	156: "1111111111111111011001",
519	157: "11111111111111111100110",
520	158: "11111111111111111100111",
521	159: "111111111111111111101111",
522	160: "1111111111111111011010",
523	161: "111111111111111011101",
524	162: "11111111111111101001",
525	163: "1111111111111111011011",
526	164: "1111111111111111011100",
527	165: "11111111111111111101000",
528	166: "11111111111111111101001",
529	167: "111111111111111011110",
530	168: "11111111111111111101010",
531	169: "1111111111111111011101",
532	170: "1111111111111111011110",
533	171: "111111111111111111110000",
534	172: "111111111111111011111",
535	173: "1111111111111111011111",
536	174: "11111111111111111101011",
537	175: "11111111111111111101100",
538	176: "111111111111111100000",
539	177: "111111111111111100001",
540	178: "1111111111111111100000",
541	179: "111111111111111100010",
542	180: "11111111111111111101101",
543	181: "1111111111111111100001",
544	182: "11111111111111111101110",
545	183: "11111111111111111101111",
546	184: "11111111111111101010",
547	185: "1111111111111111100010",
548	186: "1111111111111111100011",
549	187: "1111111111111111100100",
550	188: "11111111111111111110000",
551	189: "1111111111111111100101",
552	190: "1111111111111111100110",
553	191: "11111111111111111110001",
554	192: "11111111111111111111100000",
555	193: "11111111111111111111100001",
556	194: "11111111111111101011",
557	195: "1111111111111110001",
558	196: "1111111111111111100111",
559	197: "11111111111111111110010",
560	198: "1111111111111111101000",
561	199: "1111111111111111111101100",
562	200: "11111111111111111111100010",
563	201: "11111111111111111111100011",
564	202: "11111111111111111111100100",
565	203: "111111111111111111111011110",
566	204: "111111111111111111111011111",
567	205: "11111111111111111111100101",
568	206: "111111111111111111110001",
569	207: "1111111111111111111101101",
570	208: "1111111111111110010",
571	209: "111111111111111100011",
572	210: "11111111111111111111100110",
573	211: "111111111111111111111100000",
574	212: "111111111111111111111100001",
575	213: "11111111111111111111100111",
576	214: "111111111111111111111100010",
577	215: "111111111111111111110010",
578	216: "111111111111111100100",
579	217: "111111111111111100101",
580	218: "11111111111111111111101000",
581	219: "11111111111111111111101001",
582	220: "1111111111111111111111111101",
583	221: "111111111111111111111100011",
584	222: "111111111111111111111100100",
585	223: "111111111111111111111100101",
586	224: "11111111111111101100",
587	225: "111111111111111111110011",
588	226: "11111111111111101101",
589	227: "111111111111111100110",
590	228: "1111111111111111101001",
591	229: "111111111111111100111",
592	230: "111111111111111101000",
593	231: "11111111111111111110011",
594	232: "1111111111111111101010",
595	233: "1111111111111111101011",
596	234: "1111111111111111111101110",
597	235: "1111111111111111111101111",
598	236: "111111111111111111110100",
599	237: "111111111111111111110101",
600	238: "11111111111111111111101010",
601	239: "11111111111111111110100",
602	240: "11111111111111111111101011",
603	241: "111111111111111111111100110",
604	242: "11111111111111111111101100",
605	243: "11111111111111111111101101",
606	244: "111111111111111111111100111",
607	245: "111111111111111111111101000",
608	246: "111111111111111111111101001",
609	247: "111111111111111111111101010",
610	248: "111111111111111111111101011",
611	249: "1111111111111111111111111110",
612	250: "111111111111111111111101100",
613	251: "111111111111111111111101101",
614	252: "111111111111111111111101110",
615	253: "111111111111111111111101111",
616	254: "111111111111111111111110000",
617	255: "11111111111111111111101110",
618	256: "111111111111111111111111111111",
619}
620