1// Copyright 2024 The Pigweed Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); you may not 4// use this file except in compliance with the License. You may obtain a copy of 5// the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12// License for the specific language governing permissions and limitations under 13// the License. 14 15// Program hpack_gen generates a C++ file to help parse and emit HPACK. 16package main 17 18import ( 19 "fmt" 20) 21 22func main() { 23 var statsBefore, statsAfter Stats 24 25 buildTree() 26 validate(&rootNode) 27 computeStats(&statsBefore, &rootNode) 28 29 optimize(&rootNode) 30 validate(&rootNode) 31 computeStats(&statsAfter, &rootNode) 32 33 assignTableIndices(&rootNode, 0) 34 35 fmt.Println("// Copyright 2024 The Pigweed Authors") 36 fmt.Println("//") 37 fmt.Println("// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not") 38 fmt.Println("// use this file except in compliance with the License. You may obtain a copy of") 39 fmt.Println("// the License at") 40 fmt.Println("//") 41 fmt.Println("// https://www.apache.org/licenses/LICENSE-2.0") 42 fmt.Println("//") 43 fmt.Println("// Unless required by applicable law or agreed to in writing, software") 44 fmt.Println("// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT") 45 fmt.Println("// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the") 46 fmt.Println("// License for the specific language governing permissions and limitations under") 47 fmt.Println("// the License.") 48 fmt.Println("//") 49 fmt.Println("// AUTOGENERATED FILE, DO NOT EDIT") 50 fmt.Println("//") 51 fmt.Println("// To regenerate, run `go run gen/hpack_gen.go > hpack.autogen.inc`.") 52 fmt.Println("// This file should be included in exactly one *.cc file.") 53 fmt.Println("//") 54 fmt.Println() 55 fmt.Println("// clang-format off") 56 printDecoderTable(&statsAfter) 57 fmt.Println() 58 fmt.Printf("// Decoder table stats:\n") 59 fmt.Printf("// before optimization = %+v\n", statsBefore) 60 fmt.Printf("// after optimization = %+v\n", statsAfter) 61 fmt.Println() 62 printStaticResponses() 63} 64 65type NodeType int 66 67const ( 68 BranchNode NodeType = iota 69 OutputNode 70 UnprintableNode 71 EOSNode 72) 73 74type Node struct { 75 t NodeType 76 output int // if t == OutputNode, byte to output 77 tableIndex int // if t == BranchNode, index in the decoder table 78 child [2]*Node 79} 80 81var rootNode = Node{t: BranchNode} 82 83type Stats struct { 84 numBranchNodes int 85 numOutputNodes int 86 numUnprintableNodes int 87 numInvalidNodes int 88} 89 90func buildTree() { 91 for out, code := range huffmanTable { 92 updateTree(out, code) 93 } 94} 95 96func updateTree(out int, code string) { 97 if len(code) == 0 { 98 panic(fmt.Sprintf("code for byte %v is empty", out)) 99 } 100 101 curr := &rootNode 102 103 for i := 0; i < len(code); i++ { 104 if code[i] != '0' && code[i] != '1' { 105 panic(fmt.Sprintf("code for byte %v has invalid bit '%v'", out, code[i])) 106 } 107 108 bit := code[i] - '0' 109 110 // Middle of the code: create a branch node if needed. 111 if i < len(code)-1 { 112 next := curr.child[bit] 113 if next == nil { 114 next = &Node{t: BranchNode} 115 curr.child[bit] = next 116 } else if next.t != BranchNode { 117 panic(fmt.Sprintf("code for byte %v found non-branch node at bit number %v", out, i)) 118 } 119 curr = next 120 continue 121 } 122 123 // End of the code: create a leaf node. 124 // Each code should lead to a unique leaf node. 125 if curr.child[bit] != nil { 126 panic(fmt.Sprintf("code for byte %v reached a duplicate leaf node", out)) 127 } 128 129 switch { 130 case out == EOS: 131 curr.child[bit] = &Node{t: EOSNode} 132 case out < 32 || 127 < out: 133 // Unprintable characters are allowed by the Huffman code but not in gRPC. 134 // See: https://github.com/grpc/grpc/blob/v1.60.x/doc/PROTOCOL-HTTP2.md#requests. 135 curr.child[bit] = &Node{t: UnprintableNode} 136 default: 137 curr.child[bit] = &Node{t: OutputNode, output: out} 138 } 139 } 140} 141 142func validate(node *Node) { 143 // The binary tree should be complete: there should not be missing leaf nodes. 144 if node == nil { 145 panic("unexpected nil in binary tree") 146 } 147 if node.t != BranchNode { 148 if node.child[0] != nil || node.child[1] != nil { 149 panic(fmt.Sprintf("unexpected child under node %+v", node)) 150 } 151 return 152 } 153 validate(node.child[0]) 154 validate(node.child[1]) 155} 156 157func optimize(node *Node) { 158 if node.t != BranchNode { 159 return 160 } 161 optimize(node.child[0]) 162 optimize(node.child[1]) 163 // Collapse each unprintable subtree into a single unprintable node. 164 if node.child[0].t == UnprintableNode && node.child[1].t == UnprintableNode { 165 node.t = UnprintableNode 166 node.child[0] = nil 167 node.child[1] = nil 168 } 169} 170 171func computeStats(stats *Stats, node *Node) { 172 if node == nil { 173 stats.numInvalidNodes++ 174 return 175 } 176 switch node.t { 177 case BranchNode: 178 stats.numBranchNodes++ 179 computeStats(stats, node.child[0]) 180 computeStats(stats, node.child[1]) 181 case OutputNode: 182 stats.numOutputNodes++ 183 case UnprintableNode: 184 stats.numUnprintableNodes++ 185 case EOSNode: 186 stats.numInvalidNodes++ 187 } 188} 189 190func assignTableIndices(node *Node, idx int) int { 191 if node.t != BranchNode { 192 return idx 193 } 194 node.tableIndex = idx 195 idx++ 196 // We use a preorder traversal to ensure the root node has index 0. 197 idx = assignTableIndices(node.child[0], idx) 198 idx = assignTableIndices(node.child[1], idx) 199 return idx 200} 201 202const decoderTablePrefix = ` 203// Huffman decoder table. Decoding starts at index=0. For each bit, we inspect 204// kHuffmanDecoderTable[index][bit] and take an action based on that value: 205// 206// * If the value matches 0b0..._...., set index=value 207// * If the value matches 0b1xxx_xxxx, output byte 32 + xx_xxxx, set index=0 208// * If the value matches 0b1111_1110, fail: unprintable character 209// * If the value matches 0b1111_1111, fail: decoder entered an invalid state 210// 211static constexpr std::array<uint8_t[2], %d> kHuffmanDecoderTable = {{ 212` 213const decoderTableSuffix = ` 214}}; 215` 216 217func printDecoderTable(stats *Stats) { 218 fmt.Printf(decoderTablePrefix, stats.numBranchNodes) 219 printDecoderTableEntries(&rootNode) 220 fmt.Print(decoderTableSuffix) 221} 222 223func printDecoderTableEntries(node *Node) { 224 if node.t != BranchNode { 225 return 226 } 227 228 // Print nodes in preorder, which is the same order the indices were created. 229 fmt.Printf(" /*%v=*/ {%v, %v},\n", 230 node.tableIndex, 231 toDecoderTableEntry(node.child[0]), 232 toDecoderTableEntry(node.child[1])) 233 234 printDecoderTableEntries(node.child[0]) 235 printDecoderTableEntries(node.child[1]) 236} 237 238func toDecoderTableEntry(node *Node) uint8 { 239 switch node.t { 240 case BranchNode: 241 if node.tableIndex > 127 { 242 panic(fmt.Sprintf("BranchNode index %d > 127", node.tableIndex)) 243 } 244 return uint8(node.tableIndex) 245 case OutputNode: 246 return uint8(node.output-32) | 0b1000_0000 247 case UnprintableNode: 248 return 0b1111_1110 249 case EOSNode: 250 // RFC 7541 §5.2: "A Huffman-encoded string literal containing the EOS 251 // symbol MUST be treated as a decoding error." 252 return 0b1111_1111 253 } 254 panic(fmt.Sprintf("unexpected node type %v", node.t)) 255} 256 257const responseHeaderFieldsPrefix = ` 258// HPACK-encoded header fields which form grpc Response-Headers. 259// These are the same for every response. 260static constexpr std::array<uint8_t, %d> kResponseHeaderFields = { 261` 262 263const responseHeaderFieldsSuffix = ` 264}; 265` 266 267const maxStatusCode = 16 268const responseTrailerFieldsPrefix = ` 269// HPACK-encoded header fields which form grpc Trailers. All response Trailers 270// are identical except for the status code. 271// 272// This is indexed by pw::Status::Code, which happens to be identical to grpc's 273// status code. 274struct ResponseTrailerPayload { 275 uint32_t size; 276 std::array<uint8_t, %d> bytes; 277}; 278static constexpr std::array<ResponseTrailerPayload, %d> kResponseTrailerFields = {{ 279` 280 281const responseTrailerFieldsSuffix = ` 282}}; 283` 284 285func printStaticResponses() { 286 respHeaderFields := buildResponseHeaderFields() 287 fmt.Printf(responseHeaderFieldsPrefix, len(respHeaderFields)) 288 fmt.Printf(" %s", fmtBytes(respHeaderFields)) 289 fmt.Printf("%s", responseHeaderFieldsSuffix) 290 291 maxLen := len(buildResponseTrailerFields(maxStatusCode)) 292 fmt.Printf(responseTrailerFieldsPrefix, maxLen, maxStatusCode+1) 293 for code := 0; code <= maxStatusCode; code++ { 294 payload := buildResponseTrailerFields(code) 295 fmt.Printf(" {.size=%d, .bytes={%s}},\n", len(payload), fmtBytes(payload)) 296 } 297 fmt.Printf("%s", responseTrailerFieldsSuffix) 298} 299 300func fmtBytes(bytes []byte) string { 301 var out string 302 for i, b := range bytes { 303 if i < len(bytes)-1 { 304 out += fmt.Sprintf("0x%x, ", b) 305 } else { 306 out += fmt.Sprintf("0x%x", b) 307 } 308 } 309 return out 310} 311 312// Response-Headers → ":status 200" "content-type application/grpc" 313func buildResponseHeaderFields() []byte { 314 var out []byte 315 out = append(out, 0b1000_1000) // RFC 7541 §6.1: index 8 is ":status" "200" 316 out = append(out, 0b0101_1111) // RFC 7541 §6.2.1: index 31 is "content-type" 317 out = append(out, literalEncodeWithLength("application/grpc")...) 318 return out 319} 320 321// Trailers → "grpc-status <DIGITS>" 322func buildResponseTrailerFields(statusCode int) []byte { 323 var out []byte 324 out = append(out, 0b0100_0000) // RFC 7541 §6.2.1: literal name and value 325 out = append(out, literalEncodeWithLength("grpc-status")...) 326 if statusCode >= 10 { 327 out = append(out, 0b0000_0010) // RFC 7541 §6.2.1: length of value w/out huffman 328 out = append(out, byte(statusCode/10)+'0') // RFC 7541 §6.2.1: value 329 out = append(out, byte(statusCode%10)+'0') 330 } else { 331 out = append(out, 0b0000_0001) // RFC 7541 §6.2.1: length of value w/out huffman 332 out = append(out, byte(statusCode)+'0') // RFC 7541 §6.2.1: value 333 } 334 return out 335} 336 337func literalEncodeWithLength(str string) []byte { 338 // RFC 7541 §6.2.1: we are generating this structure with H=0. Since the 339 // strings are short, there is minimal benefit to using a Huffman encoding, 340 // at most 2-3 bytes per string. 341 // +---+---+-----------------------+ 342 // | H | Value Length (7+) | 343 // +---+---------------------------+ 344 // | Value String (Length octets) | 345 // +-------------------------------+ 346 // 347 // The length must be encoded across multiple bytes if 128 or larger. See RFC 348 // 7541 §5.2. We don't write any strings that large. 349 if len(str) >= 128 { 350 panic(fmt.Sprintf("cannot encode string of length %v: %#v", len(str), str)) 351 } 352 return append([]byte{byte(len(str))}, str...) 353} 354 355// Special symbol for Huffman EOS. 356// See RFC 7541 Appendix B. 357const EOS = 256 358 359// Mapping from symbol to Huffman code. 360// Taken verbatim from RFC 7541 Appendix B. 361var huffmanTable = map[int]string{ 362 0: "1111111111000", 363 1: "11111111111111111011000", 364 2: "1111111111111111111111100010", 365 3: "1111111111111111111111100011", 366 4: "1111111111111111111111100100", 367 5: "1111111111111111111111100101", 368 6: "1111111111111111111111100110", 369 7: "1111111111111111111111100111", 370 8: "1111111111111111111111101000", 371 9: "111111111111111111101010", 372 10: "111111111111111111111111111100", 373 11: "1111111111111111111111101001", 374 12: "1111111111111111111111101010", 375 13: "111111111111111111111111111101", 376 14: "1111111111111111111111101011", 377 15: "1111111111111111111111101100", 378 16: "1111111111111111111111101101", 379 17: "1111111111111111111111101110", 380 18: "1111111111111111111111101111", 381 19: "1111111111111111111111110000", 382 20: "1111111111111111111111110001", 383 21: "1111111111111111111111110010", 384 22: "111111111111111111111111111110", 385 23: "1111111111111111111111110011", 386 24: "1111111111111111111111110100", 387 25: "1111111111111111111111110101", 388 26: "1111111111111111111111110110", 389 27: "1111111111111111111111110111", 390 28: "1111111111111111111111111000", 391 29: "1111111111111111111111111001", 392 30: "1111111111111111111111111010", 393 31: "1111111111111111111111111011", 394 32: "010100", 395 33: "1111111000", 396 34: "1111111001", 397 35: "111111111010", 398 36: "1111111111001", 399 37: "010101", 400 38: "11111000", 401 39: "11111111010", 402 40: "1111111010", 403 41: "1111111011", 404 42: "11111001", 405 43: "11111111011", 406 44: "11111010", 407 45: "010110", 408 46: "010111", 409 47: "011000", 410 48: "00000", 411 49: "00001", 412 50: "00010", 413 51: "011001", 414 52: "011010", 415 53: "011011", 416 54: "011100", 417 55: "011101", 418 56: "011110", 419 57: "011111", 420 58: "1011100", 421 59: "11111011", 422 60: "111111111111100", 423 61: "100000", 424 62: "111111111011", 425 63: "1111111100", 426 64: "1111111111010", 427 65: "100001", 428 66: "1011101", 429 67: "1011110", 430 68: "1011111", 431 69: "1100000", 432 70: "1100001", 433 71: "1100010", 434 72: "1100011", 435 73: "1100100", 436 74: "1100101", 437 75: "1100110", 438 76: "1100111", 439 77: "1101000", 440 78: "1101001", 441 79: "1101010", 442 80: "1101011", 443 81: "1101100", 444 82: "1101101", 445 83: "1101110", 446 84: "1101111", 447 85: "1110000", 448 86: "1110001", 449 87: "1110010", 450 88: "11111100", 451 89: "1110011", 452 90: "11111101", 453 91: "1111111111011", 454 92: "1111111111111110000", 455 93: "1111111111100", 456 94: "11111111111100", 457 95: "100010", 458 96: "111111111111101", 459 97: "00011", 460 98: "100011", 461 99: "00100", 462 100: "100100", 463 101: "00101", 464 102: "100101", 465 103: "100110", 466 104: "100111", 467 105: "00110", 468 106: "1110100", 469 107: "1110101", 470 108: "101000", 471 109: "101001", 472 110: "101010", 473 111: "00111", 474 112: "101011", 475 113: "1110110", 476 114: "101100", 477 115: "01000", 478 116: "01001", 479 117: "101101", 480 118: "1110111", 481 119: "1111000", 482 120: "1111001", 483 121: "1111010", 484 122: "1111011", 485 123: "111111111111110", 486 124: "11111111100", 487 125: "11111111111101", 488 126: "1111111111101", 489 127: "1111111111111111111111111100", 490 128: "11111111111111100110", 491 129: "1111111111111111010010", 492 130: "11111111111111100111", 493 131: "11111111111111101000", 494 132: "1111111111111111010011", 495 133: "1111111111111111010100", 496 134: "1111111111111111010101", 497 135: "11111111111111111011001", 498 136: "1111111111111111010110", 499 137: "11111111111111111011010", 500 138: "11111111111111111011011", 501 139: "11111111111111111011100", 502 140: "11111111111111111011101", 503 141: "11111111111111111011110", 504 142: "111111111111111111101011", 505 143: "11111111111111111011111", 506 144: "111111111111111111101100", 507 145: "111111111111111111101101", 508 146: "1111111111111111010111", 509 147: "11111111111111111100000", 510 148: "111111111111111111101110", 511 149: "11111111111111111100001", 512 150: "11111111111111111100010", 513 151: "11111111111111111100011", 514 152: "11111111111111111100100", 515 153: "111111111111111011100", 516 154: "1111111111111111011000", 517 155: "11111111111111111100101", 518 156: "1111111111111111011001", 519 157: "11111111111111111100110", 520 158: "11111111111111111100111", 521 159: "111111111111111111101111", 522 160: "1111111111111111011010", 523 161: "111111111111111011101", 524 162: "11111111111111101001", 525 163: "1111111111111111011011", 526 164: "1111111111111111011100", 527 165: "11111111111111111101000", 528 166: "11111111111111111101001", 529 167: "111111111111111011110", 530 168: "11111111111111111101010", 531 169: "1111111111111111011101", 532 170: "1111111111111111011110", 533 171: "111111111111111111110000", 534 172: "111111111111111011111", 535 173: "1111111111111111011111", 536 174: "11111111111111111101011", 537 175: "11111111111111111101100", 538 176: "111111111111111100000", 539 177: "111111111111111100001", 540 178: "1111111111111111100000", 541 179: "111111111111111100010", 542 180: "11111111111111111101101", 543 181: "1111111111111111100001", 544 182: "11111111111111111101110", 545 183: "11111111111111111101111", 546 184: "11111111111111101010", 547 185: "1111111111111111100010", 548 186: "1111111111111111100011", 549 187: "1111111111111111100100", 550 188: "11111111111111111110000", 551 189: "1111111111111111100101", 552 190: "1111111111111111100110", 553 191: "11111111111111111110001", 554 192: "11111111111111111111100000", 555 193: "11111111111111111111100001", 556 194: "11111111111111101011", 557 195: "1111111111111110001", 558 196: "1111111111111111100111", 559 197: "11111111111111111110010", 560 198: "1111111111111111101000", 561 199: "1111111111111111111101100", 562 200: "11111111111111111111100010", 563 201: "11111111111111111111100011", 564 202: "11111111111111111111100100", 565 203: "111111111111111111111011110", 566 204: "111111111111111111111011111", 567 205: "11111111111111111111100101", 568 206: "111111111111111111110001", 569 207: "1111111111111111111101101", 570 208: "1111111111111110010", 571 209: "111111111111111100011", 572 210: "11111111111111111111100110", 573 211: "111111111111111111111100000", 574 212: "111111111111111111111100001", 575 213: "11111111111111111111100111", 576 214: "111111111111111111111100010", 577 215: "111111111111111111110010", 578 216: "111111111111111100100", 579 217: "111111111111111100101", 580 218: "11111111111111111111101000", 581 219: "11111111111111111111101001", 582 220: "1111111111111111111111111101", 583 221: "111111111111111111111100011", 584 222: "111111111111111111111100100", 585 223: "111111111111111111111100101", 586 224: "11111111111111101100", 587 225: "111111111111111111110011", 588 226: "11111111111111101101", 589 227: "111111111111111100110", 590 228: "1111111111111111101001", 591 229: "111111111111111100111", 592 230: "111111111111111101000", 593 231: "11111111111111111110011", 594 232: "1111111111111111101010", 595 233: "1111111111111111101011", 596 234: "1111111111111111111101110", 597 235: "1111111111111111111101111", 598 236: "111111111111111111110100", 599 237: "111111111111111111110101", 600 238: "11111111111111111111101010", 601 239: "11111111111111111110100", 602 240: "11111111111111111111101011", 603 241: "111111111111111111111100110", 604 242: "11111111111111111111101100", 605 243: "11111111111111111111101101", 606 244: "111111111111111111111100111", 607 245: "111111111111111111111101000", 608 246: "111111111111111111111101001", 609 247: "111111111111111111111101010", 610 248: "111111111111111111111101011", 611 249: "1111111111111111111111111110", 612 250: "111111111111111111111101100", 613 251: "111111111111111111111101101", 614 252: "111111111111111111111101110", 615 253: "111111111111111111111101111", 616 254: "111111111111111111111110000", 617 255: "11111111111111111111101110", 618 256: "111111111111111111111111111111", 619} 620