xref: /aosp_15_r20/external/brotli/scripts/dictionary/step-02-rfc-to-bin.py (revision f4ee7fba7774faf2a30f13154332c0a06550dbc4)
1*f4ee7fbaSAndroid Build Coastguard Worker# Step 02 - parse RFC.
2*f4ee7fbaSAndroid Build Coastguard Worker#
3*f4ee7fbaSAndroid Build Coastguard Worker# Static dictionary is described in "Appendix A" section in a hexadecimal form.
4*f4ee7fbaSAndroid Build Coastguard Worker# This tool locates dictionary data in RFC and converts it to raw binary format.
5*f4ee7fbaSAndroid Build Coastguard Worker
6*f4ee7fbaSAndroid Build Coastguard Workerimport re
7*f4ee7fbaSAndroid Build Coastguard Worker
8*f4ee7fbaSAndroid Build Coastguard Workerrfc_path = "rfc7932.txt"
9*f4ee7fbaSAndroid Build Coastguard Worker
10*f4ee7fbaSAndroid Build Coastguard Workerwith open(rfc_path, "r") as rfc:
11*f4ee7fbaSAndroid Build Coastguard Worker  lines = rfc.readlines()
12*f4ee7fbaSAndroid Build Coastguard Worker
13*f4ee7fbaSAndroid Build Coastguard Workerre_data_line = re.compile("^      [0-9a-f]{64}$")
14*f4ee7fbaSAndroid Build Coastguard Worker
15*f4ee7fbaSAndroid Build Coastguard Workerappendix_a_found = False
16*f4ee7fbaSAndroid Build Coastguard Workerdictionary = []
17*f4ee7fbaSAndroid Build Coastguard Workerfor line in lines:
18*f4ee7fbaSAndroid Build Coastguard Worker  if appendix_a_found:
19*f4ee7fbaSAndroid Build Coastguard Worker    if re_data_line.match(line) is not None:
20*f4ee7fbaSAndroid Build Coastguard Worker      data = line.strip()
21*f4ee7fbaSAndroid Build Coastguard Worker      for i in range(32):
22*f4ee7fbaSAndroid Build Coastguard Worker        dictionary.append(int(data[2 * i : 2 * i + 2], 16))
23*f4ee7fbaSAndroid Build Coastguard Worker      if len(dictionary) == 122784:
24*f4ee7fbaSAndroid Build Coastguard Worker        break
25*f4ee7fbaSAndroid Build Coastguard Worker  else:
26*f4ee7fbaSAndroid Build Coastguard Worker    if line.startswith("Appendix A."):
27*f4ee7fbaSAndroid Build Coastguard Worker      appendix_a_found = True
28*f4ee7fbaSAndroid Build Coastguard Worker
29*f4ee7fbaSAndroid Build Coastguard Workerbin_path = "dictionary.bin"
30*f4ee7fbaSAndroid Build Coastguard Worker
31*f4ee7fbaSAndroid Build Coastguard Workerwith open(bin_path, "wb") as output:
32*f4ee7fbaSAndroid Build Coastguard Worker  output.write(bytearray(dictionary))
33*f4ee7fbaSAndroid Build Coastguard Worker
34*f4ee7fbaSAndroid Build Coastguard Workerprint("Parsed and saved " + str(len(dictionary)) + " bytes to " + bin_path)
35