xref: /aosp_15_r20/external/libxml2/tools/genHtml5LibTests.py (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1*7c568831SAndroid Build Coastguard Worker#!/usr/bin/env python3
2*7c568831SAndroid Build Coastguard Worker
3*7c568831SAndroid Build Coastguard Workerimport glob
4*7c568831SAndroid Build Coastguard Workerimport json
5*7c568831SAndroid Build Coastguard Workerimport re
6*7c568831SAndroid Build Coastguard Worker
7*7c568831SAndroid Build Coastguard Workerstate_map = {
8*7c568831SAndroid Build Coastguard Worker    'Data state':          0,
9*7c568831SAndroid Build Coastguard Worker    'RCDATA state':        1,
10*7c568831SAndroid Build Coastguard Worker    'RAWTEXT state':       2,
11*7c568831SAndroid Build Coastguard Worker    'PLAINTEXT state':     3,
12*7c568831SAndroid Build Coastguard Worker    'Script data state':   4,
13*7c568831SAndroid Build Coastguard Worker    'CDATA section state': 5,
14*7c568831SAndroid Build Coastguard Worker}
15*7c568831SAndroid Build Coastguard Worker
16*7c568831SAndroid Build Coastguard Workerfor filename in sorted(glob.glob('../html5lib-tests/tokenizer/*.test')):
17*7c568831SAndroid Build Coastguard Worker    match = re.search('/([^/]*).test$', filename)
18*7c568831SAndroid Build Coastguard Worker    if match is None:
19*7c568831SAndroid Build Coastguard Worker        continue
20*7c568831SAndroid Build Coastguard Worker    testname = match[1]
21*7c568831SAndroid Build Coastguard Worker    if testname == 'xmlViolation':
22*7c568831SAndroid Build Coastguard Worker        continue
23*7c568831SAndroid Build Coastguard Worker
24*7c568831SAndroid Build Coastguard Worker    with open(filename) as json_data:
25*7c568831SAndroid Build Coastguard Worker        root = json.load(json_data)
26*7c568831SAndroid Build Coastguard Worker
27*7c568831SAndroid Build Coastguard Worker    test_out = open(f'test/html-tokenizer/{testname}.test', 'w')
28*7c568831SAndroid Build Coastguard Worker    result_out = open(f'result/html-tokenizer/{testname}.test', 'w')
29*7c568831SAndroid Build Coastguard Worker
30*7c568831SAndroid Build Coastguard Worker    counter = 0
31*7c568831SAndroid Build Coastguard Worker
32*7c568831SAndroid Build Coastguard Worker    for tests in root.values():
33*7c568831SAndroid Build Coastguard Worker        for test in tests:
34*7c568831SAndroid Build Coastguard Worker            input = test['input']
35*7c568831SAndroid Build Coastguard Worker
36*7c568831SAndroid Build Coastguard Worker            # Skip surrogate tests
37*7c568831SAndroid Build Coastguard Worker            if re.search(r'\\uD[89A-F]', input, re.I):
38*7c568831SAndroid Build Coastguard Worker                continue
39*7c568831SAndroid Build Coastguard Worker
40*7c568831SAndroid Build Coastguard Worker            input = re.sub(r'\\u([A-Fa-f0-9]{4})',
41*7c568831SAndroid Build Coastguard Worker                           lambda m: chr(int(m[1], 16)),
42*7c568831SAndroid Build Coastguard Worker                           input)
43*7c568831SAndroid Build Coastguard Worker
44*7c568831SAndroid Build Coastguard Worker            output = ''
45*7c568831SAndroid Build Coastguard Worker            for token in test['output']:
46*7c568831SAndroid Build Coastguard Worker                output += token[0] + '\n'
47*7c568831SAndroid Build Coastguard Worker
48*7c568831SAndroid Build Coastguard Worker                if token[0] == 'DOCTYPE':
49*7c568831SAndroid Build Coastguard Worker                    for i in range(1, 4):
50*7c568831SAndroid Build Coastguard Worker                        if token[i] is None:
51*7c568831SAndroid Build Coastguard Worker                            output += '<none>\n'
52*7c568831SAndroid Build Coastguard Worker                        else:
53*7c568831SAndroid Build Coastguard Worker                            output += token[i] + '\n'
54*7c568831SAndroid Build Coastguard Worker                else:
55*7c568831SAndroid Build Coastguard Worker                    output += token[1]
56*7c568831SAndroid Build Coastguard Worker                    if token[0] == 'StartTag':
57*7c568831SAndroid Build Coastguard Worker                        for name, value in token[2].items():
58*7c568831SAndroid Build Coastguard Worker                            output += f' {name}={value}'
59*7c568831SAndroid Build Coastguard Worker                    output += '\n'
60*7c568831SAndroid Build Coastguard Worker
61*7c568831SAndroid Build Coastguard Worker            output = re.sub(r'\\u([A-Fa-f0-9]{4})',
62*7c568831SAndroid Build Coastguard Worker                            lambda m: chr(int(m[1], 16)),
63*7c568831SAndroid Build Coastguard Worker                            output)
64*7c568831SAndroid Build Coastguard Worker            output = re.sub(r'\x00', '\uFFFD', output)
65*7c568831SAndroid Build Coastguard Worker
66*7c568831SAndroid Build Coastguard Worker            for state in test.get('initialStates', ['Data state']):
67*7c568831SAndroid Build Coastguard Worker                state_no = state_map.get(state)
68*7c568831SAndroid Build Coastguard Worker                if state_no is None:
69*7c568831SAndroid Build Coastguard Worker                    raise Exception(f'{filename}: unknown state: {state}')
70*7c568831SAndroid Build Coastguard Worker                if state_no == 5:
71*7c568831SAndroid Build Coastguard Worker                    continue
72*7c568831SAndroid Build Coastguard Worker
73*7c568831SAndroid Build Coastguard Worker                start_tag = test.get('lastStartTag', '-')
74*7c568831SAndroid Build Coastguard Worker
75*7c568831SAndroid Build Coastguard Worker                test_out.write(f'{counter} {start_tag} {state_no} '
76*7c568831SAndroid Build Coastguard Worker                               f'{len(input.encode())}\n')
77*7c568831SAndroid Build Coastguard Worker                test_out.write(input)
78*7c568831SAndroid Build Coastguard Worker                test_out.write('\n')
79*7c568831SAndroid Build Coastguard Worker
80*7c568831SAndroid Build Coastguard Worker                result_out.write(f'{counter}\n')
81*7c568831SAndroid Build Coastguard Worker                result_out.write(output)
82*7c568831SAndroid Build Coastguard Worker
83*7c568831SAndroid Build Coastguard Worker                counter += 1
84*7c568831SAndroid Build Coastguard Worker
85*7c568831SAndroid Build Coastguard Worker        test_out.close()
86*7c568831SAndroid Build Coastguard Worker        result_out.close()
87