1*7c568831SAndroid Build Coastguard Worker#!/usr/bin/env python3 2*7c568831SAndroid Build Coastguard Worker 3*7c568831SAndroid Build Coastguard Workerimport glob 4*7c568831SAndroid Build Coastguard Workerimport json 5*7c568831SAndroid Build Coastguard Workerimport re 6*7c568831SAndroid Build Coastguard Worker 7*7c568831SAndroid Build Coastguard Workerstate_map = { 8*7c568831SAndroid Build Coastguard Worker 'Data state': 0, 9*7c568831SAndroid Build Coastguard Worker 'RCDATA state': 1, 10*7c568831SAndroid Build Coastguard Worker 'RAWTEXT state': 2, 11*7c568831SAndroid Build Coastguard Worker 'PLAINTEXT state': 3, 12*7c568831SAndroid Build Coastguard Worker 'Script data state': 4, 13*7c568831SAndroid Build Coastguard Worker 'CDATA section state': 5, 14*7c568831SAndroid Build Coastguard Worker} 15*7c568831SAndroid Build Coastguard Worker 16*7c568831SAndroid Build Coastguard Workerfor filename in sorted(glob.glob('../html5lib-tests/tokenizer/*.test')): 17*7c568831SAndroid Build Coastguard Worker match = re.search('/([^/]*).test$', filename) 18*7c568831SAndroid Build Coastguard Worker if match is None: 19*7c568831SAndroid Build Coastguard Worker continue 20*7c568831SAndroid Build Coastguard Worker testname = match[1] 21*7c568831SAndroid Build Coastguard Worker if testname == 'xmlViolation': 22*7c568831SAndroid Build Coastguard Worker continue 23*7c568831SAndroid Build Coastguard Worker 24*7c568831SAndroid Build Coastguard Worker with open(filename) as json_data: 25*7c568831SAndroid Build Coastguard Worker root = json.load(json_data) 26*7c568831SAndroid Build Coastguard Worker 27*7c568831SAndroid Build Coastguard Worker test_out = open(f'test/html-tokenizer/{testname}.test', 'w') 28*7c568831SAndroid Build Coastguard Worker result_out = open(f'result/html-tokenizer/{testname}.test', 'w') 29*7c568831SAndroid Build Coastguard Worker 30*7c568831SAndroid Build Coastguard Worker counter = 0 31*7c568831SAndroid Build Coastguard Worker 32*7c568831SAndroid Build Coastguard Worker for tests in root.values(): 33*7c568831SAndroid Build Coastguard Worker for test in tests: 34*7c568831SAndroid Build Coastguard Worker input = test['input'] 35*7c568831SAndroid Build Coastguard Worker 36*7c568831SAndroid Build Coastguard Worker # Skip surrogate tests 37*7c568831SAndroid Build Coastguard Worker if re.search(r'\\uD[89A-F]', input, re.I): 38*7c568831SAndroid Build Coastguard Worker continue 39*7c568831SAndroid Build Coastguard Worker 40*7c568831SAndroid Build Coastguard Worker input = re.sub(r'\\u([A-Fa-f0-9]{4})', 41*7c568831SAndroid Build Coastguard Worker lambda m: chr(int(m[1], 16)), 42*7c568831SAndroid Build Coastguard Worker input) 43*7c568831SAndroid Build Coastguard Worker 44*7c568831SAndroid Build Coastguard Worker output = '' 45*7c568831SAndroid Build Coastguard Worker for token in test['output']: 46*7c568831SAndroid Build Coastguard Worker output += token[0] + '\n' 47*7c568831SAndroid Build Coastguard Worker 48*7c568831SAndroid Build Coastguard Worker if token[0] == 'DOCTYPE': 49*7c568831SAndroid Build Coastguard Worker for i in range(1, 4): 50*7c568831SAndroid Build Coastguard Worker if token[i] is None: 51*7c568831SAndroid Build Coastguard Worker output += '<none>\n' 52*7c568831SAndroid Build Coastguard Worker else: 53*7c568831SAndroid Build Coastguard Worker output += token[i] + '\n' 54*7c568831SAndroid Build Coastguard Worker else: 55*7c568831SAndroid Build Coastguard Worker output += token[1] 56*7c568831SAndroid Build Coastguard Worker if token[0] == 'StartTag': 57*7c568831SAndroid Build Coastguard Worker for name, value in token[2].items(): 58*7c568831SAndroid Build Coastguard Worker output += f' {name}={value}' 59*7c568831SAndroid Build Coastguard Worker output += '\n' 60*7c568831SAndroid Build Coastguard Worker 61*7c568831SAndroid Build Coastguard Worker output = re.sub(r'\\u([A-Fa-f0-9]{4})', 62*7c568831SAndroid Build Coastguard Worker lambda m: chr(int(m[1], 16)), 63*7c568831SAndroid Build Coastguard Worker output) 64*7c568831SAndroid Build Coastguard Worker output = re.sub(r'\x00', '\uFFFD', output) 65*7c568831SAndroid Build Coastguard Worker 66*7c568831SAndroid Build Coastguard Worker for state in test.get('initialStates', ['Data state']): 67*7c568831SAndroid Build Coastguard Worker state_no = state_map.get(state) 68*7c568831SAndroid Build Coastguard Worker if state_no is None: 69*7c568831SAndroid Build Coastguard Worker raise Exception(f'{filename}: unknown state: {state}') 70*7c568831SAndroid Build Coastguard Worker if state_no == 5: 71*7c568831SAndroid Build Coastguard Worker continue 72*7c568831SAndroid Build Coastguard Worker 73*7c568831SAndroid Build Coastguard Worker start_tag = test.get('lastStartTag', '-') 74*7c568831SAndroid Build Coastguard Worker 75*7c568831SAndroid Build Coastguard Worker test_out.write(f'{counter} {start_tag} {state_no} ' 76*7c568831SAndroid Build Coastguard Worker f'{len(input.encode())}\n') 77*7c568831SAndroid Build Coastguard Worker test_out.write(input) 78*7c568831SAndroid Build Coastguard Worker test_out.write('\n') 79*7c568831SAndroid Build Coastguard Worker 80*7c568831SAndroid Build Coastguard Worker result_out.write(f'{counter}\n') 81*7c568831SAndroid Build Coastguard Worker result_out.write(output) 82*7c568831SAndroid Build Coastguard Worker 83*7c568831SAndroid Build Coastguard Worker counter += 1 84*7c568831SAndroid Build Coastguard Worker 85*7c568831SAndroid Build Coastguard Worker test_out.close() 86*7c568831SAndroid Build Coastguard Worker result_out.close() 87