1#!/usr/bin/env python3 2 3# Copyright 2017 The Chromium Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7""" 8Script for generating .proto and a conversion .cc file for a templated library 9based JavaScript parser fuzzer. 10""" 11 12import sys 13 14def ParseWord(word_string): 15 # Every part of the word is either a string surrounded by "" or a placeholder 16 # $<int>. 17 18 word_string = word_string.lstrip().rstrip() 19 20 parts = [] 21 while len(word_string) > 0: 22 if word_string[0] == '"': 23 end_ix = 1 + word_string[1:].index('"') 24 parts.append(word_string[1:end_ix]) 25 word_string = word_string[(end_ix + 1):] 26 elif word_string[0] == '$': 27 if ' ' in word_string: 28 end_ix = word_string.index(' ') 29 else: 30 end_ix = len(word_string) 31 parts.append(int(word_string[1:end_ix])) 32 word_string = word_string[end_ix:] 33 else: 34 assert(False) 35 word_string = word_string.lstrip() 36 return parts 37 38def GenerateProtoContents(words): 39 contents = '' 40 for ix in range(len(words)): 41 contents += ' token_value_' + str(ix) + ' = ' + str(ix) + ';\n' 42 return contents 43 44def GenerateConversionContents(words): 45 contents = '' 46 ix = 0 47 for word in words: 48 contents += ' case ' + str(ix) + ':\n' 49 max_part = -1 50 first = True 51 building_string = '' 52 for part in word: 53 if not first: 54 building_string += ' + std::string(" ") + ' 55 if isinstance(part, str): 56 building_string += 'std::string("' + part + '")' 57 else: 58 if (part > max_part): 59 max_part = part 60 building_string += ('token_to_string(token.inner_tokens(' + str(part) + 61 '), depth)') 62 first = False 63 if max_part >= 0: 64 contents += (' if (token.inner_tokens().size() < ' + 65 str(max_part + 1) + ') return std::string("");\n') 66 contents += ' return ' + building_string + ';\n' 67 ix += 1 68 return contents 69 70def ReadDictionary(filename): 71 with open(filename) as input_file: 72 lines = input_file.readlines() 73 words = [] 74 for line in lines: 75 if not line.startswith('#'): 76 word = ParseWord(line) 77 if len(word) > 0: 78 words.append(word) 79 return words 80 81def main(argv): 82 output_proto_file = argv[1] 83 output_cc_file = argv[2] 84 input_dict_file = argv[3] 85 86 words = ReadDictionary(input_dict_file) 87 88 proto_header = ('// Generated by generate_javascript_parser_proto.py.\n' 89 '\n' 90 'syntax = "proto2";\n' 91 'package javascript_parser_proto_fuzzer;\n' 92 '\n' 93 'message Token {\n' 94 ' enum Value {\n') 95 96 97 proto_footer = (' }\n' 98 ' required Value value = 1;\n' 99 ' repeated Token inner_tokens = 2;\n' 100 '}\n' 101 '\n' 102 'message Source {\n' 103 ' required bool is_module = 1;\n' 104 ' repeated Token tokens = 2;\n' 105 '}\n') 106 107 proto_contents = proto_header + GenerateProtoContents(words) + proto_footer 108 109 with open(output_proto_file, 'w') as f: 110 f.write(proto_contents) 111 112 conversion_header = ( 113 '// Generated by generate_javascript_parser_proto.py.\n' 114 '\n' 115 '#include "testing/libfuzzer/fuzzers/' 116 'javascript_parser_proto_to_string.h"\n' 117 '\n' 118 '// Bound calls to token_to_string to prevent memory usage from growing\n' 119 '// too much.\n' 120 'const int kMaxRecursiveDepth = 9;\n' 121 '\n' 122 'std::string token_to_string(\n' 123 ' const javascript_parser_proto_fuzzer::Token& token, int depth)' 124 ' {\n' 125 ' if (++depth == kMaxRecursiveDepth) return std::string("");\n' 126 ' switch(token.value()) {\n') 127 128 conversion_footer = (' default: break;\n' 129 ' }\n' 130 ' return std::string("");\n' 131 '}\n') 132 133 conversion_contents = (conversion_header + GenerateConversionContents(words) 134 + conversion_footer) 135 136 with open(output_cc_file, 'w') as f: 137 f.write(conversion_contents) 138 139if __name__ == "__main__": 140 main(sys.argv) 141