1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Main driver for the Emboss front-end. 16 17The parse_emboss_file function performs a complete parse of the specified file, 18and returns an IR or formatted error message. 19""" 20 21import collections 22 23from compiler.front_end import attribute_checker 24from compiler.front_end import constraints 25from compiler.front_end import dependency_checker 26from compiler.front_end import expression_bounds 27from compiler.front_end import lr1 28from compiler.front_end import module_ir 29from compiler.front_end import parser 30from compiler.front_end import symbol_resolver 31from compiler.front_end import synthetics 32from compiler.front_end import tokenizer 33from compiler.front_end import type_check 34from compiler.front_end import write_inference 35from compiler.util import error 36from compiler.util import ir_data 37from compiler.util import ir_data_utils 38from compiler.util import parser_types 39from compiler.util import resources 40 41_IrDebugInfo = collections.namedtuple("IrDebugInfo", ["ir", "debug_info", 42 "errors"]) 43 44 45class DebugInfo(object): 46 """Debug information about Emboss parsing.""" 47 __slots__ = ("modules") 48 49 def __init__(self): 50 self.modules = {} 51 52 def __eq__(self, other): 53 return self.modules == other.modules 54 55 def __ne__(self, other): 56 return not self == other 57 58 59class ModuleDebugInfo(object): 60 """Debug information about the parse of a single file. 61 62 Attributes: 63 file_name: The name of the file from which this module came. 64 tokens: The tokenization of this module's source text. 65 parse_tree: The raw parse tree for this module. 66 ir: The intermediate representation of this module, before additional 67 processing such as symbol resolution. 68 used_productions: The set of grammar productions used when parsing this 69 module. 70 source_code: The source text of the module. 71 """ 72 __slots__ = ("file_name", "tokens", "parse_tree", "ir", "used_productions", 73 "source_code") 74 75 def __init__(self, file_name): 76 self.file_name = file_name 77 self.tokens = None 78 self.parse_tree = None 79 self.ir = None 80 self.used_productions = None 81 self.source_code = None 82 83 def __eq__(self, other): 84 return (self.file_name == other.file_name and self.tokens == other.tokens 85 and self.parse_tree == other.parse_tree and self.ir == other.ir and 86 self.used_productions == other.used_productions and 87 self.source_code == other.source_code) 88 89 def __ne__(self, other): 90 return not self == other 91 92 def format_tokenization(self): 93 """Renders self.tokens in a human-readable format.""" 94 return "\n".join([str(token) for token in self.tokens]) 95 96 def format_parse_tree(self, parse_tree=None, indent=""): 97 """Renders self.parse_tree in a human-readable format.""" 98 if parse_tree is None: 99 parse_tree = self.parse_tree 100 result = [] 101 if isinstance(parse_tree, lr1.Reduction): 102 result.append(indent + parse_tree.symbol) 103 if parse_tree.children: 104 result.append(":\n") 105 for child in parse_tree.children: 106 result.append(self.format_parse_tree(child, indent + " ")) 107 else: 108 result.append("\n") 109 else: 110 result.append("{}{}\n".format(indent, parse_tree)) 111 return "".join(result) 112 113 def format_module_ir(self): 114 """Renders self.ir in a human-readable format.""" 115 return ir_data_utils.IrDataSerializer(self.ir).to_json(indent=2) 116 117 118def format_production_set(productions): 119 """Renders a set of productions in a human-readable format.""" 120 return "\n".join([str(production) for production in sorted(productions)]) 121 122 123_cached_modules = {} 124 125 126def parse_module_text(source_code, file_name): 127 """Parses the text of a module, returning a module-level IR. 128 129 Arguments: 130 source_code: The text of the module to parse. 131 file_name: The name of the module's source file (will be included in the 132 resulting IR). 133 134 Returns: 135 A module-level intermediate representation (IR), prior to import and symbol 136 resolution, and a corresponding ModuleDebugInfo, for debugging the parser. 137 138 Raises: 139 FrontEndFailure: An error occurred while parsing the module. str(error) 140 will give a human-readable error message. 141 """ 142 # This is strictly an optimization to speed up tests, mostly by avoiding the 143 # need to re-parse the prelude for every test .emb. 144 if (source_code, file_name) in _cached_modules: 145 debug_info = _cached_modules[source_code, file_name] 146 ir = ir_data_utils.copy(debug_info.ir) 147 else: 148 debug_info = ModuleDebugInfo(file_name) 149 debug_info.source_code = source_code 150 tokens, errors = tokenizer.tokenize(source_code, file_name) 151 if errors: 152 return _IrDebugInfo(None, debug_info, errors) 153 debug_info.tokens = tokens 154 parse_result = parser.parse_module(tokens) 155 if parse_result.error: 156 return _IrDebugInfo( 157 None, 158 debug_info, 159 [error.make_error_from_parse_error(file_name, parse_result.error)]) 160 debug_info.parse_tree = parse_result.parse_tree 161 used_productions = set() 162 ir = module_ir.build_ir(parse_result.parse_tree, used_productions) 163 ir.source_text = source_code 164 debug_info.used_productions = used_productions 165 debug_info.ir = ir_data_utils.copy(ir) 166 _cached_modules[source_code, file_name] = debug_info 167 ir.source_file_name = file_name 168 return _IrDebugInfo(ir, debug_info, []) 169 170 171def parse_module(file_name, file_reader): 172 """Parses a module, returning a module-level IR. 173 174 Arguments: 175 file_name: The name of the module's source file. 176 file_reader: A callable that returns either: 177 (file_contents, None) or 178 (None, list_of_error_detail_strings) 179 180 Returns: 181 (ir, debug_info, errors), where ir is a module-level intermediate 182 representation (IR), debug_info is a ModuleDebugInfo containing the 183 tokenization, parse tree, and original source text of all modules, and 184 errors is a list of tokenization or parse errors. If errors is not an empty 185 list, ir will be None. 186 187 Raises: 188 FrontEndFailure: An error occurred while reading or parsing the module. 189 str(error) will give a human-readable error message. 190 """ 191 source_code, errors = file_reader(file_name) 192 if errors: 193 location = parser_types.make_location((1, 1), (1, 1)) 194 return None, None, [ 195 [error.error(file_name, location, "Unable to read file.")] + 196 [error.note(file_name, location, e) for e in errors] 197 ] 198 return parse_module_text(source_code, file_name) 199 200 201def get_prelude(): 202 """Returns the module IR and debug info of the Emboss Prelude.""" 203 return parse_module_text( 204 resources.load("compiler.front_end", "prelude.emb"), "") 205 206 207def parse_emboss_file(file_name, file_reader, stop_before_step=None): 208 """Fully parses an .emb, and returns an IR suitable for passing to a back end. 209 210 parse_emboss_file is a convenience function which calls only_parse_emboss_file 211 and process_ir. 212 213 Arguments: 214 file_name: The name of the module's source file. 215 file_reader: A callable that returns the contents of files, or raises 216 IOError. 217 stop_before_step: If set, parse_emboss_file will stop normalizing the IR 218 just before the specified step. This parameter should be None for 219 non-test code. 220 221 Returns: 222 (ir, debug_info, errors), where ir is a complete IR, ready for consumption 223 by an Emboss back end, debug_info is a DebugInfo containing the 224 tokenization, parse tree, and original source text of all modules, and 225 errors is a list of tokenization or parse errors. If errors is not an empty 226 list, ir will be None. 227 """ 228 ir, debug_info, errors = only_parse_emboss_file(file_name, file_reader) 229 if errors: 230 return _IrDebugInfo(None, debug_info, errors) 231 ir, errors = process_ir(ir, stop_before_step) 232 if errors: 233 return _IrDebugInfo(None, debug_info, errors) 234 return _IrDebugInfo(ir, debug_info, errors) 235 236 237def only_parse_emboss_file(file_name, file_reader): 238 """Parses an .emb, and returns an IR suitable for process_ir. 239 240 only_parse_emboss_file parses the given file and all of its transitive 241 imports, and returns a first-stage intermediate representation, which can be 242 passed to process_ir. 243 244 Arguments: 245 file_name: The name of the module's source file. 246 file_reader: A callable that returns the contents of files, or raises 247 IOError. 248 249 Returns: 250 (ir, debug_info, errors), where ir is an intermediate representation (IR), 251 debug_info is a DebugInfo containing the tokenization, parse tree, and 252 original source text of all modules, and errors is a list of tokenization or 253 parse errors. If errors is not an empty list, ir will be None. 254 """ 255 file_queue = [file_name] 256 files = {file_name} 257 debug_info = DebugInfo() 258 ir = ir_data.EmbossIr(module=[]) 259 while file_queue: 260 file_to_parse = file_queue[0] 261 del file_queue[0] 262 if file_to_parse: 263 module, module_debug_info, errors = parse_module(file_to_parse, 264 file_reader) 265 else: 266 module, module_debug_info, errors = get_prelude() 267 if module_debug_info: 268 debug_info.modules[file_to_parse] = module_debug_info 269 if errors: 270 return _IrDebugInfo(None, debug_info, errors) 271 ir.module.extend([module]) # Proto supports extend but not append here. 272 for import_ in module.foreign_import: 273 if import_.file_name.text not in files: 274 file_queue.append(import_.file_name.text) 275 files.add(import_.file_name.text) 276 return _IrDebugInfo(ir, debug_info, []) 277 278 279def process_ir(ir, stop_before_step): 280 """Turns a first-stage IR into a fully-processed IR. 281 282 process_ir performs all of the semantic processing steps on `ir`: resolving 283 symbols, checking dependencies, adding type annotations, normalizing 284 attributes, etc. process_ir is generally meant to be called with the result 285 of parse_emboss_file(), but in theory could be called with a first-stage 286 intermediate representation (IR) from another source. 287 288 Arguments: 289 ir: The IR to process. This structure will be modified during processing. 290 stop_before_step: If set, process_ir will stop normalizing the IR just 291 before the specified step. This parameter should be None for non-test 292 code. 293 294 Returns: 295 (ir, errors), where ir is a complete IR, ready for consumption by an Emboss 296 back end, and errors is a list of compilation errors. If errors is not an 297 empty list, ir will be None. 298 """ 299 passes = (synthetics.desugar, 300 symbol_resolver.resolve_symbols, 301 dependency_checker.find_dependency_cycles, 302 dependency_checker.set_dependency_order, 303 symbol_resolver.resolve_field_references, 304 type_check.annotate_types, 305 type_check.check_types, 306 expression_bounds.compute_constants, 307 attribute_checker.normalize_and_verify, 308 constraints.check_constraints, 309 write_inference.set_write_methods) 310 assert stop_before_step in [None] + [f.__name__ for f in passes], ( 311 "Bad value for stop_before_step.") 312 # Some parts of the IR are synthesized from "natural" parts of the IR, before 313 # the natural parts have been fully error checked. Because of this, the 314 # synthesized parts can have errors; in a couple of cases, they can have 315 # errors that show up in an earlier pass than the errors in the natural parts 316 # of the IR. As an example: 317 # 318 # struct Foo: 319 # 0 [+1] bits: 320 # 0 [+1] Flag flag 321 # 1 [+flag] UInt:8 field 322 # 323 # In this case, the use of `flag` as the size of `field` is incorrect, because 324 # `flag` is a boolean, but the size of a field must be an integer. 325 # 326 # Type checking occurs in two passes: in the first pass, expressions are 327 # checked for internal consistency. In the second pass, expression types are 328 # checked against their location. The use of `flag` would be caught in the 329 # second pass. 330 # 331 # However, the generated_fields pass will synthesize a $size_in_bytes virtual 332 # field that would look like: 333 # 334 # struct Foo: 335 # 0 [+1] bits: 336 # 0 [+1] Flag flag 337 # 1 [+flag] UInt:8 field 338 # let $size_in_bytes = $max(true ? 0 + 1 : 0, true ? 1 + flag : 0) 339 # 340 # Since `1 + flag` is not internally consistent, this type error would be 341 # caught in the first pass, and the user would see a very strange error 342 # message that "the right-hand argument of operator `+` must be an integer." 343 # 344 # In order to avoid showing these kinds of errors to the user, we defer any 345 # errors in synthetic parts of the IR. Unless there is a compiler bug, those 346 # errors will show up as errors in the natural parts of the IR, which should 347 # be much more comprehensible to end users. 348 # 349 # If, for some reason, there is an error in the synthetic IR, but no error in 350 # the natural IR, the synthetic errors will be shown. In this case, the 351 # formatting for the synthetic errors will show '[compiler bug]' for the 352 # error location, which (hopefully) will provide the end user with a cue that 353 # the error is a compiler bug. 354 deferred_errors = [] 355 for function in passes: 356 if stop_before_step == function.__name__: 357 return (ir, []) 358 errors, hidden_errors = error.split_errors(function(ir)) 359 if errors: 360 return (None, errors) 361 deferred_errors.extend(hidden_errors) 362 363 if deferred_errors: 364 return (None, deferred_errors) 365 366 assert stop_before_step is None, "Bad value for stop_before_step." 367 return (ir, []) 368