xref: /aosp_15_r20/external/emboss/compiler/util/parser_types.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Various types shared through multiple passes of parsing.
16
17This module contains types used as interfaces between parts of the Emboss front
18end.  These types do not really "belong" to either the producers or consumers,
19and in a few cases placing them in one or the other creates unnecessary
20dependencies, so they are defined here.
21"""
22
23import collections
24from compiler.util import ir_data
25
26
27def _make_position(line, column):
28  """Makes an ir_data.Position from line, column ints."""
29  if not isinstance(line, int):
30    raise ValueError("Bad line {!r}".format(line))
31  elif not isinstance(column, int):
32    raise ValueError("Bad column {!r}".format(column))
33  return ir_data.Position(line=line, column=column)
34
35
36def _parse_position(text):
37  """Parses an ir_data.Position from "line:column" (e.g., "1:2")."""
38  line, column = text.split(":")
39  return _make_position(int(line), int(column))
40
41
42def format_position(position):
43  """formats an ir_data.Position to "line:column" form."""
44  return "{}:{}".format(position.line, position.column)
45
46
47def make_location(start, end, is_synthetic=False):
48  """Makes an ir_data.Location from (line, column) tuples or ir_data.Positions."""
49  if isinstance(start, tuple):
50    start = _make_position(*start)
51  if isinstance(end, tuple):
52    end = _make_position(*end)
53  if not isinstance(start, ir_data.Position):
54    raise ValueError("Bad start {!r}".format(start))
55  elif not isinstance(end, ir_data.Position):
56    raise ValueError("Bad end {!r}".format(end))
57  elif start.line > end.line or (
58      start.line == end.line and start.column > end.column):
59    raise ValueError("Start {} is after end {}".format(format_position(start),
60                                                       format_position(end)))
61  return ir_data.Location(start=start, end=end, is_synthetic=is_synthetic)
62
63
64def format_location(location):
65  """Formats an ir_data.Location in format "1:2-3:4" ("start-end")."""
66  return "{}-{}".format(format_position(location.start),
67                        format_position(location.end))
68
69
70def parse_location(text):
71  """Parses an ir_data.Location from format "1:2-3:4" ("start-end")."""
72  start, end = text.split("-")
73  return make_location(_parse_position(start), _parse_position(end))
74
75
76class Token(
77    collections.namedtuple("Token", ["symbol", "text", "source_location"])):
78  """A Token is a chunk of text from a source file, and a classification.
79
80  Attributes:
81    symbol: The name of this token ("Indent", "SnakeWord", etc.)
82    text: The original text ("1234", "some_name", etc.)
83    source_location: Where this token came from in the original source file.
84  """
85
86  def __str__(self):
87    return "{} {} {}".format(self.symbol, repr(self.text),
88                             format_location(self.source_location))
89
90
91class Production(collections.namedtuple("Production", ["lhs", "rhs"])):
92  """A Production is a simple production from a context-free grammar.
93
94  A Production takes the form:
95
96    nonterminal -> symbol*
97
98  where "nonterminal" is an implicitly non-terminal symbol in the language,
99  and "symbol*" is zero or more terminal or non-terminal symbols which form the
100  non-terminal on the left.
101
102  Attributes:
103    lhs: The non-terminal symbol on the left-hand-side of the production.
104    rhs: The sequence of symbols on the right-hand-side of the production.
105  """
106
107  def __str__(self):
108    return str(self.lhs) + " -> " + " ".join([str(r) for r in self.rhs])
109
110  @staticmethod
111  def parse(production_text):
112    """Parses a Production from a "symbol -> symbol symbol symbol" string."""
113    words = production_text.split()
114    if words[1] != "->":
115      raise SyntaxError
116    return Production(words[0], tuple(words[2:]))
117