xref: /aosp_15_r20/external/yapf/yapf/yapflib/format_token.py (revision 7249d1a64f4850ccf838e62a46276f891f72998e)
1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Pytree nodes with extra formatting information.
15
16This is a thin wrapper around a pytree.Leaf node.
17"""
18
19import keyword
20import re
21
22from lib2to3.pgen2 import token
23
24from yapf.yapflib import py3compat
25from yapf.yapflib import pytree_utils
26from yapf.yapflib import style
27from yapf.yapflib import subtypes
28
29CONTINUATION = token.N_TOKENS
30
31
32def _TabbedContinuationAlignPadding(spaces, align_style, tab_width):
33  """Build padding string for continuation alignment in tabbed indentation.
34
35  Arguments:
36    spaces: (int) The number of spaces to place before the token for alignment.
37    align_style: (str) The alignment style for continuation lines.
38    tab_width: (int) Number of columns of each tab character.
39
40  Returns:
41    A padding string for alignment with style specified by align_style option.
42  """
43  if align_style in ('FIXED', 'VALIGN-RIGHT'):
44    if spaces > 0:
45      return '\t' * int((spaces + tab_width - 1) / tab_width)
46    return ''
47  return ' ' * spaces
48
49
50class FormatToken(object):
51  """A wrapper around pytree Leaf nodes.
52
53  This represents the token plus additional information useful for reformatting
54  the code.
55
56  Attributes:
57    node: The PyTree node this token represents.
58    next_token: The token in the logical line after this token or None if this
59      is the last token in the logical line.
60    previous_token: The token in the logical line before this token or None if
61      this is the first token in the logical line.
62    matching_bracket: If a bracket token ('[', '{', or '(') the matching
63      bracket.
64    parameters: If this and its following tokens make up a parameter list, then
65      this is a list of those parameters.
66    container_opening: If the object is in a container, this points to its
67      opening bracket.
68    container_elements: If this is the start of a container, a list of the
69      elements in the container.
70    whitespace_prefix: The prefix for the whitespace.
71    spaces_required_before: The number of spaces required before a token. This
72      is a lower-bound for the formatter and not a hard requirement. For
73      instance, a comment may have n required spaces before it. But the
74      formatter won't place n spaces before all comments. Only those that are
75      moved to the end of a line of code. The formatter may use different
76      spacing when appropriate.
77    total_length: The total length of the logical line up to and including
78      whitespace and this token. However, this doesn't include the initial
79      indentation amount.
80    split_penalty: The penalty for splitting the line before this token.
81    can_break_before: True if we're allowed to break before this token.
82    must_break_before: True if we're required to break before this token.
83    newlines: The number of newlines needed before this token.
84  """
85
86  def __init__(self, node):
87    """Constructor.
88
89    Arguments:
90      node: (pytree.Leaf) The node that's being wrapped.
91    """
92    self.node = node
93    self.next_token = None
94    self.previous_token = None
95    self.matching_bracket = None
96    self.parameters = []
97    self.container_opening = None
98    self.container_elements = []
99    self.whitespace_prefix = ''
100    self.total_length = 0
101    self.split_penalty = 0
102    self.can_break_before = False
103    self.must_break_before = pytree_utils.GetNodeAnnotation(
104        node, pytree_utils.Annotation.MUST_SPLIT, default=False)
105    self.newlines = pytree_utils.GetNodeAnnotation(
106        node, pytree_utils.Annotation.NEWLINES)
107
108    self.type = node.type
109    self.column = node.column
110    self.lineno = node.lineno
111    self.name = pytree_utils.NodeName(node)
112
113    self.spaces_required_before = 0
114    if self.is_comment:
115      self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT')
116
117    self.value = node.value
118    if self.is_continuation:
119      self.value = node.value.rstrip()
120
121    stypes = pytree_utils.GetNodeAnnotation(node,
122                                            pytree_utils.Annotation.SUBTYPE)
123    self.subtypes = {subtypes.NONE} if not stypes else stypes
124    self.is_pseudo = hasattr(node, 'is_pseudo') and node.is_pseudo
125
126  @property
127  def formatted_whitespace_prefix(self):
128    if style.Get('INDENT_BLANK_LINES'):
129      without_newlines = self.whitespace_prefix.lstrip('\n')
130      height = len(self.whitespace_prefix) - len(without_newlines)
131      if height:
132        return ('\n' + without_newlines) * height
133    return self.whitespace_prefix
134
135  def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0):
136    """Register a token's whitespace prefix.
137
138    This is the whitespace that will be output before a token's string.
139
140    Arguments:
141      newlines_before: (int) The number of newlines to place before the token.
142      spaces: (int) The number of spaces to place before the token.
143      indent_level: (int) The indentation level.
144    """
145    if style.Get('USE_TABS'):
146      if newlines_before > 0:
147        indent_before = '\t' * indent_level + _TabbedContinuationAlignPadding(
148            spaces, style.Get('CONTINUATION_ALIGN_STYLE'),
149            style.Get('INDENT_WIDTH'))
150      else:
151        indent_before = '\t' * indent_level + ' ' * spaces
152    else:
153      indent_before = (' ' * indent_level * style.Get('INDENT_WIDTH') +
154                       ' ' * spaces)
155
156    if self.is_comment:
157      comment_lines = [s.lstrip() for s in self.value.splitlines()]
158      self.value = ('\n' + indent_before).join(comment_lines)
159
160      # Update our own value since we are changing node value
161      self.value = self.value
162
163    if not self.whitespace_prefix:
164      self.whitespace_prefix = ('\n' * (self.newlines or newlines_before) +
165                                indent_before)
166    else:
167      self.whitespace_prefix += indent_before
168
169  def AdjustNewlinesBefore(self, newlines_before):
170    """Change the number of newlines before this token."""
171    self.whitespace_prefix = ('\n' * newlines_before +
172                              self.whitespace_prefix.lstrip('\n'))
173
174  def RetainHorizontalSpacing(self, first_column, depth):
175    """Retains a token's horizontal spacing."""
176    previous = self.previous_token
177    if not previous:
178      return
179
180    if previous.is_pseudo:
181      previous = previous.previous_token
182      if not previous:
183        return
184
185    cur_lineno = self.lineno
186    prev_lineno = previous.lineno
187    if previous.is_multiline_string:
188      prev_lineno += previous.value.count('\n')
189
190    if (cur_lineno != prev_lineno or
191        (previous.is_pseudo and previous.value != ')' and
192         cur_lineno != previous.previous_token.lineno)):
193      self.spaces_required_before = (
194          self.column - first_column + depth * style.Get('INDENT_WIDTH'))
195      return
196
197    cur_column = self.column
198    prev_column = previous.node.column
199    prev_len = len(previous.value)
200
201    if previous.is_pseudo and previous.value == ')':
202      prev_column -= 1
203      prev_len = 0
204
205    if previous.is_multiline_string:
206      prev_len = len(previous.value.split('\n')[-1])
207      if '\n' in previous.value:
208        prev_column = 0  # Last line starts in column 0.
209
210    self.spaces_required_before = cur_column - (prev_column + prev_len)
211
212  def OpensScope(self):
213    return self.value in pytree_utils.OPENING_BRACKETS
214
215  def ClosesScope(self):
216    return self.value in pytree_utils.CLOSING_BRACKETS
217
218  def AddSubtype(self, subtype):
219    self.subtypes.add(subtype)
220
221  def __repr__(self):
222    msg = ('FormatToken(name={0}, value={1}, column={2}, lineno={3}, '
223           'splitpenalty={4}'.format(
224               'DOCSTRING' if self.is_docstring else self.name, self.value,
225               self.column, self.lineno, self.split_penalty))
226    msg += ', pseudo)' if self.is_pseudo else ')'
227    return msg
228
229  @property
230  def node_split_penalty(self):
231    """Split penalty attached to the pytree node of this token."""
232    return pytree_utils.GetNodeAnnotation(
233        self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
234
235  @property
236  def is_binary_op(self):
237    """Token is a binary operator."""
238    return subtypes.BINARY_OPERATOR in self.subtypes
239
240  @property
241  @py3compat.lru_cache()
242  def is_arithmetic_op(self):
243    """Token is an arithmetic operator."""
244    return self.value in frozenset({
245        '+',  # Add
246        '-',  # Subtract
247        '*',  # Multiply
248        '@',  # Matrix Multiply
249        '/',  # Divide
250        '//',  # Floor Divide
251        '%',  # Modulo
252        '<<',  # Left Shift
253        '>>',  # Right Shift
254        '|',  # Bitwise Or
255        '&',  # Bitwise Add
256        '^',  # Bitwise Xor
257        '**',  # Power
258    })
259
260  @property
261  def is_simple_expr(self):
262    """Token is an operator in a simple expression."""
263    return subtypes.SIMPLE_EXPRESSION in self.subtypes
264
265  @property
266  def is_subscript_colon(self):
267    """Token is a subscript colon."""
268    return subtypes.SUBSCRIPT_COLON in self.subtypes
269
270  @property
271  def is_comment(self):
272    return self.type == token.COMMENT
273
274  @property
275  def is_continuation(self):
276    return self.type == CONTINUATION
277
278  @property
279  @py3compat.lru_cache()
280  def is_keyword(self):
281    return keyword.iskeyword(self.value)
282
283  @property
284  def is_name(self):
285    return self.type == token.NAME and not self.is_keyword
286
287  @property
288  def is_number(self):
289    return self.type == token.NUMBER
290
291  @property
292  def is_string(self):
293    return self.type == token.STRING
294
295  @property
296  def is_multiline_string(self):
297    """Test if this string is a multiline string.
298
299    Returns:
300      A multiline string always ends with triple quotes, so if it is a string
301      token, inspect the last 3 characters and return True if it is a triple
302      double or triple single quote mark.
303    """
304    return self.is_string and self.value.endswith(('"""', "'''"))
305
306  @property
307  def is_docstring(self):
308    return self.is_string and self.previous_token is None
309
310  @property
311  def is_pylint_comment(self):
312    return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=',
313                                        self.value)
314
315  @property
316  def is_pytype_comment(self):
317    return self.is_comment and re.match(r'#.*\bpytype:\s*(disable|enable)=',
318                                        self.value)
319
320  @property
321  def is_copybara_comment(self):
322    return self.is_comment and re.match(
323        r'#.*\bcopybara:\s*(strip|insert|replace)', self.value)
324