xref: /aosp_15_r20/external/yapf/yapf/yapflib/format_decision_state.py (revision 7249d1a64f4850ccf838e62a46276f891f72998e)
1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Implements a format decision state object that manages whitespace decisions.
15
16Each token is processed one at a time, at which point its whitespace formatting
17decisions are made. A graph of potential whitespace formattings is created,
18where each node in the graph is a format decision state object. The heuristic
19tries formatting the token with and without a newline before it to determine
20which one has the least penalty. Therefore, the format decision state object for
21each decision needs to be its own unique copy.
22
23Once the heuristic determines the best formatting, it makes a non-dry run pass
24through the code to commit the whitespace formatting.
25
26  FormatDecisionState: main class exported by this module.
27"""
28
29from yapf.yapflib import format_token
30from yapf.yapflib import logical_line
31from yapf.yapflib import object_state
32from yapf.yapflib import split_penalty
33from yapf.yapflib import style
34from yapf.yapflib import subtypes
35
36
37class FormatDecisionState(object):
38  """The current state when indenting a logical line.
39
40  The FormatDecisionState object is meant to be copied instead of referenced.
41
42  Attributes:
43    first_indent: The indent of the first token.
44    column: The number of used columns in the current line.
45    line: The logical line we're currently processing.
46    next_token: The next token to be formatted.
47    paren_level: The level of nesting inside (), [], and {}.
48    lowest_level_on_line: The lowest paren_level on the current line.
49    stack: A stack (of _ParenState) keeping track of properties applying to
50      parenthesis levels.
51    comp_stack: A stack (of ComprehensionState) keeping track of properties
52      applying to comprehensions.
53    param_list_stack: A stack (of ParameterListState) keeping track of
54      properties applying to function parameter lists.
55    ignore_stack_for_comparison: Ignore the stack of _ParenState for state
56      comparison.
57    column_limit: The column limit specified by the style.
58  """
59
60  def __init__(self, line, first_indent):
61    """Initializer.
62
63    Initializes to the state after placing the first token from 'line' at
64    'first_indent'.
65
66    Arguments:
67      line: (LogicalLine) The logical line we're currently processing.
68      first_indent: (int) The indent of the first token.
69    """
70    self.next_token = line.first
71    self.column = first_indent
72    self.line = line
73    self.paren_level = 0
74    self.lowest_level_on_line = 0
75    self.ignore_stack_for_comparison = False
76    self.stack = [_ParenState(first_indent, first_indent)]
77    self.comp_stack = []
78    self.param_list_stack = []
79    self.first_indent = first_indent
80    self.column_limit = style.Get('COLUMN_LIMIT')
81
82  def Clone(self):
83    """Clones a FormatDecisionState object."""
84    new = FormatDecisionState(self.line, self.first_indent)
85    new.next_token = self.next_token
86    new.column = self.column
87    new.line = self.line
88    new.paren_level = self.paren_level
89    new.line.depth = self.line.depth
90    new.lowest_level_on_line = self.lowest_level_on_line
91    new.ignore_stack_for_comparison = self.ignore_stack_for_comparison
92    new.first_indent = self.first_indent
93    new.stack = [state.Clone() for state in self.stack]
94    new.comp_stack = [state.Clone() for state in self.comp_stack]
95    new.param_list_stack = [state.Clone() for state in self.param_list_stack]
96    return new
97
98  def __eq__(self, other):
99    # Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous',
100    # because it shouldn't have a bearing on this comparison. (I.e., it will
101    # report equal if 'next_token' does.)
102    return (self.next_token == other.next_token and
103            self.column == other.column and
104            self.paren_level == other.paren_level and
105            self.line.depth == other.line.depth and
106            self.lowest_level_on_line == other.lowest_level_on_line and
107            (self.ignore_stack_for_comparison or
108             other.ignore_stack_for_comparison or self.stack == other.stack and
109             self.comp_stack == other.comp_stack and
110             self.param_list_stack == other.param_list_stack))
111
112  def __ne__(self, other):
113    return not self == other
114
115  def __hash__(self):
116    return hash((self.next_token, self.column, self.paren_level,
117                 self.line.depth, self.lowest_level_on_line))
118
119  def __repr__(self):
120    return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' %
121            (self.column, repr(self.next_token), self.paren_level,
122             '\n\t'.join(repr(s) for s in self.stack) + ']'))
123
124  def CanSplit(self, must_split):
125    """Determine if we can split before the next token.
126
127    Arguments:
128      must_split: (bool) A newline was required before this token.
129
130    Returns:
131      True if the line can be split before the next token.
132    """
133    current = self.next_token
134    previous = current.previous_token
135
136    if current.is_pseudo:
137      return False
138
139    if (not must_split and subtypes.DICTIONARY_KEY_PART in current.subtypes and
140        subtypes.DICTIONARY_KEY not in current.subtypes and
141        not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')):
142      # In some situations, a dictionary may be multiline, but pylint doesn't
143      # like it. So don't allow it unless forced to.
144      return False
145
146    if (not must_split and subtypes.DICTIONARY_VALUE in current.subtypes and
147        not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')):
148      return False
149
150    if previous and previous.value == '(' and current.value == ')':
151      # Don't split an empty function call list if we aren't splitting before
152      # dict values.
153      token = previous.previous_token
154      while token:
155        prev = token.previous_token
156        if not prev or prev.name not in {'NAME', 'DOT'}:
157          break
158        token = token.previous_token
159      if token and subtypes.DICTIONARY_VALUE in token.subtypes:
160        if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'):
161          return False
162
163    if previous and previous.value == '.' and current.value == '.':
164      return False
165
166    return current.can_break_before
167
168  def MustSplit(self):
169    """Returns True if the line must split before the next token."""
170    current = self.next_token
171    previous = current.previous_token
172
173    if current.is_pseudo:
174      return False
175
176    if current.must_break_before:
177      return True
178
179    if not previous:
180      return False
181
182    if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',':
183      return True
184
185    if (style.Get('SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES') and
186        previous.value == ','):
187      # Avoid breaking in a container that fits in the current line if possible
188      opening = _GetOpeningBracket(current)
189
190      # Can't find opening bracket, behave the same way as
191      # SPLIT_ALL_COMMA_SEPARATED_VALUES.
192      if not opening:
193        return True
194
195      if current.is_comment:
196        # Don't require splitting before a comment, since it may be related to
197        # the current line.
198        return False
199
200      # Allow the fallthrough code to handle the closing bracket.
201      if current != opening.matching_bracket:
202        # If the container doesn't fit in the current line, must split
203        return not self._ContainerFitsOnStartLine(opening)
204
205    if (self.stack[-1].split_before_closing_bracket and
206        (current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET') or
207         current.value in '}])' and style.Get('INDENT_CLOSING_BRACKETS'))):
208      # Split before the closing bracket if we can.
209      if subtypes.SUBSCRIPT_BRACKET not in current.subtypes:
210        return current.node_split_penalty != split_penalty.UNBREAKABLE
211
212    if (current.value == ')' and previous.value == ',' and
213        not _IsSingleElementTuple(current.matching_bracket)):
214      return True
215
216    # Prevent splitting before the first argument in compound statements
217    # with the exception of function declarations.
218    if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and
219        _IsCompoundStatement(self.line.first) and
220        not _IsFunctionDef(self.line.first)):
221      return False
222
223    ###########################################################################
224    # List Splitting
225    if (style.Get('DEDENT_CLOSING_BRACKETS') or
226        style.Get('INDENT_CLOSING_BRACKETS') or
227        style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
228      bracket = current if current.ClosesScope() else previous
229      if subtypes.SUBSCRIPT_BRACKET not in bracket.subtypes:
230        if bracket.OpensScope():
231          if style.Get('COALESCE_BRACKETS'):
232            if current.OpensScope():
233              # Prefer to keep all opening brackets together.
234              return False
235
236          if (not _IsLastScopeInLine(bracket) or
237              logical_line.IsSurroundedByBrackets(bracket)):
238            last_token = bracket.matching_bracket
239          else:
240            last_token = _LastTokenInLine(bracket.matching_bracket)
241
242          if not self._FitsOnLine(bracket, last_token):
243            # Split before the first element if the whole list can't fit on a
244            # single line.
245            self.stack[-1].split_before_closing_bracket = True
246            return True
247
248        elif (style.Get('DEDENT_CLOSING_BRACKETS') or
249              style.Get('INDENT_CLOSING_BRACKETS')) and current.ClosesScope():
250          # Split before and dedent the closing bracket.
251          return self.stack[-1].split_before_closing_bracket
252
253    if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
254        current.is_name):
255      # An expression that's surrounded by parens gets split after the opening
256      # parenthesis.
257      def SurroundedByParens(token):
258        """Check if it's an expression surrounded by parentheses."""
259        while token:
260          if token.value == ',':
261            return False
262          if token.value == ')':
263            return not token.next_token
264          if token.OpensScope():
265            token = token.matching_bracket.next_token
266          else:
267            token = token.next_token
268        return False
269
270      if (previous.value == '(' and not previous.is_pseudo and
271          not logical_line.IsSurroundedByBrackets(previous)):
272        pptoken = previous.previous_token
273        if (pptoken and not pptoken.is_name and not pptoken.is_keyword and
274            SurroundedByParens(current)):
275          return True
276
277    if (current.is_name or current.is_string) and previous.value == ',':
278      # If the list has function calls in it and the full list itself cannot
279      # fit on the line, then we want to split. Otherwise, we'll get something
280      # like this:
281      #
282      #     X = [
283      #         Bar(xxx='some string',
284      #             yyy='another long string',
285      #             zzz='a third long string'), Bar(
286      #                 xxx='some string',
287      #                 yyy='another long string',
288      #                 zzz='a third long string')
289      #     ]
290      #
291      # or when a string formatting syntax.
292      func_call_or_string_format = False
293      tok = current.next_token
294      if current.is_name:
295        while tok and (tok.is_name or tok.value == '.'):
296          tok = tok.next_token
297        func_call_or_string_format = tok and tok.value == '('
298      elif current.is_string:
299        while tok and tok.is_string:
300          tok = tok.next_token
301        func_call_or_string_format = tok and tok.value == '%'
302      if func_call_or_string_format:
303        open_bracket = logical_line.IsSurroundedByBrackets(current)
304        if open_bracket:
305          if open_bracket.value in '[{':
306            if not self._FitsOnLine(open_bracket,
307                                    open_bracket.matching_bracket):
308              return True
309          elif tok.value == '(':
310            if not self._FitsOnLine(current, tok.matching_bracket):
311              return True
312
313    if (current.OpensScope() and previous.value == ',' and
314        subtypes.DICTIONARY_KEY not in current.next_token.subtypes):
315      # If we have a list of tuples, then we can get a similar look as above. If
316      # the full list cannot fit on the line, then we want a split.
317      open_bracket = logical_line.IsSurroundedByBrackets(current)
318      if (open_bracket and open_bracket.value in '[{' and
319          subtypes.SUBSCRIPT_BRACKET not in open_bracket.subtypes):
320        if not self._FitsOnLine(current, current.matching_bracket):
321          return True
322
323    ###########################################################################
324    # Dict/Set Splitting
325    if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and
326        subtypes.DICTIONARY_KEY in current.subtypes and not current.is_comment):
327      # Place each dictionary entry onto its own line.
328      if previous.value == '{' and previous.previous_token:
329        opening = _GetOpeningBracket(previous.previous_token)
330        if (opening and opening.value == '(' and opening.previous_token and
331            opening.previous_token.is_name):
332          # This is a dictionary that's an argument to a function.
333          if (self._FitsOnLine(previous, previous.matching_bracket) and
334              previous.matching_bracket.next_token and
335              (not opening.matching_bracket.next_token or
336               opening.matching_bracket.next_token.value != '.') and
337              _ScopeHasNoCommas(previous)):
338            # Don't split before the key if:
339            #   - The dictionary fits on a line, and
340            #   - The function call isn't part of a builder-style call and
341            #   - The dictionary has one entry and no trailing comma
342            return False
343      return True
344
345    if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and
346        subtypes.DICT_SET_GENERATOR in current.subtypes):
347      # Split before a dict/set generator.
348      return True
349
350    if (subtypes.DICTIONARY_VALUE in current.subtypes or
351        (previous.is_pseudo and previous.value == '(' and
352         not current.is_comment)):
353      # Split before the dictionary value if we can't fit every dictionary
354      # entry on its own line.
355      if not current.OpensScope():
356        opening = _GetOpeningBracket(current)
357        if not self._EachDictEntryFitsOnOneLine(opening):
358          return style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')
359
360    if previous.value == '{':
361      # Split if the dict/set cannot fit on one line and ends in a comma.
362      closing = previous.matching_bracket
363      if (not self._FitsOnLine(previous, closing) and
364          closing.previous_token.value == ','):
365        self.stack[-1].split_before_closing_bracket = True
366        return True
367
368    ###########################################################################
369    # Argument List Splitting
370    if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and
371        subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in current.subtypes):
372      if (previous.value not in {'=', ':', '*', '**'} and
373          current.value not in ':=,)' and not _IsFunctionDefinition(previous)):
374        # If we're going to split the lines because of named arguments, then we
375        # want to split after the opening bracket as well. But not when this is
376        # part of a function definition.
377        if previous.value == '(':
378          # Make sure we don't split after the opening bracket if the
379          # continuation indent is greater than the opening bracket:
380          #
381          #  a(
382          #      b=1,
383          #      c=2)
384          if (self._FitsOnLine(previous, previous.matching_bracket) and
385              logical_line.IsSurroundedByBrackets(previous)):
386            # An argument to a function is a function call with named
387            # assigns.
388            return False
389
390          # Don't split if not required
391          if (not style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
392              not style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
393            return False
394
395          column = self.column - self.stack[-1].last_space
396          return column > style.Get('CONTINUATION_INDENT_WIDTH')
397
398        opening = _GetOpeningBracket(current)
399        if opening:
400          return not self._ContainerFitsOnStartLine(opening)
401
402    if (current.value not in '{)' and previous.value == '(' and
403        self._ArgumentListHasDictionaryEntry(current)):
404      return True
405
406    if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'):
407      # Split before arguments in a function call or definition if the
408      # arguments are terminated by a comma.
409      opening = _GetOpeningBracket(current)
410      if opening and opening.previous_token and opening.previous_token.is_name:
411        if previous.value in '(,':
412          if opening.matching_bracket.previous_token.value == ',':
413            return True
414
415    if ((current.is_name or current.value in {'*', '**'}) and
416        previous.value == ','):
417      # If we have a function call within an argument list and it won't fit on
418      # the remaining line, but it will fit on a line by itself, then go ahead
419      # and split before the call.
420      opening = _GetOpeningBracket(current)
421      if (opening and opening.value == '(' and opening.previous_token and
422          (opening.previous_token.is_name or
423           opening.previous_token.value in {'*', '**'})):
424        is_func_call = False
425        opening = current
426        while opening:
427          if opening.value == '(':
428            is_func_call = True
429            break
430          if (not (opening.is_name or opening.value in {'*', '**'}) and
431              opening.value != '.'):
432            break
433          opening = opening.next_token
434
435        if is_func_call:
436          if (not self._FitsOnLine(current, opening.matching_bracket) or
437              (opening.matching_bracket.next_token and
438               opening.matching_bracket.next_token.value != ',' and
439               not opening.matching_bracket.next_token.ClosesScope())):
440            return True
441
442    pprevious = previous.previous_token
443
444    # A function call with a dictionary as its first argument may result in
445    # unreadable formatting if the dictionary spans multiple lines. The
446    # dictionary itself is formatted just fine, but the remaining arguments are
447    # indented too far:
448    #
449    #     function_call({
450    #         KEY_1: 'value one',
451    #         KEY_2: 'value two',
452    #     },
453    #                   default=False)
454    if (current.value == '{' and previous.value == '(' and pprevious and
455        pprevious.is_name):
456      dict_end = current.matching_bracket
457      next_token = dict_end.next_token
458      if next_token.value == ',' and not self._FitsOnLine(current, dict_end):
459        return True
460
461    if (current.is_name and pprevious and pprevious.is_name and
462        previous.value == '('):
463
464      if (not self._FitsOnLine(previous, previous.matching_bracket) and
465          _IsFunctionCallWithArguments(current)):
466        # There is a function call, with more than 1 argument, where the first
467        # argument is itself a function call with arguments that does not fit
468        # into the line.  In this specific case, if we split after the first
469        # argument's opening '(', then the formatting will look bad for the
470        # rest of the arguments. E.g.:
471        #
472        #     outer_function_call(inner_function_call(
473        #         inner_arg1, inner_arg2),
474        #                         outer_arg1, outer_arg2)
475        #
476        # Instead, enforce a split before that argument to keep things looking
477        # good.
478        if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') or
479            style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
480          return True
481
482        opening = _GetOpeningBracket(current)
483        if (opening and opening.value == '(' and opening.previous_token and
484            (opening.previous_token.is_name or
485             opening.previous_token.value in {'*', '**'})):
486          is_func_call = False
487          opening = current
488          while opening:
489            if opening.value == '(':
490              is_func_call = True
491              break
492            if (not (opening.is_name or opening.value in {'*', '**'}) and
493                opening.value != '.'):
494              break
495            opening = opening.next_token
496
497          if is_func_call:
498            if (not self._FitsOnLine(current, opening.matching_bracket) or
499                (opening.matching_bracket.next_token and
500                 opening.matching_bracket.next_token.value != ',' and
501                 not opening.matching_bracket.next_token.ClosesScope())):
502              return True
503
504    if (previous.OpensScope() and not current.OpensScope() and
505        not current.is_comment and
506        subtypes.SUBSCRIPT_BRACKET not in previous.subtypes):
507      if pprevious and not pprevious.is_keyword and not pprevious.is_name:
508        # We want to split if there's a comment in the container.
509        token = current
510        while token != previous.matching_bracket:
511          if token.is_comment:
512            return True
513          token = token.next_token
514      if previous.value == '(':
515        pptoken = previous.previous_token
516        if not pptoken or not pptoken.is_name:
517          # Split after the opening of a tuple if it doesn't fit on the current
518          # line and it's not a function call.
519          if self._FitsOnLine(previous, previous.matching_bracket):
520            return False
521        elif not self._FitsOnLine(previous, previous.matching_bracket):
522          if len(previous.container_elements) == 1:
523            return False
524
525          elements = previous.container_elements + [previous.matching_bracket]
526          i = 1
527          while i < len(elements):
528            if (not elements[i - 1].OpensScope() and
529                not self._FitsOnLine(elements[i - 1], elements[i])):
530              return True
531            i += 1
532
533          if (self.column_limit - self.column) / float(self.column_limit) < 0.3:
534            # Try not to squish all of the arguments off to the right.
535            return True
536      else:
537        # Split after the opening of a container if it doesn't fit on the
538        # current line.
539        if not self._FitsOnLine(previous, previous.matching_bracket):
540          return True
541
542    ###########################################################################
543    # Original Formatting Splitting
544    # These checks rely upon the original formatting. This is in order to
545    # attempt to keep hand-written code in the same condition as it was before.
546    # However, this may cause the formatter to fail to be idempotent.
547    if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and
548        previous.lineno < current.lineno):
549      # Retain the split before a bitwise operator.
550      return True
551
552    if (current.is_comment and
553        previous.lineno < current.lineno - current.value.count('\n')):
554      # If a comment comes in the middle of a logical line (like an if
555      # conditional with comments interspersed), then we want to split if the
556      # original comments were on a separate line.
557      return True
558
559    return False
560
561  def AddTokenToState(self, newline, dry_run, must_split=False):
562    """Add a token to the format decision state.
563
564    Allow the heuristic to try out adding the token with and without a newline.
565    Later on, the algorithm will determine which one has the lowest penalty.
566
567    Arguments:
568      newline: (bool) Add the token on a new line if True.
569      dry_run: (bool) Don't commit whitespace changes to the FormatToken if
570        True.
571      must_split: (bool) A newline was required before this token.
572
573    Returns:
574      The penalty of splitting after the current token.
575    """
576    self._PushParameterListState(newline)
577
578    penalty = 0
579    if newline:
580      penalty = self._AddTokenOnNewline(dry_run, must_split)
581    else:
582      self._AddTokenOnCurrentLine(dry_run)
583
584    penalty += self._CalculateComprehensionState(newline)
585    penalty += self._CalculateParameterListState(newline)
586
587    return self.MoveStateToNextToken() + penalty
588
589  def _AddTokenOnCurrentLine(self, dry_run):
590    """Puts the token on the current line.
591
592    Appends the next token to the state and updates information necessary for
593    indentation.
594
595    Arguments:
596      dry_run: (bool) Commit whitespace changes to the FormatToken if True.
597    """
598    current = self.next_token
599    previous = current.previous_token
600
601    spaces = current.spaces_required_before
602    if isinstance(spaces, list):
603      # Don't set the value here, as we need to look at the lines near
604      # this one to determine the actual horizontal alignment value.
605      spaces = 0
606
607    if not dry_run:
608      current.AddWhitespacePrefix(newlines_before=0, spaces=spaces)
609
610    if previous.OpensScope():
611      if not current.is_comment:
612        # Align closing scopes that are on a newline with the opening scope:
613        #
614        #     foo = [a,
615        #            b,
616        #           ]
617        self.stack[-1].closing_scope_indent = self.column - 1
618        if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'):
619          self.stack[-1].closing_scope_indent += 1
620        self.stack[-1].indent = self.column + spaces
621      else:
622        self.stack[-1].closing_scope_indent = (
623            self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))
624
625    self.column += spaces
626
627  def _AddTokenOnNewline(self, dry_run, must_split):
628    """Adds a line break and necessary indentation.
629
630    Appends the next token to the state and updates information necessary for
631    indentation.
632
633    Arguments:
634      dry_run: (bool) Don't commit whitespace changes to the FormatToken if
635        True.
636      must_split: (bool) A newline was required before this token.
637
638    Returns:
639      The split penalty for splitting after the current state.
640    """
641    current = self.next_token
642    previous = current.previous_token
643
644    self.column = self._GetNewlineColumn()
645
646    if not dry_run:
647      indent_level = self.line.depth
648      spaces = self.column
649      if spaces:
650        spaces -= indent_level * style.Get('INDENT_WIDTH')
651      current.AddWhitespacePrefix(
652          newlines_before=1, spaces=spaces, indent_level=indent_level)
653
654    if not current.is_comment:
655      self.stack[-1].last_space = self.column
656    self.lowest_level_on_line = self.paren_level
657
658    if (previous.OpensScope() or
659        (previous.is_comment and previous.previous_token is not None and
660         previous.previous_token.OpensScope())):
661      dedent = (style.Get('CONTINUATION_INDENT_WIDTH'),
662                0)[style.Get('INDENT_CLOSING_BRACKETS')]
663      self.stack[-1].closing_scope_indent = (
664          max(0, self.stack[-1].indent - dedent))
665      self.stack[-1].split_before_closing_bracket = True
666
667    # Calculate the split penalty.
668    penalty = current.split_penalty
669
670    if must_split:
671      # Don't penalize for a must split.
672      return penalty
673
674    if previous.is_pseudo and previous.value == '(':
675      # Small penalty for splitting after a pseudo paren.
676      penalty += 50
677
678    # Add a penalty for each increasing newline we add, but don't penalize for
679    # splitting before an if-expression or list comprehension.
680    if current.value not in {'if', 'for'}:
681      last = self.stack[-1]
682      last.num_line_splits += 1
683      penalty += (
684          style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') *
685          last.num_line_splits)
686
687    if current.OpensScope() and previous.OpensScope():
688      # Prefer to keep opening brackets coalesced (unless it's at the beginning
689      # of a function call).
690      pprev = previous.previous_token
691      if not pprev or not pprev.is_name:
692        penalty += 10
693
694    return penalty + 10
695
696  def MoveStateToNextToken(self):
697    """Calculate format decision state information and move onto the next token.
698
699    Before moving onto the next token, we first calculate the format decision
700    state given the current token and its formatting decisions. Then the format
701    decision state is set up so that the next token can be added.
702
703    Returns:
704      The penalty for the number of characters over the column limit.
705    """
706    current = self.next_token
707    if not current.OpensScope() and not current.ClosesScope():
708      self.lowest_level_on_line = min(self.lowest_level_on_line,
709                                      self.paren_level)
710
711    # If we encounter an opening bracket, we add a level to our stack to prepare
712    # for the subsequent tokens.
713    if current.OpensScope():
714      last = self.stack[-1]
715      new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space
716
717      self.stack.append(_ParenState(new_indent, self.stack[-1].last_space))
718      self.paren_level += 1
719
720    # If we encounter a closing bracket, we can remove a level from our
721    # parenthesis stack.
722    if len(self.stack) > 1 and current.ClosesScope():
723      if subtypes.DICTIONARY_KEY_PART in current.subtypes:
724        self.stack[-2].last_space = self.stack[-2].indent
725      else:
726        self.stack[-2].last_space = self.stack[-1].last_space
727      self.stack.pop()
728      self.paren_level -= 1
729
730    is_multiline_string = current.is_string and '\n' in current.value
731    if is_multiline_string:
732      # This is a multiline string. Only look at the first line.
733      self.column += len(current.value.split('\n')[0])
734    elif not current.is_pseudo:
735      self.column += len(current.value)
736
737    self.next_token = self.next_token.next_token
738
739    # Calculate the penalty for overflowing the column limit.
740    penalty = 0
741    if (not current.is_pylint_comment and not current.is_pytype_comment and
742        not current.is_copybara_comment and self.column > self.column_limit):
743      excess_characters = self.column - self.column_limit
744      penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters
745
746    if is_multiline_string:
747      # If this is a multiline string, the column is actually the
748      # end of the last line in the string.
749      self.column = len(current.value.split('\n')[-1])
750
751    return penalty
752
753  def _CalculateComprehensionState(self, newline):
754    """Makes required changes to comprehension state.
755
756    Args:
757      newline: Whether the current token is to be added on a newline.
758
759    Returns:
760      The penalty for the token-newline combination given the current
761      comprehension state.
762    """
763    current = self.next_token
764    previous = current.previous_token
765    top_of_stack = self.comp_stack[-1] if self.comp_stack else None
766    penalty = 0
767
768    if top_of_stack is not None:
769      # Check if the token terminates the current comprehension.
770      if current == top_of_stack.closing_bracket:
771        last = self.comp_stack.pop()
772        # Lightly penalize comprehensions that are split across multiple lines.
773        if last.has_interior_split:
774          penalty += style.Get('SPLIT_PENALTY_COMPREHENSION')
775
776        return penalty
777
778      if newline:
779        top_of_stack.has_interior_split = True
780
781    if (subtypes.COMP_EXPR in current.subtypes and
782        subtypes.COMP_EXPR not in previous.subtypes):
783      self.comp_stack.append(object_state.ComprehensionState(current))
784      return penalty
785
786    if current.value == 'for' and subtypes.COMP_FOR in current.subtypes:
787      if top_of_stack.for_token is not None:
788        # Treat nested comprehensions like normal comp_if expressions.
789        # Example:
790        #     my_comp = [
791        #         a.qux + b.qux
792        #         for a in foo
793        #   -->   for b in bar   <--
794        #         if a.zut + b.zut
795        #     ]
796        if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
797            top_of_stack.has_split_at_for != newline and
798            (top_of_stack.has_split_at_for or
799             not top_of_stack.HasTrivialExpr())):
800          penalty += split_penalty.UNBREAKABLE
801      else:
802        top_of_stack.for_token = current
803        top_of_stack.has_split_at_for = newline
804
805        # Try to keep trivial expressions on the same line as the comp_for.
806        if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and newline and
807            top_of_stack.HasTrivialExpr()):
808          penalty += split_penalty.CONNECTED
809
810    if (subtypes.COMP_IF in current.subtypes and
811        subtypes.COMP_IF not in previous.subtypes):
812      # Penalize breaking at comp_if when it doesn't match the newline structure
813      # in the rest of the comprehension.
814      if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
815          top_of_stack.has_split_at_for != newline and
816          (top_of_stack.has_split_at_for or not top_of_stack.HasTrivialExpr())):
817        penalty += split_penalty.UNBREAKABLE
818
819    return penalty
820
821  def _PushParameterListState(self, newline):
822    """Push a new parameter list state for a function definition.
823
824    Args:
825      newline: Whether the current token is to be added on a newline.
826    """
827    current = self.next_token
828    previous = current.previous_token
829
830    if _IsFunctionDefinition(previous):
831      first_param_column = previous.total_length + self.stack[-2].indent
832      self.param_list_stack.append(
833          object_state.ParameterListState(previous, newline,
834                                          first_param_column))
835
836  def _CalculateParameterListState(self, newline):
837    """Makes required changes to parameter list state.
838
839    Args:
840      newline: Whether the current token is to be added on a newline.
841
842    Returns:
843      The penalty for the token-newline combination given the current
844      parameter state.
845    """
846    current = self.next_token
847    previous = current.previous_token
848    penalty = 0
849
850    if _IsFunctionDefinition(previous):
851      first_param_column = previous.total_length + self.stack[-2].indent
852      if not newline:
853        param_list = self.param_list_stack[-1]
854        if param_list.parameters and param_list.has_typed_return:
855          last_param = param_list.parameters[-1].first_token
856          last_token = _LastTokenInLine(previous.matching_bracket)
857          total_length = last_token.total_length
858          total_length -= last_param.total_length - len(last_param.value)
859          if total_length + self.column > self.column_limit:
860            # If we need to split before the trailing code of a function
861            # definition with return types, then also split before the opening
862            # parameter so that the trailing bit isn't indented on a line by
863            # itself:
864            #
865            #   def rrrrrrrrrrrrrrrrrrrrrr(ccccccccccccccccccccccc: Tuple[Text]
866            #                              ) -> List[Tuple[Text, Text]]:
867            #       pass
868            penalty += split_penalty.VERY_STRONGLY_CONNECTED
869        return penalty
870
871      if first_param_column <= self.column:
872        # Make sure we don't split after the opening bracket if the
873        # continuation indent is greater than the opening bracket:
874        #
875        #   a(
876        #       b=1,
877        #       c=2)
878        penalty += split_penalty.VERY_STRONGLY_CONNECTED
879      return penalty
880
881    if not self.param_list_stack:
882      return penalty
883
884    param_list = self.param_list_stack[-1]
885    if current == self.param_list_stack[-1].closing_bracket:
886      self.param_list_stack.pop()  # We're done with this state.
887      if newline and param_list.has_typed_return:
888        if param_list.split_before_closing_bracket:
889          penalty -= split_penalty.STRONGLY_CONNECTED
890        elif param_list.LastParamFitsOnLine(self.column):
891          penalty += split_penalty.STRONGLY_CONNECTED
892
893      if (not newline and param_list.has_typed_return and
894          param_list.has_split_before_first_param):
895        # Prefer splitting before the closing bracket if there's a return type
896        # and we've already split before the first parameter.
897        penalty += split_penalty.STRONGLY_CONNECTED
898
899      return penalty
900
901    if not param_list.parameters:
902      return penalty
903
904    if newline:
905      if self._FitsOnLine(param_list.parameters[0].first_token,
906                          _LastTokenInLine(param_list.closing_bracket)):
907        penalty += split_penalty.STRONGLY_CONNECTED
908
909    if (not newline and style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and
910        param_list.has_default_values and
911        current != param_list.parameters[0].first_token and
912        current != param_list.closing_bracket and
913        subtypes.PARAMETER_START in current.subtypes):
914      # If we want to split before parameters when there are named assigns,
915      # then add a penalty for not splitting.
916      penalty += split_penalty.STRONGLY_CONNECTED
917
918    return penalty
919
920  def _IndentWithContinuationAlignStyle(self, column):
921    if column == 0:
922      return column
923    align_style = style.Get('CONTINUATION_ALIGN_STYLE')
924    if align_style == 'FIXED':
925      return ((self.line.depth * style.Get('INDENT_WIDTH')) +
926              style.Get('CONTINUATION_INDENT_WIDTH'))
927    if align_style == 'VALIGN-RIGHT':
928      indent_width = style.Get('INDENT_WIDTH')
929      return indent_width * int((column + indent_width - 1) / indent_width)
930    return column
931
932  def _GetNewlineColumn(self):
933    """Return the new column on the newline."""
934    current = self.next_token
935    previous = current.previous_token
936    top_of_stack = self.stack[-1]
937
938    if isinstance(current.spaces_required_before, list):
939      # Don't set the value here, as we need to look at the lines near
940      # this one to determine the actual horizontal alignment value.
941      return 0
942    elif current.spaces_required_before > 2 or self.line.disable:
943      return current.spaces_required_before
944
945    cont_aligned_indent = self._IndentWithContinuationAlignStyle(
946        top_of_stack.indent)
947
948    if current.OpensScope():
949      return cont_aligned_indent if self.paren_level else self.first_indent
950
951    if current.ClosesScope():
952      if (previous.OpensScope() or
953          (previous.is_comment and previous.previous_token is not None and
954           previous.previous_token.OpensScope())):
955        return max(0,
956                   top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH'))
957      return top_of_stack.closing_scope_indent
958
959    if (previous and previous.is_string and current.is_string and
960        subtypes.DICTIONARY_VALUE in current.subtypes):
961      return previous.column
962
963    if style.Get('INDENT_DICTIONARY_VALUE'):
964      if previous and (previous.value == ':' or previous.is_pseudo):
965        if subtypes.DICTIONARY_VALUE in current.subtypes:
966          return top_of_stack.indent
967
968    if (not self.param_list_stack and _IsCompoundStatement(self.line.first) and
969        (not (style.Get('DEDENT_CLOSING_BRACKETS') or
970              style.Get('INDENT_CLOSING_BRACKETS')) or
971         style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))):
972      token_indent = (
973          len(self.line.first.whitespace_prefix.split('\n')[-1]) +
974          style.Get('INDENT_WIDTH'))
975      if token_indent == top_of_stack.indent:
976        return token_indent + style.Get('CONTINUATION_INDENT_WIDTH')
977
978    if (self.param_list_stack and
979        not self.param_list_stack[-1].SplitBeforeClosingBracket(
980            top_of_stack.indent) and top_of_stack.indent
981        == ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))):
982      if (subtypes.PARAMETER_START in current.subtypes or
983          (previous.is_comment and
984           subtypes.PARAMETER_START in previous.subtypes)):
985        return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH')
986
987    return cont_aligned_indent
988
989  def _FitsOnLine(self, start, end):
990    """Determines if line between start and end can fit on the current line."""
991    length = end.total_length - start.total_length
992    if not start.is_pseudo:
993      length += len(start.value)
994    return length + self.column <= self.column_limit
995
996  def _EachDictEntryFitsOnOneLine(self, opening):
997    """Determine if each dict elems can fit on one line."""
998
999    def PreviousNonCommentToken(tok):
1000      tok = tok.previous_token
1001      while tok.is_comment:
1002        tok = tok.previous_token
1003      return tok
1004
1005    def ImplicitStringConcatenation(tok):
1006      num_strings = 0
1007      if tok.is_pseudo:
1008        tok = tok.next_token
1009      while tok.is_string:
1010        num_strings += 1
1011        tok = tok.next_token
1012      return num_strings > 1
1013
1014    def DictValueIsContainer(opening, closing):
1015      """Return true if the dictionary value is a container."""
1016      if not opening or not closing:
1017        return False
1018      colon = opening.previous_token
1019      while colon:
1020        if not colon.is_pseudo:
1021          break
1022        colon = colon.previous_token
1023      if not colon or colon.value != ':':
1024        return False
1025      key = colon.previous_token
1026      if not key:
1027        return False
1028      return subtypes.DICTIONARY_KEY_PART in key.subtypes
1029
1030    closing = opening.matching_bracket
1031    entry_start = opening.next_token
1032    current = opening.next_token.next_token
1033
1034    while current and current != closing:
1035      if subtypes.DICTIONARY_KEY in current.subtypes:
1036        prev = PreviousNonCommentToken(current)
1037        if prev.value == ',':
1038          prev = PreviousNonCommentToken(prev.previous_token)
1039        if not DictValueIsContainer(prev.matching_bracket, prev):
1040          length = prev.total_length - entry_start.total_length
1041          length += len(entry_start.value)
1042          if length + self.stack[-2].indent >= self.column_limit:
1043            return False
1044        entry_start = current
1045      if current.OpensScope():
1046        if ((current.value == '{' or
1047             (current.is_pseudo and current.next_token.value == '{') and
1048             subtypes.DICTIONARY_VALUE in current.subtypes) or
1049            ImplicitStringConcatenation(current)):
1050          # A dictionary entry that cannot fit on a single line shouldn't matter
1051          # to this calculation. If it can't fit on a single line, then the
1052          # opening should be on the same line as the key and the rest on
1053          # newlines after it. But the other entries should be on single lines
1054          # if possible.
1055          if current.matching_bracket:
1056            current = current.matching_bracket
1057          while current:
1058            if current == closing:
1059              return True
1060            if subtypes.DICTIONARY_KEY in current.subtypes:
1061              entry_start = current
1062              break
1063            current = current.next_token
1064        else:
1065          current = current.matching_bracket
1066      else:
1067        current = current.next_token
1068
1069    # At this point, current is the closing bracket. Go back one to get the end
1070    # of the dictionary entry.
1071    current = PreviousNonCommentToken(current)
1072    length = current.total_length - entry_start.total_length
1073    length += len(entry_start.value)
1074    return length + self.stack[-2].indent <= self.column_limit
1075
1076  def _ArgumentListHasDictionaryEntry(self, token):
1077    """Check if the function argument list has a dictionary as an arg."""
1078    if _IsArgumentToFunction(token):
1079      while token:
1080        if token.value == '{':
1081          length = token.matching_bracket.total_length - token.total_length
1082          return length + self.stack[-2].indent > self.column_limit
1083        if token.ClosesScope():
1084          break
1085        if token.OpensScope():
1086          token = token.matching_bracket
1087        token = token.next_token
1088    return False
1089
1090  def _ContainerFitsOnStartLine(self, opening):
1091    """Check if the container can fit on its starting line."""
1092    return (opening.matching_bracket.total_length - opening.total_length +
1093            self.stack[-1].indent) <= self.column_limit
1094
1095
1096_COMPOUND_STMTS = frozenset(
1097    {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'})
1098
1099
1100def _IsCompoundStatement(token):
1101  if token.value == 'async':
1102    token = token.next_token
1103  return token.value in _COMPOUND_STMTS
1104
1105
1106def _IsFunctionDef(token):
1107  if token.value == 'async':
1108    token = token.next_token
1109  return token.value == 'def'
1110
1111
1112def _IsFunctionCallWithArguments(token):
1113  while token:
1114    if token.value == '(':
1115      token = token.next_token
1116      return token and token.value != ')'
1117    elif token.name not in {'NAME', 'DOT', 'EQUAL'}:
1118      break
1119    token = token.next_token
1120  return False
1121
1122
1123def _IsArgumentToFunction(token):
1124  bracket = logical_line.IsSurroundedByBrackets(token)
1125  if not bracket or bracket.value != '(':
1126    return False
1127  previous = bracket.previous_token
1128  return previous and previous.is_name
1129
1130
1131def _GetOpeningBracket(current):
1132  """Get the opening bracket containing the current token."""
1133  if current.matching_bracket and not current.is_pseudo:
1134    return current if current.OpensScope() else current.matching_bracket
1135
1136  while current:
1137    if current.ClosesScope():
1138      current = current.matching_bracket
1139    elif current.is_pseudo:
1140      current = current.previous_token
1141    elif current.OpensScope():
1142      return current
1143    current = current.previous_token
1144  return None
1145
1146
1147def _LastTokenInLine(current):
1148  while not current.is_comment and current.next_token:
1149    current = current.next_token
1150  return current
1151
1152
1153def _IsFunctionDefinition(current):
1154  prev = current.previous_token
1155  return current.value == '(' and prev and subtypes.FUNC_DEF in prev.subtypes
1156
1157
1158def _IsLastScopeInLine(current):
1159  current = current.matching_bracket
1160  while current:
1161    current = current.next_token
1162    if current and current.OpensScope():
1163      return False
1164  return True
1165
1166
1167def _IsSingleElementTuple(token):
1168  """Check if it's a single-element tuple."""
1169  close = token.matching_bracket
1170  token = token.next_token
1171  num_commas = 0
1172  while token != close:
1173    if token.value == ',':
1174      num_commas += 1
1175    token = token.matching_bracket if token.OpensScope() else token.next_token
1176  return num_commas == 1
1177
1178
1179def _ScopeHasNoCommas(token):
1180  """Check if the scope has no commas."""
1181  close = token.matching_bracket
1182  token = token.next_token
1183  while token != close:
1184    if token.value == ',':
1185      return False
1186    token = token.matching_bracket if token.OpensScope() else token.next_token
1187  return True
1188
1189
1190class _ParenState(object):
1191  """Maintains the state of the bracket enclosures.
1192
1193  A stack of _ParenState objects are kept so that we know how to indent relative
1194  to the brackets.
1195
1196  Attributes:
1197    indent: The column position to which a specified parenthesis level needs to
1198      be indented.
1199    last_space: The column position of the last space on each level.
1200    closing_scope_indent: The column position of the closing indentation.
1201    split_before_closing_bracket: Whether a newline needs to be inserted before
1202      the closing bracket. We only want to insert a newline before the closing
1203      bracket if there also was a newline after the beginning left bracket.
1204    num_line_splits: Number of line splits this _ParenState contains already.
1205      Each subsequent line split gets an increasing penalty.
1206  """
1207
1208  # TODO(morbo): This doesn't track "bin packing."
1209
1210  def __init__(self, indent, last_space):
1211    self.indent = indent
1212    self.last_space = last_space
1213    self.closing_scope_indent = 0
1214    self.split_before_closing_bracket = False
1215    self.num_line_splits = 0
1216
1217  def Clone(self):
1218    state = _ParenState(self.indent, self.last_space)
1219    state.closing_scope_indent = self.closing_scope_indent
1220    state.split_before_closing_bracket = self.split_before_closing_bracket
1221    state.num_line_splits = self.num_line_splits
1222    return state
1223
1224  def __repr__(self):
1225    return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % (
1226        self.indent, self.last_space, self.closing_scope_indent)
1227
1228  def __eq__(self, other):
1229    return hash(self) == hash(other)
1230
1231  def __ne__(self, other):
1232    return not self == other
1233
1234  def __hash__(self, *args, **kwargs):
1235    return hash((self.indent, self.last_space, self.closing_scope_indent,
1236                 self.split_before_closing_bracket, self.num_line_splits))
1237