format_decision_state.py (revision 7249d1a64f4850ccf838e62a46276f891f72998e) - OpenGrok cross reference for /aosp_15_r20/external/yapf/yapf/yapflib/format_decision_state.py

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implements a format decision state object that manages whitespace decisions.

Each token is processed one at a time, at which point its whitespace formatting
decisions are made. A graph of potential whitespace formattings is created,
where each node in the graph is a format decision state object. The heuristic
tries formatting the token with and without a newline before it to determine
which one has the least penalty. Therefore, the format decision state object for
each decision needs to be its own unique copy.

Once the heuristic determines the best formatting, it makes a non-dry run pass
through the code to commit the whitespace formatting.

  FormatDecisionState: main class exported by this module.
"""

from yapf.yapflib import format_token
from yapf.yapflib import logical_line
from yapf.yapflib import object_state
from yapf.yapflib import split_penalty
from yapf.yapflib import style
from yapf.yapflib import subtypes


class FormatDecisionState(object):
  """The current state when indenting a logical line.

  The FormatDecisionState object is meant to be copied instead of referenced.

  Attributes:
    first_indent: The indent of the first token.
    column: The number of used columns in the current line.
    line: The logical line we're currently processing.
    next_token: The next token to be formatted.
    paren_level: The level of nesting inside (), [], and {}.
    lowest_level_on_line: The lowest paren_level on the current line.
    stack: A stack (of _ParenState) keeping track of properties applying to
      parenthesis levels.
    comp_stack: A stack (of ComprehensionState) keeping track of properties
      applying to comprehensions.
    param_list_stack: A stack (of ParameterListState) keeping track of
      properties applying to function parameter lists.
    ignore_stack_for_comparison: Ignore the stack of _ParenState for state
      comparison.
    column_limit: The column limit specified by the style.
  """

  def __init__(self, line, first_indent):
    """Initializer.

    Initializes to the state after placing the first token from 'line' at
    'first_indent'.

    Arguments:
      line: (LogicalLine) The logical line we're currently processing.
      first_indent: (int) The indent of the first token.
    """
    self.next_token = line.first
    self.column = first_indent
    self.line = line
    self.paren_level = 0
    self.lowest_level_on_line = 0
    self.ignore_stack_for_comparison = False
    self.stack = [_ParenState(first_indent, first_indent)]
    self.comp_stack = []
    self.param_list_stack = []
    self.first_indent = first_indent
    self.column_limit = style.Get('COLUMN_LIMIT')

  def Clone(self):
    """Clones a FormatDecisionState object."""
    new = FormatDecisionState(self.line, self.first_indent)
    new.next_token = self.next_token
    new.column = self.column
    new.line = self.line
    new.paren_level = self.paren_level
    new.line.depth = self.line.depth
    new.lowest_level_on_line = self.lowest_level_on_line
    new.ignore_stack_for_comparison = self.ignore_stack_for_comparison
    new.first_indent = self.first_indent
    new.stack = [state.Clone() for state in self.stack]
    new.comp_stack = [state.Clone() for state in self.comp_stack]
    new.param_list_stack = [state.Clone() for state in self.param_list_stack]
    return new

  def __eq__(self, other):
    # Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous',
    # because it shouldn't have a bearing on this comparison. (I.e., it will
    # report equal if 'next_token' does.)
    return (self.next_token == other.next_token and
            self.column == other.column and
            self.paren_level == other.paren_level and
            self.line.depth == other.line.depth and
            self.lowest_level_on_line == other.lowest_level_on_line and
            (self.ignore_stack_for_comparison or
             other.ignore_stack_for_comparison or self.stack == other.stack and
             self.comp_stack == other.comp_stack and
             self.param_list_stack == other.param_list_stack))

  def __ne__(self, other):
    return not self == other

  def __hash__(self):
    return hash((self.next_token, self.column, self.paren_level,
                 self.line.depth, self.lowest_level_on_line))

  def __repr__(self):
    return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' %
            (self.column, repr(self.next_token), self.paren_level,
             '\n\t'.join(repr(s) for s in self.stack) + ']'))

  def CanSplit(self, must_split):
    """Determine if we can split before the next token.

    Arguments:
      must_split: (bool) A newline was required before this token.

    Returns:
      True if the line can be split before the next token.
    """
    current = self.next_token
    previous = current.previous_token

    if current.is_pseudo:
      return False

    if (not must_split and subtypes.DICTIONARY_KEY_PART in current.subtypes and
        subtypes.DICTIONARY_KEY not in current.subtypes and
        not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')):
      # In some situations, a dictionary may be multiline, but pylint doesn't
      # like it. So don't allow it unless forced to.
      return False

    if (not must_split and subtypes.DICTIONARY_VALUE in current.subtypes and
        not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')):
      return False

    if previous and previous.value == '(' and current.value == ')':
      # Don't split an empty function call list if we aren't splitting before
      # dict values.
      token = previous.previous_token
      while token:
        prev = token.previous_token
        if not prev or prev.name not in {'NAME', 'DOT'}:
          break
        token = token.previous_token
      if token and subtypes.DICTIONARY_VALUE in token.subtypes:
        if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'):
          return False

    if previous and previous.value == '.' and current.value == '.':
      return False

    return current.can_break_before

  def MustSplit(self):
    """Returns True if the line must split before the next token."""
    current = self.next_token
    previous = current.previous_token

    if current.is_pseudo:
      return False

    if current.must_break_before:
      return True

    if not previous:
      return False

    if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',':
      return True

    if (style.Get('SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES') and
        previous.value == ','):
      # Avoid breaking in a container that fits in the current line if possible
      opening = _GetOpeningBracket(current)

      # Can't find opening bracket, behave the same way as
      # SPLIT_ALL_COMMA_SEPARATED_VALUES.
      if not opening:
        return True

      if current.is_comment:
        # Don't require splitting before a comment, since it may be related to
        # the current line.
        return False

      # Allow the fallthrough code to handle the closing bracket.
      if current != opening.matching_bracket:
        # If the container doesn't fit in the current line, must split
        return not self._ContainerFitsOnStartLine(opening)

    if (self.stack[-1].split_before_closing_bracket and
        (current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET') or
         current.value in '}])' and style.Get('INDENT_CLOSING_BRACKETS'))):
      # Split before the closing bracket if we can.
      if subtypes.SUBSCRIPT_BRACKET not in current.subtypes:
        return current.node_split_penalty != split_penalty.UNBREAKABLE

    if (current.value == ')' and previous.value == ',' and
        not _IsSingleElementTuple(current.matching_bracket)):
      return True

    # Prevent splitting before the first argument in compound statements
    # with the exception of function declarations.
    if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and
        _IsCompoundStatement(self.line.first) and
        not _IsFunctionDef(self.line.first)):
      return False

    ###########################################################################
    # List Splitting
    if (style.Get('DEDENT_CLOSING_BRACKETS') or
        style.Get('INDENT_CLOSING_BRACKETS') or
        style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
      bracket = current if current.ClosesScope() else previous
      if subtypes.SUBSCRIPT_BRACKET not in bracket.subtypes:
        if bracket.OpensScope():
          if style.Get('COALESCE_BRACKETS'):
            if current.OpensScope():
              # Prefer to keep all opening brackets together.
              return False

          if (not _IsLastScopeInLine(bracket) or
              logical_line.IsSurroundedByBrackets(bracket)):
            last_token = bracket.matching_bracket
          else:
            last_token = _LastTokenInLine(bracket.matching_bracket)

          if not self._FitsOnLine(bracket, last_token):
            # Split before the first element if the whole list can't fit on a
            # single line.
            self.stack[-1].split_before_closing_bracket = True
            return True

        elif (style.Get('DEDENT_CLOSING_BRACKETS') or
              style.Get('INDENT_CLOSING_BRACKETS')) and current.ClosesScope():
          # Split before and dedent the closing bracket.
          return self.stack[-1].split_before_closing_bracket

    if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
        current.is_name):
      # An expression that's surrounded by parens gets split after the opening
      # parenthesis.
      def SurroundedByParens(token):
        """Check if it's an expression surrounded by parentheses."""
        while token:
          if token.value == ',':
            return False
          if token.value == ')':
            return not token.next_token
          if token.OpensScope():
            token = token.matching_bracket.next_token
          else:
            token = token.next_token
        return False

      if (previous.value == '(' and not previous.is_pseudo and
          not logical_line.IsSurroundedByBrackets(previous)):
        pptoken = previous.previous_token
        if (pptoken and not pptoken.is_name and not pptoken.is_keyword and
            SurroundedByParens(current)):
          return True

    if (current.is_name or current.is_string) and previous.value == ',':
      # If the list has function calls in it and the full list itself cannot
      # fit on the line, then we want to split. Otherwise, we'll get something
      # like this:
      #
      #     X = [
      #         Bar(xxx='some string',
      #             yyy='another long string',
      #             zzz='a third long string'), Bar(
      #                 xxx='some string',
      #                 yyy='another long string',
      #                 zzz='a third long string')
      #     ]
      #
      # or when a string formatting syntax.
      func_call_or_string_format = False
      tok = current.next_token
      if current.is_name:
        while tok and (tok.is_name or tok.value == '.'):
          tok = tok.next_token
        func_call_or_string_format = tok and tok.value == '('
      elif current.is_string:
        while tok and tok.is_string:
          tok = tok.next_token
        func_call_or_string_format = tok and tok.value == '%'
      if func_call_or_string_format:
        open_bracket = logical_line.IsSurroundedByBrackets(current)
        if open_bracket:
          if open_bracket.value in '[{':
            if not self._FitsOnLine(open_bracket,
                                    open_bracket.matching_bracket):
              return True
          elif tok.value == '(':
            if not self._FitsOnLine(current, tok.matching_bracket):
              return True

    if (current.OpensScope() and previous.value == ',' and
        subtypes.DICTIONARY_KEY not in current.next_token.subtypes):
      # If we have a list of tuples, then we can get a similar look as above. If
      # the full list cannot fit on the line, then we want a split.
      open_bracket = logical_line.IsSurroundedByBrackets(current)
      if (open_bracket and open_bracket.value in '[{' and
          subtypes.SUBSCRIPT_BRACKET not in open_bracket.subtypes):
        if not self._FitsOnLine(current, current.matching_bracket):
          return True

    ###########################################################################
    # Dict/Set Splitting
    if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and
        subtypes.DICTIONARY_KEY in current.subtypes and not current.is_comment):
      # Place each dictionary entry onto its own line.
      if previous.value == '{' and previous.previous_token:
        opening = _GetOpeningBracket(previous.previous_token)
        if (opening and opening.value == '(' and opening.previous_token and
            opening.previous_token.is_name):
          # This is a dictionary that's an argument to a function.
          if (self._FitsOnLine(previous, previous.matching_bracket) and
              previous.matching_bracket.next_token and
              (not opening.matching_bracket.next_token or
               opening.matching_bracket.next_token.value != '.') and
              _ScopeHasNoCommas(previous)):
            # Don't split before the key if:
            #   - The dictionary fits on a line, and
            #   - The function call isn't part of a builder-style call and
            #   - The dictionary has one entry and no trailing comma
            return False
      return True

    if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and
        subtypes.DICT_SET_GENERATOR in current.subtypes):
      # Split before a dict/set generator.
      return True

    if (subtypes.DICTIONARY_VALUE in current.subtypes or
        (previous.is_pseudo and previous.value == '(' and
         not current.is_comment)):
      # Split before the dictionary value if we can't fit every dictionary
      # entry on its own line.
      if not current.OpensScope():
        opening = _GetOpeningBracket(current)
        if not self._EachDictEntryFitsOnOneLine(opening):
          return style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')

    if previous.value == '{':
      # Split if the dict/set cannot fit on one line and ends in a comma.
      closing = previous.matching_bracket
      if (not self._FitsOnLine(previous, closing) and
          closing.previous_token.value == ','):
        self.stack[-1].split_before_closing_bracket = True
        return True

    ###########################################################################
    # Argument List Splitting
    if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and
        subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in current.subtypes):
      if (previous.value not in {'=', ':', '*', '**'} and
          current.value not in ':=,)' and not _IsFunctionDefinition(previous)):
        # If we're going to split the lines because of named arguments, then we
        # want to split after the opening bracket as well. But not when this is
        # part of a function definition.
        if previous.value == '(':
          # Make sure we don't split after the opening bracket if the
          # continuation indent is greater than the opening bracket:
          #
          #  a(
          #      b=1,
          #      c=2)
          if (self._FitsOnLine(previous, previous.matching_bracket) and
              logical_line.IsSurroundedByBrackets(previous)):
            # An argument to a function is a function call with named
            # assigns.
            return False

          # Don't split if not required
          if (not style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
              not style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
            return False

          column = self.column - self.stack[-1].last_space
          return column > style.Get('CONTINUATION_INDENT_WIDTH')

        opening = _GetOpeningBracket(current)
        if opening:
          return not self._ContainerFitsOnStartLine(opening)

    if (current.value not in '{)' and previous.value == '(' and
        self._ArgumentListHasDictionaryEntry(current)):
      return True

    if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'):
      # Split before arguments in a function call or definition if the
      # arguments are terminated by a comma.
      opening = _GetOpeningBracket(current)
      if opening and opening.previous_token and opening.previous_token.is_name:
        if previous.value in '(,':
          if opening.matching_bracket.previous_token.value == ',':
            return True

    if ((current.is_name or current.value in {'*', '**'}) and
        previous.value == ','):
      # If we have a function call within an argument list and it won't fit on
      # the remaining line, but it will fit on a line by itself, then go ahead
      # and split before the call.
      opening = _GetOpeningBracket(current)
      if (opening and opening.value == '(' and opening.previous_token and
          (opening.previous_token.is_name or
           opening.previous_token.value in {'*', '**'})):
        is_func_call = False
        opening = current
        while opening:
          if opening.value == '(':
            is_func_call = True
            break
          if (not (opening.is_name or opening.value in {'*', '**'}) and
              opening.value != '.'):
            break
          opening = opening.next_token

        if is_func_call:
          if (not self._FitsOnLine(current, opening.matching_bracket) or
              (opening.matching_bracket.next_token and
               opening.matching_bracket.next_token.value != ',' and
               not opening.matching_bracket.next_token.ClosesScope())):
            return True

    pprevious = previous.previous_token

    # A function call with a dictionary as its first argument may result in
    # unreadable formatting if the dictionary spans multiple lines. The
    # dictionary itself is formatted just fine, but the remaining arguments are
    # indented too far:
    #
    #     function_call({
    #         KEY_1: 'value one',
    #         KEY_2: 'value two',
    #     },
    #                   default=False)
    if (current.value == '{' and previous.value == '(' and pprevious and
        pprevious.is_name):
      dict_end = current.matching_bracket
      next_token = dict_end.next_token
      if next_token.value == ',' and not self._FitsOnLine(current, dict_end):
        return True

    if (current.is_name and pprevious and pprevious.is_name and
        previous.value == '('):

      if (not self._FitsOnLine(previous, previous.matching_bracket) and
          _IsFunctionCallWithArguments(current)):
        # There is a function call, with more than 1 argument, where the first
        # argument is itself a function call with arguments that does not fit
        # into the line.  In this specific case, if we split after the first
        # argument's opening '(', then the formatting will look bad for the
        # rest of the arguments. E.g.:
        #
        #     outer_function_call(inner_function_call(
        #         inner_arg1, inner_arg2),
        #                         outer_arg1, outer_arg2)
        #
        # Instead, enforce a split before that argument to keep things looking
        # good.
        if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') or
            style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
          return True

        opening = _GetOpeningBracket(current)
        if (opening and opening.value == '(' and opening.previous_token and
            (opening.previous_token.is_name or
             opening.previous_token.value in {'*', '**'})):
          is_func_call = False
          opening = current
          while opening:
            if opening.value == '(':
              is_func_call = True
              break
            if (not (opening.is_name or opening.value in {'*', '**'}) and
                opening.value != '.'):
              break
            opening = opening.next_token

          if is_func_call:
            if (not self._FitsOnLine(current, opening.matching_bracket) or
                (opening.matching_bracket.next_token and
                 opening.matching_bracket.next_token.value != ',' and
                 not opening.matching_bracket.next_token.ClosesScope())):
              return True

    if (previous.OpensScope() and not current.OpensScope() and
        not current.is_comment and
        subtypes.SUBSCRIPT_BRACKET not in previous.subtypes):
      if pprevious and not pprevious.is_keyword and not pprevious.is_name:
        # We want to split if there's a comment in the container.
        token = current
        while token != previous.matching_bracket:
          if token.is_comment:
            return True
          token = token.next_token
      if previous.value == '(':
        pptoken = previous.previous_token
        if not pptoken or not pptoken.is_name:
          # Split after the opening of a tuple if it doesn't fit on the current
          # line and it's not a function call.
          if self._FitsOnLine(previous, previous.matching_bracket):
            return False
        elif not self._FitsOnLine(previous, previous.matching_bracket):
          if len(previous.container_elements) == 1:
            return False

          elements = previous.container_elements + [previous.matching_bracket]
          i = 1
          while i < len(elements):
            if (not elements[i - 1].OpensScope() and
                not self._FitsOnLine(elements[i - 1], elements[i])):
              return True
            i += 1

          if (self.column_limit - self.column) / float(self.column_limit) < 0.3:
            # Try not to squish all of the arguments off to the right.
            return True
      else:
        # Split after the opening of a container if it doesn't fit on the
        # current line.
        if not self._FitsOnLine(previous, previous.matching_bracket):
          return True

    ###########################################################################
    # Original Formatting Splitting
    # These checks rely upon the original formatting. This is in order to
    # attempt to keep hand-written code in the same condition as it was before.
    # However, this may cause the formatter to fail to be idempotent.
    if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and
        previous.lineno < current.lineno):
      # Retain the split before a bitwise operator.
      return True

    if (current.is_comment and
        previous.lineno < current.lineno - current.value.count('\n')):
      # If a comment comes in the middle of a logical line (like an if
      # conditional with comments interspersed), then we want to split if the
      # original comments were on a separate line.
      return True

    return False

  def AddTokenToState(self, newline, dry_run, must_split=False):
    """Add a token to the format decision state.

    Allow the heuristic to try out adding the token with and without a newline.
    Later on, the algorithm will determine which one has the lowest penalty.

    Arguments:
      newline: (bool) Add the token on a new line if True.
      dry_run: (bool) Don't commit whitespace changes to the FormatToken if
        True.
      must_split: (bool) A newline was required before this token.

    Returns:
      The penalty of splitting after the current token.
    """
    self._PushParameterListState(newline)

    penalty = 0
    if newline:
      penalty = self._AddTokenOnNewline(dry_run, must_split)
    else:
      self._AddTokenOnCurrentLine(dry_run)

    penalty += self._CalculateComprehensionState(newline)
    penalty += self._CalculateParameterListState(newline)

    return self.MoveStateToNextToken() + penalty

  def _AddTokenOnCurrentLine(self, dry_run):
    """Puts the token on the current line.

    Appends the next token to the state and updates information necessary for
    indentation.

    Arguments:
      dry_run: (bool) Commit whitespace changes to the FormatToken if True.
    """
    current = self.next_token
    previous = current.previous_token

    spaces = current.spaces_required_before
    if isinstance(spaces, list):
      # Don't set the value here, as we need to look at the lines near
      # this one to determine the actual horizontal alignment value.
      spaces = 0

    if not dry_run:
      current.AddWhitespacePrefix(newlines_before=0, spaces=spaces)

    if previous.OpensScope():
      if not current.is_comment:
        # Align closing scopes that are on a newline with the opening scope:
        #
        #     foo = [a,
        #            b,
        #           ]
        self.stack[-1].closing_scope_indent = self.column - 1
        if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'):
          self.stack[-1].closing_scope_indent += 1
        self.stack[-1].indent = self.column + spaces
      else:
        self.stack[-1].closing_scope_indent = (
            self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))

    self.column += spaces

  def _AddTokenOnNewline(self, dry_run, must_split):
    """Adds a line break and necessary indentation.

    Appends the next token to the state and updates information necessary for
    indentation.

    Arguments:
      dry_run: (bool) Don't commit whitespace changes to the FormatToken if
        True.
      must_split: (bool) A newline was required before this token.

    Returns:
      The split penalty for splitting after the current state.
    """
    current = self.next_token
    previous = current.previous_token

    self.column = self._GetNewlineColumn()

    if not dry_run:
      indent_level = self.line.depth
      spaces = self.column
      if spaces:
        spaces -= indent_level * style.Get('INDENT_WIDTH')
      current.AddWhitespacePrefix(
          newlines_before=1, spaces=spaces, indent_level=indent_level)

    if not current.is_comment:
      self.stack[-1].last_space = self.column
    self.lowest_level_on_line = self.paren_level

    if (previous.OpensScope() or
        (previous.is_comment and previous.previous_token is not None and
         previous.previous_token.OpensScope())):
      dedent = (style.Get('CONTINUATION_INDENT_WIDTH'),
                0)[style.Get('INDENT_CLOSING_BRACKETS')]
      self.stack[-1].closing_scope_indent = (
          max(0, self.stack[-1].indent - dedent))
      self.stack[-1].split_before_closing_bracket = True

    # Calculate the split penalty.
    penalty = current.split_penalty

    if must_split:
      # Don't penalize for a must split.
      return penalty

    if previous.is_pseudo and previous.value == '(':
      # Small penalty for splitting after a pseudo paren.
      penalty += 50

    # Add a penalty for each increasing newline we add, but don't penalize for
    # splitting before an if-expression or list comprehension.
    if current.value not in {'if', 'for'}:
      last = self.stack[-1]
      last.num_line_splits += 1
      penalty += (
          style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') *
          last.num_line_splits)

    if current.OpensScope() and previous.OpensScope():
      # Prefer to keep opening brackets coalesced (unless it's at the beginning
      # of a function call).
      pprev = previous.previous_token
      if not pprev or not pprev.is_name:
        penalty += 10

    return penalty + 10

  def MoveStateToNextToken(self):
    """Calculate format decision state information and move onto the next token.

    Before moving onto the next token, we first calculate the format decision
    state given the current token and its formatting decisions. Then the format
    decision state is set up so that the next token can be added.

    Returns:
      The penalty for the number of characters over the column limit.
    """
    current = self.next_token
    if not current.OpensScope() and not current.ClosesScope():
      self.lowest_level_on_line = min(self.lowest_level_on_line,
                                      self.paren_level)

    # If we encounter an opening bracket, we add a level to our stack to prepare
    # for the subsequent tokens.
    if current.OpensScope():
      last = self.stack[-1]
      new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space

      self.stack.append(_ParenState(new_indent, self.stack[-1].last_space))
      self.paren_level += 1

    # If we encounter a closing bracket, we can remove a level from our
    # parenthesis stack.
    if len(self.stack) > 1 and current.ClosesScope():
      if subtypes.DICTIONARY_KEY_PART in current.subtypes:
        self.stack[-2].last_space = self.stack[-2].indent
      else:
        self.stack[-2].last_space = self.stack[-1].last_space
      self.stack.pop()
      self.paren_level -= 1

    is_multiline_string = current.is_string and '\n' in current.value
    if is_multiline_string:
      # This is a multiline string. Only look at the first line.
      self.column += len(current.value.split('\n')[0])
    elif not current.is_pseudo:
      self.column += len(current.value)

    self.next_token = self.next_token.next_token

    # Calculate the penalty for overflowing the column limit.
    penalty = 0
    if (not current.is_pylint_comment and not current.is_pytype_comment and
        not current.is_copybara_comment and self.column > self.column_limit):
      excess_characters = self.column - self.column_limit
      penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters

    if is_multiline_string:
      # If this is a multiline string, the column is actually the
      # end of the last line in the string.
      self.column = len(current.value.split('\n')[-1])

    return penalty

  def _CalculateComprehensionState(self, newline):
    """Makes required changes to comprehension state.

    Args:
      newline: Whether the current token is to be added on a newline.

    Returns:
      The penalty for the token-newline combination given the current
      comprehension state.
    """
    current = self.next_token
    previous = current.previous_token
    top_of_stack = self.comp_stack[-1] if self.comp_stack else None
    penalty = 0

    if top_of_stack is not None:
      # Check if the token terminates the current comprehension.
      if current == top_of_stack.closing_bracket:
        last = self.comp_stack.pop()
        # Lightly penalize comprehensions that are split across multiple lines.
        if last.has_interior_split:
          penalty += style.Get('SPLIT_PENALTY_COMPREHENSION')

        return penalty

      if newline:
        top_of_stack.has_interior_split = True

    if (subtypes.COMP_EXPR in current.subtypes and
        subtypes.COMP_EXPR not in previous.subtypes):
      self.comp_stack.append(object_state.ComprehensionState(current))
      return penalty

    if current.value == 'for' and subtypes.COMP_FOR in current.subtypes:
      if top_of_stack.for_token is not None:
        # Treat nested comprehensions like normal comp_if expressions.
        # Example:
        #     my_comp = [
        #         a.qux + b.qux
        #         for a in foo
        #   -->   for b in bar   <--
        #         if a.zut + b.zut
        #     ]
        if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
            top_of_stack.has_split_at_for != newline and
            (top_of_stack.has_split_at_for or
             not top_of_stack.HasTrivialExpr())):
          penalty += split_penalty.UNBREAKABLE
      else:
        top_of_stack.for_token = current
        top_of_stack.has_split_at_for = newline

        # Try to keep trivial expressions on the same line as the comp_for.
        if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and newline and
            top_of_stack.HasTrivialExpr()):
          penalty += split_penalty.CONNECTED

    if (subtypes.COMP_IF in current.subtypes and
        subtypes.COMP_IF not in previous.subtypes):
      # Penalize breaking at comp_if when it doesn't match the newline structure
      # in the rest of the comprehension.
      if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
          top_of_stack.has_split_at_for != newline and
          (top_of_stack.has_split_at_for or not top_of_stack.HasTrivialExpr())):
        penalty += split_penalty.UNBREAKABLE

    return penalty

  def _PushParameterListState(self, newline):
    """Push a new parameter list state for a function definition.

    Args:
      newline: Whether the current token is to be added on a newline.
    """
    current = self.next_token
    previous = current.previous_token

    if _IsFunctionDefinition(previous):
      first_param_column = previous.total_length + self.stack[-2].indent
      self.param_list_stack.append(
          object_state.ParameterListState(previous, newline,
                                          first_param_column))

  def _CalculateParameterListState(self, newline):
    """Makes required changes to parameter list state.

    Args:
      newline: Whether the current token is to be added on a newline.

    Returns:
      The penalty for the token-newline combination given the current
      parameter state.
    """
    current = self.next_token
    previous = current.previous_token
    penalty = 0

    if _IsFunctionDefinition(previous):
      first_param_column = previous.total_length + self.stack[-2].indent
      if not newline:
        param_list = self.param_list_stack[-1]
        if param_list.parameters and param_list.has_typed_return:
          last_param = param_list.parameters[-1].first_token
          last_token = _LastTokenInLine(previous.matching_bracket)
          total_length = last_token.total_length
          total_length -= last_param.total_length - len(last_param.value)
          if total_length + self.column > self.column_limit:
            # If we need to split before the trailing code of a function
            # definition with return types, then also split before the opening
            # parameter so that the trailing bit isn't indented on a line by
            # itself:
            #
            #   def rrrrrrrrrrrrrrrrrrrrrr(ccccccccccccccccccccccc: Tuple[Text]
            #                              ) -> List[Tuple[Text, Text]]:
            #       pass
            penalty += split_penalty.VERY_STRONGLY_CONNECTED
        return penalty

      if first_param_column <= self.column:
        # Make sure we don't split after the opening bracket if the
        # continuation indent is greater than the opening bracket:
        #
        #   a(
        #       b=1,
        #       c=2)
        penalty += split_penalty.VERY_STRONGLY_CONNECTED
      return penalty

    if not self.param_list_stack:
      return penalty

    param_list = self.param_list_stack[-1]
    if current == self.param_list_stack[-1].closing_bracket:
      self.param_list_stack.pop()  # We're done with this state.
      if newline and param_list.has_typed_return:
        if param_list.split_before_closing_bracket:
          penalty -= split_penalty.STRONGLY_CONNECTED
        elif param_list.LastParamFitsOnLine(self.column):
          penalty += split_penalty.STRONGLY_CONNECTED

      if (not newline and param_list.has_typed_return and
          param_list.has_split_before_first_param):
        # Prefer splitting before the closing bracket if there's a return type
        # and we've already split before the first parameter.
        penalty += split_penalty.STRONGLY_CONNECTED

      return penalty

    if not param_list.parameters:
      return penalty

    if newline:
      if self._FitsOnLine(param_list.parameters[0].first_token,
                          _LastTokenInLine(param_list.closing_bracket)):
        penalty += split_penalty.STRONGLY_CONNECTED

    if (not newline and style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and
        param_list.has_default_values and
        current != param_list.parameters[0].first_token and
        current != param_list.closing_bracket and
        subtypes.PARAMETER_START in current.subtypes):
      # If we want to split before parameters when there are named assigns,
      # then add a penalty for not splitting.
      penalty += split_penalty.STRONGLY_CONNECTED

    return penalty

  def _IndentWithContinuationAlignStyle(self, column):
    if column == 0:
      return column
    align_style = style.Get('CONTINUATION_ALIGN_STYLE')
    if align_style == 'FIXED':
      return ((self.line.depth * style.Get('INDENT_WIDTH')) +
              style.Get('CONTINUATION_INDENT_WIDTH'))
    if align_style == 'VALIGN-RIGHT':
      indent_width = style.Get('INDENT_WIDTH')
      return indent_width * int((column + indent_width - 1) / indent_width)
    return column

  def _GetNewlineColumn(self):
    """Return the new column on the newline."""
    current = self.next_token
    previous = current.previous_token
    top_of_stack = self.stack[-1]

    if isinstance(current.spaces_required_before, list):
      # Don't set the value here, as we need to look at the lines near
      # this one to determine the actual horizontal alignment value.
      return 0
    elif current.spaces_required_before > 2 or self.line.disable:
      return current.spaces_required_before

    cont_aligned_indent = self._IndentWithContinuationAlignStyle(
        top_of_stack.indent)

    if current.OpensScope():
      return cont_aligned_indent if self.paren_level else self.first_indent

    if current.ClosesScope():
      if (previous.OpensScope() or
          (previous.is_comment and previous.previous_token is not None and
           previous.previous_token.OpensScope())):
        return max(0,
                   top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH'))
      return top_of_stack.closing_scope_indent

    if (previous and previous.is_string and current.is_string and
        subtypes.DICTIONARY_VALUE in current.subtypes):
      return previous.column

    if style.Get('INDENT_DICTIONARY_VALUE'):
      if previous and (previous.value == ':' or previous.is_pseudo):
        if subtypes.DICTIONARY_VALUE in current.subtypes:
          return top_of_stack.indent

    if (not self.param_list_stack and _IsCompoundStatement(self.line.first) and
        (not (style.Get('DEDENT_CLOSING_BRACKETS') or
              style.Get('INDENT_CLOSING_BRACKETS')) or
         style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))):
      token_indent = (
          len(self.line.first.whitespace_prefix.split('\n')[-1]) +
          style.Get('INDENT_WIDTH'))
      if token_indent == top_of_stack.indent:
        return token_indent + style.Get('CONTINUATION_INDENT_WIDTH')

    if (self.param_list_stack and
        not self.param_list_stack[-1].SplitBeforeClosingBracket(
            top_of_stack.indent) and top_of_stack.indent
        == ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))):
      if (subtypes.PARAMETER_START in current.subtypes or
          (previous.is_comment and
           subtypes.PARAMETER_START in previous.subtypes)):
        return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH')

    return cont_aligned_indent

  def _FitsOnLine(self, start, end):
    """Determines if line between start and end can fit on the current line."""
    length = end.total_length - start.total_length
    if not start.is_pseudo:
      length += len(start.value)
    return length + self.column <= self.column_limit

  def _EachDictEntryFitsOnOneLine(self, opening):
    """Determine if each dict elems can fit on one line."""

    def PreviousNonCommentToken(tok):
      tok = tok.previous_token
      while tok.is_comment:
        tok = tok.previous_token
      return tok

    def ImplicitStringConcatenation(tok):
      num_strings = 0
      if tok.is_pseudo:
        tok = tok.next_token
      while tok.is_string:
        num_strings += 1
        tok = tok.next_token
      return num_strings > 1

    def DictValueIsContainer(opening, closing):
      """Return true if the dictionary value is a container."""
      if not opening or not closing:
        return False
      colon = opening.previous_token
      while colon:
        if not colon.is_pseudo:
          break
        colon = colon.previous_token
      if not colon or colon.value != ':':
        return False
      key = colon.previous_token
      if not key:
        return False
      return subtypes.DICTIONARY_KEY_PART in key.subtypes

    closing = opening.matching_bracket
    entry_start = opening.next_token
    current = opening.next_token.next_token

    while current and current != closing:
      if subtypes.DICTIONARY_KEY in current.subtypes:
        prev = PreviousNonCommentToken(current)
        if prev.value == ',':
          prev = PreviousNonCommentToken(prev.previous_token)
        if not DictValueIsContainer(prev.matching_bracket, prev):
          length = prev.total_length - entry_start.total_length
          length += len(entry_start.value)
          if length + self.stack[-2].indent >= self.column_limit:
            return False
        entry_start = current
      if current.OpensScope():
        if ((current.value == '{' or
             (current.is_pseudo and current.next_token.value == '{') and
             subtypes.DICTIONARY_VALUE in current.subtypes) or
            ImplicitStringConcatenation(current)):
          # A dictionary entry that cannot fit on a single line shouldn't matter
          # to this calculation. If it can't fit on a single line, then the
          # opening should be on the same line as the key and the rest on
          # newlines after it. But the other entries should be on single lines
          # if possible.
          if current.matching_bracket:
            current = current.matching_bracket
          while current:
            if current == closing:
              return True
            if subtypes.DICTIONARY_KEY in current.subtypes:
              entry_start = current
              break
            current = current.next_token
        else:
          current = current.matching_bracket
      else:
        current = current.next_token

    # At this point, current is the closing bracket. Go back one to get the end
    # of the dictionary entry.
    current = PreviousNonCommentToken(current)
    length = current.total_length - entry_start.total_length
    length += len(entry_start.value)
    return length + self.stack[-2].indent <= self.column_limit

  def _ArgumentListHasDictionaryEntry(self, token):
    """Check if the function argument list has a dictionary as an arg."""
    if _IsArgumentToFunction(token):
      while token:
        if token.value == '{':
          length = token.matching_bracket.total_length - token.total_length
          return length + self.stack[-2].indent > self.column_limit
        if token.ClosesScope():
          break
        if token.OpensScope():
          token = token.matching_bracket
        token = token.next_token
    return False

  def _ContainerFitsOnStartLine(self, opening):
    """Check if the container can fit on its starting line."""
    return (opening.matching_bracket.total_length - opening.total_length +
            self.stack[-1].indent) <= self.column_limit


_COMPOUND_STMTS = frozenset(
    {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'})


def _IsCompoundStatement(token):
  if token.value == 'async':
    token = token.next_token
  return token.value in _COMPOUND_STMTS


def _IsFunctionDef(token):
  if token.value == 'async':
    token = token.next_token
  return token.value == 'def'


def _IsFunctionCallWithArguments(token):
  while token:
    if token.value == '(':
      token = token.next_token
      return token and token.value != ')'
    elif token.name not in {'NAME', 'DOT', 'EQUAL'}:
      break
    token = token.next_token
  return False


def _IsArgumentToFunction(token):
  bracket = logical_line.IsSurroundedByBrackets(token)
  if not bracket or bracket.value != '(':
    return False
  previous = bracket.previous_token
  return previous and previous.is_name


def _GetOpeningBracket(current):
  """Get the opening bracket containing the current token."""
  if current.matching_bracket and not current.is_pseudo:
    return current if current.OpensScope() else current.matching_bracket

  while current:
    if current.ClosesScope():
      current = current.matching_bracket
    elif current.is_pseudo:
      current = current.previous_token
    elif current.OpensScope():
      return current
    current = current.previous_token
  return None


def _LastTokenInLine(current):
  while not current.is_comment and current.next_token:
    current = current.next_token
  return current


def _IsFunctionDefinition(current):
  prev = current.previous_token
  return current.value == '(' and prev and subtypes.FUNC_DEF in prev.subtypes


def _IsLastScopeInLine(current):
  current = current.matching_bracket
  while current:
    current = current.next_token
    if current and current.OpensScope():
      return False
  return True


def _IsSingleElementTuple(token):
  """Check if it's a single-element tuple."""
  close = token.matching_bracket
  token = token.next_token
  num_commas = 0
  while token != close:
    if token.value == ',':
      num_commas += 1
    token = token.matching_bracket if token.OpensScope() else token.next_token
  return num_commas == 1


def _ScopeHasNoCommas(token):
  """Check if the scope has no commas."""
  close = token.matching_bracket
  token = token.next_token
  while token != close:
    if token.value == ',':
      return False
    token = token.matching_bracket if token.OpensScope() else token.next_token
  return True


class _ParenState(object):
  """Maintains the state of the bracket enclosures.

  A stack of _ParenState objects are kept so that we know how to indent relative
  to the brackets.

  Attributes:
    indent: The column position to which a specified parenthesis level needs to
      be indented.
    last_space: The column position of the last space on each level.
    closing_scope_indent: The column position of the closing indentation.
    split_before_closing_bracket: Whether a newline needs to be inserted before
      the closing bracket. We only want to insert a newline before the closing
      bracket if there also was a newline after the beginning left bracket.
    num_line_splits: Number of line splits this _ParenState contains already.
      Each subsequent line split gets an increasing penalty.
  """

  # TODO(morbo): This doesn't track "bin packing."

  def __init__(self, indent, last_space):
    self.indent = indent
    self.last_space = last_space
    self.closing_scope_indent = 0
    self.split_before_closing_bracket = False
    self.num_line_splits = 0

  def Clone(self):
    state = _ParenState(self.indent, self.last_space)
    state.closing_scope_indent = self.closing_scope_indent
    state.split_before_closing_bracket = self.split_before_closing_bracket
    state.num_line_splits = self.num_line_splits
    return state

  def __repr__(self):
    return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % (
        self.indent, self.last_space, self.closing_scope_indent)

  def __eq__(self, other):
    return hash(self) == hash(other)

  def __ne__(self, other):
    return not self == other

  def __hash__(self, *args, **kwargs):
    return hash((self.indent, self.last_space, self.closing_scope_indent,
                 self.split_before_closing_bracket, self.num_line_splits))