1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Implements a format decision state object that manages whitespace decisions. 15 16Each token is processed one at a time, at which point its whitespace formatting 17decisions are made. A graph of potential whitespace formattings is created, 18where each node in the graph is a format decision state object. The heuristic 19tries formatting the token with and without a newline before it to determine 20which one has the least penalty. Therefore, the format decision state object for 21each decision needs to be its own unique copy. 22 23Once the heuristic determines the best formatting, it makes a non-dry run pass 24through the code to commit the whitespace formatting. 25 26 FormatDecisionState: main class exported by this module. 27""" 28 29from yapf.yapflib import format_token 30from yapf.yapflib import logical_line 31from yapf.yapflib import object_state 32from yapf.yapflib import split_penalty 33from yapf.yapflib import style 34from yapf.yapflib import subtypes 35 36 37class FormatDecisionState(object): 38 """The current state when indenting a logical line. 39 40 The FormatDecisionState object is meant to be copied instead of referenced. 41 42 Attributes: 43 first_indent: The indent of the first token. 44 column: The number of used columns in the current line. 45 line: The logical line we're currently processing. 46 next_token: The next token to be formatted. 47 paren_level: The level of nesting inside (), [], and {}. 48 lowest_level_on_line: The lowest paren_level on the current line. 49 stack: A stack (of _ParenState) keeping track of properties applying to 50 parenthesis levels. 51 comp_stack: A stack (of ComprehensionState) keeping track of properties 52 applying to comprehensions. 53 param_list_stack: A stack (of ParameterListState) keeping track of 54 properties applying to function parameter lists. 55 ignore_stack_for_comparison: Ignore the stack of _ParenState for state 56 comparison. 57 column_limit: The column limit specified by the style. 58 """ 59 60 def __init__(self, line, first_indent): 61 """Initializer. 62 63 Initializes to the state after placing the first token from 'line' at 64 'first_indent'. 65 66 Arguments: 67 line: (LogicalLine) The logical line we're currently processing. 68 first_indent: (int) The indent of the first token. 69 """ 70 self.next_token = line.first 71 self.column = first_indent 72 self.line = line 73 self.paren_level = 0 74 self.lowest_level_on_line = 0 75 self.ignore_stack_for_comparison = False 76 self.stack = [_ParenState(first_indent, first_indent)] 77 self.comp_stack = [] 78 self.param_list_stack = [] 79 self.first_indent = first_indent 80 self.column_limit = style.Get('COLUMN_LIMIT') 81 82 def Clone(self): 83 """Clones a FormatDecisionState object.""" 84 new = FormatDecisionState(self.line, self.first_indent) 85 new.next_token = self.next_token 86 new.column = self.column 87 new.line = self.line 88 new.paren_level = self.paren_level 89 new.line.depth = self.line.depth 90 new.lowest_level_on_line = self.lowest_level_on_line 91 new.ignore_stack_for_comparison = self.ignore_stack_for_comparison 92 new.first_indent = self.first_indent 93 new.stack = [state.Clone() for state in self.stack] 94 new.comp_stack = [state.Clone() for state in self.comp_stack] 95 new.param_list_stack = [state.Clone() for state in self.param_list_stack] 96 return new 97 98 def __eq__(self, other): 99 # Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous', 100 # because it shouldn't have a bearing on this comparison. (I.e., it will 101 # report equal if 'next_token' does.) 102 return (self.next_token == other.next_token and 103 self.column == other.column and 104 self.paren_level == other.paren_level and 105 self.line.depth == other.line.depth and 106 self.lowest_level_on_line == other.lowest_level_on_line and 107 (self.ignore_stack_for_comparison or 108 other.ignore_stack_for_comparison or self.stack == other.stack and 109 self.comp_stack == other.comp_stack and 110 self.param_list_stack == other.param_list_stack)) 111 112 def __ne__(self, other): 113 return not self == other 114 115 def __hash__(self): 116 return hash((self.next_token, self.column, self.paren_level, 117 self.line.depth, self.lowest_level_on_line)) 118 119 def __repr__(self): 120 return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' % 121 (self.column, repr(self.next_token), self.paren_level, 122 '\n\t'.join(repr(s) for s in self.stack) + ']')) 123 124 def CanSplit(self, must_split): 125 """Determine if we can split before the next token. 126 127 Arguments: 128 must_split: (bool) A newline was required before this token. 129 130 Returns: 131 True if the line can be split before the next token. 132 """ 133 current = self.next_token 134 previous = current.previous_token 135 136 if current.is_pseudo: 137 return False 138 139 if (not must_split and subtypes.DICTIONARY_KEY_PART in current.subtypes and 140 subtypes.DICTIONARY_KEY not in current.subtypes and 141 not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')): 142 # In some situations, a dictionary may be multiline, but pylint doesn't 143 # like it. So don't allow it unless forced to. 144 return False 145 146 if (not must_split and subtypes.DICTIONARY_VALUE in current.subtypes and 147 not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')): 148 return False 149 150 if previous and previous.value == '(' and current.value == ')': 151 # Don't split an empty function call list if we aren't splitting before 152 # dict values. 153 token = previous.previous_token 154 while token: 155 prev = token.previous_token 156 if not prev or prev.name not in {'NAME', 'DOT'}: 157 break 158 token = token.previous_token 159 if token and subtypes.DICTIONARY_VALUE in token.subtypes: 160 if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'): 161 return False 162 163 if previous and previous.value == '.' and current.value == '.': 164 return False 165 166 return current.can_break_before 167 168 def MustSplit(self): 169 """Returns True if the line must split before the next token.""" 170 current = self.next_token 171 previous = current.previous_token 172 173 if current.is_pseudo: 174 return False 175 176 if current.must_break_before: 177 return True 178 179 if not previous: 180 return False 181 182 if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',': 183 return True 184 185 if (style.Get('SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES') and 186 previous.value == ','): 187 # Avoid breaking in a container that fits in the current line if possible 188 opening = _GetOpeningBracket(current) 189 190 # Can't find opening bracket, behave the same way as 191 # SPLIT_ALL_COMMA_SEPARATED_VALUES. 192 if not opening: 193 return True 194 195 if current.is_comment: 196 # Don't require splitting before a comment, since it may be related to 197 # the current line. 198 return False 199 200 # Allow the fallthrough code to handle the closing bracket. 201 if current != opening.matching_bracket: 202 # If the container doesn't fit in the current line, must split 203 return not self._ContainerFitsOnStartLine(opening) 204 205 if (self.stack[-1].split_before_closing_bracket and 206 (current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET') or 207 current.value in '}])' and style.Get('INDENT_CLOSING_BRACKETS'))): 208 # Split before the closing bracket if we can. 209 if subtypes.SUBSCRIPT_BRACKET not in current.subtypes: 210 return current.node_split_penalty != split_penalty.UNBREAKABLE 211 212 if (current.value == ')' and previous.value == ',' and 213 not _IsSingleElementTuple(current.matching_bracket)): 214 return True 215 216 # Prevent splitting before the first argument in compound statements 217 # with the exception of function declarations. 218 if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and 219 _IsCompoundStatement(self.line.first) and 220 not _IsFunctionDef(self.line.first)): 221 return False 222 223 ########################################################################### 224 # List Splitting 225 if (style.Get('DEDENT_CLOSING_BRACKETS') or 226 style.Get('INDENT_CLOSING_BRACKETS') or 227 style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): 228 bracket = current if current.ClosesScope() else previous 229 if subtypes.SUBSCRIPT_BRACKET not in bracket.subtypes: 230 if bracket.OpensScope(): 231 if style.Get('COALESCE_BRACKETS'): 232 if current.OpensScope(): 233 # Prefer to keep all opening brackets together. 234 return False 235 236 if (not _IsLastScopeInLine(bracket) or 237 logical_line.IsSurroundedByBrackets(bracket)): 238 last_token = bracket.matching_bracket 239 else: 240 last_token = _LastTokenInLine(bracket.matching_bracket) 241 242 if not self._FitsOnLine(bracket, last_token): 243 # Split before the first element if the whole list can't fit on a 244 # single line. 245 self.stack[-1].split_before_closing_bracket = True 246 return True 247 248 elif (style.Get('DEDENT_CLOSING_BRACKETS') or 249 style.Get('INDENT_CLOSING_BRACKETS')) and current.ClosesScope(): 250 # Split before and dedent the closing bracket. 251 return self.stack[-1].split_before_closing_bracket 252 253 if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and 254 current.is_name): 255 # An expression that's surrounded by parens gets split after the opening 256 # parenthesis. 257 def SurroundedByParens(token): 258 """Check if it's an expression surrounded by parentheses.""" 259 while token: 260 if token.value == ',': 261 return False 262 if token.value == ')': 263 return not token.next_token 264 if token.OpensScope(): 265 token = token.matching_bracket.next_token 266 else: 267 token = token.next_token 268 return False 269 270 if (previous.value == '(' and not previous.is_pseudo and 271 not logical_line.IsSurroundedByBrackets(previous)): 272 pptoken = previous.previous_token 273 if (pptoken and not pptoken.is_name and not pptoken.is_keyword and 274 SurroundedByParens(current)): 275 return True 276 277 if (current.is_name or current.is_string) and previous.value == ',': 278 # If the list has function calls in it and the full list itself cannot 279 # fit on the line, then we want to split. Otherwise, we'll get something 280 # like this: 281 # 282 # X = [ 283 # Bar(xxx='some string', 284 # yyy='another long string', 285 # zzz='a third long string'), Bar( 286 # xxx='some string', 287 # yyy='another long string', 288 # zzz='a third long string') 289 # ] 290 # 291 # or when a string formatting syntax. 292 func_call_or_string_format = False 293 tok = current.next_token 294 if current.is_name: 295 while tok and (tok.is_name or tok.value == '.'): 296 tok = tok.next_token 297 func_call_or_string_format = tok and tok.value == '(' 298 elif current.is_string: 299 while tok and tok.is_string: 300 tok = tok.next_token 301 func_call_or_string_format = tok and tok.value == '%' 302 if func_call_or_string_format: 303 open_bracket = logical_line.IsSurroundedByBrackets(current) 304 if open_bracket: 305 if open_bracket.value in '[{': 306 if not self._FitsOnLine(open_bracket, 307 open_bracket.matching_bracket): 308 return True 309 elif tok.value == '(': 310 if not self._FitsOnLine(current, tok.matching_bracket): 311 return True 312 313 if (current.OpensScope() and previous.value == ',' and 314 subtypes.DICTIONARY_KEY not in current.next_token.subtypes): 315 # If we have a list of tuples, then we can get a similar look as above. If 316 # the full list cannot fit on the line, then we want a split. 317 open_bracket = logical_line.IsSurroundedByBrackets(current) 318 if (open_bracket and open_bracket.value in '[{' and 319 subtypes.SUBSCRIPT_BRACKET not in open_bracket.subtypes): 320 if not self._FitsOnLine(current, current.matching_bracket): 321 return True 322 323 ########################################################################### 324 # Dict/Set Splitting 325 if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and 326 subtypes.DICTIONARY_KEY in current.subtypes and not current.is_comment): 327 # Place each dictionary entry onto its own line. 328 if previous.value == '{' and previous.previous_token: 329 opening = _GetOpeningBracket(previous.previous_token) 330 if (opening and opening.value == '(' and opening.previous_token and 331 opening.previous_token.is_name): 332 # This is a dictionary that's an argument to a function. 333 if (self._FitsOnLine(previous, previous.matching_bracket) and 334 previous.matching_bracket.next_token and 335 (not opening.matching_bracket.next_token or 336 opening.matching_bracket.next_token.value != '.') and 337 _ScopeHasNoCommas(previous)): 338 # Don't split before the key if: 339 # - The dictionary fits on a line, and 340 # - The function call isn't part of a builder-style call and 341 # - The dictionary has one entry and no trailing comma 342 return False 343 return True 344 345 if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and 346 subtypes.DICT_SET_GENERATOR in current.subtypes): 347 # Split before a dict/set generator. 348 return True 349 350 if (subtypes.DICTIONARY_VALUE in current.subtypes or 351 (previous.is_pseudo and previous.value == '(' and 352 not current.is_comment)): 353 # Split before the dictionary value if we can't fit every dictionary 354 # entry on its own line. 355 if not current.OpensScope(): 356 opening = _GetOpeningBracket(current) 357 if not self._EachDictEntryFitsOnOneLine(opening): 358 return style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE') 359 360 if previous.value == '{': 361 # Split if the dict/set cannot fit on one line and ends in a comma. 362 closing = previous.matching_bracket 363 if (not self._FitsOnLine(previous, closing) and 364 closing.previous_token.value == ','): 365 self.stack[-1].split_before_closing_bracket = True 366 return True 367 368 ########################################################################### 369 # Argument List Splitting 370 if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and 371 subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in current.subtypes): 372 if (previous.value not in {'=', ':', '*', '**'} and 373 current.value not in ':=,)' and not _IsFunctionDefinition(previous)): 374 # If we're going to split the lines because of named arguments, then we 375 # want to split after the opening bracket as well. But not when this is 376 # part of a function definition. 377 if previous.value == '(': 378 # Make sure we don't split after the opening bracket if the 379 # continuation indent is greater than the opening bracket: 380 # 381 # a( 382 # b=1, 383 # c=2) 384 if (self._FitsOnLine(previous, previous.matching_bracket) and 385 logical_line.IsSurroundedByBrackets(previous)): 386 # An argument to a function is a function call with named 387 # assigns. 388 return False 389 390 # Don't split if not required 391 if (not style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and 392 not style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): 393 return False 394 395 column = self.column - self.stack[-1].last_space 396 return column > style.Get('CONTINUATION_INDENT_WIDTH') 397 398 opening = _GetOpeningBracket(current) 399 if opening: 400 return not self._ContainerFitsOnStartLine(opening) 401 402 if (current.value not in '{)' and previous.value == '(' and 403 self._ArgumentListHasDictionaryEntry(current)): 404 return True 405 406 if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'): 407 # Split before arguments in a function call or definition if the 408 # arguments are terminated by a comma. 409 opening = _GetOpeningBracket(current) 410 if opening and opening.previous_token and opening.previous_token.is_name: 411 if previous.value in '(,': 412 if opening.matching_bracket.previous_token.value == ',': 413 return True 414 415 if ((current.is_name or current.value in {'*', '**'}) and 416 previous.value == ','): 417 # If we have a function call within an argument list and it won't fit on 418 # the remaining line, but it will fit on a line by itself, then go ahead 419 # and split before the call. 420 opening = _GetOpeningBracket(current) 421 if (opening and opening.value == '(' and opening.previous_token and 422 (opening.previous_token.is_name or 423 opening.previous_token.value in {'*', '**'})): 424 is_func_call = False 425 opening = current 426 while opening: 427 if opening.value == '(': 428 is_func_call = True 429 break 430 if (not (opening.is_name or opening.value in {'*', '**'}) and 431 opening.value != '.'): 432 break 433 opening = opening.next_token 434 435 if is_func_call: 436 if (not self._FitsOnLine(current, opening.matching_bracket) or 437 (opening.matching_bracket.next_token and 438 opening.matching_bracket.next_token.value != ',' and 439 not opening.matching_bracket.next_token.ClosesScope())): 440 return True 441 442 pprevious = previous.previous_token 443 444 # A function call with a dictionary as its first argument may result in 445 # unreadable formatting if the dictionary spans multiple lines. The 446 # dictionary itself is formatted just fine, but the remaining arguments are 447 # indented too far: 448 # 449 # function_call({ 450 # KEY_1: 'value one', 451 # KEY_2: 'value two', 452 # }, 453 # default=False) 454 if (current.value == '{' and previous.value == '(' and pprevious and 455 pprevious.is_name): 456 dict_end = current.matching_bracket 457 next_token = dict_end.next_token 458 if next_token.value == ',' and not self._FitsOnLine(current, dict_end): 459 return True 460 461 if (current.is_name and pprevious and pprevious.is_name and 462 previous.value == '('): 463 464 if (not self._FitsOnLine(previous, previous.matching_bracket) and 465 _IsFunctionCallWithArguments(current)): 466 # There is a function call, with more than 1 argument, where the first 467 # argument is itself a function call with arguments that does not fit 468 # into the line. In this specific case, if we split after the first 469 # argument's opening '(', then the formatting will look bad for the 470 # rest of the arguments. E.g.: 471 # 472 # outer_function_call(inner_function_call( 473 # inner_arg1, inner_arg2), 474 # outer_arg1, outer_arg2) 475 # 476 # Instead, enforce a split before that argument to keep things looking 477 # good. 478 if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') or 479 style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): 480 return True 481 482 opening = _GetOpeningBracket(current) 483 if (opening and opening.value == '(' and opening.previous_token and 484 (opening.previous_token.is_name or 485 opening.previous_token.value in {'*', '**'})): 486 is_func_call = False 487 opening = current 488 while opening: 489 if opening.value == '(': 490 is_func_call = True 491 break 492 if (not (opening.is_name or opening.value in {'*', '**'}) and 493 opening.value != '.'): 494 break 495 opening = opening.next_token 496 497 if is_func_call: 498 if (not self._FitsOnLine(current, opening.matching_bracket) or 499 (opening.matching_bracket.next_token and 500 opening.matching_bracket.next_token.value != ',' and 501 not opening.matching_bracket.next_token.ClosesScope())): 502 return True 503 504 if (previous.OpensScope() and not current.OpensScope() and 505 not current.is_comment and 506 subtypes.SUBSCRIPT_BRACKET not in previous.subtypes): 507 if pprevious and not pprevious.is_keyword and not pprevious.is_name: 508 # We want to split if there's a comment in the container. 509 token = current 510 while token != previous.matching_bracket: 511 if token.is_comment: 512 return True 513 token = token.next_token 514 if previous.value == '(': 515 pptoken = previous.previous_token 516 if not pptoken or not pptoken.is_name: 517 # Split after the opening of a tuple if it doesn't fit on the current 518 # line and it's not a function call. 519 if self._FitsOnLine(previous, previous.matching_bracket): 520 return False 521 elif not self._FitsOnLine(previous, previous.matching_bracket): 522 if len(previous.container_elements) == 1: 523 return False 524 525 elements = previous.container_elements + [previous.matching_bracket] 526 i = 1 527 while i < len(elements): 528 if (not elements[i - 1].OpensScope() and 529 not self._FitsOnLine(elements[i - 1], elements[i])): 530 return True 531 i += 1 532 533 if (self.column_limit - self.column) / float(self.column_limit) < 0.3: 534 # Try not to squish all of the arguments off to the right. 535 return True 536 else: 537 # Split after the opening of a container if it doesn't fit on the 538 # current line. 539 if not self._FitsOnLine(previous, previous.matching_bracket): 540 return True 541 542 ########################################################################### 543 # Original Formatting Splitting 544 # These checks rely upon the original formatting. This is in order to 545 # attempt to keep hand-written code in the same condition as it was before. 546 # However, this may cause the formatter to fail to be idempotent. 547 if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and 548 previous.lineno < current.lineno): 549 # Retain the split before a bitwise operator. 550 return True 551 552 if (current.is_comment and 553 previous.lineno < current.lineno - current.value.count('\n')): 554 # If a comment comes in the middle of a logical line (like an if 555 # conditional with comments interspersed), then we want to split if the 556 # original comments were on a separate line. 557 return True 558 559 return False 560 561 def AddTokenToState(self, newline, dry_run, must_split=False): 562 """Add a token to the format decision state. 563 564 Allow the heuristic to try out adding the token with and without a newline. 565 Later on, the algorithm will determine which one has the lowest penalty. 566 567 Arguments: 568 newline: (bool) Add the token on a new line if True. 569 dry_run: (bool) Don't commit whitespace changes to the FormatToken if 570 True. 571 must_split: (bool) A newline was required before this token. 572 573 Returns: 574 The penalty of splitting after the current token. 575 """ 576 self._PushParameterListState(newline) 577 578 penalty = 0 579 if newline: 580 penalty = self._AddTokenOnNewline(dry_run, must_split) 581 else: 582 self._AddTokenOnCurrentLine(dry_run) 583 584 penalty += self._CalculateComprehensionState(newline) 585 penalty += self._CalculateParameterListState(newline) 586 587 return self.MoveStateToNextToken() + penalty 588 589 def _AddTokenOnCurrentLine(self, dry_run): 590 """Puts the token on the current line. 591 592 Appends the next token to the state and updates information necessary for 593 indentation. 594 595 Arguments: 596 dry_run: (bool) Commit whitespace changes to the FormatToken if True. 597 """ 598 current = self.next_token 599 previous = current.previous_token 600 601 spaces = current.spaces_required_before 602 if isinstance(spaces, list): 603 # Don't set the value here, as we need to look at the lines near 604 # this one to determine the actual horizontal alignment value. 605 spaces = 0 606 607 if not dry_run: 608 current.AddWhitespacePrefix(newlines_before=0, spaces=spaces) 609 610 if previous.OpensScope(): 611 if not current.is_comment: 612 # Align closing scopes that are on a newline with the opening scope: 613 # 614 # foo = [a, 615 # b, 616 # ] 617 self.stack[-1].closing_scope_indent = self.column - 1 618 if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'): 619 self.stack[-1].closing_scope_indent += 1 620 self.stack[-1].indent = self.column + spaces 621 else: 622 self.stack[-1].closing_scope_indent = ( 623 self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH')) 624 625 self.column += spaces 626 627 def _AddTokenOnNewline(self, dry_run, must_split): 628 """Adds a line break and necessary indentation. 629 630 Appends the next token to the state and updates information necessary for 631 indentation. 632 633 Arguments: 634 dry_run: (bool) Don't commit whitespace changes to the FormatToken if 635 True. 636 must_split: (bool) A newline was required before this token. 637 638 Returns: 639 The split penalty for splitting after the current state. 640 """ 641 current = self.next_token 642 previous = current.previous_token 643 644 self.column = self._GetNewlineColumn() 645 646 if not dry_run: 647 indent_level = self.line.depth 648 spaces = self.column 649 if spaces: 650 spaces -= indent_level * style.Get('INDENT_WIDTH') 651 current.AddWhitespacePrefix( 652 newlines_before=1, spaces=spaces, indent_level=indent_level) 653 654 if not current.is_comment: 655 self.stack[-1].last_space = self.column 656 self.lowest_level_on_line = self.paren_level 657 658 if (previous.OpensScope() or 659 (previous.is_comment and previous.previous_token is not None and 660 previous.previous_token.OpensScope())): 661 dedent = (style.Get('CONTINUATION_INDENT_WIDTH'), 662 0)[style.Get('INDENT_CLOSING_BRACKETS')] 663 self.stack[-1].closing_scope_indent = ( 664 max(0, self.stack[-1].indent - dedent)) 665 self.stack[-1].split_before_closing_bracket = True 666 667 # Calculate the split penalty. 668 penalty = current.split_penalty 669 670 if must_split: 671 # Don't penalize for a must split. 672 return penalty 673 674 if previous.is_pseudo and previous.value == '(': 675 # Small penalty for splitting after a pseudo paren. 676 penalty += 50 677 678 # Add a penalty for each increasing newline we add, but don't penalize for 679 # splitting before an if-expression or list comprehension. 680 if current.value not in {'if', 'for'}: 681 last = self.stack[-1] 682 last.num_line_splits += 1 683 penalty += ( 684 style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') * 685 last.num_line_splits) 686 687 if current.OpensScope() and previous.OpensScope(): 688 # Prefer to keep opening brackets coalesced (unless it's at the beginning 689 # of a function call). 690 pprev = previous.previous_token 691 if not pprev or not pprev.is_name: 692 penalty += 10 693 694 return penalty + 10 695 696 def MoveStateToNextToken(self): 697 """Calculate format decision state information and move onto the next token. 698 699 Before moving onto the next token, we first calculate the format decision 700 state given the current token and its formatting decisions. Then the format 701 decision state is set up so that the next token can be added. 702 703 Returns: 704 The penalty for the number of characters over the column limit. 705 """ 706 current = self.next_token 707 if not current.OpensScope() and not current.ClosesScope(): 708 self.lowest_level_on_line = min(self.lowest_level_on_line, 709 self.paren_level) 710 711 # If we encounter an opening bracket, we add a level to our stack to prepare 712 # for the subsequent tokens. 713 if current.OpensScope(): 714 last = self.stack[-1] 715 new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space 716 717 self.stack.append(_ParenState(new_indent, self.stack[-1].last_space)) 718 self.paren_level += 1 719 720 # If we encounter a closing bracket, we can remove a level from our 721 # parenthesis stack. 722 if len(self.stack) > 1 and current.ClosesScope(): 723 if subtypes.DICTIONARY_KEY_PART in current.subtypes: 724 self.stack[-2].last_space = self.stack[-2].indent 725 else: 726 self.stack[-2].last_space = self.stack[-1].last_space 727 self.stack.pop() 728 self.paren_level -= 1 729 730 is_multiline_string = current.is_string and '\n' in current.value 731 if is_multiline_string: 732 # This is a multiline string. Only look at the first line. 733 self.column += len(current.value.split('\n')[0]) 734 elif not current.is_pseudo: 735 self.column += len(current.value) 736 737 self.next_token = self.next_token.next_token 738 739 # Calculate the penalty for overflowing the column limit. 740 penalty = 0 741 if (not current.is_pylint_comment and not current.is_pytype_comment and 742 not current.is_copybara_comment and self.column > self.column_limit): 743 excess_characters = self.column - self.column_limit 744 penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters 745 746 if is_multiline_string: 747 # If this is a multiline string, the column is actually the 748 # end of the last line in the string. 749 self.column = len(current.value.split('\n')[-1]) 750 751 return penalty 752 753 def _CalculateComprehensionState(self, newline): 754 """Makes required changes to comprehension state. 755 756 Args: 757 newline: Whether the current token is to be added on a newline. 758 759 Returns: 760 The penalty for the token-newline combination given the current 761 comprehension state. 762 """ 763 current = self.next_token 764 previous = current.previous_token 765 top_of_stack = self.comp_stack[-1] if self.comp_stack else None 766 penalty = 0 767 768 if top_of_stack is not None: 769 # Check if the token terminates the current comprehension. 770 if current == top_of_stack.closing_bracket: 771 last = self.comp_stack.pop() 772 # Lightly penalize comprehensions that are split across multiple lines. 773 if last.has_interior_split: 774 penalty += style.Get('SPLIT_PENALTY_COMPREHENSION') 775 776 return penalty 777 778 if newline: 779 top_of_stack.has_interior_split = True 780 781 if (subtypes.COMP_EXPR in current.subtypes and 782 subtypes.COMP_EXPR not in previous.subtypes): 783 self.comp_stack.append(object_state.ComprehensionState(current)) 784 return penalty 785 786 if current.value == 'for' and subtypes.COMP_FOR in current.subtypes: 787 if top_of_stack.for_token is not None: 788 # Treat nested comprehensions like normal comp_if expressions. 789 # Example: 790 # my_comp = [ 791 # a.qux + b.qux 792 # for a in foo 793 # --> for b in bar <-- 794 # if a.zut + b.zut 795 # ] 796 if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and 797 top_of_stack.has_split_at_for != newline and 798 (top_of_stack.has_split_at_for or 799 not top_of_stack.HasTrivialExpr())): 800 penalty += split_penalty.UNBREAKABLE 801 else: 802 top_of_stack.for_token = current 803 top_of_stack.has_split_at_for = newline 804 805 # Try to keep trivial expressions on the same line as the comp_for. 806 if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and newline and 807 top_of_stack.HasTrivialExpr()): 808 penalty += split_penalty.CONNECTED 809 810 if (subtypes.COMP_IF in current.subtypes and 811 subtypes.COMP_IF not in previous.subtypes): 812 # Penalize breaking at comp_if when it doesn't match the newline structure 813 # in the rest of the comprehension. 814 if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and 815 top_of_stack.has_split_at_for != newline and 816 (top_of_stack.has_split_at_for or not top_of_stack.HasTrivialExpr())): 817 penalty += split_penalty.UNBREAKABLE 818 819 return penalty 820 821 def _PushParameterListState(self, newline): 822 """Push a new parameter list state for a function definition. 823 824 Args: 825 newline: Whether the current token is to be added on a newline. 826 """ 827 current = self.next_token 828 previous = current.previous_token 829 830 if _IsFunctionDefinition(previous): 831 first_param_column = previous.total_length + self.stack[-2].indent 832 self.param_list_stack.append( 833 object_state.ParameterListState(previous, newline, 834 first_param_column)) 835 836 def _CalculateParameterListState(self, newline): 837 """Makes required changes to parameter list state. 838 839 Args: 840 newline: Whether the current token is to be added on a newline. 841 842 Returns: 843 The penalty for the token-newline combination given the current 844 parameter state. 845 """ 846 current = self.next_token 847 previous = current.previous_token 848 penalty = 0 849 850 if _IsFunctionDefinition(previous): 851 first_param_column = previous.total_length + self.stack[-2].indent 852 if not newline: 853 param_list = self.param_list_stack[-1] 854 if param_list.parameters and param_list.has_typed_return: 855 last_param = param_list.parameters[-1].first_token 856 last_token = _LastTokenInLine(previous.matching_bracket) 857 total_length = last_token.total_length 858 total_length -= last_param.total_length - len(last_param.value) 859 if total_length + self.column > self.column_limit: 860 # If we need to split before the trailing code of a function 861 # definition with return types, then also split before the opening 862 # parameter so that the trailing bit isn't indented on a line by 863 # itself: 864 # 865 # def rrrrrrrrrrrrrrrrrrrrrr(ccccccccccccccccccccccc: Tuple[Text] 866 # ) -> List[Tuple[Text, Text]]: 867 # pass 868 penalty += split_penalty.VERY_STRONGLY_CONNECTED 869 return penalty 870 871 if first_param_column <= self.column: 872 # Make sure we don't split after the opening bracket if the 873 # continuation indent is greater than the opening bracket: 874 # 875 # a( 876 # b=1, 877 # c=2) 878 penalty += split_penalty.VERY_STRONGLY_CONNECTED 879 return penalty 880 881 if not self.param_list_stack: 882 return penalty 883 884 param_list = self.param_list_stack[-1] 885 if current == self.param_list_stack[-1].closing_bracket: 886 self.param_list_stack.pop() # We're done with this state. 887 if newline and param_list.has_typed_return: 888 if param_list.split_before_closing_bracket: 889 penalty -= split_penalty.STRONGLY_CONNECTED 890 elif param_list.LastParamFitsOnLine(self.column): 891 penalty += split_penalty.STRONGLY_CONNECTED 892 893 if (not newline and param_list.has_typed_return and 894 param_list.has_split_before_first_param): 895 # Prefer splitting before the closing bracket if there's a return type 896 # and we've already split before the first parameter. 897 penalty += split_penalty.STRONGLY_CONNECTED 898 899 return penalty 900 901 if not param_list.parameters: 902 return penalty 903 904 if newline: 905 if self._FitsOnLine(param_list.parameters[0].first_token, 906 _LastTokenInLine(param_list.closing_bracket)): 907 penalty += split_penalty.STRONGLY_CONNECTED 908 909 if (not newline and style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and 910 param_list.has_default_values and 911 current != param_list.parameters[0].first_token and 912 current != param_list.closing_bracket and 913 subtypes.PARAMETER_START in current.subtypes): 914 # If we want to split before parameters when there are named assigns, 915 # then add a penalty for not splitting. 916 penalty += split_penalty.STRONGLY_CONNECTED 917 918 return penalty 919 920 def _IndentWithContinuationAlignStyle(self, column): 921 if column == 0: 922 return column 923 align_style = style.Get('CONTINUATION_ALIGN_STYLE') 924 if align_style == 'FIXED': 925 return ((self.line.depth * style.Get('INDENT_WIDTH')) + 926 style.Get('CONTINUATION_INDENT_WIDTH')) 927 if align_style == 'VALIGN-RIGHT': 928 indent_width = style.Get('INDENT_WIDTH') 929 return indent_width * int((column + indent_width - 1) / indent_width) 930 return column 931 932 def _GetNewlineColumn(self): 933 """Return the new column on the newline.""" 934 current = self.next_token 935 previous = current.previous_token 936 top_of_stack = self.stack[-1] 937 938 if isinstance(current.spaces_required_before, list): 939 # Don't set the value here, as we need to look at the lines near 940 # this one to determine the actual horizontal alignment value. 941 return 0 942 elif current.spaces_required_before > 2 or self.line.disable: 943 return current.spaces_required_before 944 945 cont_aligned_indent = self._IndentWithContinuationAlignStyle( 946 top_of_stack.indent) 947 948 if current.OpensScope(): 949 return cont_aligned_indent if self.paren_level else self.first_indent 950 951 if current.ClosesScope(): 952 if (previous.OpensScope() or 953 (previous.is_comment and previous.previous_token is not None and 954 previous.previous_token.OpensScope())): 955 return max(0, 956 top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH')) 957 return top_of_stack.closing_scope_indent 958 959 if (previous and previous.is_string and current.is_string and 960 subtypes.DICTIONARY_VALUE in current.subtypes): 961 return previous.column 962 963 if style.Get('INDENT_DICTIONARY_VALUE'): 964 if previous and (previous.value == ':' or previous.is_pseudo): 965 if subtypes.DICTIONARY_VALUE in current.subtypes: 966 return top_of_stack.indent 967 968 if (not self.param_list_stack and _IsCompoundStatement(self.line.first) and 969 (not (style.Get('DEDENT_CLOSING_BRACKETS') or 970 style.Get('INDENT_CLOSING_BRACKETS')) or 971 style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))): 972 token_indent = ( 973 len(self.line.first.whitespace_prefix.split('\n')[-1]) + 974 style.Get('INDENT_WIDTH')) 975 if token_indent == top_of_stack.indent: 976 return token_indent + style.Get('CONTINUATION_INDENT_WIDTH') 977 978 if (self.param_list_stack and 979 not self.param_list_stack[-1].SplitBeforeClosingBracket( 980 top_of_stack.indent) and top_of_stack.indent 981 == ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))): 982 if (subtypes.PARAMETER_START in current.subtypes or 983 (previous.is_comment and 984 subtypes.PARAMETER_START in previous.subtypes)): 985 return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH') 986 987 return cont_aligned_indent 988 989 def _FitsOnLine(self, start, end): 990 """Determines if line between start and end can fit on the current line.""" 991 length = end.total_length - start.total_length 992 if not start.is_pseudo: 993 length += len(start.value) 994 return length + self.column <= self.column_limit 995 996 def _EachDictEntryFitsOnOneLine(self, opening): 997 """Determine if each dict elems can fit on one line.""" 998 999 def PreviousNonCommentToken(tok): 1000 tok = tok.previous_token 1001 while tok.is_comment: 1002 tok = tok.previous_token 1003 return tok 1004 1005 def ImplicitStringConcatenation(tok): 1006 num_strings = 0 1007 if tok.is_pseudo: 1008 tok = tok.next_token 1009 while tok.is_string: 1010 num_strings += 1 1011 tok = tok.next_token 1012 return num_strings > 1 1013 1014 def DictValueIsContainer(opening, closing): 1015 """Return true if the dictionary value is a container.""" 1016 if not opening or not closing: 1017 return False 1018 colon = opening.previous_token 1019 while colon: 1020 if not colon.is_pseudo: 1021 break 1022 colon = colon.previous_token 1023 if not colon or colon.value != ':': 1024 return False 1025 key = colon.previous_token 1026 if not key: 1027 return False 1028 return subtypes.DICTIONARY_KEY_PART in key.subtypes 1029 1030 closing = opening.matching_bracket 1031 entry_start = opening.next_token 1032 current = opening.next_token.next_token 1033 1034 while current and current != closing: 1035 if subtypes.DICTIONARY_KEY in current.subtypes: 1036 prev = PreviousNonCommentToken(current) 1037 if prev.value == ',': 1038 prev = PreviousNonCommentToken(prev.previous_token) 1039 if not DictValueIsContainer(prev.matching_bracket, prev): 1040 length = prev.total_length - entry_start.total_length 1041 length += len(entry_start.value) 1042 if length + self.stack[-2].indent >= self.column_limit: 1043 return False 1044 entry_start = current 1045 if current.OpensScope(): 1046 if ((current.value == '{' or 1047 (current.is_pseudo and current.next_token.value == '{') and 1048 subtypes.DICTIONARY_VALUE in current.subtypes) or 1049 ImplicitStringConcatenation(current)): 1050 # A dictionary entry that cannot fit on a single line shouldn't matter 1051 # to this calculation. If it can't fit on a single line, then the 1052 # opening should be on the same line as the key and the rest on 1053 # newlines after it. But the other entries should be on single lines 1054 # if possible. 1055 if current.matching_bracket: 1056 current = current.matching_bracket 1057 while current: 1058 if current == closing: 1059 return True 1060 if subtypes.DICTIONARY_KEY in current.subtypes: 1061 entry_start = current 1062 break 1063 current = current.next_token 1064 else: 1065 current = current.matching_bracket 1066 else: 1067 current = current.next_token 1068 1069 # At this point, current is the closing bracket. Go back one to get the end 1070 # of the dictionary entry. 1071 current = PreviousNonCommentToken(current) 1072 length = current.total_length - entry_start.total_length 1073 length += len(entry_start.value) 1074 return length + self.stack[-2].indent <= self.column_limit 1075 1076 def _ArgumentListHasDictionaryEntry(self, token): 1077 """Check if the function argument list has a dictionary as an arg.""" 1078 if _IsArgumentToFunction(token): 1079 while token: 1080 if token.value == '{': 1081 length = token.matching_bracket.total_length - token.total_length 1082 return length + self.stack[-2].indent > self.column_limit 1083 if token.ClosesScope(): 1084 break 1085 if token.OpensScope(): 1086 token = token.matching_bracket 1087 token = token.next_token 1088 return False 1089 1090 def _ContainerFitsOnStartLine(self, opening): 1091 """Check if the container can fit on its starting line.""" 1092 return (opening.matching_bracket.total_length - opening.total_length + 1093 self.stack[-1].indent) <= self.column_limit 1094 1095 1096_COMPOUND_STMTS = frozenset( 1097 {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'}) 1098 1099 1100def _IsCompoundStatement(token): 1101 if token.value == 'async': 1102 token = token.next_token 1103 return token.value in _COMPOUND_STMTS 1104 1105 1106def _IsFunctionDef(token): 1107 if token.value == 'async': 1108 token = token.next_token 1109 return token.value == 'def' 1110 1111 1112def _IsFunctionCallWithArguments(token): 1113 while token: 1114 if token.value == '(': 1115 token = token.next_token 1116 return token and token.value != ')' 1117 elif token.name not in {'NAME', 'DOT', 'EQUAL'}: 1118 break 1119 token = token.next_token 1120 return False 1121 1122 1123def _IsArgumentToFunction(token): 1124 bracket = logical_line.IsSurroundedByBrackets(token) 1125 if not bracket or bracket.value != '(': 1126 return False 1127 previous = bracket.previous_token 1128 return previous and previous.is_name 1129 1130 1131def _GetOpeningBracket(current): 1132 """Get the opening bracket containing the current token.""" 1133 if current.matching_bracket and not current.is_pseudo: 1134 return current if current.OpensScope() else current.matching_bracket 1135 1136 while current: 1137 if current.ClosesScope(): 1138 current = current.matching_bracket 1139 elif current.is_pseudo: 1140 current = current.previous_token 1141 elif current.OpensScope(): 1142 return current 1143 current = current.previous_token 1144 return None 1145 1146 1147def _LastTokenInLine(current): 1148 while not current.is_comment and current.next_token: 1149 current = current.next_token 1150 return current 1151 1152 1153def _IsFunctionDefinition(current): 1154 prev = current.previous_token 1155 return current.value == '(' and prev and subtypes.FUNC_DEF in prev.subtypes 1156 1157 1158def _IsLastScopeInLine(current): 1159 current = current.matching_bracket 1160 while current: 1161 current = current.next_token 1162 if current and current.OpensScope(): 1163 return False 1164 return True 1165 1166 1167def _IsSingleElementTuple(token): 1168 """Check if it's a single-element tuple.""" 1169 close = token.matching_bracket 1170 token = token.next_token 1171 num_commas = 0 1172 while token != close: 1173 if token.value == ',': 1174 num_commas += 1 1175 token = token.matching_bracket if token.OpensScope() else token.next_token 1176 return num_commas == 1 1177 1178 1179def _ScopeHasNoCommas(token): 1180 """Check if the scope has no commas.""" 1181 close = token.matching_bracket 1182 token = token.next_token 1183 while token != close: 1184 if token.value == ',': 1185 return False 1186 token = token.matching_bracket if token.OpensScope() else token.next_token 1187 return True 1188 1189 1190class _ParenState(object): 1191 """Maintains the state of the bracket enclosures. 1192 1193 A stack of _ParenState objects are kept so that we know how to indent relative 1194 to the brackets. 1195 1196 Attributes: 1197 indent: The column position to which a specified parenthesis level needs to 1198 be indented. 1199 last_space: The column position of the last space on each level. 1200 closing_scope_indent: The column position of the closing indentation. 1201 split_before_closing_bracket: Whether a newline needs to be inserted before 1202 the closing bracket. We only want to insert a newline before the closing 1203 bracket if there also was a newline after the beginning left bracket. 1204 num_line_splits: Number of line splits this _ParenState contains already. 1205 Each subsequent line split gets an increasing penalty. 1206 """ 1207 1208 # TODO(morbo): This doesn't track "bin packing." 1209 1210 def __init__(self, indent, last_space): 1211 self.indent = indent 1212 self.last_space = last_space 1213 self.closing_scope_indent = 0 1214 self.split_before_closing_bracket = False 1215 self.num_line_splits = 0 1216 1217 def Clone(self): 1218 state = _ParenState(self.indent, self.last_space) 1219 state.closing_scope_indent = self.closing_scope_indent 1220 state.split_before_closing_bracket = self.split_before_closing_bracket 1221 state.num_line_splits = self.num_line_splits 1222 return state 1223 1224 def __repr__(self): 1225 return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % ( 1226 self.indent, self.last_space, self.closing_scope_indent) 1227 1228 def __eq__(self, other): 1229 return hash(self) == hash(other) 1230 1231 def __ne__(self, other): 1232 return not self == other 1233 1234 def __hash__(self, *args, **kwargs): 1235 return hash((self.indent, self.last_space, self.closing_scope_indent, 1236 self.split_before_closing_bracket, self.num_line_splits)) 1237