1#!/usr/bin/env python3 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===------------------------------------------------------------------------===# 10 11r""" 12clang-format git integration 13============================ 14 15This file provides a clang-format integration for git. Put it somewhere in your 16path and ensure that it is executable. Then, "git clang-format" will invoke 17clang-format on the changes in current files or a specific commit. 18 19For further details, run: 20git clang-format -h 21 22Requires Python 2.7 or Python 3 23""" 24 25from __future__ import absolute_import, division, print_function 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = ('git clang-format [OPTIONS] [<commit>] [<commit>|--staged] ' 36 '[--] [<file>...]') 37 38desc = ''' 39If zero or one commits are given, run clang-format on all lines that differ 40between the working directory and <commit>, which defaults to HEAD. Changes are 41only applied to the working directory, or in the stage/index. 42 43Examples: 44 To format staged changes, i.e everything that's been `git add`ed: 45 git clang-format 46 47 To also format everything touched in the most recent commit: 48 git clang-format HEAD~1 49 50 If you're on a branch off main, to format everything touched on your branch: 51 git clang-format main 52 53If two commits are given (requires --diff), run clang-format on all lines in the 54second <commit> that differ from the first <commit>. 55 56The following git-config settings set the default of the corresponding option: 57 clangFormat.binary 58 clangFormat.commit 59 clangFormat.extensions 60 clangFormat.style 61''' 62 63# Name of the temporary index file in which save the output of clang-format. 64# This file is created within the .git directory. 65temp_index_basename = 'clang-format-index' 66 67 68Range = collections.namedtuple('Range', 'start, count') 69 70 71def main(): 72 config = load_git_config() 73 74 # In order to keep '--' yet allow options after positionals, we need to 75 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 76 # nargs=argparse.REMAINDER disallows options after positionals.) 77 argv = sys.argv[1:] 78 try: 79 idx = argv.index('--') 80 except ValueError: 81 dash_dash = [] 82 else: 83 dash_dash = argv[idx:] 84 argv = argv[:idx] 85 86 default_extensions = ','.join([ 87 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 88 'c', 'h', # C 89 'm', # ObjC 90 'mm', # ObjC++ 91 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', 'inc', # C++ 92 'ccm', 'cppm', 'cxxm', 'c++m', # C++ Modules 93 'cu', 'cuh', # CUDA 94 # Other languages that clang-format supports 95 'proto', 'protodevel', # Protocol Buffers 96 'java', # Java 97 'js', # JavaScript 98 'ts', # TypeScript 99 'cs', # C Sharp 100 'json', # Json 101 'sv', 'svh', 'v', 'vh', # Verilog 102 ]) 103 104 p = argparse.ArgumentParser( 105 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 106 description=desc) 107 p.add_argument('--binary', 108 default=config.get('clangformat.binary', 'clang-format'), 109 help='path to clang-format'), 110 p.add_argument('--commit', 111 default=config.get('clangformat.commit', 'HEAD'), 112 help='default commit to use if none is specified'), 113 p.add_argument('--diff', action='store_true', 114 help='print a diff instead of applying the changes') 115 p.add_argument('--diffstat', action='store_true', 116 help='print a diffstat instead of applying the changes') 117 p.add_argument('--extensions', 118 default=config.get('clangformat.extensions', 119 default_extensions), 120 help=('comma-separated list of file extensions to format, ' 121 'excluding the period and case-insensitive')), 122 p.add_argument('-f', '--force', action='store_true', 123 help='allow changes to unstaged files') 124 p.add_argument('-p', '--patch', action='store_true', 125 help='select hunks interactively') 126 p.add_argument('-q', '--quiet', action='count', default=0, 127 help='print less information') 128 p.add_argument('--staged', '--cached', action='store_true', 129 help='format lines in the stage instead of the working dir') 130 p.add_argument('--style', 131 default=config.get('clangformat.style', None), 132 help='passed to clang-format'), 133 p.add_argument('-v', '--verbose', action='count', default=0, 134 help='print extra information') 135 p.add_argument('--diff_from_common_commit', action='store_true', 136 help=('diff from the last common commit for commits in ' 137 'separate branches rather than the exact point of the ' 138 'commits')) 139 # We gather all the remaining positional arguments into 'args' since we need 140 # to use some heuristics to determine whether or not <commit> was present. 141 # However, to print pretty messages, we make use of metavar and help. 142 p.add_argument('args', nargs='*', metavar='<commit>', 143 help='revision from which to compute the diff') 144 p.add_argument('ignored', nargs='*', metavar='<file>...', 145 help='if specified, only consider differences in these files') 146 opts = p.parse_args(argv) 147 148 opts.verbose -= opts.quiet 149 del opts.quiet 150 151 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 152 if len(commits) > 2: 153 die('at most two commits allowed; %d given' % len(commits)) 154 if len(commits) == 2: 155 if opts.staged: 156 die('--staged is not allowed when two commits are given') 157 if not opts.diff: 158 die('--diff is required when two commits are given') 159 elif opts.diff_from_common_commit: 160 die('--diff_from_common_commit is only allowed when two commits are given') 161 162 if os.path.dirname(opts.binary): 163 opts.binary = os.path.abspath(opts.binary) 164 165 changed_lines = compute_diff_and_extract_lines(commits, 166 files, 167 opts.staged, 168 opts.diff_from_common_commit) 169 if opts.verbose >= 1: 170 ignored_files = set(changed_lines) 171 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 172 # The computed diff outputs absolute paths, so we must cd before accessing 173 # those files. 174 cd_to_toplevel() 175 filter_symlinks(changed_lines) 176 if opts.verbose >= 1: 177 ignored_files.difference_update(changed_lines) 178 if ignored_files: 179 print( 180 'Ignoring changes in the following files (wrong extension or symlink):') 181 for filename in ignored_files: 182 print(' %s' % filename) 183 if changed_lines: 184 print('Running clang-format on the following files:') 185 for filename in changed_lines: 186 print(' %s' % filename) 187 188 if not changed_lines: 189 if opts.verbose >= 0: 190 print('no modified files to format') 191 return 0 192 193 if len(commits) > 1: 194 old_tree = commits[1] 195 revision = old_tree 196 elif opts.staged: 197 old_tree = create_tree_from_index(changed_lines) 198 revision = '' 199 else: 200 old_tree = create_tree_from_workdir(changed_lines) 201 revision = None 202 new_tree = run_clang_format_and_save_to_tree(changed_lines, 203 revision, 204 binary=opts.binary, 205 style=opts.style) 206 if opts.verbose >= 1: 207 print('old tree: %s' % old_tree) 208 print('new tree: %s' % new_tree) 209 210 if old_tree == new_tree: 211 if opts.verbose >= 0: 212 print('clang-format did not modify any files') 213 return 0 214 215 if opts.diff: 216 return print_diff(old_tree, new_tree) 217 if opts.diffstat: 218 return print_diffstat(old_tree, new_tree) 219 220 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 221 patch_mode=opts.patch) 222 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 223 print('changed files:') 224 for filename in changed_files: 225 print(' %s' % filename) 226 227 return 1 228 229 230def load_git_config(non_string_options=None): 231 """Return the git configuration as a dictionary. 232 233 All options are assumed to be strings unless in `non_string_options`, in which 234 is a dictionary mapping option name (in lower case) to either "--bool" or 235 "--int".""" 236 if non_string_options is None: 237 non_string_options = {} 238 out = {} 239 for entry in run('git', 'config', '--list', '--null').split('\0'): 240 if entry: 241 if '\n' in entry: 242 name, value = entry.split('\n', 1) 243 else: 244 # A setting with no '=' ('\n' with --null) is implicitly 'true' 245 name = entry 246 value = 'true' 247 if name in non_string_options: 248 value = run('git', 'config', non_string_options[name], name) 249 out[name] = value 250 return out 251 252 253def interpret_args(args, dash_dash, default_commit): 254 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 255 256 It is assumed that "--" and everything that follows has been removed from 257 args and placed in `dash_dash`. 258 259 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 260 left (if present) are taken as commits. Otherwise, the arguments are checked 261 from left to right if they are commits or files. If commits are not given, 262 a list with `default_commit` is used.""" 263 if dash_dash: 264 if len(args) == 0: 265 commits = [default_commit] 266 else: 267 commits = args 268 for commit in commits: 269 object_type = get_object_type(commit) 270 if object_type not in ('commit', 'tag'): 271 if object_type is None: 272 die("'%s' is not a commit" % commit) 273 else: 274 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 275 files = dash_dash[1:] 276 elif args: 277 commits = [] 278 while args: 279 if not disambiguate_revision(args[0]): 280 break 281 commits.append(args.pop(0)) 282 if not commits: 283 commits = [default_commit] 284 files = args 285 else: 286 commits = [default_commit] 287 files = [] 288 return commits, files 289 290 291def disambiguate_revision(value): 292 """Returns True if `value` is a revision, False if it is a file, or dies.""" 293 # If `value` is ambiguous (neither a commit nor a file), the following 294 # command will die with an appropriate error message. 295 run('git', 'rev-parse', value, verbose=False) 296 object_type = get_object_type(value) 297 if object_type is None: 298 return False 299 if object_type in ('commit', 'tag'): 300 return True 301 die('`%s` is a %s, but a commit or filename was expected' % 302 (value, object_type)) 303 304 305def get_object_type(value): 306 """Returns a string description of an object's type, or None if it is not 307 a valid git object.""" 308 cmd = ['git', 'cat-file', '-t', value] 309 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 310 stdout, stderr = p.communicate() 311 if p.returncode != 0: 312 return None 313 return convert_string(stdout.strip()) 314 315 316def compute_diff_and_extract_lines(commits, files, staged, diff_common_commit): 317 """Calls compute_diff() followed by extract_lines().""" 318 diff_process = compute_diff(commits, files, staged, diff_common_commit) 319 changed_lines = extract_lines(diff_process.stdout) 320 diff_process.stdout.close() 321 diff_process.wait() 322 if diff_process.returncode != 0: 323 # Assume error was already printed to stderr. 324 sys.exit(2) 325 return changed_lines 326 327 328def compute_diff(commits, files, staged, diff_common_commit): 329 """Return a subprocess object producing the diff from `commits`. 330 331 The return value's `stdin` file object will produce a patch with the 332 differences between the working directory (or stage if --staged is used) and 333 the first commit if a single one was specified, or the difference between 334 both specified commits, filtered on `files` (if non-empty). 335 Zero context lines are used in the patch.""" 336 git_tool = 'diff-index' 337 extra_args = [] 338 if len(commits) == 2: 339 git_tool = 'diff-tree' 340 if diff_common_commit: 341 commits = [f'{commits[0]}...{commits[1]}'] 342 elif staged: 343 extra_args += ['--cached'] 344 345 cmd = ['git', git_tool, '-p', '-U0'] + extra_args + commits + ['--'] 346 cmd.extend(files) 347 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 348 p.stdin.close() 349 return p 350 351 352def extract_lines(patch_file): 353 """Extract the changed lines in `patch_file`. 354 355 The return value is a dictionary mapping filename to a list of (start_line, 356 line_count) pairs. 357 358 The input must have been produced with ``-U0``, meaning unidiff format with 359 zero lines of context. The return value is a dict mapping filename to a 360 list of line `Range`s.""" 361 matches = {} 362 for line in patch_file: 363 line = convert_string(line) 364 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 365 if match: 366 filename = match.group(1).rstrip('\r\n\t') 367 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 368 if match: 369 start_line = int(match.group(1)) 370 line_count = 1 371 if match.group(3): 372 line_count = int(match.group(3)) 373 if line_count == 0: 374 line_count = 1 375 if start_line == 0: 376 continue 377 matches.setdefault(filename, []).append(Range(start_line, line_count)) 378 return matches 379 380 381def filter_by_extension(dictionary, allowed_extensions): 382 """Delete every key in `dictionary` that doesn't have an allowed extension. 383 384 `allowed_extensions` must be a collection of lowercase file extensions, 385 excluding the period.""" 386 allowed_extensions = frozenset(allowed_extensions) 387 for filename in list(dictionary.keys()): 388 base_ext = filename.rsplit('.', 1) 389 if len(base_ext) == 1 and '' in allowed_extensions: 390 continue 391 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 392 del dictionary[filename] 393 394 395def filter_symlinks(dictionary): 396 """Delete every key in `dictionary` that is a symlink.""" 397 for filename in list(dictionary.keys()): 398 if os.path.islink(filename): 399 del dictionary[filename] 400 401 402def cd_to_toplevel(): 403 """Change to the top level of the git repository.""" 404 toplevel = run('git', 'rev-parse', '--show-toplevel') 405 os.chdir(toplevel) 406 407 408def create_tree_from_workdir(filenames): 409 """Create a new git tree with the given files from the working directory. 410 411 Returns the object ID (SHA-1) of the created tree.""" 412 return create_tree(filenames, '--stdin') 413 414 415def create_tree_from_index(filenames): 416 # Copy the environment, because the files have to be read from the original 417 # index. 418 env = os.environ.copy() 419 def index_contents_generator(): 420 for filename in filenames: 421 git_ls_files_cmd = ['git', 'ls-files', '--stage', '-z', '--', filename] 422 git_ls_files = subprocess.Popen(git_ls_files_cmd, env=env, 423 stdin=subprocess.PIPE, 424 stdout=subprocess.PIPE) 425 stdout = git_ls_files.communicate()[0] 426 yield convert_string(stdout.split(b'\0')[0]) 427 return create_tree(index_contents_generator(), '--index-info') 428 429 430def run_clang_format_and_save_to_tree(changed_lines, revision=None, 431 binary='clang-format', style=None): 432 """Run clang-format on each file and save the result to a git tree. 433 434 Returns the object ID (SHA-1) of the created tree.""" 435 # Copy the environment when formatting the files in the index, because the 436 # files have to be read from the original index. 437 env = os.environ.copy() if revision == '' else None 438 def iteritems(container): 439 try: 440 return container.iteritems() # Python 2 441 except AttributeError: 442 return container.items() # Python 3 443 def index_info_generator(): 444 for filename, line_ranges in iteritems(changed_lines): 445 if revision is not None: 446 if len(revision) > 0: 447 git_metadata_cmd = ['git', 'ls-tree', 448 '%s:%s' % (revision, os.path.dirname(filename)), 449 os.path.basename(filename)] 450 else: 451 git_metadata_cmd = ['git', 'ls-files', '--stage', '--', filename] 452 git_metadata = subprocess.Popen(git_metadata_cmd, env=env, 453 stdin=subprocess.PIPE, 454 stdout=subprocess.PIPE) 455 stdout = git_metadata.communicate()[0] 456 mode = oct(int(stdout.split()[0], 8)) 457 else: 458 mode = oct(os.stat(filename).st_mode) 459 # Adjust python3 octal format so that it matches what git expects 460 if mode.startswith('0o'): 461 mode = '0' + mode[2:] 462 blob_id = clang_format_to_blob(filename, line_ranges, 463 revision=revision, 464 binary=binary, 465 style=style, 466 env=env) 467 yield '%s %s\t%s' % (mode, blob_id, filename) 468 return create_tree(index_info_generator(), '--index-info') 469 470 471def create_tree(input_lines, mode): 472 """Create a tree object from the given input. 473 474 If mode is '--stdin', it must be a list of filenames. If mode is 475 '--index-info' is must be a list of values suitable for "git update-index 476 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 477 is invalid.""" 478 assert mode in ('--stdin', '--index-info') 479 cmd = ['git', 'update-index', '--add', '-z', mode] 480 with temporary_index_file(): 481 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 482 for line in input_lines: 483 p.stdin.write(to_bytes('%s\0' % line)) 484 p.stdin.close() 485 if p.wait() != 0: 486 die('`%s` failed' % ' '.join(cmd)) 487 tree_id = run('git', 'write-tree') 488 return tree_id 489 490 491def clang_format_to_blob(filename, line_ranges, revision=None, 492 binary='clang-format', style=None, env=None): 493 """Run clang-format on the given file and save the result to a git blob. 494 495 Runs on the file in `revision` if not None, or on the file in the working 496 directory if `revision` is None. Revision can be set to an empty string to run 497 clang-format on the file in the index. 498 499 Returns the object ID (SHA-1) of the created blob.""" 500 clang_format_cmd = [binary] 501 if style: 502 clang_format_cmd.extend(['-style='+style]) 503 clang_format_cmd.extend([ 504 '-lines=%s:%s' % (start_line, start_line+line_count-1) 505 for start_line, line_count in line_ranges]) 506 if revision is not None: 507 clang_format_cmd.extend(['-assume-filename='+filename]) 508 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 509 git_show = subprocess.Popen(git_show_cmd, env=env, stdin=subprocess.PIPE, 510 stdout=subprocess.PIPE) 511 git_show.stdin.close() 512 clang_format_stdin = git_show.stdout 513 else: 514 clang_format_cmd.extend([filename]) 515 git_show = None 516 clang_format_stdin = subprocess.PIPE 517 try: 518 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 519 stdout=subprocess.PIPE) 520 if clang_format_stdin == subprocess.PIPE: 521 clang_format_stdin = clang_format.stdin 522 except OSError as e: 523 if e.errno == errno.ENOENT: 524 die('cannot find executable "%s"' % binary) 525 else: 526 raise 527 clang_format_stdin.close() 528 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 529 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 530 stdout=subprocess.PIPE) 531 clang_format.stdout.close() 532 stdout = hash_object.communicate()[0] 533 if hash_object.returncode != 0: 534 die('`%s` failed' % ' '.join(hash_object_cmd)) 535 if clang_format.wait() != 0: 536 die('`%s` failed' % ' '.join(clang_format_cmd)) 537 if git_show and git_show.wait() != 0: 538 die('`%s` failed' % ' '.join(git_show_cmd)) 539 return convert_string(stdout).rstrip('\r\n') 540 541 542@contextlib.contextmanager 543def temporary_index_file(tree=None): 544 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 545 the file afterward.""" 546 index_path = create_temporary_index(tree) 547 old_index_path = os.environ.get('GIT_INDEX_FILE') 548 os.environ['GIT_INDEX_FILE'] = index_path 549 try: 550 yield 551 finally: 552 if old_index_path is None: 553 del os.environ['GIT_INDEX_FILE'] 554 else: 555 os.environ['GIT_INDEX_FILE'] = old_index_path 556 os.remove(index_path) 557 558 559def create_temporary_index(tree=None): 560 """Create a temporary index file and return the created file's path. 561 562 If `tree` is not None, use that as the tree to read in. Otherwise, an 563 empty index is created.""" 564 gitdir = run('git', 'rev-parse', '--git-dir') 565 path = os.path.join(gitdir, temp_index_basename) 566 if tree is None: 567 tree = '--empty' 568 run('git', 'read-tree', '--index-output='+path, tree) 569 return path 570 571 572def print_diff(old_tree, new_tree): 573 """Print the diff between the two trees to stdout.""" 574 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 575 # is expected to be viewed by the user, and only the former does nice things 576 # like color and pagination. 577 # 578 # We also only print modified files since `new_tree` only contains the files 579 # that were modified, so unmodified files would show as deleted without the 580 # filter. 581 return subprocess.run(['git', 'diff', '--diff-filter=M', 582 '--exit-code', old_tree, new_tree]).returncode 583 584def print_diffstat(old_tree, new_tree): 585 """Print the diffstat between the two trees to stdout.""" 586 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 587 # is expected to be viewed by the user, and only the former does nice things 588 # like color and pagination. 589 # 590 # We also only print modified files since `new_tree` only contains the files 591 # that were modified, so unmodified files would show as deleted without the 592 # filter. 593 return subprocess.run(['git', 'diff', '--diff-filter=M', '--exit-code', 594 '--stat', old_tree, new_tree]).returncode 595 596def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 597 """Apply the changes in `new_tree` to the working directory. 598 599 Bails if there are local changes in those files and not `force`. If 600 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 601 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 602 '--name-only', old_tree, 603 new_tree).rstrip('\0').split('\0') 604 if not force: 605 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 606 if unstaged_files: 607 print('The following files would be modified but ' 608 'have unstaged changes:', file=sys.stderr) 609 print(unstaged_files, file=sys.stderr) 610 print('Please commit, stage, or stash them first.', file=sys.stderr) 611 sys.exit(2) 612 if patch_mode: 613 # In patch mode, we could just as well create an index from the new tree 614 # and checkout from that, but then the user will be presented with a 615 # message saying "Discard ... from worktree". Instead, we use the old 616 # tree as the index and checkout from new_tree, which gives the slightly 617 # better message, "Apply ... to index and worktree". This is not quite 618 # right, since it won't be applied to the user's index, but oh well. 619 with temporary_index_file(old_tree): 620 subprocess.run(['git', 'checkout', '--patch', new_tree], check=True) 621 index_tree = old_tree 622 else: 623 with temporary_index_file(new_tree): 624 run('git', 'checkout-index', '-f', '--', *changed_files) 625 return changed_files 626 627 628def run(*args, **kwargs): 629 stdin = kwargs.pop('stdin', '') 630 verbose = kwargs.pop('verbose', True) 631 strip = kwargs.pop('strip', True) 632 for name in kwargs: 633 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 634 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 635 stdin=subprocess.PIPE) 636 stdout, stderr = p.communicate(input=stdin) 637 638 stdout = convert_string(stdout) 639 stderr = convert_string(stderr) 640 641 if p.returncode == 0: 642 if stderr: 643 if verbose: 644 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 645 print(stderr.rstrip(), file=sys.stderr) 646 if strip: 647 stdout = stdout.rstrip('\r\n') 648 return stdout 649 if verbose: 650 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 651 if stderr: 652 print(stderr.rstrip(), file=sys.stderr) 653 sys.exit(2) 654 655 656def die(message): 657 print('error:', message, file=sys.stderr) 658 sys.exit(2) 659 660 661def to_bytes(str_input): 662 # Encode to UTF-8 to get binary data. 663 if isinstance(str_input, bytes): 664 return str_input 665 return str_input.encode('utf-8') 666 667 668def to_string(bytes_input): 669 if isinstance(bytes_input, str): 670 return bytes_input 671 return bytes_input.encode('utf-8') 672 673 674def convert_string(bytes_input): 675 try: 676 return to_string(bytes_input.decode('utf-8')) 677 except AttributeError: # 'str' object has no attribute 'decode'. 678 return str(bytes_input) 679 except UnicodeError: 680 return str(bytes_input) 681 682if __name__ == '__main__': 683 sys.exit(main()) 684