1#!/usr/bin/env python3
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
25from __future__ import absolute_import, division, print_function
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = ('git clang-format [OPTIONS] [<commit>] [<commit>|--staged] '
36         '[--] [<file>...]')
37
38desc = '''
39If zero or one commits are given, run clang-format on all lines that differ
40between the working directory and <commit>, which defaults to HEAD.  Changes are
41only applied to the working directory, or in the stage/index.
42
43Examples:
44  To format staged changes, i.e everything that's been `git add`ed:
45    git clang-format
46
47  To also format everything touched in the most recent commit:
48    git clang-format HEAD~1
49
50  If you're on a branch off main, to format everything touched on your branch:
51    git clang-format main
52
53If two commits are given (requires --diff), run clang-format on all lines in the
54second <commit> that differ from the first <commit>.
55
56The following git-config settings set the default of the corresponding option:
57  clangFormat.binary
58  clangFormat.commit
59  clangFormat.extensions
60  clangFormat.style
61'''
62
63# Name of the temporary index file in which save the output of clang-format.
64# This file is created within the .git directory.
65temp_index_basename = 'clang-format-index'
66
67
68Range = collections.namedtuple('Range', 'start, count')
69
70
71def main():
72  config = load_git_config()
73
74  # In order to keep '--' yet allow options after positionals, we need to
75  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
76  # nargs=argparse.REMAINDER disallows options after positionals.)
77  argv = sys.argv[1:]
78  try:
79    idx = argv.index('--')
80  except ValueError:
81    dash_dash = []
82  else:
83    dash_dash = argv[idx:]
84    argv = argv[:idx]
85
86  default_extensions = ','.join([
87      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
88      'c', 'h',  # C
89      'm',  # ObjC
90      'mm',  # ObjC++
91      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', 'inc',  # C++
92      'ccm', 'cppm', 'cxxm', 'c++m',  # C++ Modules
93      'cu', 'cuh',  # CUDA
94      # Other languages that clang-format supports
95      'proto', 'protodevel',  # Protocol Buffers
96      'java',  # Java
97      'js',  # JavaScript
98      'ts',  # TypeScript
99      'cs',  # C Sharp
100      'json',  # Json
101      'sv', 'svh', 'v', 'vh', # Verilog
102      ])
103
104  p = argparse.ArgumentParser(
105    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
106    description=desc)
107  p.add_argument('--binary',
108                 default=config.get('clangformat.binary', 'clang-format'),
109                 help='path to clang-format'),
110  p.add_argument('--commit',
111                 default=config.get('clangformat.commit', 'HEAD'),
112                 help='default commit to use if none is specified'),
113  p.add_argument('--diff', action='store_true',
114                 help='print a diff instead of applying the changes')
115  p.add_argument('--diffstat', action='store_true',
116                 help='print a diffstat instead of applying the changes')
117  p.add_argument('--extensions',
118                 default=config.get('clangformat.extensions',
119                                    default_extensions),
120                 help=('comma-separated list of file extensions to format, '
121                       'excluding the period and case-insensitive')),
122  p.add_argument('-f', '--force', action='store_true',
123                 help='allow changes to unstaged files')
124  p.add_argument('-p', '--patch', action='store_true',
125                 help='select hunks interactively')
126  p.add_argument('-q', '--quiet', action='count', default=0,
127                 help='print less information')
128  p.add_argument('--staged', '--cached', action='store_true',
129                 help='format lines in the stage instead of the working dir')
130  p.add_argument('--style',
131                 default=config.get('clangformat.style', None),
132                 help='passed to clang-format'),
133  p.add_argument('-v', '--verbose', action='count', default=0,
134                 help='print extra information')
135  p.add_argument('--diff_from_common_commit', action='store_true',
136                 help=('diff from the last common commit for commits in '
137                      'separate branches rather than the exact point of the '
138                      'commits'))
139  # We gather all the remaining positional arguments into 'args' since we need
140  # to use some heuristics to determine whether or not <commit> was present.
141  # However, to print pretty messages, we make use of metavar and help.
142  p.add_argument('args', nargs='*', metavar='<commit>',
143                 help='revision from which to compute the diff')
144  p.add_argument('ignored', nargs='*', metavar='<file>...',
145                 help='if specified, only consider differences in these files')
146  opts = p.parse_args(argv)
147
148  opts.verbose -= opts.quiet
149  del opts.quiet
150
151  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
152  if len(commits) > 2:
153    die('at most two commits allowed; %d given' % len(commits))
154  if len(commits) == 2:
155    if opts.staged:
156      die('--staged is not allowed when two commits are given')
157    if not opts.diff:
158      die('--diff is required when two commits are given')
159  elif opts.diff_from_common_commit:
160    die('--diff_from_common_commit is only allowed when two commits are given')
161
162  if os.path.dirname(opts.binary):
163    opts.binary = os.path.abspath(opts.binary)
164
165  changed_lines = compute_diff_and_extract_lines(commits,
166                                                 files,
167                                                 opts.staged,
168                                                 opts.diff_from_common_commit)
169  if opts.verbose >= 1:
170    ignored_files = set(changed_lines)
171  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
172  # The computed diff outputs absolute paths, so we must cd before accessing
173  # those files.
174  cd_to_toplevel()
175  filter_symlinks(changed_lines)
176  if opts.verbose >= 1:
177    ignored_files.difference_update(changed_lines)
178    if ignored_files:
179      print(
180        'Ignoring changes in the following files (wrong extension or symlink):')
181      for filename in ignored_files:
182        print('    %s' % filename)
183    if changed_lines:
184      print('Running clang-format on the following files:')
185      for filename in changed_lines:
186        print('    %s' % filename)
187
188  if not changed_lines:
189    if opts.verbose >= 0:
190      print('no modified files to format')
191    return 0
192
193  if len(commits) > 1:
194    old_tree = commits[1]
195    revision = old_tree
196  elif opts.staged:
197    old_tree = create_tree_from_index(changed_lines)
198    revision = ''
199  else:
200    old_tree = create_tree_from_workdir(changed_lines)
201    revision = None
202  new_tree = run_clang_format_and_save_to_tree(changed_lines,
203                                               revision,
204                                               binary=opts.binary,
205                                               style=opts.style)
206  if opts.verbose >= 1:
207    print('old tree: %s' % old_tree)
208    print('new tree: %s' % new_tree)
209
210  if old_tree == new_tree:
211    if opts.verbose >= 0:
212      print('clang-format did not modify any files')
213    return 0
214
215  if opts.diff:
216    return print_diff(old_tree, new_tree)
217  if opts.diffstat:
218    return print_diffstat(old_tree, new_tree)
219
220  changed_files = apply_changes(old_tree, new_tree, force=opts.force,
221                                patch_mode=opts.patch)
222  if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
223    print('changed files:')
224    for filename in changed_files:
225      print('    %s' % filename)
226
227  return 1
228
229
230def load_git_config(non_string_options=None):
231  """Return the git configuration as a dictionary.
232
233  All options are assumed to be strings unless in `non_string_options`, in which
234  is a dictionary mapping option name (in lower case) to either "--bool" or
235  "--int"."""
236  if non_string_options is None:
237    non_string_options = {}
238  out = {}
239  for entry in run('git', 'config', '--list', '--null').split('\0'):
240    if entry:
241      if '\n' in entry:
242        name, value = entry.split('\n', 1)
243      else:
244        # A setting with no '=' ('\n' with --null) is implicitly 'true'
245        name = entry
246        value = 'true'
247      if name in non_string_options:
248        value = run('git', 'config', non_string_options[name], name)
249      out[name] = value
250  return out
251
252
253def interpret_args(args, dash_dash, default_commit):
254  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
255
256  It is assumed that "--" and everything that follows has been removed from
257  args and placed in `dash_dash`.
258
259  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
260  left (if present) are taken as commits.  Otherwise, the arguments are checked
261  from left to right if they are commits or files.  If commits are not given,
262  a list with `default_commit` is used."""
263  if dash_dash:
264    if len(args) == 0:
265      commits = [default_commit]
266    else:
267      commits = args
268    for commit in commits:
269      object_type = get_object_type(commit)
270      if object_type not in ('commit', 'tag'):
271        if object_type is None:
272          die("'%s' is not a commit" % commit)
273        else:
274          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
275    files = dash_dash[1:]
276  elif args:
277    commits = []
278    while args:
279      if not disambiguate_revision(args[0]):
280        break
281      commits.append(args.pop(0))
282    if not commits:
283      commits = [default_commit]
284    files = args
285  else:
286    commits = [default_commit]
287    files = []
288  return commits, files
289
290
291def disambiguate_revision(value):
292  """Returns True if `value` is a revision, False if it is a file, or dies."""
293  # If `value` is ambiguous (neither a commit nor a file), the following
294  # command will die with an appropriate error message.
295  run('git', 'rev-parse', value, verbose=False)
296  object_type = get_object_type(value)
297  if object_type is None:
298    return False
299  if object_type in ('commit', 'tag'):
300    return True
301  die('`%s` is a %s, but a commit or filename was expected' %
302      (value, object_type))
303
304
305def get_object_type(value):
306  """Returns a string description of an object's type, or None if it is not
307  a valid git object."""
308  cmd = ['git', 'cat-file', '-t', value]
309  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
310  stdout, stderr = p.communicate()
311  if p.returncode != 0:
312    return None
313  return convert_string(stdout.strip())
314
315
316def compute_diff_and_extract_lines(commits, files, staged, diff_common_commit):
317  """Calls compute_diff() followed by extract_lines()."""
318  diff_process = compute_diff(commits, files, staged, diff_common_commit)
319  changed_lines = extract_lines(diff_process.stdout)
320  diff_process.stdout.close()
321  diff_process.wait()
322  if diff_process.returncode != 0:
323    # Assume error was already printed to stderr.
324    sys.exit(2)
325  return changed_lines
326
327
328def compute_diff(commits, files, staged, diff_common_commit):
329  """Return a subprocess object producing the diff from `commits`.
330
331  The return value's `stdin` file object will produce a patch with the
332  differences between the working directory (or stage if --staged is used) and
333  the first commit if a single one was specified, or the difference between
334  both specified commits, filtered on `files` (if non-empty).
335  Zero context lines are used in the patch."""
336  git_tool = 'diff-index'
337  extra_args = []
338  if len(commits) == 2:
339    git_tool = 'diff-tree'
340    if diff_common_commit:
341      commits = [f'{commits[0]}...{commits[1]}']
342  elif staged:
343    extra_args += ['--cached']
344
345  cmd = ['git', git_tool, '-p', '-U0'] + extra_args + commits + ['--']
346  cmd.extend(files)
347  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
348  p.stdin.close()
349  return p
350
351
352def extract_lines(patch_file):
353  """Extract the changed lines in `patch_file`.
354
355  The return value is a dictionary mapping filename to a list of (start_line,
356  line_count) pairs.
357
358  The input must have been produced with ``-U0``, meaning unidiff format with
359  zero lines of context.  The return value is a dict mapping filename to a
360  list of line `Range`s."""
361  matches = {}
362  for line in patch_file:
363    line = convert_string(line)
364    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
365    if match:
366      filename = match.group(1).rstrip('\r\n\t')
367    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
368    if match:
369      start_line = int(match.group(1))
370      line_count = 1
371      if match.group(3):
372        line_count = int(match.group(3))
373      if line_count == 0:
374        line_count = 1
375      if start_line == 0:
376        continue
377      matches.setdefault(filename, []).append(Range(start_line, line_count))
378  return matches
379
380
381def filter_by_extension(dictionary, allowed_extensions):
382  """Delete every key in `dictionary` that doesn't have an allowed extension.
383
384  `allowed_extensions` must be a collection of lowercase file extensions,
385  excluding the period."""
386  allowed_extensions = frozenset(allowed_extensions)
387  for filename in list(dictionary.keys()):
388    base_ext = filename.rsplit('.', 1)
389    if len(base_ext) == 1 and '' in allowed_extensions:
390        continue
391    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
392      del dictionary[filename]
393
394
395def filter_symlinks(dictionary):
396  """Delete every key in `dictionary` that is a symlink."""
397  for filename in list(dictionary.keys()):
398    if os.path.islink(filename):
399      del dictionary[filename]
400
401
402def cd_to_toplevel():
403  """Change to the top level of the git repository."""
404  toplevel = run('git', 'rev-parse', '--show-toplevel')
405  os.chdir(toplevel)
406
407
408def create_tree_from_workdir(filenames):
409  """Create a new git tree with the given files from the working directory.
410
411  Returns the object ID (SHA-1) of the created tree."""
412  return create_tree(filenames, '--stdin')
413
414
415def create_tree_from_index(filenames):
416  # Copy the environment, because the files have to be read from the original
417  # index.
418  env = os.environ.copy()
419  def index_contents_generator():
420    for filename in filenames:
421      git_ls_files_cmd = ['git', 'ls-files', '--stage', '-z', '--', filename]
422      git_ls_files = subprocess.Popen(git_ls_files_cmd, env=env,
423                                      stdin=subprocess.PIPE,
424                                      stdout=subprocess.PIPE)
425      stdout = git_ls_files.communicate()[0]
426      yield convert_string(stdout.split(b'\0')[0])
427  return create_tree(index_contents_generator(), '--index-info')
428
429
430def run_clang_format_and_save_to_tree(changed_lines, revision=None,
431                                      binary='clang-format', style=None):
432  """Run clang-format on each file and save the result to a git tree.
433
434  Returns the object ID (SHA-1) of the created tree."""
435  # Copy the environment when formatting the files in the index, because the
436  # files have to be read from the original index.
437  env = os.environ.copy() if revision == '' else None
438  def iteritems(container):
439      try:
440          return container.iteritems() # Python 2
441      except AttributeError:
442          return container.items() # Python 3
443  def index_info_generator():
444    for filename, line_ranges in iteritems(changed_lines):
445      if revision is not None:
446        if len(revision) > 0:
447          git_metadata_cmd = ['git', 'ls-tree',
448                              '%s:%s' % (revision, os.path.dirname(filename)),
449                              os.path.basename(filename)]
450        else:
451          git_metadata_cmd = ['git', 'ls-files', '--stage', '--', filename]
452        git_metadata = subprocess.Popen(git_metadata_cmd, env=env,
453                                        stdin=subprocess.PIPE,
454                                        stdout=subprocess.PIPE)
455        stdout = git_metadata.communicate()[0]
456        mode = oct(int(stdout.split()[0], 8))
457      else:
458        mode = oct(os.stat(filename).st_mode)
459      # Adjust python3 octal format so that it matches what git expects
460      if mode.startswith('0o'):
461          mode = '0' + mode[2:]
462      blob_id = clang_format_to_blob(filename, line_ranges,
463                                     revision=revision,
464                                     binary=binary,
465                                     style=style,
466                                     env=env)
467      yield '%s %s\t%s' % (mode, blob_id, filename)
468  return create_tree(index_info_generator(), '--index-info')
469
470
471def create_tree(input_lines, mode):
472  """Create a tree object from the given input.
473
474  If mode is '--stdin', it must be a list of filenames.  If mode is
475  '--index-info' is must be a list of values suitable for "git update-index
476  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
477  is invalid."""
478  assert mode in ('--stdin', '--index-info')
479  cmd = ['git', 'update-index', '--add', '-z', mode]
480  with temporary_index_file():
481    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
482    for line in input_lines:
483      p.stdin.write(to_bytes('%s\0' % line))
484    p.stdin.close()
485    if p.wait() != 0:
486      die('`%s` failed' % ' '.join(cmd))
487    tree_id = run('git', 'write-tree')
488    return tree_id
489
490
491def clang_format_to_blob(filename, line_ranges, revision=None,
492                         binary='clang-format', style=None, env=None):
493  """Run clang-format on the given file and save the result to a git blob.
494
495  Runs on the file in `revision` if not None, or on the file in the working
496  directory if `revision` is None. Revision can be set to an empty string to run
497  clang-format on the file in the index.
498
499  Returns the object ID (SHA-1) of the created blob."""
500  clang_format_cmd = [binary]
501  if style:
502    clang_format_cmd.extend(['-style='+style])
503  clang_format_cmd.extend([
504      '-lines=%s:%s' % (start_line, start_line+line_count-1)
505      for start_line, line_count in line_ranges])
506  if revision is not None:
507    clang_format_cmd.extend(['-assume-filename='+filename])
508    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
509    git_show = subprocess.Popen(git_show_cmd, env=env, stdin=subprocess.PIPE,
510                                stdout=subprocess.PIPE)
511    git_show.stdin.close()
512    clang_format_stdin = git_show.stdout
513  else:
514    clang_format_cmd.extend([filename])
515    git_show = None
516    clang_format_stdin = subprocess.PIPE
517  try:
518    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
519                                    stdout=subprocess.PIPE)
520    if clang_format_stdin == subprocess.PIPE:
521      clang_format_stdin = clang_format.stdin
522  except OSError as e:
523    if e.errno == errno.ENOENT:
524      die('cannot find executable "%s"' % binary)
525    else:
526      raise
527  clang_format_stdin.close()
528  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
529  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
530                                 stdout=subprocess.PIPE)
531  clang_format.stdout.close()
532  stdout = hash_object.communicate()[0]
533  if hash_object.returncode != 0:
534    die('`%s` failed' % ' '.join(hash_object_cmd))
535  if clang_format.wait() != 0:
536    die('`%s` failed' % ' '.join(clang_format_cmd))
537  if git_show and git_show.wait() != 0:
538    die('`%s` failed' % ' '.join(git_show_cmd))
539  return convert_string(stdout).rstrip('\r\n')
540
541
542@contextlib.contextmanager
543def temporary_index_file(tree=None):
544  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
545  the file afterward."""
546  index_path = create_temporary_index(tree)
547  old_index_path = os.environ.get('GIT_INDEX_FILE')
548  os.environ['GIT_INDEX_FILE'] = index_path
549  try:
550    yield
551  finally:
552    if old_index_path is None:
553      del os.environ['GIT_INDEX_FILE']
554    else:
555      os.environ['GIT_INDEX_FILE'] = old_index_path
556    os.remove(index_path)
557
558
559def create_temporary_index(tree=None):
560  """Create a temporary index file and return the created file's path.
561
562  If `tree` is not None, use that as the tree to read in.  Otherwise, an
563  empty index is created."""
564  gitdir = run('git', 'rev-parse', '--git-dir')
565  path = os.path.join(gitdir, temp_index_basename)
566  if tree is None:
567    tree = '--empty'
568  run('git', 'read-tree', '--index-output='+path, tree)
569  return path
570
571
572def print_diff(old_tree, new_tree):
573  """Print the diff between the two trees to stdout."""
574  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
575  # is expected to be viewed by the user, and only the former does nice things
576  # like color and pagination.
577  #
578  # We also only print modified files since `new_tree` only contains the files
579  # that were modified, so unmodified files would show as deleted without the
580  # filter.
581  return subprocess.run(['git', 'diff', '--diff-filter=M',
582                         '--exit-code', old_tree, new_tree]).returncode
583
584def print_diffstat(old_tree, new_tree):
585  """Print the diffstat between the two trees to stdout."""
586  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
587  # is expected to be viewed by the user, and only the former does nice things
588  # like color and pagination.
589  #
590  # We also only print modified files since `new_tree` only contains the files
591  # that were modified, so unmodified files would show as deleted without the
592  # filter.
593  return subprocess.run(['git', 'diff', '--diff-filter=M', '--exit-code',
594                         '--stat', old_tree, new_tree]).returncode
595
596def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
597  """Apply the changes in `new_tree` to the working directory.
598
599  Bails if there are local changes in those files and not `force`.  If
600  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
601  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
602                      '--name-only', old_tree,
603                      new_tree).rstrip('\0').split('\0')
604  if not force:
605    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
606    if unstaged_files:
607      print('The following files would be modified but '
608                'have unstaged changes:', file=sys.stderr)
609      print(unstaged_files, file=sys.stderr)
610      print('Please commit, stage, or stash them first.', file=sys.stderr)
611      sys.exit(2)
612  if patch_mode:
613    # In patch mode, we could just as well create an index from the new tree
614    # and checkout from that, but then the user will be presented with a
615    # message saying "Discard ... from worktree".  Instead, we use the old
616    # tree as the index and checkout from new_tree, which gives the slightly
617    # better message, "Apply ... to index and worktree".  This is not quite
618    # right, since it won't be applied to the user's index, but oh well.
619    with temporary_index_file(old_tree):
620      subprocess.run(['git', 'checkout', '--patch', new_tree], check=True)
621    index_tree = old_tree
622  else:
623    with temporary_index_file(new_tree):
624      run('git', 'checkout-index', '-f', '--', *changed_files)
625  return changed_files
626
627
628def run(*args, **kwargs):
629  stdin = kwargs.pop('stdin', '')
630  verbose = kwargs.pop('verbose', True)
631  strip = kwargs.pop('strip', True)
632  for name in kwargs:
633    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
634  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
635                       stdin=subprocess.PIPE)
636  stdout, stderr = p.communicate(input=stdin)
637
638  stdout = convert_string(stdout)
639  stderr = convert_string(stderr)
640
641  if p.returncode == 0:
642    if stderr:
643      if verbose:
644        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
645      print(stderr.rstrip(), file=sys.stderr)
646    if strip:
647      stdout = stdout.rstrip('\r\n')
648    return stdout
649  if verbose:
650    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
651  if stderr:
652    print(stderr.rstrip(), file=sys.stderr)
653  sys.exit(2)
654
655
656def die(message):
657  print('error:', message, file=sys.stderr)
658  sys.exit(2)
659
660
661def to_bytes(str_input):
662    # Encode to UTF-8 to get binary data.
663    if isinstance(str_input, bytes):
664        return str_input
665    return str_input.encode('utf-8')
666
667
668def to_string(bytes_input):
669    if isinstance(bytes_input, str):
670        return bytes_input
671    return bytes_input.encode('utf-8')
672
673
674def convert_string(bytes_input):
675    try:
676        return to_string(bytes_input.decode('utf-8'))
677    except AttributeError: # 'str' object has no attribute 'decode'.
678        return str(bytes_input)
679    except UnicodeError:
680        return str(bytes_input)
681
682if __name__ == '__main__':
683  sys.exit(main())
684