1# Copyright 2017 The Abseil Authors.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Internal helper functions for Abseil Python flags library."""
16
17import os
18import re
19import struct
20import sys
21import textwrap
22import types
23from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Sequence, Set
24from xml.dom import minidom
25# pylint: disable=g-import-not-at-top
26try:
27  import fcntl
28except ImportError:
29  fcntl = None
30try:
31  # Importing termios will fail on non-unix platforms.
32  import termios
33except ImportError:
34  termios = None
35# pylint: enable=g-import-not-at-top
36
37
38_DEFAULT_HELP_WIDTH = 80  # Default width of help output.
39# Minimal "sane" width of help output. We assume that any value below 40 is
40# unreasonable.
41_MIN_HELP_WIDTH = 40
42
43# Define the allowed error rate in an input string to get suggestions.
44#
45# We lean towards a high threshold because we tend to be matching a phrase,
46# and the simple algorithm used here is geared towards correcting word
47# spellings.
48#
49# For manual testing, consider "<command> --list" which produced a large number
50# of spurious suggestions when we used "least_errors > 0.5" instead of
51# "least_erros >= 0.5".
52_SUGGESTION_ERROR_RATE_THRESHOLD = 0.50
53
54# Characters that cannot appear or are highly discouraged in an XML 1.0
55# document. (See http://www.w3.org/TR/REC-xml/#charsets or
56# https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0)
57_ILLEGAL_XML_CHARS_REGEX = re.compile(
58    u'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]')
59
60# This is a set of module ids for the modules that disclaim key flags.
61# This module is explicitly added to this set so that we never consider it to
62# define key flag.
63disclaim_module_ids: Set[int] = set([id(sys.modules[__name__])])
64
65
66# Define special flags here so that help may be generated for them.
67# NOTE: Please do NOT use SPECIAL_FLAGS from outside flags module.
68# Initialized inside flagvalues.py.
69# NOTE: This cannot be annotated as its actual FlagValues type since this would
70# create a circular dependency.
71SPECIAL_FLAGS: Any = None
72
73
74# This points to the flags module, initialized in flags/__init__.py.
75# This should only be used in adopt_module_key_flags to take SPECIAL_FLAGS into
76# account.
77FLAGS_MODULE: types.ModuleType = None
78
79
80class _ModuleObjectAndName(NamedTuple):
81  """Module object and name.
82
83  Fields:
84  - module: object, module object.
85  - module_name: str, module name.
86  """
87  module: types.ModuleType
88  module_name: str
89
90
91def get_module_object_and_name(
92    globals_dict: Dict[str, Any]
93) -> _ModuleObjectAndName:
94  """Returns the module that defines a global environment, and its name.
95
96  Args:
97    globals_dict: A dictionary that should correspond to an environment
98      providing the values of the globals.
99
100  Returns:
101    _ModuleObjectAndName - pair of module object & module name.
102    Returns (None, None) if the module could not be identified.
103  """
104  name = globals_dict.get('__name__', None)
105  module = sys.modules.get(name, None)
106  # Pick a more informative name for the main module.
107  return _ModuleObjectAndName(module,
108                              (sys.argv[0] if name == '__main__' else name))
109
110
111def get_calling_module_object_and_name() -> _ModuleObjectAndName:
112  """Returns the module that's calling into this module.
113
114  We generally use this function to get the name of the module calling a
115  DEFINE_foo... function.
116
117  Returns:
118    The module object that called into this one.
119
120  Raises:
121    AssertionError: Raised when no calling module could be identified.
122  """
123  for depth in range(1, sys.getrecursionlimit()):
124    # sys._getframe is the right thing to use here, as it's the best
125    # way to walk up the call stack.
126    globals_for_frame = sys._getframe(depth).f_globals  # pylint: disable=protected-access
127    module, module_name = get_module_object_and_name(globals_for_frame)
128    if id(module) not in disclaim_module_ids and module_name is not None:
129      return _ModuleObjectAndName(module, module_name)
130  raise AssertionError('No module was found')
131
132
133def get_calling_module() -> str:
134  """Returns the name of the module that's calling into this module."""
135  return get_calling_module_object_and_name().module_name
136
137
138def create_xml_dom_element(
139    doc: minidom.Document, name: str, value: Any
140) -> minidom.Element:
141  """Returns an XML DOM element with name and text value.
142
143  Args:
144    doc: minidom.Document, the DOM document it should create nodes from.
145    name: str, the tag of XML element.
146    value: object, whose string representation will be used
147        as the value of the XML element. Illegal or highly discouraged xml 1.0
148        characters are stripped.
149
150  Returns:
151    An instance of minidom.Element.
152  """
153  s = str(value)
154  if isinstance(value, bool):
155    # Display boolean values as the C++ flag library does: no caps.
156    s = s.lower()
157  # Remove illegal xml characters.
158  s = _ILLEGAL_XML_CHARS_REGEX.sub(u'', s)
159
160  e = doc.createElement(name)
161  e.appendChild(doc.createTextNode(s))
162  return e
163
164
165def get_help_width() -> int:
166  """Returns the integer width of help lines that is used in TextWrap."""
167  if not sys.stdout.isatty() or termios is None or fcntl is None:
168    return _DEFAULT_HELP_WIDTH
169  try:
170    data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, b'1234')
171    columns = struct.unpack('hh', data)[1]
172    # Emacs mode returns 0.
173    # Here we assume that any value below 40 is unreasonable.
174    if columns >= _MIN_HELP_WIDTH:
175      return columns
176    # Returning an int as default is fine, int(int) just return the int.
177    return int(os.getenv('COLUMNS', _DEFAULT_HELP_WIDTH))
178
179  except (TypeError, IOError, struct.error):
180    return _DEFAULT_HELP_WIDTH
181
182
183def get_flag_suggestions(
184    attempt: Optional[str], longopt_list: Sequence[str]
185) -> List[str]:
186  """Returns helpful similar matches for an invalid flag."""
187  # Don't suggest on very short strings, or if no longopts are specified.
188  if len(attempt) <= 2 or not longopt_list:
189    return []
190
191  option_names = [v.split('=')[0] for v in longopt_list]
192
193  # Find close approximations in flag prefixes.
194  # This also handles the case where the flag is spelled right but ambiguous.
195  distances = [(_damerau_levenshtein(attempt, option[0:len(attempt)]), option)
196               for option in option_names]
197  # t[0] is distance, and sorting by t[1] allows us to have stable output.
198  distances.sort()
199
200  least_errors, _ = distances[0]
201  # Don't suggest excessively bad matches.
202  if least_errors >= _SUGGESTION_ERROR_RATE_THRESHOLD * len(attempt):
203    return []
204
205  suggestions = []
206  for errors, name in distances:
207    if errors == least_errors:
208      suggestions.append(name)
209    else:
210      break
211  return suggestions
212
213
214def _damerau_levenshtein(a, b):
215  """Returns Damerau-Levenshtein edit distance from a to b."""
216  memo = {}
217
218  def distance(x, y):
219    """Recursively defined string distance with memoization."""
220    if (x, y) in memo:
221      return memo[x, y]
222    if not x:
223      d = len(y)
224    elif not y:
225      d = len(x)
226    else:
227      d = min(
228          distance(x[1:], y) + 1,  # correct an insertion error
229          distance(x, y[1:]) + 1,  # correct a deletion error
230          distance(x[1:], y[1:]) + (x[0] != y[0]))  # correct a wrong character
231      if len(x) >= 2 and len(y) >= 2 and x[0] == y[1] and x[1] == y[0]:
232        # Correct a transposition.
233        t = distance(x[2:], y[2:]) + 1
234        if d > t:
235          d = t
236
237    memo[x, y] = d
238    return d
239  return distance(a, b)
240
241
242def text_wrap(
243    text: str,
244    length: Optional[int] = None,
245    indent: str = '',
246    firstline_indent: Optional[str] = None,
247) -> str:
248  """Wraps a given text to a maximum line length and returns it.
249
250  It turns lines that only contain whitespace into empty lines, keeps new lines,
251  and expands tabs using 4 spaces.
252
253  Args:
254    text: str, text to wrap.
255    length: int, maximum length of a line, includes indentation.
256        If this is None then use get_help_width()
257    indent: str, indent for all but first line.
258    firstline_indent: str, indent for first line; if None, fall back to indent.
259
260  Returns:
261    str, the wrapped text.
262
263  Raises:
264    ValueError: Raised if indent or firstline_indent not shorter than length.
265  """
266  # Get defaults where callee used None
267  if length is None:
268    length = get_help_width()
269  if indent is None:
270    indent = ''
271  if firstline_indent is None:
272    firstline_indent = indent
273
274  if len(indent) >= length:
275    raise ValueError('Length of indent exceeds length')
276  if len(firstline_indent) >= length:
277    raise ValueError('Length of first line indent exceeds length')
278
279  text = text.expandtabs(4)
280
281  result = []
282  # Create one wrapper for the first paragraph and one for subsequent
283  # paragraphs that does not have the initial wrapping.
284  wrapper = textwrap.TextWrapper(
285      width=length, initial_indent=firstline_indent, subsequent_indent=indent)
286  subsequent_wrapper = textwrap.TextWrapper(
287      width=length, initial_indent=indent, subsequent_indent=indent)
288
289  # textwrap does not have any special treatment for newlines. From the docs:
290  # "...newlines may appear in the middle of a line and cause strange output.
291  # For this reason, text should be split into paragraphs (using
292  # str.splitlines() or similar) which are wrapped separately."
293  for paragraph in (p.strip() for p in text.splitlines()):
294    if paragraph:
295      result.extend(wrapper.wrap(paragraph))
296    else:
297      result.append('')  # Keep empty lines.
298    # Replace initial wrapper with wrapper for subsequent paragraphs.
299    wrapper = subsequent_wrapper
300
301  return '\n'.join(result)
302
303
304def flag_dict_to_args(
305    flag_map: Dict[str, Any], multi_flags: Optional[Set[str]] = None
306) -> Iterable[str]:
307  """Convert a dict of values into process call parameters.
308
309  This method is used to convert a dictionary into a sequence of parameters
310  for a binary that parses arguments using this module.
311
312  Args:
313    flag_map: dict, a mapping where the keys are flag names (strings).
314        values are treated according to their type:
315
316        * If value is ``None``, then only the name is emitted.
317        * If value is ``True``, then only the name is emitted.
318        * If value is ``False``, then only the name prepended with 'no' is
319          emitted.
320        * If value is a string then ``--name=value`` is emitted.
321        * If value is a collection, this will emit
322          ``--name=value1,value2,value3``, unless the flag name is in
323          ``multi_flags``, in which case this will emit
324          ``--name=value1 --name=value2 --name=value3``.
325        * Everything else is converted to string an passed as such.
326
327    multi_flags: set, names (strings) of flags that should be treated as
328        multi-flags.
329  Yields:
330    sequence of string suitable for a subprocess execution.
331  """
332  for key, value in flag_map.items():
333    if value is None:
334      yield '--%s' % key
335    elif isinstance(value, bool):
336      if value:
337        yield '--%s' % key
338      else:
339        yield '--no%s' % key
340    elif isinstance(value, (bytes, type(u''))):
341      # We don't want strings to be handled like python collections.
342      yield '--%s=%s' % (key, value)
343    else:
344      # Now we attempt to deal with collections.
345      try:
346        if multi_flags and key in multi_flags:
347          for item in value:
348            yield '--%s=%s' % (key, str(item))
349        else:
350          yield '--%s=%s' % (key, ','.join(str(item) for item in value))
351      except TypeError:
352        # Default case.
353        yield '--%s=%s' % (key, value)
354
355
356def trim_docstring(docstring: str) -> str:
357  """Removes indentation from triple-quoted strings.
358
359  This is the function specified in PEP 257 to handle docstrings:
360  https://www.python.org/dev/peps/pep-0257/.
361
362  Args:
363    docstring: str, a python docstring.
364
365  Returns:
366    str, docstring with indentation removed.
367  """
368  if not docstring:
369    return ''
370
371  # If you've got a line longer than this you have other problems...
372  max_indent = 1 << 29
373
374  # Convert tabs to spaces (following the normal Python rules)
375  # and split into a list of lines:
376  lines = docstring.expandtabs().splitlines()
377
378  # Determine minimum indentation (first line doesn't count):
379  indent = max_indent
380  for line in lines[1:]:
381    stripped = line.lstrip()
382    if stripped:
383      indent = min(indent, len(line) - len(stripped))
384  # Remove indentation (first line is special):
385  trimmed = [lines[0].strip()]
386  if indent < max_indent:
387    for line in lines[1:]:
388      trimmed.append(line[indent:].rstrip())
389  # Strip off trailing and leading blank lines:
390  while trimmed and not trimmed[-1]:
391    trimmed.pop()
392  while trimmed and not trimmed[0]:
393    trimmed.pop(0)
394  # Return a single string:
395  return '\n'.join(trimmed)
396
397
398def doc_to_help(doc: str) -> str:
399  """Takes a __doc__ string and reformats it as help."""
400
401  # Get rid of starting and ending white space. Using lstrip() or even
402  # strip() could drop more than maximum of first line and right space
403  # of last line.
404  doc = doc.strip()
405
406  # Get rid of all empty lines.
407  whitespace_only_line = re.compile('^[ \t]+$', re.M)
408  doc = whitespace_only_line.sub('', doc)
409
410  # Cut out common space at line beginnings.
411  doc = trim_docstring(doc)
412
413  # Just like this module's comment, comments tend to be aligned somehow.
414  # In other words they all start with the same amount of white space.
415  # 1) keep double new lines;
416  # 2) keep ws after new lines if not empty line;
417  # 3) all other new lines shall be changed to a space;
418  # Solution: Match new lines between non white space and replace with space.
419  doc = re.sub(r'(?<=\S)\n(?=\S)', ' ', doc, flags=re.M)
420
421  return doc
422