1# Copyright 2017 The Abseil Authors. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Internal helper functions for Abseil Python flags library.""" 16 17import os 18import re 19import struct 20import sys 21import textwrap 22import types 23from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Sequence, Set 24from xml.dom import minidom 25# pylint: disable=g-import-not-at-top 26try: 27 import fcntl 28except ImportError: 29 fcntl = None 30try: 31 # Importing termios will fail on non-unix platforms. 32 import termios 33except ImportError: 34 termios = None 35# pylint: enable=g-import-not-at-top 36 37 38_DEFAULT_HELP_WIDTH = 80 # Default width of help output. 39# Minimal "sane" width of help output. We assume that any value below 40 is 40# unreasonable. 41_MIN_HELP_WIDTH = 40 42 43# Define the allowed error rate in an input string to get suggestions. 44# 45# We lean towards a high threshold because we tend to be matching a phrase, 46# and the simple algorithm used here is geared towards correcting word 47# spellings. 48# 49# For manual testing, consider "<command> --list" which produced a large number 50# of spurious suggestions when we used "least_errors > 0.5" instead of 51# "least_erros >= 0.5". 52_SUGGESTION_ERROR_RATE_THRESHOLD = 0.50 53 54# Characters that cannot appear or are highly discouraged in an XML 1.0 55# document. (See http://www.w3.org/TR/REC-xml/#charsets or 56# https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0) 57_ILLEGAL_XML_CHARS_REGEX = re.compile( 58 u'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]') 59 60# This is a set of module ids for the modules that disclaim key flags. 61# This module is explicitly added to this set so that we never consider it to 62# define key flag. 63disclaim_module_ids: Set[int] = set([id(sys.modules[__name__])]) 64 65 66# Define special flags here so that help may be generated for them. 67# NOTE: Please do NOT use SPECIAL_FLAGS from outside flags module. 68# Initialized inside flagvalues.py. 69# NOTE: This cannot be annotated as its actual FlagValues type since this would 70# create a circular dependency. 71SPECIAL_FLAGS: Any = None 72 73 74# This points to the flags module, initialized in flags/__init__.py. 75# This should only be used in adopt_module_key_flags to take SPECIAL_FLAGS into 76# account. 77FLAGS_MODULE: types.ModuleType = None 78 79 80class _ModuleObjectAndName(NamedTuple): 81 """Module object and name. 82 83 Fields: 84 - module: object, module object. 85 - module_name: str, module name. 86 """ 87 module: types.ModuleType 88 module_name: str 89 90 91def get_module_object_and_name( 92 globals_dict: Dict[str, Any] 93) -> _ModuleObjectAndName: 94 """Returns the module that defines a global environment, and its name. 95 96 Args: 97 globals_dict: A dictionary that should correspond to an environment 98 providing the values of the globals. 99 100 Returns: 101 _ModuleObjectAndName - pair of module object & module name. 102 Returns (None, None) if the module could not be identified. 103 """ 104 name = globals_dict.get('__name__', None) 105 module = sys.modules.get(name, None) 106 # Pick a more informative name for the main module. 107 return _ModuleObjectAndName(module, 108 (sys.argv[0] if name == '__main__' else name)) 109 110 111def get_calling_module_object_and_name() -> _ModuleObjectAndName: 112 """Returns the module that's calling into this module. 113 114 We generally use this function to get the name of the module calling a 115 DEFINE_foo... function. 116 117 Returns: 118 The module object that called into this one. 119 120 Raises: 121 AssertionError: Raised when no calling module could be identified. 122 """ 123 for depth in range(1, sys.getrecursionlimit()): 124 # sys._getframe is the right thing to use here, as it's the best 125 # way to walk up the call stack. 126 globals_for_frame = sys._getframe(depth).f_globals # pylint: disable=protected-access 127 module, module_name = get_module_object_and_name(globals_for_frame) 128 if id(module) not in disclaim_module_ids and module_name is not None: 129 return _ModuleObjectAndName(module, module_name) 130 raise AssertionError('No module was found') 131 132 133def get_calling_module() -> str: 134 """Returns the name of the module that's calling into this module.""" 135 return get_calling_module_object_and_name().module_name 136 137 138def create_xml_dom_element( 139 doc: minidom.Document, name: str, value: Any 140) -> minidom.Element: 141 """Returns an XML DOM element with name and text value. 142 143 Args: 144 doc: minidom.Document, the DOM document it should create nodes from. 145 name: str, the tag of XML element. 146 value: object, whose string representation will be used 147 as the value of the XML element. Illegal or highly discouraged xml 1.0 148 characters are stripped. 149 150 Returns: 151 An instance of minidom.Element. 152 """ 153 s = str(value) 154 if isinstance(value, bool): 155 # Display boolean values as the C++ flag library does: no caps. 156 s = s.lower() 157 # Remove illegal xml characters. 158 s = _ILLEGAL_XML_CHARS_REGEX.sub(u'', s) 159 160 e = doc.createElement(name) 161 e.appendChild(doc.createTextNode(s)) 162 return e 163 164 165def get_help_width() -> int: 166 """Returns the integer width of help lines that is used in TextWrap.""" 167 if not sys.stdout.isatty() or termios is None or fcntl is None: 168 return _DEFAULT_HELP_WIDTH 169 try: 170 data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, b'1234') 171 columns = struct.unpack('hh', data)[1] 172 # Emacs mode returns 0. 173 # Here we assume that any value below 40 is unreasonable. 174 if columns >= _MIN_HELP_WIDTH: 175 return columns 176 # Returning an int as default is fine, int(int) just return the int. 177 return int(os.getenv('COLUMNS', _DEFAULT_HELP_WIDTH)) 178 179 except (TypeError, IOError, struct.error): 180 return _DEFAULT_HELP_WIDTH 181 182 183def get_flag_suggestions( 184 attempt: Optional[str], longopt_list: Sequence[str] 185) -> List[str]: 186 """Returns helpful similar matches for an invalid flag.""" 187 # Don't suggest on very short strings, or if no longopts are specified. 188 if len(attempt) <= 2 or not longopt_list: 189 return [] 190 191 option_names = [v.split('=')[0] for v in longopt_list] 192 193 # Find close approximations in flag prefixes. 194 # This also handles the case where the flag is spelled right but ambiguous. 195 distances = [(_damerau_levenshtein(attempt, option[0:len(attempt)]), option) 196 for option in option_names] 197 # t[0] is distance, and sorting by t[1] allows us to have stable output. 198 distances.sort() 199 200 least_errors, _ = distances[0] 201 # Don't suggest excessively bad matches. 202 if least_errors >= _SUGGESTION_ERROR_RATE_THRESHOLD * len(attempt): 203 return [] 204 205 suggestions = [] 206 for errors, name in distances: 207 if errors == least_errors: 208 suggestions.append(name) 209 else: 210 break 211 return suggestions 212 213 214def _damerau_levenshtein(a, b): 215 """Returns Damerau-Levenshtein edit distance from a to b.""" 216 memo = {} 217 218 def distance(x, y): 219 """Recursively defined string distance with memoization.""" 220 if (x, y) in memo: 221 return memo[x, y] 222 if not x: 223 d = len(y) 224 elif not y: 225 d = len(x) 226 else: 227 d = min( 228 distance(x[1:], y) + 1, # correct an insertion error 229 distance(x, y[1:]) + 1, # correct a deletion error 230 distance(x[1:], y[1:]) + (x[0] != y[0])) # correct a wrong character 231 if len(x) >= 2 and len(y) >= 2 and x[0] == y[1] and x[1] == y[0]: 232 # Correct a transposition. 233 t = distance(x[2:], y[2:]) + 1 234 if d > t: 235 d = t 236 237 memo[x, y] = d 238 return d 239 return distance(a, b) 240 241 242def text_wrap( 243 text: str, 244 length: Optional[int] = None, 245 indent: str = '', 246 firstline_indent: Optional[str] = None, 247) -> str: 248 """Wraps a given text to a maximum line length and returns it. 249 250 It turns lines that only contain whitespace into empty lines, keeps new lines, 251 and expands tabs using 4 spaces. 252 253 Args: 254 text: str, text to wrap. 255 length: int, maximum length of a line, includes indentation. 256 If this is None then use get_help_width() 257 indent: str, indent for all but first line. 258 firstline_indent: str, indent for first line; if None, fall back to indent. 259 260 Returns: 261 str, the wrapped text. 262 263 Raises: 264 ValueError: Raised if indent or firstline_indent not shorter than length. 265 """ 266 # Get defaults where callee used None 267 if length is None: 268 length = get_help_width() 269 if indent is None: 270 indent = '' 271 if firstline_indent is None: 272 firstline_indent = indent 273 274 if len(indent) >= length: 275 raise ValueError('Length of indent exceeds length') 276 if len(firstline_indent) >= length: 277 raise ValueError('Length of first line indent exceeds length') 278 279 text = text.expandtabs(4) 280 281 result = [] 282 # Create one wrapper for the first paragraph and one for subsequent 283 # paragraphs that does not have the initial wrapping. 284 wrapper = textwrap.TextWrapper( 285 width=length, initial_indent=firstline_indent, subsequent_indent=indent) 286 subsequent_wrapper = textwrap.TextWrapper( 287 width=length, initial_indent=indent, subsequent_indent=indent) 288 289 # textwrap does not have any special treatment for newlines. From the docs: 290 # "...newlines may appear in the middle of a line and cause strange output. 291 # For this reason, text should be split into paragraphs (using 292 # str.splitlines() or similar) which are wrapped separately." 293 for paragraph in (p.strip() for p in text.splitlines()): 294 if paragraph: 295 result.extend(wrapper.wrap(paragraph)) 296 else: 297 result.append('') # Keep empty lines. 298 # Replace initial wrapper with wrapper for subsequent paragraphs. 299 wrapper = subsequent_wrapper 300 301 return '\n'.join(result) 302 303 304def flag_dict_to_args( 305 flag_map: Dict[str, Any], multi_flags: Optional[Set[str]] = None 306) -> Iterable[str]: 307 """Convert a dict of values into process call parameters. 308 309 This method is used to convert a dictionary into a sequence of parameters 310 for a binary that parses arguments using this module. 311 312 Args: 313 flag_map: dict, a mapping where the keys are flag names (strings). 314 values are treated according to their type: 315 316 * If value is ``None``, then only the name is emitted. 317 * If value is ``True``, then only the name is emitted. 318 * If value is ``False``, then only the name prepended with 'no' is 319 emitted. 320 * If value is a string then ``--name=value`` is emitted. 321 * If value is a collection, this will emit 322 ``--name=value1,value2,value3``, unless the flag name is in 323 ``multi_flags``, in which case this will emit 324 ``--name=value1 --name=value2 --name=value3``. 325 * Everything else is converted to string an passed as such. 326 327 multi_flags: set, names (strings) of flags that should be treated as 328 multi-flags. 329 Yields: 330 sequence of string suitable for a subprocess execution. 331 """ 332 for key, value in flag_map.items(): 333 if value is None: 334 yield '--%s' % key 335 elif isinstance(value, bool): 336 if value: 337 yield '--%s' % key 338 else: 339 yield '--no%s' % key 340 elif isinstance(value, (bytes, type(u''))): 341 # We don't want strings to be handled like python collections. 342 yield '--%s=%s' % (key, value) 343 else: 344 # Now we attempt to deal with collections. 345 try: 346 if multi_flags and key in multi_flags: 347 for item in value: 348 yield '--%s=%s' % (key, str(item)) 349 else: 350 yield '--%s=%s' % (key, ','.join(str(item) for item in value)) 351 except TypeError: 352 # Default case. 353 yield '--%s=%s' % (key, value) 354 355 356def trim_docstring(docstring: str) -> str: 357 """Removes indentation from triple-quoted strings. 358 359 This is the function specified in PEP 257 to handle docstrings: 360 https://www.python.org/dev/peps/pep-0257/. 361 362 Args: 363 docstring: str, a python docstring. 364 365 Returns: 366 str, docstring with indentation removed. 367 """ 368 if not docstring: 369 return '' 370 371 # If you've got a line longer than this you have other problems... 372 max_indent = 1 << 29 373 374 # Convert tabs to spaces (following the normal Python rules) 375 # and split into a list of lines: 376 lines = docstring.expandtabs().splitlines() 377 378 # Determine minimum indentation (first line doesn't count): 379 indent = max_indent 380 for line in lines[1:]: 381 stripped = line.lstrip() 382 if stripped: 383 indent = min(indent, len(line) - len(stripped)) 384 # Remove indentation (first line is special): 385 trimmed = [lines[0].strip()] 386 if indent < max_indent: 387 for line in lines[1:]: 388 trimmed.append(line[indent:].rstrip()) 389 # Strip off trailing and leading blank lines: 390 while trimmed and not trimmed[-1]: 391 trimmed.pop() 392 while trimmed and not trimmed[0]: 393 trimmed.pop(0) 394 # Return a single string: 395 return '\n'.join(trimmed) 396 397 398def doc_to_help(doc: str) -> str: 399 """Takes a __doc__ string and reformats it as help.""" 400 401 # Get rid of starting and ending white space. Using lstrip() or even 402 # strip() could drop more than maximum of first line and right space 403 # of last line. 404 doc = doc.strip() 405 406 # Get rid of all empty lines. 407 whitespace_only_line = re.compile('^[ \t]+$', re.M) 408 doc = whitespace_only_line.sub('', doc) 409 410 # Cut out common space at line beginnings. 411 doc = trim_docstring(doc) 412 413 # Just like this module's comment, comments tend to be aligned somehow. 414 # In other words they all start with the same amount of white space. 415 # 1) keep double new lines; 416 # 2) keep ws after new lines if not empty line; 417 # 3) all other new lines shall be changed to a space; 418 # Solution: Match new lines between non white space and replace with space. 419 doc = re.sub(r'(?<=\S)\n(?=\S)', ' ', doc, flags=re.M) 420 421 return doc 422