1# module pyparsing.py 2# 3# Copyright (c) 2003-2018 Paul T. McGuire 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23# 24 25__doc__ = \ 26""" 27pyparsing module - Classes and methods to define and execute parsing grammars 28============================================================================= 29 30The pyparsing module is an alternative approach to creating and executing simple grammars, 31vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 33provides a library of classes that you use to construct the grammar directly in Python. 34 35Here is a program to parse "Hello, World!" (or any greeting of the form 36C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 37(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to 38L{Literal} expressions):: 39 40 from pyparsing import Word, alphas 41 42 # define grammar of a greeting 43 greet = Word(alphas) + "," + Word(alphas) + "!" 44 45 hello = "Hello, World!" 46 print (hello, "->", greet.parseString(hello)) 47 48The program outputs the following:: 49 50 Hello, World! -> ['Hello', ',', 'World', '!'] 51 52The Python representation of the grammar is quite readable, owing to the self-explanatory 53class names, and the use of '+', '|' and '^' operators. 54 55The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 56object with named attributes. 57 58The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 60 - quoted strings 61 - embedded comments 62 63 64Getting Started - 65----------------- 66Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing 67classes inherit from. Use the docstrings for examples of how to: 68 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes 69 - construct character word-group expressions using the L{Word} class 70 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes 71 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones 72 - associate names with your parsed results using L{ParserElement.setResultsName} 73 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf} 74 - find more useful common expressions in the L{pyparsing_common} namespace class 75""" 76 77__version__ = "2.2.1" 78__versionTime__ = "18 Sep 2018 00:49 UTC" 79__author__ = "Paul McGuire <[email protected]>" 80 81import string 82from weakref import ref as wkref 83import copy 84import sys 85import warnings 86import re 87import sre_constants 88import collections 89import pprint 90import traceback 91import types 92from datetime import datetime 93 94try: 95 from _thread import RLock 96except ImportError: 97 from threading import RLock 98 99try: 100 # Python 3 101 from collections.abc import Iterable 102 from collections.abc import MutableMapping 103except ImportError: 104 # Python 2.7 105 from collections import Iterable 106 from collections import MutableMapping 107 108try: 109 from collections import OrderedDict as _OrderedDict 110except ImportError: 111 try: 112 from ordereddict import OrderedDict as _OrderedDict 113 except ImportError: 114 _OrderedDict = None 115 116#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 117 118__all__ = [ 119'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 120'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 121'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 122'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 123'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 124'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 125'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 126'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 127'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 128'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 129'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 130'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 131'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 132'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 133'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 134'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 135'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 136'CloseMatch', 'tokenMap', 'pyparsing_common', 137] 138 139system_version = tuple(sys.version_info)[:3] 140PY_3 = system_version[0] == 3 141if PY_3: 142 _MAX_INT = sys.maxsize 143 basestring = str 144 unichr = chr 145 _ustr = str 146 147 # build list of single arg builtins, that can be used as parse actions 148 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 149 150else: 151 _MAX_INT = sys.maxint 152 range = xrange 153 154 def _ustr(obj): 155 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 156 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 157 then < returns the unicode object | encodes it with the default encoding | ... >. 158 """ 159 if isinstance(obj,unicode): 160 return obj 161 162 try: 163 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 164 # it won't break any existing code. 165 return str(obj) 166 167 except UnicodeEncodeError: 168 # Else encode it 169 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 170 xmlcharref = Regex(r'&#\d+;') 171 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 172 return xmlcharref.transformString(ret) 173 174 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 175 singleArgBuiltins = [] 176 import __builtin__ 177 for fname in "sum len sorted reversed list tuple set any all min max".split(): 178 try: 179 singleArgBuiltins.append(getattr(__builtin__,fname)) 180 except AttributeError: 181 continue 182 183_generatorType = type((y for y in range(1))) 184 185def _xml_escape(data): 186 """Escape &, <, >, ", ', etc. in a string of data.""" 187 188 # ampersand must be replaced first 189 from_symbols = '&><"\'' 190 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 191 for from_,to_ in zip(from_symbols, to_symbols): 192 data = data.replace(from_, to_) 193 return data 194 195class _Constants(object): 196 pass 197 198alphas = string.ascii_uppercase + string.ascii_lowercase 199nums = "0123456789" 200hexnums = nums + "ABCDEFabcdef" 201alphanums = alphas + nums 202_bslash = chr(92) 203printables = "".join(c for c in string.printable if c not in string.whitespace) 204 205class ParseBaseException(Exception): 206 """base exception class for all parsing runtime exceptions""" 207 # Performance tuning: we construct a *lot* of these, so keep this 208 # constructor as small and fast as possible 209 def __init__( self, pstr, loc=0, msg=None, elem=None ): 210 self.loc = loc 211 if msg is None: 212 self.msg = pstr 213 self.pstr = "" 214 else: 215 self.msg = msg 216 self.pstr = pstr 217 self.parserElement = elem 218 self.args = (pstr, loc, msg) 219 220 @classmethod 221 def _from_exception(cls, pe): 222 """ 223 internal factory method to simplify creating one type of ParseException 224 from another - avoids having __init__ signature conflicts among subclasses 225 """ 226 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) 227 228 def __getattr__( self, aname ): 229 """supported attributes by name are: 230 - lineno - returns the line number of the exception text 231 - col - returns the column number of the exception text 232 - line - returns the line containing the exception text 233 """ 234 if( aname == "lineno" ): 235 return lineno( self.loc, self.pstr ) 236 elif( aname in ("col", "column") ): 237 return col( self.loc, self.pstr ) 238 elif( aname == "line" ): 239 return line( self.loc, self.pstr ) 240 else: 241 raise AttributeError(aname) 242 243 def __str__( self ): 244 return "%s (at char %d), (line:%d, col:%d)" % \ 245 ( self.msg, self.loc, self.lineno, self.column ) 246 def __repr__( self ): 247 return _ustr(self) 248 def markInputline( self, markerString = ">!<" ): 249 """Extracts the exception line from the input string, and marks 250 the location of the exception with a special symbol. 251 """ 252 line_str = self.line 253 line_column = self.column - 1 254 if markerString: 255 line_str = "".join((line_str[:line_column], 256 markerString, line_str[line_column:])) 257 return line_str.strip() 258 def __dir__(self): 259 return "lineno col line".split() + dir(type(self)) 260 261class ParseException(ParseBaseException): 262 """ 263 Exception thrown when parse expressions don't match class; 264 supported attributes by name are: 265 - lineno - returns the line number of the exception text 266 - col - returns the column number of the exception text 267 - line - returns the line containing the exception text 268 269 Example:: 270 try: 271 Word(nums).setName("integer").parseString("ABC") 272 except ParseException as pe: 273 print(pe) 274 print("column: {}".format(pe.col)) 275 276 prints:: 277 Expected integer (at char 0), (line:1, col:1) 278 column: 1 279 """ 280 pass 281 282class ParseFatalException(ParseBaseException): 283 """user-throwable exception thrown when inconsistent parse content 284 is found; stops all parsing immediately""" 285 pass 286 287class ParseSyntaxException(ParseFatalException): 288 """just like L{ParseFatalException}, but thrown internally when an 289 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 290 immediately because an unbacktrackable syntax error has been found""" 291 pass 292 293#~ class ReparseException(ParseBaseException): 294 #~ """Experimental class - parse actions can raise this exception to cause 295 #~ pyparsing to reparse the input string: 296 #~ - with a modified input string, and/or 297 #~ - with a modified start location 298 #~ Set the values of the ReparseException in the constructor, and raise the 299 #~ exception in a parse action to cause pyparsing to use the new string/location. 300 #~ Setting the values as None causes no change to be made. 301 #~ """ 302 #~ def __init_( self, newstring, restartLoc ): 303 #~ self.newParseText = newstring 304 #~ self.reparseLoc = restartLoc 305 306class RecursiveGrammarException(Exception): 307 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" 308 def __init__( self, parseElementList ): 309 self.parseElementTrace = parseElementList 310 311 def __str__( self ): 312 return "RecursiveGrammarException: %s" % self.parseElementTrace 313 314class _ParseResultsWithOffset(object): 315 def __init__(self,p1,p2): 316 self.tup = (p1,p2) 317 def __getitem__(self,i): 318 return self.tup[i] 319 def __repr__(self): 320 return repr(self.tup[0]) 321 def setOffset(self,i): 322 self.tup = (self.tup[0],i) 323 324class ParseResults(object): 325 """ 326 Structured parse results, to provide multiple means of access to the parsed data: 327 - as a list (C{len(results)}) 328 - by list index (C{results[0], results[1]}, etc.) 329 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 330 331 Example:: 332 integer = Word(nums) 333 date_str = (integer.setResultsName("year") + '/' 334 + integer.setResultsName("month") + '/' 335 + integer.setResultsName("day")) 336 # equivalent form: 337 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 338 339 # parseString returns a ParseResults object 340 result = date_str.parseString("1999/12/31") 341 342 def test(s, fn=repr): 343 print("%s -> %s" % (s, fn(eval(s)))) 344 test("list(result)") 345 test("result[0]") 346 test("result['month']") 347 test("result.day") 348 test("'month' in result") 349 test("'minutes' in result") 350 test("result.dump()", str) 351 prints:: 352 list(result) -> ['1999', '/', '12', '/', '31'] 353 result[0] -> '1999' 354 result['month'] -> '12' 355 result.day -> '31' 356 'month' in result -> True 357 'minutes' in result -> False 358 result.dump() -> ['1999', '/', '12', '/', '31'] 359 - day: 31 360 - month: 12 361 - year: 1999 362 """ 363 def __new__(cls, toklist=None, name=None, asList=True, modal=True ): 364 if isinstance(toklist, cls): 365 return toklist 366 retobj = object.__new__(cls) 367 retobj.__doinit = True 368 return retobj 369 370 # Performance tuning: we construct a *lot* of these, so keep this 371 # constructor as small and fast as possible 372 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): 373 if self.__doinit: 374 self.__doinit = False 375 self.__name = None 376 self.__parent = None 377 self.__accumNames = {} 378 self.__asList = asList 379 self.__modal = modal 380 if toklist is None: 381 toklist = [] 382 if isinstance(toklist, list): 383 self.__toklist = toklist[:] 384 elif isinstance(toklist, _generatorType): 385 self.__toklist = list(toklist) 386 else: 387 self.__toklist = [toklist] 388 self.__tokdict = dict() 389 390 if name is not None and name: 391 if not modal: 392 self.__accumNames[name] = 0 393 if isinstance(name,int): 394 name = _ustr(name) # will always return a str, but use _ustr for consistency 395 self.__name = name 396 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 397 if isinstance(toklist,basestring): 398 toklist = [ toklist ] 399 if asList: 400 if isinstance(toklist,ParseResults): 401 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 402 else: 403 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 404 self[name].__name = name 405 else: 406 try: 407 self[name] = toklist[0] 408 except (KeyError,TypeError,IndexError): 409 self[name] = toklist 410 411 def __getitem__( self, i ): 412 if isinstance( i, (int,slice) ): 413 return self.__toklist[i] 414 else: 415 if i not in self.__accumNames: 416 return self.__tokdict[i][-1][0] 417 else: 418 return ParseResults([ v[0] for v in self.__tokdict[i] ]) 419 420 def __setitem__( self, k, v, isinstance=isinstance ): 421 if isinstance(v,_ParseResultsWithOffset): 422 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 423 sub = v[0] 424 elif isinstance(k,(int,slice)): 425 self.__toklist[k] = v 426 sub = v 427 else: 428 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 429 sub = v 430 if isinstance(sub,ParseResults): 431 sub.__parent = wkref(self) 432 433 def __delitem__( self, i ): 434 if isinstance(i,(int,slice)): 435 mylen = len( self.__toklist ) 436 del self.__toklist[i] 437 438 # convert int to slice 439 if isinstance(i, int): 440 if i < 0: 441 i += mylen 442 i = slice(i, i+1) 443 # get removed indices 444 removed = list(range(*i.indices(mylen))) 445 removed.reverse() 446 # fixup indices in token dictionary 447 for name,occurrences in self.__tokdict.items(): 448 for j in removed: 449 for k, (value, position) in enumerate(occurrences): 450 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 451 else: 452 del self.__tokdict[i] 453 454 def __contains__( self, k ): 455 return k in self.__tokdict 456 457 def __len__( self ): return len( self.__toklist ) 458 def __bool__(self): return ( not not self.__toklist ) 459 __nonzero__ = __bool__ 460 def __iter__( self ): return iter( self.__toklist ) 461 def __reversed__( self ): return iter( self.__toklist[::-1] ) 462 def _iterkeys( self ): 463 if hasattr(self.__tokdict, "iterkeys"): 464 return self.__tokdict.iterkeys() 465 else: 466 return iter(self.__tokdict) 467 468 def _itervalues( self ): 469 return (self[k] for k in self._iterkeys()) 470 471 def _iteritems( self ): 472 return ((k, self[k]) for k in self._iterkeys()) 473 474 if PY_3: 475 keys = _iterkeys 476 """Returns an iterator of all named result keys (Python 3.x only).""" 477 478 values = _itervalues 479 """Returns an iterator of all named result values (Python 3.x only).""" 480 481 items = _iteritems 482 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 483 484 else: 485 iterkeys = _iterkeys 486 """Returns an iterator of all named result keys (Python 2.x only).""" 487 488 itervalues = _itervalues 489 """Returns an iterator of all named result values (Python 2.x only).""" 490 491 iteritems = _iteritems 492 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 493 494 def keys( self ): 495 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 496 return list(self.iterkeys()) 497 498 def values( self ): 499 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 500 return list(self.itervalues()) 501 502 def items( self ): 503 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 504 return list(self.iteritems()) 505 506 def haskeys( self ): 507 """Since keys() returns an iterator, this method is helpful in bypassing 508 code that looks for the existence of any defined results names.""" 509 return bool(self.__tokdict) 510 511 def pop( self, *args, **kwargs): 512 """ 513 Removes and returns item at specified index (default=C{last}). 514 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 515 argument or an integer argument, it will use C{list} semantics 516 and pop tokens from the list of parsed tokens. If passed a 517 non-integer argument (most likely a string), it will use C{dict} 518 semantics and pop the corresponding value from any defined 519 results names. A second default return value argument is 520 supported, just as in C{dict.pop()}. 521 522 Example:: 523 def remove_first(tokens): 524 tokens.pop(0) 525 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 526 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 527 528 label = Word(alphas) 529 patt = label("LABEL") + OneOrMore(Word(nums)) 530 print(patt.parseString("AAB 123 321").dump()) 531 532 # Use pop() in a parse action to remove named result (note that corresponding value is not 533 # removed from list form of results) 534 def remove_LABEL(tokens): 535 tokens.pop("LABEL") 536 return tokens 537 patt.addParseAction(remove_LABEL) 538 print(patt.parseString("AAB 123 321").dump()) 539 prints:: 540 ['AAB', '123', '321'] 541 - LABEL: AAB 542 543 ['AAB', '123', '321'] 544 """ 545 if not args: 546 args = [-1] 547 for k,v in kwargs.items(): 548 if k == 'default': 549 args = (args[0], v) 550 else: 551 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 552 if (isinstance(args[0], int) or 553 len(args) == 1 or 554 args[0] in self): 555 index = args[0] 556 ret = self[index] 557 del self[index] 558 return ret 559 else: 560 defaultvalue = args[1] 561 return defaultvalue 562 563 def get(self, key, defaultValue=None): 564 """ 565 Returns named result matching the given key, or if there is no 566 such name, then returns the given C{defaultValue} or C{None} if no 567 C{defaultValue} is specified. 568 569 Similar to C{dict.get()}. 570 571 Example:: 572 integer = Word(nums) 573 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 574 575 result = date_str.parseString("1999/12/31") 576 print(result.get("year")) # -> '1999' 577 print(result.get("hour", "not specified")) # -> 'not specified' 578 print(result.get("hour")) # -> None 579 """ 580 if key in self: 581 return self[key] 582 else: 583 return defaultValue 584 585 def insert( self, index, insStr ): 586 """ 587 Inserts new element at location index in the list of parsed tokens. 588 589 Similar to C{list.insert()}. 590 591 Example:: 592 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 593 594 # use a parse action to insert the parse location in the front of the parsed results 595 def insert_locn(locn, tokens): 596 tokens.insert(0, locn) 597 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 598 """ 599 self.__toklist.insert(index, insStr) 600 # fixup indices in token dictionary 601 for name,occurrences in self.__tokdict.items(): 602 for k, (value, position) in enumerate(occurrences): 603 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 604 605 def append( self, item ): 606 """ 607 Add single element to end of ParseResults list of elements. 608 609 Example:: 610 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 611 612 # use a parse action to compute the sum of the parsed integers, and add it to the end 613 def append_sum(tokens): 614 tokens.append(sum(map(int, tokens))) 615 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 616 """ 617 self.__toklist.append(item) 618 619 def extend( self, itemseq ): 620 """ 621 Add sequence of elements to end of ParseResults list of elements. 622 623 Example:: 624 patt = OneOrMore(Word(alphas)) 625 626 # use a parse action to append the reverse of the matched strings, to make a palindrome 627 def make_palindrome(tokens): 628 tokens.extend(reversed([t[::-1] for t in tokens])) 629 return ''.join(tokens) 630 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 631 """ 632 if isinstance(itemseq, ParseResults): 633 self += itemseq 634 else: 635 self.__toklist.extend(itemseq) 636 637 def clear( self ): 638 """ 639 Clear all elements and results names. 640 """ 641 del self.__toklist[:] 642 self.__tokdict.clear() 643 644 def __getattr__( self, name ): 645 try: 646 return self[name] 647 except KeyError: 648 return "" 649 650 if name in self.__tokdict: 651 if name not in self.__accumNames: 652 return self.__tokdict[name][-1][0] 653 else: 654 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 655 else: 656 return "" 657 658 def __add__( self, other ): 659 ret = self.copy() 660 ret += other 661 return ret 662 663 def __iadd__( self, other ): 664 if other.__tokdict: 665 offset = len(self.__toklist) 666 addoffset = lambda a: offset if a<0 else a+offset 667 otheritems = other.__tokdict.items() 668 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 669 for (k,vlist) in otheritems for v in vlist] 670 for k,v in otherdictitems: 671 self[k] = v 672 if isinstance(v[0],ParseResults): 673 v[0].__parent = wkref(self) 674 675 self.__toklist += other.__toklist 676 self.__accumNames.update( other.__accumNames ) 677 return self 678 679 def __radd__(self, other): 680 if isinstance(other,int) and other == 0: 681 # useful for merging many ParseResults using sum() builtin 682 return self.copy() 683 else: 684 # this may raise a TypeError - so be it 685 return other + self 686 687 def __repr__( self ): 688 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 689 690 def __str__( self ): 691 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' 692 693 def _asStringList( self, sep='' ): 694 out = [] 695 for item in self.__toklist: 696 if out and sep: 697 out.append(sep) 698 if isinstance( item, ParseResults ): 699 out += item._asStringList() 700 else: 701 out.append( _ustr(item) ) 702 return out 703 704 def asList( self ): 705 """ 706 Returns the parse results as a nested list of matching tokens, all converted to strings. 707 708 Example:: 709 patt = OneOrMore(Word(alphas)) 710 result = patt.parseString("sldkj lsdkj sldkj") 711 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 712 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 713 714 # Use asList() to create an actual list 715 result_list = result.asList() 716 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 717 """ 718 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] 719 720 def asDict( self ): 721 """ 722 Returns the named parse results as a nested dictionary. 723 724 Example:: 725 integer = Word(nums) 726 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 727 728 result = date_str.parseString('12/31/1999') 729 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 730 731 result_dict = result.asDict() 732 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 733 734 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 735 import json 736 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 737 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 738 """ 739 if PY_3: 740 item_fn = self.items 741 else: 742 item_fn = self.iteritems 743 744 def toItem(obj): 745 if isinstance(obj, ParseResults): 746 if obj.haskeys(): 747 return obj.asDict() 748 else: 749 return [toItem(v) for v in obj] 750 else: 751 return obj 752 753 return dict((k,toItem(v)) for k,v in item_fn()) 754 755 def copy( self ): 756 """ 757 Returns a new copy of a C{ParseResults} object. 758 """ 759 ret = ParseResults( self.__toklist ) 760 ret.__tokdict = self.__tokdict.copy() 761 ret.__parent = self.__parent 762 ret.__accumNames.update( self.__accumNames ) 763 ret.__name = self.__name 764 return ret 765 766 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 767 """ 768 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 769 """ 770 nl = "\n" 771 out = [] 772 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 773 for v in vlist) 774 nextLevelIndent = indent + " " 775 776 # collapse out indents if formatting is not desired 777 if not formatted: 778 indent = "" 779 nextLevelIndent = "" 780 nl = "" 781 782 selfTag = None 783 if doctag is not None: 784 selfTag = doctag 785 else: 786 if self.__name: 787 selfTag = self.__name 788 789 if not selfTag: 790 if namedItemsOnly: 791 return "" 792 else: 793 selfTag = "ITEM" 794 795 out += [ nl, indent, "<", selfTag, ">" ] 796 797 for i,res in enumerate(self.__toklist): 798 if isinstance(res,ParseResults): 799 if i in namedItems: 800 out += [ res.asXML(namedItems[i], 801 namedItemsOnly and doctag is None, 802 nextLevelIndent, 803 formatted)] 804 else: 805 out += [ res.asXML(None, 806 namedItemsOnly and doctag is None, 807 nextLevelIndent, 808 formatted)] 809 else: 810 # individual token, see if there is a name for it 811 resTag = None 812 if i in namedItems: 813 resTag = namedItems[i] 814 if not resTag: 815 if namedItemsOnly: 816 continue 817 else: 818 resTag = "ITEM" 819 xmlBodyText = _xml_escape(_ustr(res)) 820 out += [ nl, nextLevelIndent, "<", resTag, ">", 821 xmlBodyText, 822 "</", resTag, ">" ] 823 824 out += [ nl, indent, "</", selfTag, ">" ] 825 return "".join(out) 826 827 def __lookup(self,sub): 828 for k,vlist in self.__tokdict.items(): 829 for v,loc in vlist: 830 if sub is v: 831 return k 832 return None 833 834 def getName(self): 835 r""" 836 Returns the results name for this token expression. Useful when several 837 different expressions might match at a particular location. 838 839 Example:: 840 integer = Word(nums) 841 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 842 house_number_expr = Suppress('#') + Word(nums, alphanums) 843 user_data = (Group(house_number_expr)("house_number") 844 | Group(ssn_expr)("ssn") 845 | Group(integer)("age")) 846 user_info = OneOrMore(user_data) 847 848 result = user_info.parseString("22 111-22-3333 #221B") 849 for item in result: 850 print(item.getName(), ':', item[0]) 851 prints:: 852 age : 22 853 ssn : 111-22-3333 854 house_number : 221B 855 """ 856 if self.__name: 857 return self.__name 858 elif self.__parent: 859 par = self.__parent() 860 if par: 861 return par.__lookup(self) 862 else: 863 return None 864 elif (len(self) == 1 and 865 len(self.__tokdict) == 1 and 866 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 867 return next(iter(self.__tokdict.keys())) 868 else: 869 return None 870 871 def dump(self, indent='', depth=0, full=True): 872 """ 873 Diagnostic method for listing out the contents of a C{ParseResults}. 874 Accepts an optional C{indent} argument so that this string can be embedded 875 in a nested display of other data. 876 877 Example:: 878 integer = Word(nums) 879 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 880 881 result = date_str.parseString('12/31/1999') 882 print(result.dump()) 883 prints:: 884 ['12', '/', '31', '/', '1999'] 885 - day: 1999 886 - month: 31 887 - year: 12 888 """ 889 out = [] 890 NL = '\n' 891 out.append( indent+_ustr(self.asList()) ) 892 if full: 893 if self.haskeys(): 894 items = sorted((str(k), v) for k,v in self.items()) 895 for k,v in items: 896 if out: 897 out.append(NL) 898 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 899 if isinstance(v,ParseResults): 900 if v: 901 out.append( v.dump(indent,depth+1) ) 902 else: 903 out.append(_ustr(v)) 904 else: 905 out.append(repr(v)) 906 elif any(isinstance(vv,ParseResults) for vv in self): 907 v = self 908 for i,vv in enumerate(v): 909 if isinstance(vv,ParseResults): 910 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 911 else: 912 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 913 914 return "".join(out) 915 916 def pprint(self, *args, **kwargs): 917 """ 918 Pretty-printer for parsed results as a list, using the C{pprint} module. 919 Accepts additional positional or keyword args as defined for the 920 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 921 922 Example:: 923 ident = Word(alphas, alphanums) 924 num = Word(nums) 925 func = Forward() 926 term = ident | num | Group('(' + func + ')') 927 func <<= ident + Group(Optional(delimitedList(term))) 928 result = func.parseString("fna a,b,(fnb c,d,200),100") 929 result.pprint(width=40) 930 prints:: 931 ['fna', 932 ['a', 933 'b', 934 ['(', 'fnb', ['c', 'd', '200'], ')'], 935 '100']] 936 """ 937 pprint.pprint(self.asList(), *args, **kwargs) 938 939 # add support for pickle protocol 940 def __getstate__(self): 941 return ( self.__toklist, 942 ( self.__tokdict.copy(), 943 self.__parent is not None and self.__parent() or None, 944 self.__accumNames, 945 self.__name ) ) 946 947 def __setstate__(self,state): 948 self.__toklist = state[0] 949 (self.__tokdict, 950 par, 951 inAccumNames, 952 self.__name) = state[1] 953 self.__accumNames = {} 954 self.__accumNames.update(inAccumNames) 955 if par is not None: 956 self.__parent = wkref(par) 957 else: 958 self.__parent = None 959 960 def __getnewargs__(self): 961 return self.__toklist, self.__name, self.__asList, self.__modal 962 963 def __dir__(self): 964 return (dir(type(self)) + list(self.keys())) 965 966MutableMapping.register(ParseResults) 967 968def col (loc,strg): 969 """Returns current column within a string, counting newlines as line separators. 970 The first column is number 1. 971 972 Note: the default parsing behavior is to expand tabs in the input string 973 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 975 consistent view of the parsed string, the parse location, and line and column 976 positions within the parsed string. 977 """ 978 s = strg 979 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc) 980 981def lineno(loc,strg): 982 """Returns current line number within a string, counting newlines as line separators. 983 The first line is number 1. 984 985 Note: the default parsing behavior is to expand tabs in the input string 986 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 987 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 988 consistent view of the parsed string, the parse location, and line and column 989 positions within the parsed string. 990 """ 991 return strg.count("\n",0,loc) + 1 992 993def line( loc, strg ): 994 """Returns the line of text containing loc within a string, counting newlines as line separators. 995 """ 996 lastCR = strg.rfind("\n", 0, loc) 997 nextCR = strg.find("\n", loc) 998 if nextCR >= 0: 999 return strg[lastCR+1:nextCR] 1000 else: 1001 return strg[lastCR+1:] 1002 1003def _defaultStartDebugAction( instring, loc, expr ): 1004 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 1005 1006def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 1007 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 1008 1009def _defaultExceptionDebugAction( instring, loc, expr, exc ): 1010 print ("Exception raised:" + _ustr(exc)) 1011 1012def nullDebugAction(*args): 1013 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 1014 pass 1015 1016# Only works on Python 3.x - nonlocal is toxic to Python 2 installs 1017#~ 'decorator to trim function calls to match the arity of the target' 1018#~ def _trim_arity(func, maxargs=3): 1019 #~ if func in singleArgBuiltins: 1020 #~ return lambda s,l,t: func(t) 1021 #~ limit = 0 1022 #~ foundArity = False 1023 #~ def wrapper(*args): 1024 #~ nonlocal limit,foundArity 1025 #~ while 1: 1026 #~ try: 1027 #~ ret = func(*args[limit:]) 1028 #~ foundArity = True 1029 #~ return ret 1030 #~ except TypeError: 1031 #~ if limit == maxargs or foundArity: 1032 #~ raise 1033 #~ limit += 1 1034 #~ continue 1035 #~ return wrapper 1036 1037# this version is Python 2.x-3.x cross-compatible 1038'decorator to trim function calls to match the arity of the target' 1039def _trim_arity(func, maxargs=2): 1040 if func in singleArgBuiltins: 1041 return lambda s,l,t: func(t) 1042 limit = [0] 1043 foundArity = [False] 1044 1045 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1046 if system_version[:2] >= (3,5): 1047 def extract_stack(limit=0): 1048 # special handling for Python 3.5.0 - extra deep call stack by 1 1049 offset = -3 if system_version == (3,5,0) else -2 1050 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1051 return [frame_summary[:2]] 1052 def extract_tb(tb, limit=0): 1053 frames = traceback.extract_tb(tb, limit=limit) 1054 frame_summary = frames[-1] 1055 return [frame_summary[:2]] 1056 else: 1057 extract_stack = traceback.extract_stack 1058 extract_tb = traceback.extract_tb 1059 1060 # synthesize what would be returned by traceback.extract_stack at the call to 1061 # user's parse action 'func', so that we don't incur call penalty at parse time 1062 1063 LINE_DIFF = 6 1064 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1065 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1066 this_line = extract_stack(limit=2)[-1] 1067 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1068 1069 def wrapper(*args): 1070 while 1: 1071 try: 1072 ret = func(*args[limit[0]:]) 1073 foundArity[0] = True 1074 return ret 1075 except TypeError: 1076 # re-raise TypeErrors if they did not come from our arity testing 1077 if foundArity[0]: 1078 raise 1079 else: 1080 try: 1081 tb = sys.exc_info()[-1] 1082 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1083 raise 1084 finally: 1085 del tb 1086 1087 if limit[0] <= maxargs: 1088 limit[0] += 1 1089 continue 1090 raise 1091 1092 # copy func name to wrapper for sensible debug output 1093 func_name = "<parse action>" 1094 try: 1095 func_name = getattr(func, '__name__', 1096 getattr(func, '__class__').__name__) 1097 except Exception: 1098 func_name = str(func) 1099 wrapper.__name__ = func_name 1100 1101 return wrapper 1102 1103class ParserElement(object): 1104 """Abstract base level parser element class.""" 1105 DEFAULT_WHITE_CHARS = " \n\t\r" 1106 verbose_stacktrace = False 1107 1108 @staticmethod 1109 def setDefaultWhitespaceChars( chars ): 1110 r""" 1111 Overrides the default whitespace chars 1112 1113 Example:: 1114 # default whitespace chars are space, <TAB> and newline 1115 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1116 1117 # change to just treat newline as significant 1118 ParserElement.setDefaultWhitespaceChars(" \t") 1119 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1120 """ 1121 ParserElement.DEFAULT_WHITE_CHARS = chars 1122 1123 @staticmethod 1124 def inlineLiteralsUsing(cls): 1125 """ 1126 Set class to be used for inclusion of string literals into a parser. 1127 1128 Example:: 1129 # default literal class used is Literal 1130 integer = Word(nums) 1131 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1132 1133 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1134 1135 1136 # change to Suppress 1137 ParserElement.inlineLiteralsUsing(Suppress) 1138 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1139 1140 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1141 """ 1142 ParserElement._literalStringClass = cls 1143 1144 def __init__( self, savelist=False ): 1145 self.parseAction = list() 1146 self.failAction = None 1147 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1148 self.strRepr = None 1149 self.resultsName = None 1150 self.saveAsList = savelist 1151 self.skipWhitespace = True 1152 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1153 self.copyDefaultWhiteChars = True 1154 self.mayReturnEmpty = False # used when checking for left-recursion 1155 self.keepTabs = False 1156 self.ignoreExprs = list() 1157 self.debug = False 1158 self.streamlined = False 1159 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1160 self.errmsg = "" 1161 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1162 self.debugActions = ( None, None, None ) #custom debug actions 1163 self.re = None 1164 self.callPreparse = True # used to avoid redundant calls to preParse 1165 self.callDuringTry = False 1166 1167 def copy( self ): 1168 """ 1169 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1170 for the same parsing pattern, using copies of the original parse element. 1171 1172 Example:: 1173 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1174 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1175 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1176 1177 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1178 prints:: 1179 [5120, 100, 655360, 268435456] 1180 Equivalent form of C{expr.copy()} is just C{expr()}:: 1181 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1182 """ 1183 cpy = copy.copy( self ) 1184 cpy.parseAction = self.parseAction[:] 1185 cpy.ignoreExprs = self.ignoreExprs[:] 1186 if self.copyDefaultWhiteChars: 1187 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1188 return cpy 1189 1190 def setName( self, name ): 1191 """ 1192 Define name for this expression, makes debugging and exception messages clearer. 1193 1194 Example:: 1195 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1196 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1197 """ 1198 self.name = name 1199 self.errmsg = "Expected " + self.name 1200 if hasattr(self,"exception"): 1201 self.exception.msg = self.errmsg 1202 return self 1203 1204 def setResultsName( self, name, listAllMatches=False ): 1205 """ 1206 Define name for referencing matching tokens as a nested attribute 1207 of the returned parse results. 1208 NOTE: this returns a *copy* of the original C{ParserElement} object; 1209 this is so that the client can define a basic element, such as an 1210 integer, and reference it in multiple places with different names. 1211 1212 You can also set results names using the abbreviated syntax, 1213 C{expr("name")} in place of C{expr.setResultsName("name")} - 1214 see L{I{__call__}<__call__>}. 1215 1216 Example:: 1217 date_str = (integer.setResultsName("year") + '/' 1218 + integer.setResultsName("month") + '/' 1219 + integer.setResultsName("day")) 1220 1221 # equivalent form: 1222 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1223 """ 1224 newself = self.copy() 1225 if name.endswith("*"): 1226 name = name[:-1] 1227 listAllMatches=True 1228 newself.resultsName = name 1229 newself.modalResults = not listAllMatches 1230 return newself 1231 1232 def setBreak(self,breakFlag = True): 1233 """Method to invoke the Python pdb debugger when this element is 1234 about to be parsed. Set C{breakFlag} to True to enable, False to 1235 disable. 1236 """ 1237 if breakFlag: 1238 _parseMethod = self._parse 1239 def breaker(instring, loc, doActions=True, callPreParse=True): 1240 import pdb 1241 pdb.set_trace() 1242 return _parseMethod( instring, loc, doActions, callPreParse ) 1243 breaker._originalParseMethod = _parseMethod 1244 self._parse = breaker 1245 else: 1246 if hasattr(self._parse,"_originalParseMethod"): 1247 self._parse = self._parse._originalParseMethod 1248 return self 1249 1250 def setParseAction( self, *fns, **kwargs ): 1251 """ 1252 Define one or more actions to perform when successfully matching parse element definition. 1253 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1254 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1255 - s = the original string being parsed (see note below) 1256 - loc = the location of the matching substring 1257 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1258 If the functions in fns modify the tokens, they can return them as the return 1259 value from fn, and the modified list of tokens will replace the original. 1260 Otherwise, fn does not need to return any value. 1261 1262 Optional keyword arguments: 1263 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1264 1265 Note: the default parsing behavior is to expand tabs in the input string 1266 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1267 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1268 consistent view of the parsed string, the parse location, and line and column 1269 positions within the parsed string. 1270 1271 Example:: 1272 integer = Word(nums) 1273 date_str = integer + '/' + integer + '/' + integer 1274 1275 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1276 1277 # use parse action to convert to ints at parse time 1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1279 date_str = integer + '/' + integer + '/' + integer 1280 1281 # note that integer fields are now ints, not strings 1282 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1283 """ 1284 self.parseAction = list(map(_trim_arity, list(fns))) 1285 self.callDuringTry = kwargs.get("callDuringTry", False) 1286 return self 1287 1288 def addParseAction( self, *fns, **kwargs ): 1289 """ 1290 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1291 1292 See examples in L{I{copy}<copy>}. 1293 """ 1294 self.parseAction += list(map(_trim_arity, list(fns))) 1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1296 return self 1297 1298 def addCondition(self, *fns, **kwargs): 1299 """Add a boolean predicate function to expression's list of parse actions. See 1300 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1301 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1302 1303 Optional keyword arguments: 1304 - message = define a custom message to be used in the raised exception 1305 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1306 1307 Example:: 1308 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1309 year_int = integer.copy() 1310 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1311 date_str = year_int + '/' + integer + '/' + integer 1312 1313 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1314 """ 1315 msg = kwargs.get("message", "failed user-defined condition") 1316 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1317 for fn in fns: 1318 def pa(s,l,t): 1319 if not bool(_trim_arity(fn)(s,l,t)): 1320 raise exc_type(s,l,msg) 1321 self.parseAction.append(pa) 1322 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1323 return self 1324 1325 def setFailAction( self, fn ): 1326 """Define action to perform if parsing fails at this expression. 1327 Fail acton fn is a callable function that takes the arguments 1328 C{fn(s,loc,expr,err)} where: 1329 - s = string being parsed 1330 - loc = location where expression match was attempted and failed 1331 - expr = the parse expression that failed 1332 - err = the exception thrown 1333 The function returns no value. It may throw C{L{ParseFatalException}} 1334 if it is desired to stop parsing immediately.""" 1335 self.failAction = fn 1336 return self 1337 1338 def _skipIgnorables( self, instring, loc ): 1339 exprsFound = True 1340 while exprsFound: 1341 exprsFound = False 1342 for e in self.ignoreExprs: 1343 try: 1344 while 1: 1345 loc,dummy = e._parse( instring, loc ) 1346 exprsFound = True 1347 except ParseException: 1348 pass 1349 return loc 1350 1351 def preParse( self, instring, loc ): 1352 if self.ignoreExprs: 1353 loc = self._skipIgnorables( instring, loc ) 1354 1355 if self.skipWhitespace: 1356 wt = self.whiteChars 1357 instrlen = len(instring) 1358 while loc < instrlen and instring[loc] in wt: 1359 loc += 1 1360 1361 return loc 1362 1363 def parseImpl( self, instring, loc, doActions=True ): 1364 return loc, [] 1365 1366 def postParse( self, instring, loc, tokenlist ): 1367 return tokenlist 1368 1369 #~ @profile 1370 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 1371 debugging = ( self.debug ) #and doActions ) 1372 1373 if debugging or self.failAction: 1374 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1375 if (self.debugActions[0] ): 1376 self.debugActions[0]( instring, loc, self ) 1377 if callPreParse and self.callPreparse: 1378 preloc = self.preParse( instring, loc ) 1379 else: 1380 preloc = loc 1381 tokensStart = preloc 1382 try: 1383 try: 1384 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1385 except IndexError: 1386 raise ParseException( instring, len(instring), self.errmsg, self ) 1387 except ParseBaseException as err: 1388 #~ print ("Exception raised:", err) 1389 if self.debugActions[2]: 1390 self.debugActions[2]( instring, tokensStart, self, err ) 1391 if self.failAction: 1392 self.failAction( instring, tokensStart, self, err ) 1393 raise 1394 else: 1395 if callPreParse and self.callPreparse: 1396 preloc = self.preParse( instring, loc ) 1397 else: 1398 preloc = loc 1399 tokensStart = preloc 1400 if self.mayIndexError or preloc >= len(instring): 1401 try: 1402 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1403 except IndexError: 1404 raise ParseException( instring, len(instring), self.errmsg, self ) 1405 else: 1406 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1407 1408 tokens = self.postParse( instring, loc, tokens ) 1409 1410 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1411 if self.parseAction and (doActions or self.callDuringTry): 1412 if debugging: 1413 try: 1414 for fn in self.parseAction: 1415 tokens = fn( instring, tokensStart, retTokens ) 1416 if tokens is not None: 1417 retTokens = ParseResults( tokens, 1418 self.resultsName, 1419 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1420 modal=self.modalResults ) 1421 except ParseBaseException as err: 1422 #~ print "Exception raised in user parse action:", err 1423 if (self.debugActions[2] ): 1424 self.debugActions[2]( instring, tokensStart, self, err ) 1425 raise 1426 else: 1427 for fn in self.parseAction: 1428 tokens = fn( instring, tokensStart, retTokens ) 1429 if tokens is not None: 1430 retTokens = ParseResults( tokens, 1431 self.resultsName, 1432 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1433 modal=self.modalResults ) 1434 if debugging: 1435 #~ print ("Matched",self,"->",retTokens.asList()) 1436 if (self.debugActions[1] ): 1437 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1438 1439 return loc, retTokens 1440 1441 def tryParse( self, instring, loc ): 1442 try: 1443 return self._parse( instring, loc, doActions=False )[0] 1444 except ParseFatalException: 1445 raise ParseException( instring, loc, self.errmsg, self) 1446 1447 def canParseNext(self, instring, loc): 1448 try: 1449 self.tryParse(instring, loc) 1450 except (ParseException, IndexError): 1451 return False 1452 else: 1453 return True 1454 1455 class _UnboundedCache(object): 1456 def __init__(self): 1457 cache = {} 1458 self.not_in_cache = not_in_cache = object() 1459 1460 def get(self, key): 1461 return cache.get(key, not_in_cache) 1462 1463 def set(self, key, value): 1464 cache[key] = value 1465 1466 def clear(self): 1467 cache.clear() 1468 1469 def cache_len(self): 1470 return len(cache) 1471 1472 self.get = types.MethodType(get, self) 1473 self.set = types.MethodType(set, self) 1474 self.clear = types.MethodType(clear, self) 1475 self.__len__ = types.MethodType(cache_len, self) 1476 1477 if _OrderedDict is not None: 1478 class _FifoCache(object): 1479 def __init__(self, size): 1480 self.not_in_cache = not_in_cache = object() 1481 1482 cache = _OrderedDict() 1483 1484 def get(self, key): 1485 return cache.get(key, not_in_cache) 1486 1487 def set(self, key, value): 1488 cache[key] = value 1489 while len(cache) > size: 1490 try: 1491 cache.popitem(False) 1492 except KeyError: 1493 pass 1494 1495 def clear(self): 1496 cache.clear() 1497 1498 def cache_len(self): 1499 return len(cache) 1500 1501 self.get = types.MethodType(get, self) 1502 self.set = types.MethodType(set, self) 1503 self.clear = types.MethodType(clear, self) 1504 self.__len__ = types.MethodType(cache_len, self) 1505 1506 else: 1507 class _FifoCache(object): 1508 def __init__(self, size): 1509 self.not_in_cache = not_in_cache = object() 1510 1511 cache = {} 1512 key_fifo = collections.deque([], size) 1513 1514 def get(self, key): 1515 return cache.get(key, not_in_cache) 1516 1517 def set(self, key, value): 1518 cache[key] = value 1519 while len(key_fifo) > size: 1520 cache.pop(key_fifo.popleft(), None) 1521 key_fifo.append(key) 1522 1523 def clear(self): 1524 cache.clear() 1525 key_fifo.clear() 1526 1527 def cache_len(self): 1528 return len(cache) 1529 1530 self.get = types.MethodType(get, self) 1531 self.set = types.MethodType(set, self) 1532 self.clear = types.MethodType(clear, self) 1533 self.__len__ = types.MethodType(cache_len, self) 1534 1535 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1536 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1537 packrat_cache_lock = RLock() 1538 packrat_cache_stats = [0, 0] 1539 1540 # this method gets repeatedly called during backtracking with the same arguments - 1541 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 1542 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 1543 HIT, MISS = 0, 1 1544 lookup = (self, instring, loc, callPreParse, doActions) 1545 with ParserElement.packrat_cache_lock: 1546 cache = ParserElement.packrat_cache 1547 value = cache.get(lookup) 1548 if value is cache.not_in_cache: 1549 ParserElement.packrat_cache_stats[MISS] += 1 1550 try: 1551 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1552 except ParseBaseException as pe: 1553 # cache a copy of the exception, without the traceback 1554 cache.set(lookup, pe.__class__(*pe.args)) 1555 raise 1556 else: 1557 cache.set(lookup, (value[0], value[1].copy())) 1558 return value 1559 else: 1560 ParserElement.packrat_cache_stats[HIT] += 1 1561 if isinstance(value, Exception): 1562 raise value 1563 return (value[0], value[1].copy()) 1564 1565 _parse = _parseNoCache 1566 1567 @staticmethod 1568 def resetCache(): 1569 ParserElement.packrat_cache.clear() 1570 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) 1571 1572 _packratEnabled = False 1573 @staticmethod 1574 def enablePackrat(cache_size_limit=128): 1575 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1576 Repeated parse attempts at the same string location (which happens 1577 often in many complex grammars) can immediately return a cached value, 1578 instead of re-executing parsing/validating code. Memoizing is done of 1579 both valid results and parsing exceptions. 1580 1581 Parameters: 1582 - cache_size_limit - (default=C{128}) - if an integer value is provided 1583 will limit the size of the packrat cache; if None is passed, then 1584 the cache size will be unbounded; if 0 is passed, the cache will 1585 be effectively disabled. 1586 1587 This speedup may break existing programs that use parse actions that 1588 have side-effects. For this reason, packrat parsing is disabled when 1589 you first import pyparsing. To activate the packrat feature, your 1590 program must call the class method C{ParserElement.enablePackrat()}. If 1591 your program uses C{psyco} to "compile as you go", you must call 1592 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1593 Python will crash. For best results, call C{enablePackrat()} immediately 1594 after importing pyparsing. 1595 1596 Example:: 1597 import pyparsing 1598 pyparsing.ParserElement.enablePackrat() 1599 """ 1600 if not ParserElement._packratEnabled: 1601 ParserElement._packratEnabled = True 1602 if cache_size_limit is None: 1603 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1604 else: 1605 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1606 ParserElement._parse = ParserElement._parseCache 1607 1608 def parseString( self, instring, parseAll=False ): 1609 """ 1610 Execute the parse expression with the given string. 1611 This is the main interface to the client code, once the complete 1612 expression has been built. 1613 1614 If you want the grammar to require that the entire input string be 1615 successfully parsed, then set C{parseAll} to True (equivalent to ending 1616 the grammar with C{L{StringEnd()}}). 1617 1618 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1619 in order to report proper column numbers in parse actions. 1620 If the input string contains tabs and 1621 the grammar uses parse actions that use the C{loc} argument to index into the 1622 string being parsed, you can ensure you have a consistent view of the input 1623 string by: 1624 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1625 (see L{I{parseWithTabs}<parseWithTabs>}) 1626 - define your parse action using the full C{(s,loc,toks)} signature, and 1627 reference the input string using the parse action's C{s} argument 1628 - explictly expand the tabs in your input string before calling 1629 C{parseString} 1630 1631 Example:: 1632 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1633 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1634 """ 1635 ParserElement.resetCache() 1636 if not self.streamlined: 1637 self.streamline() 1638 #~ self.saveAsList = True 1639 for e in self.ignoreExprs: 1640 e.streamline() 1641 if not self.keepTabs: 1642 instring = instring.expandtabs() 1643 try: 1644 loc, tokens = self._parse( instring, 0 ) 1645 if parseAll: 1646 loc = self.preParse( instring, loc ) 1647 se = Empty() + StringEnd() 1648 se._parse( instring, loc ) 1649 except ParseBaseException as exc: 1650 if ParserElement.verbose_stacktrace: 1651 raise 1652 else: 1653 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1654 raise exc 1655 else: 1656 return tokens 1657 1658 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): 1659 """ 1660 Scan the input string for expression matches. Each match will return the 1661 matching tokens, start location, and end location. May be called with optional 1662 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1663 C{overlap} is specified, then overlapping matches will be reported. 1664 1665 Note that the start and end locations are reported relative to the string 1666 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1667 strings with embedded tabs. 1668 1669 Example:: 1670 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1671 print(source) 1672 for tokens,start,end in Word(alphas).scanString(source): 1673 print(' '*start + '^'*(end-start)) 1674 print(' '*start + tokens[0]) 1675 1676 prints:: 1677 1678 sldjf123lsdjjkf345sldkjf879lkjsfd987 1679 ^^^^^ 1680 sldjf 1681 ^^^^^^^ 1682 lsdjjkf 1683 ^^^^^^ 1684 sldkjf 1685 ^^^^^^ 1686 lkjsfd 1687 """ 1688 if not self.streamlined: 1689 self.streamline() 1690 for e in self.ignoreExprs: 1691 e.streamline() 1692 1693 if not self.keepTabs: 1694 instring = _ustr(instring).expandtabs() 1695 instrlen = len(instring) 1696 loc = 0 1697 preparseFn = self.preParse 1698 parseFn = self._parse 1699 ParserElement.resetCache() 1700 matches = 0 1701 try: 1702 while loc <= instrlen and matches < maxMatches: 1703 try: 1704 preloc = preparseFn( instring, loc ) 1705 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1706 except ParseException: 1707 loc = preloc+1 1708 else: 1709 if nextLoc > loc: 1710 matches += 1 1711 yield tokens, preloc, nextLoc 1712 if overlap: 1713 nextloc = preparseFn( instring, loc ) 1714 if nextloc > loc: 1715 loc = nextLoc 1716 else: 1717 loc += 1 1718 else: 1719 loc = nextLoc 1720 else: 1721 loc = preloc+1 1722 except ParseBaseException as exc: 1723 if ParserElement.verbose_stacktrace: 1724 raise 1725 else: 1726 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1727 raise exc 1728 1729 def transformString( self, instring ): 1730 """ 1731 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1732 be returned from a parse action. To use C{transformString}, define a grammar and 1733 attach a parse action to it that modifies the returned token list. 1734 Invoking C{transformString()} on a target string will then scan for matches, 1735 and replace the matched text patterns according to the logic in the parse 1736 action. C{transformString()} returns the resulting transformed string. 1737 1738 Example:: 1739 wd = Word(alphas) 1740 wd.setParseAction(lambda toks: toks[0].title()) 1741 1742 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1743 Prints:: 1744 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1745 """ 1746 out = [] 1747 lastE = 0 1748 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1749 # keep string locs straight between transformString and scanString 1750 self.keepTabs = True 1751 try: 1752 for t,s,e in self.scanString( instring ): 1753 out.append( instring[lastE:s] ) 1754 if t: 1755 if isinstance(t,ParseResults): 1756 out += t.asList() 1757 elif isinstance(t,list): 1758 out += t 1759 else: 1760 out.append(t) 1761 lastE = e 1762 out.append(instring[lastE:]) 1763 out = [o for o in out if o] 1764 return "".join(map(_ustr,_flatten(out))) 1765 except ParseBaseException as exc: 1766 if ParserElement.verbose_stacktrace: 1767 raise 1768 else: 1769 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1770 raise exc 1771 1772 def searchString( self, instring, maxMatches=_MAX_INT ): 1773 """ 1774 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1775 to match the given parse expression. May be called with optional 1776 C{maxMatches} argument, to clip searching after 'n' matches are found. 1777 1778 Example:: 1779 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1780 cap_word = Word(alphas.upper(), alphas.lower()) 1781 1782 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1783 1784 # the sum() builtin can be used to merge results into a single ParseResults object 1785 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 1786 prints:: 1787 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 1788 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 1789 """ 1790 try: 1791 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1792 except ParseBaseException as exc: 1793 if ParserElement.verbose_stacktrace: 1794 raise 1795 else: 1796 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1797 raise exc 1798 1799 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): 1800 """ 1801 Generator method to split a string using the given expression as a separator. 1802 May be called with optional C{maxsplit} argument, to limit the number of splits; 1803 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1804 matching text should be included in the split results. 1805 1806 Example:: 1807 punc = oneOf(list(".,;:/-!?")) 1808 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1809 prints:: 1810 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1811 """ 1812 splits = 0 1813 last = 0 1814 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1815 yield instring[last:s] 1816 if includeSeparators: 1817 yield t[0] 1818 last = e 1819 yield instring[last:] 1820 1821 def __add__(self, other ): 1822 """ 1823 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 1824 converts them to L{Literal}s by default. 1825 1826 Example:: 1827 greet = Word(alphas) + "," + Word(alphas) + "!" 1828 hello = "Hello, World!" 1829 print (hello, "->", greet.parseString(hello)) 1830 Prints:: 1831 Hello, World! -> ['Hello', ',', 'World', '!'] 1832 """ 1833 if isinstance( other, basestring ): 1834 other = ParserElement._literalStringClass( other ) 1835 if not isinstance( other, ParserElement ): 1836 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1837 SyntaxWarning, stacklevel=2) 1838 return None 1839 return And( [ self, other ] ) 1840 1841 def __radd__(self, other ): 1842 """ 1843 Implementation of + operator when left operand is not a C{L{ParserElement}} 1844 """ 1845 if isinstance( other, basestring ): 1846 other = ParserElement._literalStringClass( other ) 1847 if not isinstance( other, ParserElement ): 1848 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1849 SyntaxWarning, stacklevel=2) 1850 return None 1851 return other + self 1852 1853 def __sub__(self, other): 1854 """ 1855 Implementation of - operator, returns C{L{And}} with error stop 1856 """ 1857 if isinstance( other, basestring ): 1858 other = ParserElement._literalStringClass( other ) 1859 if not isinstance( other, ParserElement ): 1860 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1861 SyntaxWarning, stacklevel=2) 1862 return None 1863 return self + And._ErrorStop() + other 1864 1865 def __rsub__(self, other ): 1866 """ 1867 Implementation of - operator when left operand is not a C{L{ParserElement}} 1868 """ 1869 if isinstance( other, basestring ): 1870 other = ParserElement._literalStringClass( other ) 1871 if not isinstance( other, ParserElement ): 1872 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1873 SyntaxWarning, stacklevel=2) 1874 return None 1875 return other - self 1876 1877 def __mul__(self,other): 1878 """ 1879 Implementation of * operator, allows use of C{expr * 3} in place of 1880 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1881 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1882 may also include C{None} as in: 1883 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1884 to C{expr*n + L{ZeroOrMore}(expr)} 1885 (read as "at least n instances of C{expr}") 1886 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1887 (read as "0 to n instances of C{expr}") 1888 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1889 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1890 1891 Note that C{expr*(None,n)} does not raise an exception if 1892 more than n exprs exist in the input stream; that is, 1893 C{expr*(None,n)} does not enforce a maximum number of expr 1894 occurrences. If this behavior is desired, then write 1895 C{expr*(None,n) + ~expr} 1896 """ 1897 if isinstance(other,int): 1898 minElements, optElements = other,0 1899 elif isinstance(other,tuple): 1900 other = (other + (None, None))[:2] 1901 if other[0] is None: 1902 other = (0, other[1]) 1903 if isinstance(other[0],int) and other[1] is None: 1904 if other[0] == 0: 1905 return ZeroOrMore(self) 1906 if other[0] == 1: 1907 return OneOrMore(self) 1908 else: 1909 return self*other[0] + ZeroOrMore(self) 1910 elif isinstance(other[0],int) and isinstance(other[1],int): 1911 minElements, optElements = other 1912 optElements -= minElements 1913 else: 1914 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1915 else: 1916 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1917 1918 if minElements < 0: 1919 raise ValueError("cannot multiply ParserElement by negative value") 1920 if optElements < 0: 1921 raise ValueError("second tuple value must be greater or equal to first tuple value") 1922 if minElements == optElements == 0: 1923 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1924 1925 if (optElements): 1926 def makeOptionalList(n): 1927 if n>1: 1928 return Optional(self + makeOptionalList(n-1)) 1929 else: 1930 return Optional(self) 1931 if minElements: 1932 if minElements == 1: 1933 ret = self + makeOptionalList(optElements) 1934 else: 1935 ret = And([self]*minElements) + makeOptionalList(optElements) 1936 else: 1937 ret = makeOptionalList(optElements) 1938 else: 1939 if minElements == 1: 1940 ret = self 1941 else: 1942 ret = And([self]*minElements) 1943 return ret 1944 1945 def __rmul__(self, other): 1946 return self.__mul__(other) 1947 1948 def __or__(self, other ): 1949 """ 1950 Implementation of | operator - returns C{L{MatchFirst}} 1951 """ 1952 if isinstance( other, basestring ): 1953 other = ParserElement._literalStringClass( other ) 1954 if not isinstance( other, ParserElement ): 1955 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1956 SyntaxWarning, stacklevel=2) 1957 return None 1958 return MatchFirst( [ self, other ] ) 1959 1960 def __ror__(self, other ): 1961 """ 1962 Implementation of | operator when left operand is not a C{L{ParserElement}} 1963 """ 1964 if isinstance( other, basestring ): 1965 other = ParserElement._literalStringClass( other ) 1966 if not isinstance( other, ParserElement ): 1967 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1968 SyntaxWarning, stacklevel=2) 1969 return None 1970 return other | self 1971 1972 def __xor__(self, other ): 1973 """ 1974 Implementation of ^ operator - returns C{L{Or}} 1975 """ 1976 if isinstance( other, basestring ): 1977 other = ParserElement._literalStringClass( other ) 1978 if not isinstance( other, ParserElement ): 1979 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1980 SyntaxWarning, stacklevel=2) 1981 return None 1982 return Or( [ self, other ] ) 1983 1984 def __rxor__(self, other ): 1985 """ 1986 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1987 """ 1988 if isinstance( other, basestring ): 1989 other = ParserElement._literalStringClass( other ) 1990 if not isinstance( other, ParserElement ): 1991 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1992 SyntaxWarning, stacklevel=2) 1993 return None 1994 return other ^ self 1995 1996 def __and__(self, other ): 1997 """ 1998 Implementation of & operator - returns C{L{Each}} 1999 """ 2000 if isinstance( other, basestring ): 2001 other = ParserElement._literalStringClass( other ) 2002 if not isinstance( other, ParserElement ): 2003 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2004 SyntaxWarning, stacklevel=2) 2005 return None 2006 return Each( [ self, other ] ) 2007 2008 def __rand__(self, other ): 2009 """ 2010 Implementation of & operator when left operand is not a C{L{ParserElement}} 2011 """ 2012 if isinstance( other, basestring ): 2013 other = ParserElement._literalStringClass( other ) 2014 if not isinstance( other, ParserElement ): 2015 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2016 SyntaxWarning, stacklevel=2) 2017 return None 2018 return other & self 2019 2020 def __invert__( self ): 2021 """ 2022 Implementation of ~ operator - returns C{L{NotAny}} 2023 """ 2024 return NotAny( self ) 2025 2026 def __call__(self, name=None): 2027 """ 2028 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 2029 2030 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 2031 passed as C{True}. 2032 2033 If C{name} is omitted, same as calling C{L{copy}}. 2034 2035 Example:: 2036 # these are equivalent 2037 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 2038 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 2039 """ 2040 if name is not None: 2041 return self.setResultsName(name) 2042 else: 2043 return self.copy() 2044 2045 def suppress( self ): 2046 """ 2047 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 2048 cluttering up returned output. 2049 """ 2050 return Suppress( self ) 2051 2052 def leaveWhitespace( self ): 2053 """ 2054 Disables the skipping of whitespace before matching the characters in the 2055 C{ParserElement}'s defined pattern. This is normally only used internally by 2056 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2057 """ 2058 self.skipWhitespace = False 2059 return self 2060 2061 def setWhitespaceChars( self, chars ): 2062 """ 2063 Overrides the default whitespace chars 2064 """ 2065 self.skipWhitespace = True 2066 self.whiteChars = chars 2067 self.copyDefaultWhiteChars = False 2068 return self 2069 2070 def parseWithTabs( self ): 2071 """ 2072 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2073 Must be called before C{parseString} when the input grammar contains elements that 2074 match C{<TAB>} characters. 2075 """ 2076 self.keepTabs = True 2077 return self 2078 2079 def ignore( self, other ): 2080 """ 2081 Define expression to be ignored (e.g., comments) while doing pattern 2082 matching; may be called repeatedly, to define multiple comment or other 2083 ignorable patterns. 2084 2085 Example:: 2086 patt = OneOrMore(Word(alphas)) 2087 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2088 2089 patt.ignore(cStyleComment) 2090 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2091 """ 2092 if isinstance(other, basestring): 2093 other = Suppress(other) 2094 2095 if isinstance( other, Suppress ): 2096 if other not in self.ignoreExprs: 2097 self.ignoreExprs.append(other) 2098 else: 2099 self.ignoreExprs.append( Suppress( other.copy() ) ) 2100 return self 2101 2102 def setDebugActions( self, startAction, successAction, exceptionAction ): 2103 """ 2104 Enable display of debugging messages while doing pattern matching. 2105 """ 2106 self.debugActions = (startAction or _defaultStartDebugAction, 2107 successAction or _defaultSuccessDebugAction, 2108 exceptionAction or _defaultExceptionDebugAction) 2109 self.debug = True 2110 return self 2111 2112 def setDebug( self, flag=True ): 2113 """ 2114 Enable display of debugging messages while doing pattern matching. 2115 Set C{flag} to True to enable, False to disable. 2116 2117 Example:: 2118 wd = Word(alphas).setName("alphaword") 2119 integer = Word(nums).setName("numword") 2120 term = wd | integer 2121 2122 # turn on debugging for wd 2123 wd.setDebug() 2124 2125 OneOrMore(term).parseString("abc 123 xyz 890") 2126 2127 prints:: 2128 Match alphaword at loc 0(1,1) 2129 Matched alphaword -> ['abc'] 2130 Match alphaword at loc 3(1,4) 2131 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2132 Match alphaword at loc 7(1,8) 2133 Matched alphaword -> ['xyz'] 2134 Match alphaword at loc 11(1,12) 2135 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2136 Match alphaword at loc 15(1,16) 2137 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2138 2139 The output shown is that produced by the default debug actions - custom debug actions can be 2140 specified using L{setDebugActions}. Prior to attempting 2141 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2142 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2143 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2144 which makes debugging and exception messages easier to understand - for instance, the default 2145 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2146 """ 2147 if flag: 2148 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2149 else: 2150 self.debug = False 2151 return self 2152 2153 def __str__( self ): 2154 return self.name 2155 2156 def __repr__( self ): 2157 return _ustr(self) 2158 2159 def streamline( self ): 2160 self.streamlined = True 2161 self.strRepr = None 2162 return self 2163 2164 def checkRecursion( self, parseElementList ): 2165 pass 2166 2167 def validate( self, validateTrace=[] ): 2168 """ 2169 Check defined expressions for valid structure, check for infinite recursive definitions. 2170 """ 2171 self.checkRecursion( [] ) 2172 2173 def parseFile( self, file_or_filename, parseAll=False ): 2174 """ 2175 Execute the parse expression on the given file or filename. 2176 If a filename is specified (instead of a file object), 2177 the entire file is opened, read, and closed before parsing. 2178 """ 2179 try: 2180 file_contents = file_or_filename.read() 2181 except AttributeError: 2182 with open(file_or_filename, "r") as f: 2183 file_contents = f.read() 2184 try: 2185 return self.parseString(file_contents, parseAll) 2186 except ParseBaseException as exc: 2187 if ParserElement.verbose_stacktrace: 2188 raise 2189 else: 2190 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2191 raise exc 2192 2193 def __eq__(self,other): 2194 if isinstance(other, ParserElement): 2195 return self is other or vars(self) == vars(other) 2196 elif isinstance(other, basestring): 2197 return self.matches(other) 2198 else: 2199 return super(ParserElement,self)==other 2200 2201 def __ne__(self,other): 2202 return not (self == other) 2203 2204 def __hash__(self): 2205 return hash(id(self)) 2206 2207 def __req__(self,other): 2208 return self == other 2209 2210 def __rne__(self,other): 2211 return not (self == other) 2212 2213 def matches(self, testString, parseAll=True): 2214 """ 2215 Method for quick testing of a parser against a test string. Good for simple 2216 inline microtests of sub expressions while building up larger parser. 2217 2218 Parameters: 2219 - testString - to test against this expression for a match 2220 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2221 2222 Example:: 2223 expr = Word(nums) 2224 assert expr.matches("100") 2225 """ 2226 try: 2227 self.parseString(_ustr(testString), parseAll=parseAll) 2228 return True 2229 except ParseBaseException: 2230 return False 2231 2232 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): 2233 """ 2234 Execute the parse expression on a series of test strings, showing each 2235 test, the parsed results or where the parse failed. Quick and easy way to 2236 run a parse expression against a list of sample strings. 2237 2238 Parameters: 2239 - tests - a list of separate test strings, or a multiline string of test strings 2240 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2241 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2242 string; pass None to disable comment filtering 2243 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2244 if False, only dump nested list 2245 - printResults - (default=C{True}) prints test output to stdout 2246 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2247 2248 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2249 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2250 test's output 2251 2252 Example:: 2253 number_expr = pyparsing_common.number.copy() 2254 2255 result = number_expr.runTests(''' 2256 # unsigned integer 2257 100 2258 # negative integer 2259 -100 2260 # float with scientific notation 2261 6.02e23 2262 # integer with scientific notation 2263 1e-12 2264 ''') 2265 print("Success" if result[0] else "Failed!") 2266 2267 result = number_expr.runTests(''' 2268 # stray character 2269 100Z 2270 # missing leading digit before '.' 2271 -.100 2272 # too many '.' 2273 3.14.159 2274 ''', failureTests=True) 2275 print("Success" if result[0] else "Failed!") 2276 prints:: 2277 # unsigned integer 2278 100 2279 [100] 2280 2281 # negative integer 2282 -100 2283 [-100] 2284 2285 # float with scientific notation 2286 6.02e23 2287 [6.02e+23] 2288 2289 # integer with scientific notation 2290 1e-12 2291 [1e-12] 2292 2293 Success 2294 2295 # stray character 2296 100Z 2297 ^ 2298 FAIL: Expected end of text (at char 3), (line:1, col:4) 2299 2300 # missing leading digit before '.' 2301 -.100 2302 ^ 2303 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2304 2305 # too many '.' 2306 3.14.159 2307 ^ 2308 FAIL: Expected end of text (at char 4), (line:1, col:5) 2309 2310 Success 2311 2312 Each test string must be on a single line. If you want to test a string that spans multiple 2313 lines, create a test like this:: 2314 2315 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2316 2317 (Note that this is a raw string literal, you must include the leading 'r'.) 2318 """ 2319 if isinstance(tests, basestring): 2320 tests = list(map(str.strip, tests.rstrip().splitlines())) 2321 if isinstance(comment, basestring): 2322 comment = Literal(comment) 2323 allResults = [] 2324 comments = [] 2325 success = True 2326 for t in tests: 2327 if comment is not None and comment.matches(t, False) or comments and not t: 2328 comments.append(t) 2329 continue 2330 if not t: 2331 continue 2332 out = ['\n'.join(comments), t] 2333 comments = [] 2334 try: 2335 t = t.replace(r'\n','\n') 2336 result = self.parseString(t, parseAll=parseAll) 2337 out.append(result.dump(full=fullDump)) 2338 success = success and not failureTests 2339 except ParseBaseException as pe: 2340 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2341 if '\n' in t: 2342 out.append(line(pe.loc, t)) 2343 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2344 else: 2345 out.append(' '*pe.loc + '^' + fatal) 2346 out.append("FAIL: " + str(pe)) 2347 success = success and failureTests 2348 result = pe 2349 except Exception as exc: 2350 out.append("FAIL-EXCEPTION: " + str(exc)) 2351 success = success and failureTests 2352 result = exc 2353 2354 if printResults: 2355 if fullDump: 2356 out.append('') 2357 print('\n'.join(out)) 2358 2359 allResults.append((t, result)) 2360 2361 return success, allResults 2362 2363 2364class Token(ParserElement): 2365 """ 2366 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2367 """ 2368 def __init__( self ): 2369 super(Token,self).__init__( savelist=False ) 2370 2371 2372class Empty(Token): 2373 """ 2374 An empty token, will always match. 2375 """ 2376 def __init__( self ): 2377 super(Empty,self).__init__() 2378 self.name = "Empty" 2379 self.mayReturnEmpty = True 2380 self.mayIndexError = False 2381 2382 2383class NoMatch(Token): 2384 """ 2385 A token that will never match. 2386 """ 2387 def __init__( self ): 2388 super(NoMatch,self).__init__() 2389 self.name = "NoMatch" 2390 self.mayReturnEmpty = True 2391 self.mayIndexError = False 2392 self.errmsg = "Unmatchable token" 2393 2394 def parseImpl( self, instring, loc, doActions=True ): 2395 raise ParseException(instring, loc, self.errmsg, self) 2396 2397 2398class Literal(Token): 2399 """ 2400 Token to exactly match a specified string. 2401 2402 Example:: 2403 Literal('blah').parseString('blah') # -> ['blah'] 2404 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2405 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2406 2407 For case-insensitive matching, use L{CaselessLiteral}. 2408 2409 For keyword matching (force word break before and after the matched string), 2410 use L{Keyword} or L{CaselessKeyword}. 2411 """ 2412 def __init__( self, matchString ): 2413 super(Literal,self).__init__() 2414 self.match = matchString 2415 self.matchLen = len(matchString) 2416 try: 2417 self.firstMatchChar = matchString[0] 2418 except IndexError: 2419 warnings.warn("null string passed to Literal; use Empty() instead", 2420 SyntaxWarning, stacklevel=2) 2421 self.__class__ = Empty 2422 self.name = '"%s"' % _ustr(self.match) 2423 self.errmsg = "Expected " + self.name 2424 self.mayReturnEmpty = False 2425 self.mayIndexError = False 2426 2427 # Performance tuning: this routine gets called a *lot* 2428 # if this is a single character match string and the first character matches, 2429 # short-circuit as quickly as possible, and avoid calling startswith 2430 #~ @profile 2431 def parseImpl( self, instring, loc, doActions=True ): 2432 if (instring[loc] == self.firstMatchChar and 2433 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2434 return loc+self.matchLen, self.match 2435 raise ParseException(instring, loc, self.errmsg, self) 2436_L = Literal 2437ParserElement._literalStringClass = Literal 2438 2439class Keyword(Token): 2440 """ 2441 Token to exactly match a specified string as a keyword, that is, it must be 2442 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2443 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2444 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2445 Accepts two optional constructor arguments in addition to the keyword string: 2446 - C{identChars} is a string of characters that would be valid identifier characters, 2447 defaulting to all alphanumerics + "_" and "$" 2448 - C{caseless} allows case-insensitive matching, default is C{False}. 2449 2450 Example:: 2451 Keyword("start").parseString("start") # -> ['start'] 2452 Keyword("start").parseString("starting") # -> Exception 2453 2454 For case-insensitive matching, use L{CaselessKeyword}. 2455 """ 2456 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2457 2458 def __init__( self, matchString, identChars=None, caseless=False ): 2459 super(Keyword,self).__init__() 2460 if identChars is None: 2461 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2462 self.match = matchString 2463 self.matchLen = len(matchString) 2464 try: 2465 self.firstMatchChar = matchString[0] 2466 except IndexError: 2467 warnings.warn("null string passed to Keyword; use Empty() instead", 2468 SyntaxWarning, stacklevel=2) 2469 self.name = '"%s"' % self.match 2470 self.errmsg = "Expected " + self.name 2471 self.mayReturnEmpty = False 2472 self.mayIndexError = False 2473 self.caseless = caseless 2474 if caseless: 2475 self.caselessmatch = matchString.upper() 2476 identChars = identChars.upper() 2477 self.identChars = set(identChars) 2478 2479 def parseImpl( self, instring, loc, doActions=True ): 2480 if self.caseless: 2481 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2482 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2483 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2484 return loc+self.matchLen, self.match 2485 else: 2486 if (instring[loc] == self.firstMatchChar and 2487 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2488 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2489 (loc == 0 or instring[loc-1] not in self.identChars) ): 2490 return loc+self.matchLen, self.match 2491 raise ParseException(instring, loc, self.errmsg, self) 2492 2493 def copy(self): 2494 c = super(Keyword,self).copy() 2495 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2496 return c 2497 2498 @staticmethod 2499 def setDefaultKeywordChars( chars ): 2500 """Overrides the default Keyword chars 2501 """ 2502 Keyword.DEFAULT_KEYWORD_CHARS = chars 2503 2504class CaselessLiteral(Literal): 2505 """ 2506 Token to match a specified string, ignoring case of letters. 2507 Note: the matched results will always be in the case of the given 2508 match string, NOT the case of the input text. 2509 2510 Example:: 2511 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2512 2513 (Contrast with example for L{CaselessKeyword}.) 2514 """ 2515 def __init__( self, matchString ): 2516 super(CaselessLiteral,self).__init__( matchString.upper() ) 2517 # Preserve the defining literal. 2518 self.returnString = matchString 2519 self.name = "'%s'" % self.returnString 2520 self.errmsg = "Expected " + self.name 2521 2522 def parseImpl( self, instring, loc, doActions=True ): 2523 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2524 return loc+self.matchLen, self.returnString 2525 raise ParseException(instring, loc, self.errmsg, self) 2526 2527class CaselessKeyword(Keyword): 2528 """ 2529 Caseless version of L{Keyword}. 2530 2531 Example:: 2532 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2533 2534 (Contrast with example for L{CaselessLiteral}.) 2535 """ 2536 def __init__( self, matchString, identChars=None ): 2537 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 2538 2539 def parseImpl( self, instring, loc, doActions=True ): 2540 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2541 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2542 return loc+self.matchLen, self.match 2543 raise ParseException(instring, loc, self.errmsg, self) 2544 2545class CloseMatch(Token): 2546 """ 2547 A variation on L{Literal} which matches "close" matches, that is, 2548 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 2549 - C{match_string} - string to be matched 2550 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 2551 2552 The results from a successful parse will contain the matched text from the input string and the following named results: 2553 - C{mismatches} - a list of the positions within the match_string where mismatches were found 2554 - C{original} - the original match_string used to compare against the input string 2555 2556 If C{mismatches} is an empty list, then the match was an exact match. 2557 2558 Example:: 2559 patt = CloseMatch("ATCATCGAATGGA") 2560 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2561 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2562 2563 # exact match 2564 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2565 2566 # close match allowing up to 2 mismatches 2567 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 2568 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2569 """ 2570 def __init__(self, match_string, maxMismatches=1): 2571 super(CloseMatch,self).__init__() 2572 self.name = match_string 2573 self.match_string = match_string 2574 self.maxMismatches = maxMismatches 2575 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 2576 self.mayIndexError = False 2577 self.mayReturnEmpty = False 2578 2579 def parseImpl( self, instring, loc, doActions=True ): 2580 start = loc 2581 instrlen = len(instring) 2582 maxloc = start + len(self.match_string) 2583 2584 if maxloc <= instrlen: 2585 match_string = self.match_string 2586 match_stringloc = 0 2587 mismatches = [] 2588 maxMismatches = self.maxMismatches 2589 2590 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 2591 src,mat = s_m 2592 if src != mat: 2593 mismatches.append(match_stringloc) 2594 if len(mismatches) > maxMismatches: 2595 break 2596 else: 2597 loc = match_stringloc + 1 2598 results = ParseResults([instring[start:loc]]) 2599 results['original'] = self.match_string 2600 results['mismatches'] = mismatches 2601 return loc, results 2602 2603 raise ParseException(instring, loc, self.errmsg, self) 2604 2605 2606class Word(Token): 2607 """ 2608 Token for matching words composed of allowed character sets. 2609 Defined with string containing all allowed initial characters, 2610 an optional string containing allowed body characters (if omitted, 2611 defaults to the initial character set), and an optional minimum, 2612 maximum, and/or exact length. The default value for C{min} is 1 (a 2613 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2614 are 0, meaning no maximum or exact length restriction. An optional 2615 C{excludeChars} parameter can list characters that might be found in 2616 the input C{bodyChars} string; useful to define a word of all printables 2617 except for one or two characters, for instance. 2618 2619 L{srange} is useful for defining custom character set strings for defining 2620 C{Word} expressions, using range notation from regular expression character sets. 2621 2622 A common mistake is to use C{Word} to match a specific literal string, as in 2623 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2624 I{sets} of matchable characters. This expression would match "Add", "AAA", 2625 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2626 To match an exact literal string, use L{Literal} or L{Keyword}. 2627 2628 pyparsing includes helper strings for building Words: 2629 - L{alphas} 2630 - L{nums} 2631 - L{alphanums} 2632 - L{hexnums} 2633 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2634 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2635 - L{printables} (any non-whitespace character) 2636 2637 Example:: 2638 # a word composed of digits 2639 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2640 2641 # a word with a leading capital, and zero or more lowercase 2642 capital_word = Word(alphas.upper(), alphas.lower()) 2643 2644 # hostnames are alphanumeric, with leading alpha, and '-' 2645 hostname = Word(alphas, alphanums+'-') 2646 2647 # roman numeral (not a strict parser, accepts invalid mix of characters) 2648 roman = Word("IVXLCDM") 2649 2650 # any string of non-whitespace characters, except for ',' 2651 csv_value = Word(printables, excludeChars=",") 2652 """ 2653 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 2654 super(Word,self).__init__() 2655 if excludeChars: 2656 initChars = ''.join(c for c in initChars if c not in excludeChars) 2657 if bodyChars: 2658 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2659 self.initCharsOrig = initChars 2660 self.initChars = set(initChars) 2661 if bodyChars : 2662 self.bodyCharsOrig = bodyChars 2663 self.bodyChars = set(bodyChars) 2664 else: 2665 self.bodyCharsOrig = initChars 2666 self.bodyChars = set(initChars) 2667 2668 self.maxSpecified = max > 0 2669 2670 if min < 1: 2671 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2672 2673 self.minLen = min 2674 2675 if max > 0: 2676 self.maxLen = max 2677 else: 2678 self.maxLen = _MAX_INT 2679 2680 if exact > 0: 2681 self.maxLen = exact 2682 self.minLen = exact 2683 2684 self.name = _ustr(self) 2685 self.errmsg = "Expected " + self.name 2686 self.mayIndexError = False 2687 self.asKeyword = asKeyword 2688 2689 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2690 if self.bodyCharsOrig == self.initCharsOrig: 2691 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2692 elif len(self.initCharsOrig) == 1: 2693 self.reString = "%s[%s]*" % \ 2694 (re.escape(self.initCharsOrig), 2695 _escapeRegexRangeChars(self.bodyCharsOrig),) 2696 else: 2697 self.reString = "[%s][%s]*" % \ 2698 (_escapeRegexRangeChars(self.initCharsOrig), 2699 _escapeRegexRangeChars(self.bodyCharsOrig),) 2700 if self.asKeyword: 2701 self.reString = r"\b"+self.reString+r"\b" 2702 try: 2703 self.re = re.compile( self.reString ) 2704 except Exception: 2705 self.re = None 2706 2707 def parseImpl( self, instring, loc, doActions=True ): 2708 if self.re: 2709 result = self.re.match(instring,loc) 2710 if not result: 2711 raise ParseException(instring, loc, self.errmsg, self) 2712 2713 loc = result.end() 2714 return loc, result.group() 2715 2716 if not(instring[ loc ] in self.initChars): 2717 raise ParseException(instring, loc, self.errmsg, self) 2718 2719 start = loc 2720 loc += 1 2721 instrlen = len(instring) 2722 bodychars = self.bodyChars 2723 maxloc = start + self.maxLen 2724 maxloc = min( maxloc, instrlen ) 2725 while loc < maxloc and instring[loc] in bodychars: 2726 loc += 1 2727 2728 throwException = False 2729 if loc - start < self.minLen: 2730 throwException = True 2731 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2732 throwException = True 2733 if self.asKeyword: 2734 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2735 throwException = True 2736 2737 if throwException: 2738 raise ParseException(instring, loc, self.errmsg, self) 2739 2740 return loc, instring[start:loc] 2741 2742 def __str__( self ): 2743 try: 2744 return super(Word,self).__str__() 2745 except Exception: 2746 pass 2747 2748 2749 if self.strRepr is None: 2750 2751 def charsAsStr(s): 2752 if len(s)>4: 2753 return s[:4]+"..." 2754 else: 2755 return s 2756 2757 if ( self.initCharsOrig != self.bodyCharsOrig ): 2758 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2759 else: 2760 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2761 2762 return self.strRepr 2763 2764 2765class Regex(Token): 2766 r""" 2767 Token for matching strings that match a given regular expression. 2768 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2769 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2770 named parse results. 2771 2772 Example:: 2773 realnum = Regex(r"[+-]?\d+\.\d*") 2774 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2775 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2776 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2777 """ 2778 compiledREtype = type(re.compile("[A-Z]")) 2779 def __init__( self, pattern, flags=0): 2780 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2781 super(Regex,self).__init__() 2782 2783 if isinstance(pattern, basestring): 2784 if not pattern: 2785 warnings.warn("null string passed to Regex; use Empty() instead", 2786 SyntaxWarning, stacklevel=2) 2787 2788 self.pattern = pattern 2789 self.flags = flags 2790 2791 try: 2792 self.re = re.compile(self.pattern, self.flags) 2793 self.reString = self.pattern 2794 except sre_constants.error: 2795 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2796 SyntaxWarning, stacklevel=2) 2797 raise 2798 2799 elif isinstance(pattern, Regex.compiledREtype): 2800 self.re = pattern 2801 self.pattern = \ 2802 self.reString = str(pattern) 2803 self.flags = flags 2804 2805 else: 2806 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2807 2808 self.name = _ustr(self) 2809 self.errmsg = "Expected " + self.name 2810 self.mayIndexError = False 2811 self.mayReturnEmpty = True 2812 2813 def parseImpl( self, instring, loc, doActions=True ): 2814 result = self.re.match(instring,loc) 2815 if not result: 2816 raise ParseException(instring, loc, self.errmsg, self) 2817 2818 loc = result.end() 2819 d = result.groupdict() 2820 ret = ParseResults(result.group()) 2821 if d: 2822 for k in d: 2823 ret[k] = d[k] 2824 return loc,ret 2825 2826 def __str__( self ): 2827 try: 2828 return super(Regex,self).__str__() 2829 except Exception: 2830 pass 2831 2832 if self.strRepr is None: 2833 self.strRepr = "Re:(%s)" % repr(self.pattern) 2834 2835 return self.strRepr 2836 2837 2838class QuotedString(Token): 2839 r""" 2840 Token for matching strings that are delimited by quoting characters. 2841 2842 Defined with the following parameters: 2843 - quoteChar - string of one or more characters defining the quote delimiting string 2844 - escChar - character to escape quotes, typically backslash (default=C{None}) 2845 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2846 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2847 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2848 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2849 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2850 2851 Example:: 2852 qs = QuotedString('"') 2853 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2854 complex_qs = QuotedString('{{', endQuoteChar='}}') 2855 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2856 sql_qs = QuotedString('"', escQuote='""') 2857 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2858 prints:: 2859 [['This is the quote']] 2860 [['This is the "quote"']] 2861 [['This is the quote with "embedded" quotes']] 2862 """ 2863 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): 2864 super(QuotedString,self).__init__() 2865 2866 # remove white space from quote chars - wont work anyway 2867 quoteChar = quoteChar.strip() 2868 if not quoteChar: 2869 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2870 raise SyntaxError() 2871 2872 if endQuoteChar is None: 2873 endQuoteChar = quoteChar 2874 else: 2875 endQuoteChar = endQuoteChar.strip() 2876 if not endQuoteChar: 2877 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2878 raise SyntaxError() 2879 2880 self.quoteChar = quoteChar 2881 self.quoteCharLen = len(quoteChar) 2882 self.firstQuoteChar = quoteChar[0] 2883 self.endQuoteChar = endQuoteChar 2884 self.endQuoteCharLen = len(endQuoteChar) 2885 self.escChar = escChar 2886 self.escQuote = escQuote 2887 self.unquoteResults = unquoteResults 2888 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2889 2890 if multiline: 2891 self.flags = re.MULTILINE | re.DOTALL 2892 self.pattern = r'%s(?:[^%s%s]' % \ 2893 ( re.escape(self.quoteChar), 2894 _escapeRegexRangeChars(self.endQuoteChar[0]), 2895 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2896 else: 2897 self.flags = 0 2898 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2899 ( re.escape(self.quoteChar), 2900 _escapeRegexRangeChars(self.endQuoteChar[0]), 2901 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2902 if len(self.endQuoteChar) > 1: 2903 self.pattern += ( 2904 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2905 _escapeRegexRangeChars(self.endQuoteChar[i])) 2906 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2907 ) 2908 if escQuote: 2909 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2910 if escChar: 2911 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2912 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2913 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2914 2915 try: 2916 self.re = re.compile(self.pattern, self.flags) 2917 self.reString = self.pattern 2918 except sre_constants.error: 2919 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2920 SyntaxWarning, stacklevel=2) 2921 raise 2922 2923 self.name = _ustr(self) 2924 self.errmsg = "Expected " + self.name 2925 self.mayIndexError = False 2926 self.mayReturnEmpty = True 2927 2928 def parseImpl( self, instring, loc, doActions=True ): 2929 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2930 if not result: 2931 raise ParseException(instring, loc, self.errmsg, self) 2932 2933 loc = result.end() 2934 ret = result.group() 2935 2936 if self.unquoteResults: 2937 2938 # strip off quotes 2939 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2940 2941 if isinstance(ret,basestring): 2942 # replace escaped whitespace 2943 if '\\' in ret and self.convertWhitespaceEscapes: 2944 ws_map = { 2945 r'\t' : '\t', 2946 r'\n' : '\n', 2947 r'\f' : '\f', 2948 r'\r' : '\r', 2949 } 2950 for wslit,wschar in ws_map.items(): 2951 ret = ret.replace(wslit, wschar) 2952 2953 # replace escaped characters 2954 if self.escChar: 2955 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 2956 2957 # replace escaped quotes 2958 if self.escQuote: 2959 ret = ret.replace(self.escQuote, self.endQuoteChar) 2960 2961 return loc, ret 2962 2963 def __str__( self ): 2964 try: 2965 return super(QuotedString,self).__str__() 2966 except Exception: 2967 pass 2968 2969 if self.strRepr is None: 2970 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2971 2972 return self.strRepr 2973 2974 2975class CharsNotIn(Token): 2976 """ 2977 Token for matching words composed of characters I{not} in a given set (will 2978 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2979 Defined with string containing all disallowed characters, and an optional 2980 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2981 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2982 are 0, meaning no maximum or exact length restriction. 2983 2984 Example:: 2985 # define a comma-separated-value as anything that is not a ',' 2986 csv_value = CharsNotIn(',') 2987 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2988 prints:: 2989 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2990 """ 2991 def __init__( self, notChars, min=1, max=0, exact=0 ): 2992 super(CharsNotIn,self).__init__() 2993 self.skipWhitespace = False 2994 self.notChars = notChars 2995 2996 if min < 1: 2997 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2998 2999 self.minLen = min 3000 3001 if max > 0: 3002 self.maxLen = max 3003 else: 3004 self.maxLen = _MAX_INT 3005 3006 if exact > 0: 3007 self.maxLen = exact 3008 self.minLen = exact 3009 3010 self.name = _ustr(self) 3011 self.errmsg = "Expected " + self.name 3012 self.mayReturnEmpty = ( self.minLen == 0 ) 3013 self.mayIndexError = False 3014 3015 def parseImpl( self, instring, loc, doActions=True ): 3016 if instring[loc] in self.notChars: 3017 raise ParseException(instring, loc, self.errmsg, self) 3018 3019 start = loc 3020 loc += 1 3021 notchars = self.notChars 3022 maxlen = min( start+self.maxLen, len(instring) ) 3023 while loc < maxlen and \ 3024 (instring[loc] not in notchars): 3025 loc += 1 3026 3027 if loc - start < self.minLen: 3028 raise ParseException(instring, loc, self.errmsg, self) 3029 3030 return loc, instring[start:loc] 3031 3032 def __str__( self ): 3033 try: 3034 return super(CharsNotIn, self).__str__() 3035 except Exception: 3036 pass 3037 3038 if self.strRepr is None: 3039 if len(self.notChars) > 4: 3040 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3041 else: 3042 self.strRepr = "!W:(%s)" % self.notChars 3043 3044 return self.strRepr 3045 3046class White(Token): 3047 """ 3048 Special matching class for matching whitespace. Normally, whitespace is ignored 3049 by pyparsing grammars. This class is included when some whitespace structures 3050 are significant. Define with a string containing the whitespace characters to be 3051 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 3052 as defined for the C{L{Word}} class. 3053 """ 3054 whiteStrs = { 3055 " " : "<SPC>", 3056 "\t": "<TAB>", 3057 "\n": "<LF>", 3058 "\r": "<CR>", 3059 "\f": "<FF>", 3060 } 3061 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 3062 super(White,self).__init__() 3063 self.matchWhite = ws 3064 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 3065 #~ self.leaveWhitespace() 3066 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3067 self.mayReturnEmpty = True 3068 self.errmsg = "Expected " + self.name 3069 3070 self.minLen = min 3071 3072 if max > 0: 3073 self.maxLen = max 3074 else: 3075 self.maxLen = _MAX_INT 3076 3077 if exact > 0: 3078 self.maxLen = exact 3079 self.minLen = exact 3080 3081 def parseImpl( self, instring, loc, doActions=True ): 3082 if not(instring[ loc ] in self.matchWhite): 3083 raise ParseException(instring, loc, self.errmsg, self) 3084 start = loc 3085 loc += 1 3086 maxloc = start + self.maxLen 3087 maxloc = min( maxloc, len(instring) ) 3088 while loc < maxloc and instring[loc] in self.matchWhite: 3089 loc += 1 3090 3091 if loc - start < self.minLen: 3092 raise ParseException(instring, loc, self.errmsg, self) 3093 3094 return loc, instring[start:loc] 3095 3096 3097class _PositionToken(Token): 3098 def __init__( self ): 3099 super(_PositionToken,self).__init__() 3100 self.name=self.__class__.__name__ 3101 self.mayReturnEmpty = True 3102 self.mayIndexError = False 3103 3104class GoToColumn(_PositionToken): 3105 """ 3106 Token to advance to a specific column of input text; useful for tabular report scraping. 3107 """ 3108 def __init__( self, colno ): 3109 super(GoToColumn,self).__init__() 3110 self.col = colno 3111 3112 def preParse( self, instring, loc ): 3113 if col(loc,instring) != self.col: 3114 instrlen = len(instring) 3115 if self.ignoreExprs: 3116 loc = self._skipIgnorables( instring, loc ) 3117 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 3118 loc += 1 3119 return loc 3120 3121 def parseImpl( self, instring, loc, doActions=True ): 3122 thiscol = col( loc, instring ) 3123 if thiscol > self.col: 3124 raise ParseException( instring, loc, "Text not in expected column", self ) 3125 newloc = loc + self.col - thiscol 3126 ret = instring[ loc: newloc ] 3127 return newloc, ret 3128 3129 3130class LineStart(_PositionToken): 3131 """ 3132 Matches if current position is at the beginning of a line within the parse string 3133 3134 Example:: 3135 3136 test = '''\ 3137 AAA this line 3138 AAA and this line 3139 AAA but not this one 3140 B AAA and definitely not this one 3141 ''' 3142 3143 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 3144 print(t) 3145 3146 Prints:: 3147 ['AAA', ' this line'] 3148 ['AAA', ' and this line'] 3149 3150 """ 3151 def __init__( self ): 3152 super(LineStart,self).__init__() 3153 self.errmsg = "Expected start of line" 3154 3155 def parseImpl( self, instring, loc, doActions=True ): 3156 if col(loc, instring) == 1: 3157 return loc, [] 3158 raise ParseException(instring, loc, self.errmsg, self) 3159 3160class LineEnd(_PositionToken): 3161 """ 3162 Matches if current position is at the end of a line within the parse string 3163 """ 3164 def __init__( self ): 3165 super(LineEnd,self).__init__() 3166 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3167 self.errmsg = "Expected end of line" 3168 3169 def parseImpl( self, instring, loc, doActions=True ): 3170 if loc<len(instring): 3171 if instring[loc] == "\n": 3172 return loc+1, "\n" 3173 else: 3174 raise ParseException(instring, loc, self.errmsg, self) 3175 elif loc == len(instring): 3176 return loc+1, [] 3177 else: 3178 raise ParseException(instring, loc, self.errmsg, self) 3179 3180class StringStart(_PositionToken): 3181 """ 3182 Matches if current position is at the beginning of the parse string 3183 """ 3184 def __init__( self ): 3185 super(StringStart,self).__init__() 3186 self.errmsg = "Expected start of text" 3187 3188 def parseImpl( self, instring, loc, doActions=True ): 3189 if loc != 0: 3190 # see if entire string up to here is just whitespace and ignoreables 3191 if loc != self.preParse( instring, 0 ): 3192 raise ParseException(instring, loc, self.errmsg, self) 3193 return loc, [] 3194 3195class StringEnd(_PositionToken): 3196 """ 3197 Matches if current position is at the end of the parse string 3198 """ 3199 def __init__( self ): 3200 super(StringEnd,self).__init__() 3201 self.errmsg = "Expected end of text" 3202 3203 def parseImpl( self, instring, loc, doActions=True ): 3204 if loc < len(instring): 3205 raise ParseException(instring, loc, self.errmsg, self) 3206 elif loc == len(instring): 3207 return loc+1, [] 3208 elif loc > len(instring): 3209 return loc, [] 3210 else: 3211 raise ParseException(instring, loc, self.errmsg, self) 3212 3213class WordStart(_PositionToken): 3214 """ 3215 Matches if the current position is at the beginning of a Word, and 3216 is not preceded by any character in a given set of C{wordChars} 3217 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3218 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3219 the string being parsed, or at the beginning of a line. 3220 """ 3221 def __init__(self, wordChars = printables): 3222 super(WordStart,self).__init__() 3223 self.wordChars = set(wordChars) 3224 self.errmsg = "Not at the start of a word" 3225 3226 def parseImpl(self, instring, loc, doActions=True ): 3227 if loc != 0: 3228 if (instring[loc-1] in self.wordChars or 3229 instring[loc] not in self.wordChars): 3230 raise ParseException(instring, loc, self.errmsg, self) 3231 return loc, [] 3232 3233class WordEnd(_PositionToken): 3234 """ 3235 Matches if the current position is at the end of a Word, and 3236 is not followed by any character in a given set of C{wordChars} 3237 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3238 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3239 the string being parsed, or at the end of a line. 3240 """ 3241 def __init__(self, wordChars = printables): 3242 super(WordEnd,self).__init__() 3243 self.wordChars = set(wordChars) 3244 self.skipWhitespace = False 3245 self.errmsg = "Not at the end of a word" 3246 3247 def parseImpl(self, instring, loc, doActions=True ): 3248 instrlen = len(instring) 3249 if instrlen>0 and loc<instrlen: 3250 if (instring[loc] in self.wordChars or 3251 instring[loc-1] not in self.wordChars): 3252 raise ParseException(instring, loc, self.errmsg, self) 3253 return loc, [] 3254 3255 3256class ParseExpression(ParserElement): 3257 """ 3258 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3259 """ 3260 def __init__( self, exprs, savelist = False ): 3261 super(ParseExpression,self).__init__(savelist) 3262 if isinstance( exprs, _generatorType ): 3263 exprs = list(exprs) 3264 3265 if isinstance( exprs, basestring ): 3266 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3267 elif isinstance( exprs, Iterable ): 3268 exprs = list(exprs) 3269 # if sequence of strings provided, wrap with Literal 3270 if all(isinstance(expr, basestring) for expr in exprs): 3271 exprs = map(ParserElement._literalStringClass, exprs) 3272 self.exprs = list(exprs) 3273 else: 3274 try: 3275 self.exprs = list( exprs ) 3276 except TypeError: 3277 self.exprs = [ exprs ] 3278 self.callPreparse = False 3279 3280 def __getitem__( self, i ): 3281 return self.exprs[i] 3282 3283 def append( self, other ): 3284 self.exprs.append( other ) 3285 self.strRepr = None 3286 return self 3287 3288 def leaveWhitespace( self ): 3289 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3290 all contained expressions.""" 3291 self.skipWhitespace = False 3292 self.exprs = [ e.copy() for e in self.exprs ] 3293 for e in self.exprs: 3294 e.leaveWhitespace() 3295 return self 3296 3297 def ignore( self, other ): 3298 if isinstance( other, Suppress ): 3299 if other not in self.ignoreExprs: 3300 super( ParseExpression, self).ignore( other ) 3301 for e in self.exprs: 3302 e.ignore( self.ignoreExprs[-1] ) 3303 else: 3304 super( ParseExpression, self).ignore( other ) 3305 for e in self.exprs: 3306 e.ignore( self.ignoreExprs[-1] ) 3307 return self 3308 3309 def __str__( self ): 3310 try: 3311 return super(ParseExpression,self).__str__() 3312 except Exception: 3313 pass 3314 3315 if self.strRepr is None: 3316 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3317 return self.strRepr 3318 3319 def streamline( self ): 3320 super(ParseExpression,self).streamline() 3321 3322 for e in self.exprs: 3323 e.streamline() 3324 3325 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3326 # but only if there are no parse actions or resultsNames on the nested And's 3327 # (likewise for Or's and MatchFirst's) 3328 if ( len(self.exprs) == 2 ): 3329 other = self.exprs[0] 3330 if ( isinstance( other, self.__class__ ) and 3331 not(other.parseAction) and 3332 other.resultsName is None and 3333 not other.debug ): 3334 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3335 self.strRepr = None 3336 self.mayReturnEmpty |= other.mayReturnEmpty 3337 self.mayIndexError |= other.mayIndexError 3338 3339 other = self.exprs[-1] 3340 if ( isinstance( other, self.__class__ ) and 3341 not(other.parseAction) and 3342 other.resultsName is None and 3343 not other.debug ): 3344 self.exprs = self.exprs[:-1] + other.exprs[:] 3345 self.strRepr = None 3346 self.mayReturnEmpty |= other.mayReturnEmpty 3347 self.mayIndexError |= other.mayIndexError 3348 3349 self.errmsg = "Expected " + _ustr(self) 3350 3351 return self 3352 3353 def setResultsName( self, name, listAllMatches=False ): 3354 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3355 return ret 3356 3357 def validate( self, validateTrace=[] ): 3358 tmp = validateTrace[:]+[self] 3359 for e in self.exprs: 3360 e.validate(tmp) 3361 self.checkRecursion( [] ) 3362 3363 def copy(self): 3364 ret = super(ParseExpression,self).copy() 3365 ret.exprs = [e.copy() for e in self.exprs] 3366 return ret 3367 3368class And(ParseExpression): 3369 """ 3370 Requires all given C{ParseExpression}s to be found in the given order. 3371 Expressions may be separated by whitespace. 3372 May be constructed using the C{'+'} operator. 3373 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3374 3375 Example:: 3376 integer = Word(nums) 3377 name_expr = OneOrMore(Word(alphas)) 3378 3379 expr = And([integer("id"),name_expr("name"),integer("age")]) 3380 # more easily written as: 3381 expr = integer("id") + name_expr("name") + integer("age") 3382 """ 3383 3384 class _ErrorStop(Empty): 3385 def __init__(self, *args, **kwargs): 3386 super(And._ErrorStop,self).__init__(*args, **kwargs) 3387 self.name = '-' 3388 self.leaveWhitespace() 3389 3390 def __init__( self, exprs, savelist = True ): 3391 super(And,self).__init__(exprs, savelist) 3392 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3393 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3394 self.skipWhitespace = self.exprs[0].skipWhitespace 3395 self.callPreparse = True 3396 3397 def parseImpl( self, instring, loc, doActions=True ): 3398 # pass False as last arg to _parse for first element, since we already 3399 # pre-parsed the string as part of our And pre-parsing 3400 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3401 errorStop = False 3402 for e in self.exprs[1:]: 3403 if isinstance(e, And._ErrorStop): 3404 errorStop = True 3405 continue 3406 if errorStop: 3407 try: 3408 loc, exprtokens = e._parse( instring, loc, doActions ) 3409 except ParseSyntaxException: 3410 raise 3411 except ParseBaseException as pe: 3412 pe.__traceback__ = None 3413 raise ParseSyntaxException._from_exception(pe) 3414 except IndexError: 3415 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3416 else: 3417 loc, exprtokens = e._parse( instring, loc, doActions ) 3418 if exprtokens or exprtokens.haskeys(): 3419 resultlist += exprtokens 3420 return loc, resultlist 3421 3422 def __iadd__(self, other ): 3423 if isinstance( other, basestring ): 3424 other = ParserElement._literalStringClass( other ) 3425 return self.append( other ) #And( [ self, other ] ) 3426 3427 def checkRecursion( self, parseElementList ): 3428 subRecCheckList = parseElementList[:] + [ self ] 3429 for e in self.exprs: 3430 e.checkRecursion( subRecCheckList ) 3431 if not e.mayReturnEmpty: 3432 break 3433 3434 def __str__( self ): 3435 if hasattr(self,"name"): 3436 return self.name 3437 3438 if self.strRepr is None: 3439 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3440 3441 return self.strRepr 3442 3443 3444class Or(ParseExpression): 3445 """ 3446 Requires that at least one C{ParseExpression} is found. 3447 If two expressions match, the expression that matches the longest string will be used. 3448 May be constructed using the C{'^'} operator. 3449 3450 Example:: 3451 # construct Or using '^' operator 3452 3453 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3454 print(number.searchString("123 3.1416 789")) 3455 prints:: 3456 [['123'], ['3.1416'], ['789']] 3457 """ 3458 def __init__( self, exprs, savelist = False ): 3459 super(Or,self).__init__(exprs, savelist) 3460 if self.exprs: 3461 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3462 else: 3463 self.mayReturnEmpty = True 3464 3465 def parseImpl( self, instring, loc, doActions=True ): 3466 maxExcLoc = -1 3467 maxException = None 3468 matches = [] 3469 for e in self.exprs: 3470 try: 3471 loc2 = e.tryParse( instring, loc ) 3472 except ParseException as err: 3473 err.__traceback__ = None 3474 if err.loc > maxExcLoc: 3475 maxException = err 3476 maxExcLoc = err.loc 3477 except IndexError: 3478 if len(instring) > maxExcLoc: 3479 maxException = ParseException(instring,len(instring),e.errmsg,self) 3480 maxExcLoc = len(instring) 3481 else: 3482 # save match among all matches, to retry longest to shortest 3483 matches.append((loc2, e)) 3484 3485 if matches: 3486 matches.sort(key=lambda x: -x[0]) 3487 for _,e in matches: 3488 try: 3489 return e._parse( instring, loc, doActions ) 3490 except ParseException as err: 3491 err.__traceback__ = None 3492 if err.loc > maxExcLoc: 3493 maxException = err 3494 maxExcLoc = err.loc 3495 3496 if maxException is not None: 3497 maxException.msg = self.errmsg 3498 raise maxException 3499 else: 3500 raise ParseException(instring, loc, "no defined alternatives to match", self) 3501 3502 3503 def __ixor__(self, other ): 3504 if isinstance( other, basestring ): 3505 other = ParserElement._literalStringClass( other ) 3506 return self.append( other ) #Or( [ self, other ] ) 3507 3508 def __str__( self ): 3509 if hasattr(self,"name"): 3510 return self.name 3511 3512 if self.strRepr is None: 3513 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3514 3515 return self.strRepr 3516 3517 def checkRecursion( self, parseElementList ): 3518 subRecCheckList = parseElementList[:] + [ self ] 3519 for e in self.exprs: 3520 e.checkRecursion( subRecCheckList ) 3521 3522 3523class MatchFirst(ParseExpression): 3524 """ 3525 Requires that at least one C{ParseExpression} is found. 3526 If two expressions match, the first one listed is the one that will match. 3527 May be constructed using the C{'|'} operator. 3528 3529 Example:: 3530 # construct MatchFirst using '|' operator 3531 3532 # watch the order of expressions to match 3533 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3534 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3535 3536 # put more selective expression first 3537 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3538 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3539 """ 3540 def __init__( self, exprs, savelist = False ): 3541 super(MatchFirst,self).__init__(exprs, savelist) 3542 if self.exprs: 3543 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3544 else: 3545 self.mayReturnEmpty = True 3546 3547 def parseImpl( self, instring, loc, doActions=True ): 3548 maxExcLoc = -1 3549 maxException = None 3550 for e in self.exprs: 3551 try: 3552 ret = e._parse( instring, loc, doActions ) 3553 return ret 3554 except ParseException as err: 3555 if err.loc > maxExcLoc: 3556 maxException = err 3557 maxExcLoc = err.loc 3558 except IndexError: 3559 if len(instring) > maxExcLoc: 3560 maxException = ParseException(instring,len(instring),e.errmsg,self) 3561 maxExcLoc = len(instring) 3562 3563 # only got here if no expression matched, raise exception for match that made it the furthest 3564 else: 3565 if maxException is not None: 3566 maxException.msg = self.errmsg 3567 raise maxException 3568 else: 3569 raise ParseException(instring, loc, "no defined alternatives to match", self) 3570 3571 def __ior__(self, other ): 3572 if isinstance( other, basestring ): 3573 other = ParserElement._literalStringClass( other ) 3574 return self.append( other ) #MatchFirst( [ self, other ] ) 3575 3576 def __str__( self ): 3577 if hasattr(self,"name"): 3578 return self.name 3579 3580 if self.strRepr is None: 3581 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3582 3583 return self.strRepr 3584 3585 def checkRecursion( self, parseElementList ): 3586 subRecCheckList = parseElementList[:] + [ self ] 3587 for e in self.exprs: 3588 e.checkRecursion( subRecCheckList ) 3589 3590 3591class Each(ParseExpression): 3592 """ 3593 Requires all given C{ParseExpression}s to be found, but in any order. 3594 Expressions may be separated by whitespace. 3595 May be constructed using the C{'&'} operator. 3596 3597 Example:: 3598 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3599 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3600 integer = Word(nums) 3601 shape_attr = "shape:" + shape_type("shape") 3602 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3603 color_attr = "color:" + color("color") 3604 size_attr = "size:" + integer("size") 3605 3606 # use Each (using operator '&') to accept attributes in any order 3607 # (shape and posn are required, color and size are optional) 3608 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3609 3610 shape_spec.runTests(''' 3611 shape: SQUARE color: BLACK posn: 100, 120 3612 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3613 color:GREEN size:20 shape:TRIANGLE posn:20,40 3614 ''' 3615 ) 3616 prints:: 3617 shape: SQUARE color: BLACK posn: 100, 120 3618 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3619 - color: BLACK 3620 - posn: ['100', ',', '120'] 3621 - x: 100 3622 - y: 120 3623 - shape: SQUARE 3624 3625 3626 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3627 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3628 - color: BLUE 3629 - posn: ['50', ',', '80'] 3630 - x: 50 3631 - y: 80 3632 - shape: CIRCLE 3633 - size: 50 3634 3635 3636 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3637 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3638 - color: GREEN 3639 - posn: ['20', ',', '40'] 3640 - x: 20 3641 - y: 40 3642 - shape: TRIANGLE 3643 - size: 20 3644 """ 3645 def __init__( self, exprs, savelist = True ): 3646 super(Each,self).__init__(exprs, savelist) 3647 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3648 self.skipWhitespace = True 3649 self.initExprGroups = True 3650 3651 def parseImpl( self, instring, loc, doActions=True ): 3652 if self.initExprGroups: 3653 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3654 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3655 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3656 self.optionals = opt1 + opt2 3657 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3658 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3659 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3660 self.required += self.multirequired 3661 self.initExprGroups = False 3662 tmpLoc = loc 3663 tmpReqd = self.required[:] 3664 tmpOpt = self.optionals[:] 3665 matchOrder = [] 3666 3667 keepMatching = True 3668 while keepMatching: 3669 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3670 failed = [] 3671 for e in tmpExprs: 3672 try: 3673 tmpLoc = e.tryParse( instring, tmpLoc ) 3674 except ParseException: 3675 failed.append(e) 3676 else: 3677 matchOrder.append(self.opt1map.get(id(e),e)) 3678 if e in tmpReqd: 3679 tmpReqd.remove(e) 3680 elif e in tmpOpt: 3681 tmpOpt.remove(e) 3682 if len(failed) == len(tmpExprs): 3683 keepMatching = False 3684 3685 if tmpReqd: 3686 missing = ", ".join(_ustr(e) for e in tmpReqd) 3687 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3688 3689 # add any unmatched Optionals, in case they have default values defined 3690 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3691 3692 resultlist = [] 3693 for e in matchOrder: 3694 loc,results = e._parse(instring,loc,doActions) 3695 resultlist.append(results) 3696 3697 finalResults = sum(resultlist, ParseResults([])) 3698 return loc, finalResults 3699 3700 def __str__( self ): 3701 if hasattr(self,"name"): 3702 return self.name 3703 3704 if self.strRepr is None: 3705 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3706 3707 return self.strRepr 3708 3709 def checkRecursion( self, parseElementList ): 3710 subRecCheckList = parseElementList[:] + [ self ] 3711 for e in self.exprs: 3712 e.checkRecursion( subRecCheckList ) 3713 3714 3715class ParseElementEnhance(ParserElement): 3716 """ 3717 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3718 """ 3719 def __init__( self, expr, savelist=False ): 3720 super(ParseElementEnhance,self).__init__(savelist) 3721 if isinstance( expr, basestring ): 3722 if issubclass(ParserElement._literalStringClass, Token): 3723 expr = ParserElement._literalStringClass(expr) 3724 else: 3725 expr = ParserElement._literalStringClass(Literal(expr)) 3726 self.expr = expr 3727 self.strRepr = None 3728 if expr is not None: 3729 self.mayIndexError = expr.mayIndexError 3730 self.mayReturnEmpty = expr.mayReturnEmpty 3731 self.setWhitespaceChars( expr.whiteChars ) 3732 self.skipWhitespace = expr.skipWhitespace 3733 self.saveAsList = expr.saveAsList 3734 self.callPreparse = expr.callPreparse 3735 self.ignoreExprs.extend(expr.ignoreExprs) 3736 3737 def parseImpl( self, instring, loc, doActions=True ): 3738 if self.expr is not None: 3739 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3740 else: 3741 raise ParseException("",loc,self.errmsg,self) 3742 3743 def leaveWhitespace( self ): 3744 self.skipWhitespace = False 3745 self.expr = self.expr.copy() 3746 if self.expr is not None: 3747 self.expr.leaveWhitespace() 3748 return self 3749 3750 def ignore( self, other ): 3751 if isinstance( other, Suppress ): 3752 if other not in self.ignoreExprs: 3753 super( ParseElementEnhance, self).ignore( other ) 3754 if self.expr is not None: 3755 self.expr.ignore( self.ignoreExprs[-1] ) 3756 else: 3757 super( ParseElementEnhance, self).ignore( other ) 3758 if self.expr is not None: 3759 self.expr.ignore( self.ignoreExprs[-1] ) 3760 return self 3761 3762 def streamline( self ): 3763 super(ParseElementEnhance,self).streamline() 3764 if self.expr is not None: 3765 self.expr.streamline() 3766 return self 3767 3768 def checkRecursion( self, parseElementList ): 3769 if self in parseElementList: 3770 raise RecursiveGrammarException( parseElementList+[self] ) 3771 subRecCheckList = parseElementList[:] + [ self ] 3772 if self.expr is not None: 3773 self.expr.checkRecursion( subRecCheckList ) 3774 3775 def validate( self, validateTrace=[] ): 3776 tmp = validateTrace[:]+[self] 3777 if self.expr is not None: 3778 self.expr.validate(tmp) 3779 self.checkRecursion( [] ) 3780 3781 def __str__( self ): 3782 try: 3783 return super(ParseElementEnhance,self).__str__() 3784 except Exception: 3785 pass 3786 3787 if self.strRepr is None and self.expr is not None: 3788 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3789 return self.strRepr 3790 3791 3792class FollowedBy(ParseElementEnhance): 3793 """ 3794 Lookahead matching of the given parse expression. C{FollowedBy} 3795 does I{not} advance the parsing position within the input string, it only 3796 verifies that the specified parse expression matches at the current 3797 position. C{FollowedBy} always returns a null token list. 3798 3799 Example:: 3800 # use FollowedBy to match a label only if it is followed by a ':' 3801 data_word = Word(alphas) 3802 label = data_word + FollowedBy(':') 3803 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3804 3805 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3806 prints:: 3807 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3808 """ 3809 def __init__( self, expr ): 3810 super(FollowedBy,self).__init__(expr) 3811 self.mayReturnEmpty = True 3812 3813 def parseImpl( self, instring, loc, doActions=True ): 3814 self.expr.tryParse( instring, loc ) 3815 return loc, [] 3816 3817 3818class NotAny(ParseElementEnhance): 3819 """ 3820 Lookahead to disallow matching with the given parse expression. C{NotAny} 3821 does I{not} advance the parsing position within the input string, it only 3822 verifies that the specified parse expression does I{not} match at the current 3823 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3824 always returns a null token list. May be constructed using the '~' operator. 3825 3826 Example:: 3827 3828 """ 3829 def __init__( self, expr ): 3830 super(NotAny,self).__init__(expr) 3831 #~ self.leaveWhitespace() 3832 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3833 self.mayReturnEmpty = True 3834 self.errmsg = "Found unwanted token, "+_ustr(self.expr) 3835 3836 def parseImpl( self, instring, loc, doActions=True ): 3837 if self.expr.canParseNext(instring, loc): 3838 raise ParseException(instring, loc, self.errmsg, self) 3839 return loc, [] 3840 3841 def __str__( self ): 3842 if hasattr(self,"name"): 3843 return self.name 3844 3845 if self.strRepr is None: 3846 self.strRepr = "~{" + _ustr(self.expr) + "}" 3847 3848 return self.strRepr 3849 3850class _MultipleMatch(ParseElementEnhance): 3851 def __init__( self, expr, stopOn=None): 3852 super(_MultipleMatch, self).__init__(expr) 3853 self.saveAsList = True 3854 ender = stopOn 3855 if isinstance(ender, basestring): 3856 ender = ParserElement._literalStringClass(ender) 3857 self.not_ender = ~ender if ender is not None else None 3858 3859 def parseImpl( self, instring, loc, doActions=True ): 3860 self_expr_parse = self.expr._parse 3861 self_skip_ignorables = self._skipIgnorables 3862 check_ender = self.not_ender is not None 3863 if check_ender: 3864 try_not_ender = self.not_ender.tryParse 3865 3866 # must be at least one (but first see if we are the stopOn sentinel; 3867 # if so, fail) 3868 if check_ender: 3869 try_not_ender(instring, loc) 3870 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3871 try: 3872 hasIgnoreExprs = (not not self.ignoreExprs) 3873 while 1: 3874 if check_ender: 3875 try_not_ender(instring, loc) 3876 if hasIgnoreExprs: 3877 preloc = self_skip_ignorables( instring, loc ) 3878 else: 3879 preloc = loc 3880 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3881 if tmptokens or tmptokens.haskeys(): 3882 tokens += tmptokens 3883 except (ParseException,IndexError): 3884 pass 3885 3886 return loc, tokens 3887 3888class OneOrMore(_MultipleMatch): 3889 """ 3890 Repetition of one or more of the given expression. 3891 3892 Parameters: 3893 - expr - expression that must match one or more times 3894 - stopOn - (default=C{None}) - expression for a terminating sentinel 3895 (only required if the sentinel would ordinarily match the repetition 3896 expression) 3897 3898 Example:: 3899 data_word = Word(alphas) 3900 label = data_word + FollowedBy(':') 3901 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3902 3903 text = "shape: SQUARE posn: upper left color: BLACK" 3904 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3905 3906 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3907 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3908 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3909 3910 # could also be written as 3911 (attr_expr * (1,)).parseString(text).pprint() 3912 """ 3913 3914 def __str__( self ): 3915 if hasattr(self,"name"): 3916 return self.name 3917 3918 if self.strRepr is None: 3919 self.strRepr = "{" + _ustr(self.expr) + "}..." 3920 3921 return self.strRepr 3922 3923class ZeroOrMore(_MultipleMatch): 3924 """ 3925 Optional repetition of zero or more of the given expression. 3926 3927 Parameters: 3928 - expr - expression that must match zero or more times 3929 - stopOn - (default=C{None}) - expression for a terminating sentinel 3930 (only required if the sentinel would ordinarily match the repetition 3931 expression) 3932 3933 Example: similar to L{OneOrMore} 3934 """ 3935 def __init__( self, expr, stopOn=None): 3936 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3937 self.mayReturnEmpty = True 3938 3939 def parseImpl( self, instring, loc, doActions=True ): 3940 try: 3941 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3942 except (ParseException,IndexError): 3943 return loc, [] 3944 3945 def __str__( self ): 3946 if hasattr(self,"name"): 3947 return self.name 3948 3949 if self.strRepr is None: 3950 self.strRepr = "[" + _ustr(self.expr) + "]..." 3951 3952 return self.strRepr 3953 3954class _NullToken(object): 3955 def __bool__(self): 3956 return False 3957 __nonzero__ = __bool__ 3958 def __str__(self): 3959 return "" 3960 3961_optionalNotMatched = _NullToken() 3962class Optional(ParseElementEnhance): 3963 """ 3964 Optional matching of the given expression. 3965 3966 Parameters: 3967 - expr - expression that must match zero or more times 3968 - default (optional) - value to be returned if the optional expression is not found. 3969 3970 Example:: 3971 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3972 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3973 zip.runTests(''' 3974 # traditional ZIP code 3975 12345 3976 3977 # ZIP+4 form 3978 12101-0001 3979 3980 # invalid ZIP 3981 98765- 3982 ''') 3983 prints:: 3984 # traditional ZIP code 3985 12345 3986 ['12345'] 3987 3988 # ZIP+4 form 3989 12101-0001 3990 ['12101-0001'] 3991 3992 # invalid ZIP 3993 98765- 3994 ^ 3995 FAIL: Expected end of text (at char 5), (line:1, col:6) 3996 """ 3997 def __init__( self, expr, default=_optionalNotMatched ): 3998 super(Optional,self).__init__( expr, savelist=False ) 3999 self.saveAsList = self.expr.saveAsList 4000 self.defaultValue = default 4001 self.mayReturnEmpty = True 4002 4003 def parseImpl( self, instring, loc, doActions=True ): 4004 try: 4005 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 4006 except (ParseException,IndexError): 4007 if self.defaultValue is not _optionalNotMatched: 4008 if self.expr.resultsName: 4009 tokens = ParseResults([ self.defaultValue ]) 4010 tokens[self.expr.resultsName] = self.defaultValue 4011 else: 4012 tokens = [ self.defaultValue ] 4013 else: 4014 tokens = [] 4015 return loc, tokens 4016 4017 def __str__( self ): 4018 if hasattr(self,"name"): 4019 return self.name 4020 4021 if self.strRepr is None: 4022 self.strRepr = "[" + _ustr(self.expr) + "]" 4023 4024 return self.strRepr 4025 4026class SkipTo(ParseElementEnhance): 4027 """ 4028 Token for skipping over all undefined text until the matched expression is found. 4029 4030 Parameters: 4031 - expr - target expression marking the end of the data to be skipped 4032 - include - (default=C{False}) if True, the target expression is also parsed 4033 (the skipped text and target expression are returned as a 2-element list). 4034 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 4035 comments) that might contain false matches to the target expression 4036 - failOn - (default=C{None}) define expressions that are not allowed to be 4037 included in the skipped test; if found before the target expression is found, 4038 the SkipTo is not a match 4039 4040 Example:: 4041 report = ''' 4042 Outstanding Issues Report - 1 Jan 2000 4043 4044 # | Severity | Description | Days Open 4045 -----+----------+-------------------------------------------+----------- 4046 101 | Critical | Intermittent system crash | 6 4047 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4048 79 | Minor | System slow when running too many reports | 47 4049 ''' 4050 integer = Word(nums) 4051 SEP = Suppress('|') 4052 # use SkipTo to simply match everything up until the next SEP 4053 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4054 # - parse action will call token.strip() for each matched token, i.e., the description body 4055 string_data = SkipTo(SEP, ignore=quotedString) 4056 string_data.setParseAction(tokenMap(str.strip)) 4057 ticket_expr = (integer("issue_num") + SEP 4058 + string_data("sev") + SEP 4059 + string_data("desc") + SEP 4060 + integer("days_open")) 4061 4062 for tkt in ticket_expr.searchString(report): 4063 print tkt.dump() 4064 prints:: 4065 ['101', 'Critical', 'Intermittent system crash', '6'] 4066 - days_open: 6 4067 - desc: Intermittent system crash 4068 - issue_num: 101 4069 - sev: Critical 4070 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4071 - days_open: 14 4072 - desc: Spelling error on Login ('log|n') 4073 - issue_num: 94 4074 - sev: Cosmetic 4075 ['79', 'Minor', 'System slow when running too many reports', '47'] 4076 - days_open: 47 4077 - desc: System slow when running too many reports 4078 - issue_num: 79 4079 - sev: Minor 4080 """ 4081 def __init__( self, other, include=False, ignore=None, failOn=None ): 4082 super( SkipTo, self ).__init__( other ) 4083 self.ignoreExpr = ignore 4084 self.mayReturnEmpty = True 4085 self.mayIndexError = False 4086 self.includeMatch = include 4087 self.asList = False 4088 if isinstance(failOn, basestring): 4089 self.failOn = ParserElement._literalStringClass(failOn) 4090 else: 4091 self.failOn = failOn 4092 self.errmsg = "No match found for "+_ustr(self.expr) 4093 4094 def parseImpl( self, instring, loc, doActions=True ): 4095 startloc = loc 4096 instrlen = len(instring) 4097 expr = self.expr 4098 expr_parse = self.expr._parse 4099 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4100 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4101 4102 tmploc = loc 4103 while tmploc <= instrlen: 4104 if self_failOn_canParseNext is not None: 4105 # break if failOn expression matches 4106 if self_failOn_canParseNext(instring, tmploc): 4107 break 4108 4109 if self_ignoreExpr_tryParse is not None: 4110 # advance past ignore expressions 4111 while 1: 4112 try: 4113 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4114 except ParseBaseException: 4115 break 4116 4117 try: 4118 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4119 except (ParseException, IndexError): 4120 # no match, advance loc in string 4121 tmploc += 1 4122 else: 4123 # matched skipto expr, done 4124 break 4125 4126 else: 4127 # ran off the end of the input string without matching skipto expr, fail 4128 raise ParseException(instring, loc, self.errmsg, self) 4129 4130 # build up return values 4131 loc = tmploc 4132 skiptext = instring[startloc:loc] 4133 skipresult = ParseResults(skiptext) 4134 4135 if self.includeMatch: 4136 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4137 skipresult += mat 4138 4139 return loc, skipresult 4140 4141class Forward(ParseElementEnhance): 4142 """ 4143 Forward declaration of an expression to be defined later - 4144 used for recursive grammars, such as algebraic infix notation. 4145 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4146 4147 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4148 Specifically, '|' has a lower precedence than '<<', so that:: 4149 fwdExpr << a | b | c 4150 will actually be evaluated as:: 4151 (fwdExpr << a) | b | c 4152 thereby leaving b and c out as parseable alternatives. It is recommended that you 4153 explicitly group the values inserted into the C{Forward}:: 4154 fwdExpr << (a | b | c) 4155 Converting to use the '<<=' operator instead will avoid this problem. 4156 4157 See L{ParseResults.pprint} for an example of a recursive parser created using 4158 C{Forward}. 4159 """ 4160 def __init__( self, other=None ): 4161 super(Forward,self).__init__( other, savelist=False ) 4162 4163 def __lshift__( self, other ): 4164 if isinstance( other, basestring ): 4165 other = ParserElement._literalStringClass(other) 4166 self.expr = other 4167 self.strRepr = None 4168 self.mayIndexError = self.expr.mayIndexError 4169 self.mayReturnEmpty = self.expr.mayReturnEmpty 4170 self.setWhitespaceChars( self.expr.whiteChars ) 4171 self.skipWhitespace = self.expr.skipWhitespace 4172 self.saveAsList = self.expr.saveAsList 4173 self.ignoreExprs.extend(self.expr.ignoreExprs) 4174 return self 4175 4176 def __ilshift__(self, other): 4177 return self << other 4178 4179 def leaveWhitespace( self ): 4180 self.skipWhitespace = False 4181 return self 4182 4183 def streamline( self ): 4184 if not self.streamlined: 4185 self.streamlined = True 4186 if self.expr is not None: 4187 self.expr.streamline() 4188 return self 4189 4190 def validate( self, validateTrace=[] ): 4191 if self not in validateTrace: 4192 tmp = validateTrace[:]+[self] 4193 if self.expr is not None: 4194 self.expr.validate(tmp) 4195 self.checkRecursion([]) 4196 4197 def __str__( self ): 4198 if hasattr(self,"name"): 4199 return self.name 4200 return self.__class__.__name__ + ": ..." 4201 4202 # stubbed out for now - creates awful memory and perf issues 4203 self._revertClass = self.__class__ 4204 self.__class__ = _ForwardNoRecurse 4205 try: 4206 if self.expr is not None: 4207 retString = _ustr(self.expr) 4208 else: 4209 retString = "None" 4210 finally: 4211 self.__class__ = self._revertClass 4212 return self.__class__.__name__ + ": " + retString 4213 4214 def copy(self): 4215 if self.expr is not None: 4216 return super(Forward,self).copy() 4217 else: 4218 ret = Forward() 4219 ret <<= self 4220 return ret 4221 4222class _ForwardNoRecurse(Forward): 4223 def __str__( self ): 4224 return "..." 4225 4226class TokenConverter(ParseElementEnhance): 4227 """ 4228 Abstract subclass of C{ParseExpression}, for converting parsed results. 4229 """ 4230 def __init__( self, expr, savelist=False ): 4231 super(TokenConverter,self).__init__( expr )#, savelist ) 4232 self.saveAsList = False 4233 4234class Combine(TokenConverter): 4235 """ 4236 Converter to concatenate all matching tokens to a single string. 4237 By default, the matching patterns must also be contiguous in the input string; 4238 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4239 4240 Example:: 4241 real = Word(nums) + '.' + Word(nums) 4242 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4243 # will also erroneously match the following 4244 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4245 4246 real = Combine(Word(nums) + '.' + Word(nums)) 4247 print(real.parseString('3.1416')) # -> ['3.1416'] 4248 # no match when there are internal spaces 4249 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4250 """ 4251 def __init__( self, expr, joinString="", adjacent=True ): 4252 super(Combine,self).__init__( expr ) 4253 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4254 if adjacent: 4255 self.leaveWhitespace() 4256 self.adjacent = adjacent 4257 self.skipWhitespace = True 4258 self.joinString = joinString 4259 self.callPreparse = True 4260 4261 def ignore( self, other ): 4262 if self.adjacent: 4263 ParserElement.ignore(self, other) 4264 else: 4265 super( Combine, self).ignore( other ) 4266 return self 4267 4268 def postParse( self, instring, loc, tokenlist ): 4269 retToks = tokenlist.copy() 4270 del retToks[:] 4271 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4272 4273 if self.resultsName and retToks.haskeys(): 4274 return [ retToks ] 4275 else: 4276 return retToks 4277 4278class Group(TokenConverter): 4279 """ 4280 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4281 4282 Example:: 4283 ident = Word(alphas) 4284 num = Word(nums) 4285 term = ident | num 4286 func = ident + Optional(delimitedList(term)) 4287 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4288 4289 func = ident + Group(Optional(delimitedList(term))) 4290 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4291 """ 4292 def __init__( self, expr ): 4293 super(Group,self).__init__( expr ) 4294 self.saveAsList = True 4295 4296 def postParse( self, instring, loc, tokenlist ): 4297 return [ tokenlist ] 4298 4299class Dict(TokenConverter): 4300 """ 4301 Converter to return a repetitive expression as a list, but also as a dictionary. 4302 Each element can also be referenced using the first token in the expression as its key. 4303 Useful for tabular report scraping when the first column can be used as a item key. 4304 4305 Example:: 4306 data_word = Word(alphas) 4307 label = data_word + FollowedBy(':') 4308 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4309 4310 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4311 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4312 4313 # print attributes as plain groups 4314 print(OneOrMore(attr_expr).parseString(text).dump()) 4315 4316 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4317 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4318 print(result.dump()) 4319 4320 # access named fields as dict entries, or output as dict 4321 print(result['shape']) 4322 print(result.asDict()) 4323 prints:: 4324 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4325 4326 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4327 - color: light blue 4328 - posn: upper left 4329 - shape: SQUARE 4330 - texture: burlap 4331 SQUARE 4332 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4333 See more examples at L{ParseResults} of accessing fields by results name. 4334 """ 4335 def __init__( self, expr ): 4336 super(Dict,self).__init__( expr ) 4337 self.saveAsList = True 4338 4339 def postParse( self, instring, loc, tokenlist ): 4340 for i,tok in enumerate(tokenlist): 4341 if len(tok) == 0: 4342 continue 4343 ikey = tok[0] 4344 if isinstance(ikey,int): 4345 ikey = _ustr(tok[0]).strip() 4346 if len(tok)==1: 4347 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4348 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4349 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4350 else: 4351 dictvalue = tok.copy() #ParseResults(i) 4352 del dictvalue[0] 4353 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4354 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4355 else: 4356 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4357 4358 if self.resultsName: 4359 return [ tokenlist ] 4360 else: 4361 return tokenlist 4362 4363 4364class Suppress(TokenConverter): 4365 """ 4366 Converter for ignoring the results of a parsed expression. 4367 4368 Example:: 4369 source = "a, b, c,d" 4370 wd = Word(alphas) 4371 wd_list1 = wd + ZeroOrMore(',' + wd) 4372 print(wd_list1.parseString(source)) 4373 4374 # often, delimiters that are useful during parsing are just in the 4375 # way afterward - use Suppress to keep them out of the parsed output 4376 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4377 print(wd_list2.parseString(source)) 4378 prints:: 4379 ['a', ',', 'b', ',', 'c', ',', 'd'] 4380 ['a', 'b', 'c', 'd'] 4381 (See also L{delimitedList}.) 4382 """ 4383 def postParse( self, instring, loc, tokenlist ): 4384 return [] 4385 4386 def suppress( self ): 4387 return self 4388 4389 4390class OnlyOnce(object): 4391 """ 4392 Wrapper for parse actions, to ensure they are only called once. 4393 """ 4394 def __init__(self, methodCall): 4395 self.callable = _trim_arity(methodCall) 4396 self.called = False 4397 def __call__(self,s,l,t): 4398 if not self.called: 4399 results = self.callable(s,l,t) 4400 self.called = True 4401 return results 4402 raise ParseException(s,l,"") 4403 def reset(self): 4404 self.called = False 4405 4406def traceParseAction(f): 4407 """ 4408 Decorator for debugging parse actions. 4409 4410 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 4411 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 4412 4413 Example:: 4414 wd = Word(alphas) 4415 4416 @traceParseAction 4417 def remove_duplicate_chars(tokens): 4418 return ''.join(sorted(set(''.join(tokens)))) 4419 4420 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4421 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4422 prints:: 4423 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4424 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4425 ['dfjkls'] 4426 """ 4427 f = _trim_arity(f) 4428 def z(*paArgs): 4429 thisFunc = f.__name__ 4430 s,l,t = paArgs[-3:] 4431 if len(paArgs)>3: 4432 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4433 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4434 try: 4435 ret = f(*paArgs) 4436 except Exception as exc: 4437 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4438 raise 4439 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4440 return ret 4441 try: 4442 z.__name__ = f.__name__ 4443 except AttributeError: 4444 pass 4445 return z 4446 4447# 4448# global helpers 4449# 4450def delimitedList( expr, delim=",", combine=False ): 4451 """ 4452 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4453 By default, the list elements and delimiters can have intervening whitespace, and 4454 comments, but this can be overridden by passing C{combine=True} in the constructor. 4455 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4456 string, with the delimiters included; otherwise, the matching tokens are returned 4457 as a list of tokens, with the delimiters suppressed. 4458 4459 Example:: 4460 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4461 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4462 """ 4463 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4464 if combine: 4465 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4466 else: 4467 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 4468 4469def countedArray( expr, intExpr=None ): 4470 """ 4471 Helper to define a counted list of expressions. 4472 This helper defines a pattern of the form:: 4473 integer expr expr expr... 4474 where the leading integer tells how many expr expressions follow. 4475 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4476 4477 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 4478 4479 Example:: 4480 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4481 4482 # in this parser, the leading integer value is given in binary, 4483 # '10' indicating that 2 values are in the array 4484 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 4485 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 4486 """ 4487 arrayExpr = Forward() 4488 def countFieldParseAction(s,l,t): 4489 n = t[0] 4490 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4491 return [] 4492 if intExpr is None: 4493 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4494 else: 4495 intExpr = intExpr.copy() 4496 intExpr.setName("arrayLen") 4497 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4498 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4499 4500def _flatten(L): 4501 ret = [] 4502 for i in L: 4503 if isinstance(i,list): 4504 ret.extend(_flatten(i)) 4505 else: 4506 ret.append(i) 4507 return ret 4508 4509def matchPreviousLiteral(expr): 4510 """ 4511 Helper to define an expression that is indirectly defined from 4512 the tokens matched in a previous expression, that is, it looks 4513 for a 'repeat' of a previous expression. For example:: 4514 first = Word(nums) 4515 second = matchPreviousLiteral(first) 4516 matchExpr = first + ":" + second 4517 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4518 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4519 If this is not desired, use C{matchPreviousExpr}. 4520 Do I{not} use with packrat parsing enabled. 4521 """ 4522 rep = Forward() 4523 def copyTokenToRepeater(s,l,t): 4524 if t: 4525 if len(t) == 1: 4526 rep << t[0] 4527 else: 4528 # flatten t tokens 4529 tflat = _flatten(t.asList()) 4530 rep << And(Literal(tt) for tt in tflat) 4531 else: 4532 rep << Empty() 4533 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4534 rep.setName('(prev) ' + _ustr(expr)) 4535 return rep 4536 4537def matchPreviousExpr(expr): 4538 """ 4539 Helper to define an expression that is indirectly defined from 4540 the tokens matched in a previous expression, that is, it looks 4541 for a 'repeat' of a previous expression. For example:: 4542 first = Word(nums) 4543 second = matchPreviousExpr(first) 4544 matchExpr = first + ":" + second 4545 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4546 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4547 the expressions are evaluated first, and then compared, so 4548 C{"1"} is compared with C{"10"}. 4549 Do I{not} use with packrat parsing enabled. 4550 """ 4551 rep = Forward() 4552 e2 = expr.copy() 4553 rep <<= e2 4554 def copyTokenToRepeater(s,l,t): 4555 matchTokens = _flatten(t.asList()) 4556 def mustMatchTheseTokens(s,l,t): 4557 theseTokens = _flatten(t.asList()) 4558 if theseTokens != matchTokens: 4559 raise ParseException("",0,"") 4560 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4561 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4562 rep.setName('(prev) ' + _ustr(expr)) 4563 return rep 4564 4565def _escapeRegexRangeChars(s): 4566 #~ escape these chars: ^-] 4567 for c in r"\^-]": 4568 s = s.replace(c,_bslash+c) 4569 s = s.replace("\n",r"\n") 4570 s = s.replace("\t",r"\t") 4571 return _ustr(s) 4572 4573def oneOf( strs, caseless=False, useRegex=True ): 4574 """ 4575 Helper to quickly define a set of alternative Literals, and makes sure to do 4576 longest-first testing when there is a conflict, regardless of the input order, 4577 but returns a C{L{MatchFirst}} for best performance. 4578 4579 Parameters: 4580 - strs - a string of space-delimited literals, or a collection of string literals 4581 - caseless - (default=C{False}) - treat all literals as caseless 4582 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4583 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4584 if creating a C{Regex} raises an exception) 4585 4586 Example:: 4587 comp_oper = oneOf("< = > <= >= !=") 4588 var = Word(alphas) 4589 number = Word(nums) 4590 term = var | number 4591 comparison_expr = term + comp_oper + term 4592 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4593 prints:: 4594 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4595 """ 4596 if caseless: 4597 isequal = ( lambda a,b: a.upper() == b.upper() ) 4598 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4599 parseElementClass = CaselessLiteral 4600 else: 4601 isequal = ( lambda a,b: a == b ) 4602 masks = ( lambda a,b: b.startswith(a) ) 4603 parseElementClass = Literal 4604 4605 symbols = [] 4606 if isinstance(strs,basestring): 4607 symbols = strs.split() 4608 elif isinstance(strs, Iterable): 4609 symbols = list(strs) 4610 else: 4611 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4612 SyntaxWarning, stacklevel=2) 4613 if not symbols: 4614 return NoMatch() 4615 4616 i = 0 4617 while i < len(symbols)-1: 4618 cur = symbols[i] 4619 for j,other in enumerate(symbols[i+1:]): 4620 if ( isequal(other, cur) ): 4621 del symbols[i+j+1] 4622 break 4623 elif ( masks(cur, other) ): 4624 del symbols[i+j+1] 4625 symbols.insert(i,other) 4626 cur = other 4627 break 4628 else: 4629 i += 1 4630 4631 if not caseless and useRegex: 4632 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4633 try: 4634 if len(symbols)==len("".join(symbols)): 4635 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4636 else: 4637 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4638 except Exception: 4639 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4640 SyntaxWarning, stacklevel=2) 4641 4642 4643 # last resort, just use MatchFirst 4644 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) 4645 4646def dictOf( key, value ): 4647 """ 4648 Helper to easily and clearly define a dictionary by specifying the respective patterns 4649 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4650 in the proper order. The key pattern can include delimiting markers or punctuation, 4651 as long as they are suppressed, thereby leaving the significant key text. The value 4652 pattern can include named results, so that the C{Dict} results can include named token 4653 fields. 4654 4655 Example:: 4656 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4657 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4658 print(OneOrMore(attr_expr).parseString(text).dump()) 4659 4660 attr_label = label 4661 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4662 4663 # similar to Dict, but simpler call format 4664 result = dictOf(attr_label, attr_value).parseString(text) 4665 print(result.dump()) 4666 print(result['shape']) 4667 print(result.shape) # object attribute access works too 4668 print(result.asDict()) 4669 prints:: 4670 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4671 - color: light blue 4672 - posn: upper left 4673 - shape: SQUARE 4674 - texture: burlap 4675 SQUARE 4676 SQUARE 4677 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4678 """ 4679 return Dict( ZeroOrMore( Group ( key + value ) ) ) 4680 4681def originalTextFor(expr, asString=True): 4682 """ 4683 Helper to return the original, untokenized text for a given expression. Useful to 4684 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4685 revert separate tokens with intervening whitespace back to the original matching 4686 input text. By default, returns astring containing the original parsed text. 4687 4688 If the optional C{asString} argument is passed as C{False}, then the return value is a 4689 C{L{ParseResults}} containing any results names that were originally matched, and a 4690 single token containing the original matched text from the input string. So if 4691 the expression passed to C{L{originalTextFor}} contains expressions with defined 4692 results names, you must set C{asString} to C{False} if you want to preserve those 4693 results name values. 4694 4695 Example:: 4696 src = "this is test <b> bold <i>text</i> </b> normal text " 4697 for tag in ("b","i"): 4698 opener,closer = makeHTMLTags(tag) 4699 patt = originalTextFor(opener + SkipTo(closer) + closer) 4700 print(patt.searchString(src)[0]) 4701 prints:: 4702 ['<b> bold <i>text</i> </b>'] 4703 ['<i>text</i>'] 4704 """ 4705 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4706 endlocMarker = locMarker.copy() 4707 endlocMarker.callPreparse = False 4708 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4709 if asString: 4710 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4711 else: 4712 def extractText(s,l,t): 4713 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] 4714 matchExpr.setParseAction(extractText) 4715 matchExpr.ignoreExprs = expr.ignoreExprs 4716 return matchExpr 4717 4718def ungroup(expr): 4719 """ 4720 Helper to undo pyparsing's default grouping of And expressions, even 4721 if all but one are non-empty. 4722 """ 4723 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4724 4725def locatedExpr(expr): 4726 """ 4727 Helper to decorate a returned token with its starting and ending locations in the input string. 4728 This helper adds the following results names: 4729 - locn_start = location where matched expression begins 4730 - locn_end = location where matched expression ends 4731 - value = the actual parsed results 4732 4733 Be careful if the input text contains C{<TAB>} characters, you may want to call 4734 C{L{ParserElement.parseWithTabs}} 4735 4736 Example:: 4737 wd = Word(alphas) 4738 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4739 print(match) 4740 prints:: 4741 [[0, 'ljsdf', 5]] 4742 [[8, 'lksdjjf', 15]] 4743 [[18, 'lkkjj', 23]] 4744 """ 4745 locator = Empty().setParseAction(lambda s,l,t: l) 4746 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 4747 4748 4749# convenience constants for positional expressions 4750empty = Empty().setName("empty") 4751lineStart = LineStart().setName("lineStart") 4752lineEnd = LineEnd().setName("lineEnd") 4753stringStart = StringStart().setName("stringStart") 4754stringEnd = StringEnd().setName("stringEnd") 4755 4756_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4757_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4758_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4759_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) 4760_charRange = Group(_singleChar + Suppress("-") + _singleChar) 4761_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 4762 4763def srange(s): 4764 r""" 4765 Helper to easily define string ranges for use in Word construction. Borrows 4766 syntax from regexp '[]' string range definitions:: 4767 srange("[0-9]") -> "0123456789" 4768 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4769 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4770 The input string must be enclosed in []'s, and the returned string is the expanded 4771 character set joined into a single string. 4772 The values enclosed in the []'s may be: 4773 - a single character 4774 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4775 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4776 (C{\0x##} is also supported for backwards compatibility) 4777 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4778 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4779 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4780 """ 4781 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4782 try: 4783 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4784 except Exception: 4785 return "" 4786 4787def matchOnlyAtCol(n): 4788 """ 4789 Helper method for defining parse actions that require matching at a specific 4790 column in the input text. 4791 """ 4792 def verifyCol(strg,locn,toks): 4793 if col(locn,strg) != n: 4794 raise ParseException(strg,locn,"matched token not at column %d" % n) 4795 return verifyCol 4796 4797def replaceWith(replStr): 4798 """ 4799 Helper method for common parse actions that simply return a literal value. Especially 4800 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4801 4802 Example:: 4803 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4804 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4805 term = na | num 4806 4807 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4808 """ 4809 return lambda s,l,t: [replStr] 4810 4811def removeQuotes(s,l,t): 4812 """ 4813 Helper parse action for removing quotation marks from parsed quoted strings. 4814 4815 Example:: 4816 # by default, quotation marks are included in parsed results 4817 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4818 4819 # use removeQuotes to strip quotation marks from parsed results 4820 quotedString.setParseAction(removeQuotes) 4821 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4822 """ 4823 return t[0][1:-1] 4824 4825def tokenMap(func, *args): 4826 """ 4827 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4828 args are passed, they are forwarded to the given function as additional arguments after 4829 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4830 parsed data to an integer using base 16. 4831 4832 Example (compare the last to example in L{ParserElement.transformString}:: 4833 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4834 hex_ints.runTests(''' 4835 00 11 22 aa FF 0a 0d 1a 4836 ''') 4837 4838 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4839 OneOrMore(upperword).runTests(''' 4840 my kingdom for a horse 4841 ''') 4842 4843 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4844 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4845 now is the winter of our discontent made glorious summer by this sun of york 4846 ''') 4847 prints:: 4848 00 11 22 aa FF 0a 0d 1a 4849 [0, 17, 34, 170, 255, 10, 13, 26] 4850 4851 my kingdom for a horse 4852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4853 4854 now is the winter of our discontent made glorious summer by this sun of york 4855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4856 """ 4857 def pa(s,l,t): 4858 return [func(tokn, *args) for tokn in t] 4859 4860 try: 4861 func_name = getattr(func, '__name__', 4862 getattr(func, '__class__').__name__) 4863 except Exception: 4864 func_name = str(func) 4865 pa.__name__ = func_name 4866 4867 return pa 4868 4869upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4870"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 4871 4872downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4873"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" 4874 4875def _makeTags(tagStr, xml): 4876 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4877 if isinstance(tagStr,basestring): 4878 resname = tagStr 4879 tagStr = Keyword(tagStr, caseless=not xml) 4880 else: 4881 resname = tagStr.name 4882 4883 tagAttrName = Word(alphas,alphanums+"_-:") 4884 if (xml): 4885 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4886 openTag = Suppress("<") + tagStr("tag") + \ 4887 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4888 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4889 else: 4890 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4891 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4892 openTag = Suppress("<") + tagStr("tag") + \ 4893 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4894 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4895 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4896 closeTag = Combine(_L("</") + tagStr + ">") 4897 4898 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4899 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4900 openTag.tag = resname 4901 closeTag.tag = resname 4902 return openTag, closeTag 4903 4904def makeHTMLTags(tagStr): 4905 """ 4906 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4907 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4908 4909 Example:: 4910 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4911 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4912 a,a_end = makeHTMLTags("A") 4913 link_expr = a + SkipTo(a_end)("link_text") + a_end 4914 4915 for link in link_expr.searchString(text): 4916 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4917 print(link.link_text, '->', link.href) 4918 prints:: 4919 pyparsing -> http://pyparsing.wikispaces.com 4920 """ 4921 return _makeTags( tagStr, False ) 4922 4923def makeXMLTags(tagStr): 4924 """ 4925 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4926 tags only in the given upper/lower case. 4927 4928 Example: similar to L{makeHTMLTags} 4929 """ 4930 return _makeTags( tagStr, True ) 4931 4932def withAttribute(*args,**attrDict): 4933 """ 4934 Helper to create a validating parse action to be used with start tags created 4935 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4936 with a required attribute value, to avoid false matches on common tags such as 4937 C{<TD>} or C{<DIV>}. 4938 4939 Call C{withAttribute} with a series of attribute names and values. Specify the list 4940 of filter attributes names and values as: 4941 - keyword arguments, as in C{(align="right")}, or 4942 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4943 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4944 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4945 For attribute names with a namespace prefix, you must use the second form. Attribute 4946 names are matched insensitive to upper/lower case. 4947 4948 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4949 4950 To verify that the attribute exists, but without specifying a value, pass 4951 C{withAttribute.ANY_VALUE} as the value. 4952 4953 Example:: 4954 html = ''' 4955 <div> 4956 Some text 4957 <div type="grid">1 4 0 1 0</div> 4958 <div type="graph">1,3 2,3 1,1</div> 4959 <div>this has no type</div> 4960 </div> 4961 4962 ''' 4963 div,div_end = makeHTMLTags("div") 4964 4965 # only match div tag having a type attribute with value "grid" 4966 div_grid = div().setParseAction(withAttribute(type="grid")) 4967 grid_expr = div_grid + SkipTo(div | div_end)("body") 4968 for grid_header in grid_expr.searchString(html): 4969 print(grid_header.body) 4970 4971 # construct a match with any div tag having a type attribute, regardless of the value 4972 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4973 div_expr = div_any_type + SkipTo(div | div_end)("body") 4974 for div_header in div_expr.searchString(html): 4975 print(div_header.body) 4976 prints:: 4977 1 4 0 1 0 4978 4979 1 4 0 1 0 4980 1,3 2,3 1,1 4981 """ 4982 if args: 4983 attrs = args[:] 4984 else: 4985 attrs = attrDict.items() 4986 attrs = [(k,v) for k,v in attrs] 4987 def pa(s,l,tokens): 4988 for attrName,attrValue in attrs: 4989 if attrName not in tokens: 4990 raise ParseException(s,l,"no matching attribute " + attrName) 4991 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4992 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4993 (attrName, tokens[attrName], attrValue)) 4994 return pa 4995withAttribute.ANY_VALUE = object() 4996 4997def withClass(classname, namespace=''): 4998 """ 4999 Simplified version of C{L{withAttribute}} when matching on a div class - made 5000 difficult because C{class} is a reserved word in Python. 5001 5002 Example:: 5003 html = ''' 5004 <div> 5005 Some text 5006 <div class="grid">1 4 0 1 0</div> 5007 <div class="graph">1,3 2,3 1,1</div> 5008 <div>this <div> has no class</div> 5009 </div> 5010 5011 ''' 5012 div,div_end = makeHTMLTags("div") 5013 div_grid = div().setParseAction(withClass("grid")) 5014 5015 grid_expr = div_grid + SkipTo(div | div_end)("body") 5016 for grid_header in grid_expr.searchString(html): 5017 print(grid_header.body) 5018 5019 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 5020 div_expr = div_any_type + SkipTo(div | div_end)("body") 5021 for div_header in div_expr.searchString(html): 5022 print(div_header.body) 5023 prints:: 5024 1 4 0 1 0 5025 5026 1 4 0 1 0 5027 1,3 2,3 1,1 5028 """ 5029 classattr = "%s:class" % namespace if namespace else "class" 5030 return withAttribute(**{classattr : classname}) 5031 5032opAssoc = _Constants() 5033opAssoc.LEFT = object() 5034opAssoc.RIGHT = object() 5035 5036def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): 5037 """ 5038 Helper method for constructing grammars of expressions made up of 5039 operators working in a precedence hierarchy. Operators may be unary or 5040 binary, left- or right-associative. Parse actions can also be attached 5041 to operator expressions. The generated parser will also recognize the use 5042 of parentheses to override operator precedences (see example below). 5043 5044 Note: if you define a deep operator list, you may see performance issues 5045 when using infixNotation. See L{ParserElement.enablePackrat} for a 5046 mechanism to potentially improve your parser performance. 5047 5048 Parameters: 5049 - baseExpr - expression representing the most basic element for the nested 5050 - opList - list of tuples, one for each operator precedence level in the 5051 expression grammar; each tuple is of the form 5052 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 5053 - opExpr is the pyparsing expression for the operator; 5054 may also be a string, which will be converted to a Literal; 5055 if numTerms is 3, opExpr is a tuple of two expressions, for the 5056 two operators separating the 3 terms 5057 - numTerms is the number of terms for this operator (must 5058 be 1, 2, or 3) 5059 - rightLeftAssoc is the indicator whether the operator is 5060 right or left associative, using the pyparsing-defined 5061 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 5062 - parseAction is the parse action to be associated with 5063 expressions matching this operator expression (the 5064 parse action tuple member may be omitted); if the parse action 5065 is passed a tuple or list of functions, this is equivalent to 5066 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 5067 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 5068 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 5069 5070 Example:: 5071 # simple example of four-function arithmetic with ints and variable names 5072 integer = pyparsing_common.signed_integer 5073 varname = pyparsing_common.identifier 5074 5075 arith_expr = infixNotation(integer | varname, 5076 [ 5077 ('-', 1, opAssoc.RIGHT), 5078 (oneOf('* /'), 2, opAssoc.LEFT), 5079 (oneOf('+ -'), 2, opAssoc.LEFT), 5080 ]) 5081 5082 arith_expr.runTests(''' 5083 5+3*6 5084 (5+3)*6 5085 -2--11 5086 ''', fullDump=False) 5087 prints:: 5088 5+3*6 5089 [[5, '+', [3, '*', 6]]] 5090 5091 (5+3)*6 5092 [[[5, '+', 3], '*', 6]] 5093 5094 -2--11 5095 [[['-', 2], '-', ['-', 11]]] 5096 """ 5097 ret = Forward() 5098 lastExpr = baseExpr | ( lpar + ret + rpar ) 5099 for i,operDef in enumerate(opList): 5100 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 5101 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 5102 if arity == 3: 5103 if opExpr is None or len(opExpr) != 2: 5104 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 5105 opExpr1, opExpr2 = opExpr 5106 thisExpr = Forward().setName(termName) 5107 if rightLeftAssoc == opAssoc.LEFT: 5108 if arity == 1: 5109 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 5110 elif arity == 2: 5111 if opExpr is not None: 5112 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 5113 else: 5114 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 5115 elif arity == 3: 5116 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 5117 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 5118 else: 5119 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5120 elif rightLeftAssoc == opAssoc.RIGHT: 5121 if arity == 1: 5122 # try to avoid LR with this extra test 5123 if not isinstance(opExpr, Optional): 5124 opExpr = Optional(opExpr) 5125 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 5126 elif arity == 2: 5127 if opExpr is not None: 5128 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 5129 else: 5130 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 5131 elif arity == 3: 5132 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 5133 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 5134 else: 5135 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5136 else: 5137 raise ValueError("operator must indicate right or left associativity") 5138 if pa: 5139 if isinstance(pa, (tuple, list)): 5140 matchExpr.setParseAction(*pa) 5141 else: 5142 matchExpr.setParseAction(pa) 5143 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 5144 lastExpr = thisExpr 5145 ret <<= lastExpr 5146 return ret 5147 5148operatorPrecedence = infixNotation 5149"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 5150 5151dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5152sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5153quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5154 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5155unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") 5156 5157def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 5158 """ 5159 Helper method for defining nested lists enclosed in opening and closing 5160 delimiters ("(" and ")" are the default). 5161 5162 Parameters: 5163 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5164 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5165 - content - expression for items within the nested lists (default=C{None}) 5166 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5167 5168 If an expression is not provided for the content argument, the nested 5169 expression will capture all whitespace-delimited content between delimiters 5170 as a list of separate values. 5171 5172 Use the C{ignoreExpr} argument to define expressions that may contain 5173 opening or closing characters that should not be treated as opening 5174 or closing characters for nesting, such as quotedString or a comment 5175 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5176 The default is L{quotedString}, but if no expressions are to be ignored, 5177 then pass C{None} for this argument. 5178 5179 Example:: 5180 data_type = oneOf("void int short long char float double") 5181 decl_data_type = Combine(data_type + Optional(Word('*'))) 5182 ident = Word(alphas+'_', alphanums+'_') 5183 number = pyparsing_common.number 5184 arg = Group(decl_data_type + ident) 5185 LPAR,RPAR = map(Suppress, "()") 5186 5187 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5188 5189 c_function = (decl_data_type("type") 5190 + ident("name") 5191 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5192 + code_body("body")) 5193 c_function.ignore(cStyleComment) 5194 5195 source_code = ''' 5196 int is_odd(int x) { 5197 return (x%2); 5198 } 5199 5200 int dec_to_hex(char hchar) { 5201 if (hchar >= '0' && hchar <= '9') { 5202 return (ord(hchar)-ord('0')); 5203 } else { 5204 return (10+ord(hchar)-ord('A')); 5205 } 5206 } 5207 ''' 5208 for func in c_function.searchString(source_code): 5209 print("%(name)s (%(type)s) args: %(args)s" % func) 5210 5211 prints:: 5212 is_odd (int) args: [['int', 'x']] 5213 dec_to_hex (int) args: [['char', 'hchar']] 5214 """ 5215 if opener == closer: 5216 raise ValueError("opening and closing strings cannot be the same") 5217 if content is None: 5218 if isinstance(opener,basestring) and isinstance(closer,basestring): 5219 if len(opener) == 1 and len(closer)==1: 5220 if ignoreExpr is not None: 5221 content = (Combine(OneOrMore(~ignoreExpr + 5222 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5223 ).setParseAction(lambda t:t[0].strip())) 5224 else: 5225 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5226 ).setParseAction(lambda t:t[0].strip())) 5227 else: 5228 if ignoreExpr is not None: 5229 content = (Combine(OneOrMore(~ignoreExpr + 5230 ~Literal(opener) + ~Literal(closer) + 5231 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5232 ).setParseAction(lambda t:t[0].strip())) 5233 else: 5234 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5235 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5236 ).setParseAction(lambda t:t[0].strip())) 5237 else: 5238 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5239 ret = Forward() 5240 if ignoreExpr is not None: 5241 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5242 else: 5243 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5244 ret.setName('nested %s%s expression' % (opener,closer)) 5245 return ret 5246 5247def indentedBlock(blockStatementExpr, indentStack, indent=True): 5248 """ 5249 Helper method for defining space-delimited indentation blocks, such as 5250 those used to define block statements in Python source code. 5251 5252 Parameters: 5253 - blockStatementExpr - expression defining syntax of statement that 5254 is repeated within the indented block 5255 - indentStack - list created by caller to manage indentation stack 5256 (multiple statementWithIndentedBlock expressions within a single grammar 5257 should share a common indentStack) 5258 - indent - boolean indicating whether block must be indented beyond the 5259 the current level; set to False for block of left-most statements 5260 (default=C{True}) 5261 5262 A valid block must contain at least one C{blockStatement}. 5263 5264 Example:: 5265 data = ''' 5266 def A(z): 5267 A1 5268 B = 100 5269 G = A2 5270 A2 5271 A3 5272 B 5273 def BB(a,b,c): 5274 BB1 5275 def BBA(): 5276 bba1 5277 bba2 5278 bba3 5279 C 5280 D 5281 def spam(x,y): 5282 def eggs(z): 5283 pass 5284 ''' 5285 5286 5287 indentStack = [1] 5288 stmt = Forward() 5289 5290 identifier = Word(alphas, alphanums) 5291 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5292 func_body = indentedBlock(stmt, indentStack) 5293 funcDef = Group( funcDecl + func_body ) 5294 5295 rvalue = Forward() 5296 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5297 rvalue << (funcCall | identifier | Word(nums)) 5298 assignment = Group(identifier + "=" + rvalue) 5299 stmt << ( funcDef | assignment | identifier ) 5300 5301 module_body = OneOrMore(stmt) 5302 5303 parseTree = module_body.parseString(data) 5304 parseTree.pprint() 5305 prints:: 5306 [['def', 5307 'A', 5308 ['(', 'z', ')'], 5309 ':', 5310 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5311 'B', 5312 ['def', 5313 'BB', 5314 ['(', 'a', 'b', 'c', ')'], 5315 ':', 5316 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5317 'C', 5318 'D', 5319 ['def', 5320 'spam', 5321 ['(', 'x', 'y', ')'], 5322 ':', 5323 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5324 """ 5325 def checkPeerIndent(s,l,t): 5326 if l >= len(s): return 5327 curCol = col(l,s) 5328 if curCol != indentStack[-1]: 5329 if curCol > indentStack[-1]: 5330 raise ParseFatalException(s,l,"illegal nesting") 5331 raise ParseException(s,l,"not a peer entry") 5332 5333 def checkSubIndent(s,l,t): 5334 curCol = col(l,s) 5335 if curCol > indentStack[-1]: 5336 indentStack.append( curCol ) 5337 else: 5338 raise ParseException(s,l,"not a subentry") 5339 5340 def checkUnindent(s,l,t): 5341 if l >= len(s): return 5342 curCol = col(l,s) 5343 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5344 raise ParseException(s,l,"not an unindent") 5345 indentStack.pop() 5346 5347 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5348 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5349 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5350 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5351 if indent: 5352 smExpr = Group( Optional(NL) + 5353 #~ FollowedBy(blockStatementExpr) + 5354 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5355 else: 5356 smExpr = Group( Optional(NL) + 5357 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5358 blockStatementExpr.ignore(_bslash + LineEnd()) 5359 return smExpr.setName('indented block') 5360 5361alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5362punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5363 5364anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5365_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5366commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") 5367def replaceHTMLEntity(t): 5368 """Helper parser action to replace common HTML entities with their special characters""" 5369 return _htmlEntityMap.get(t.entity) 5370 5371# it's easy to get these comment structures wrong - they're very common, so may as well make them available 5372cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5373"Comment of the form C{/* ... */}" 5374 5375htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5376"Comment of the form C{<!-- ... -->}" 5377 5378restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5379dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5380"Comment of the form C{// ... (to end of line)}" 5381 5382cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5383"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5384 5385javaStyleComment = cppStyleComment 5386"Same as C{L{cppStyleComment}}" 5387 5388pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5389"Comment of the form C{# ... (to end of line)}" 5390 5391_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5392 Optional( Word(" \t") + 5393 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5394commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5395"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 5396 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" 5397 5398# some other useful expressions - using lower-case class name since we are really using this as a namespace 5399class pyparsing_common: 5400 """ 5401 Here are some common low-level expressions that may be useful in jump-starting parser development: 5402 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 5403 - common L{programming identifiers<identifier>} 5404 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5405 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5406 - L{UUID<uuid>} 5407 - L{comma-separated list<comma_separated_list>} 5408 Parse actions: 5409 - C{L{convertToInteger}} 5410 - C{L{convertToFloat}} 5411 - C{L{convertToDate}} 5412 - C{L{convertToDatetime}} 5413 - C{L{stripHTMLTags}} 5414 - C{L{upcaseTokens}} 5415 - C{L{downcaseTokens}} 5416 5417 Example:: 5418 pyparsing_common.number.runTests(''' 5419 # any int or real number, returned as the appropriate type 5420 100 5421 -100 5422 +100 5423 3.14159 5424 6.02e23 5425 1e-12 5426 ''') 5427 5428 pyparsing_common.fnumber.runTests(''' 5429 # any int or real number, returned as float 5430 100 5431 -100 5432 +100 5433 3.14159 5434 6.02e23 5435 1e-12 5436 ''') 5437 5438 pyparsing_common.hex_integer.runTests(''' 5439 # hex numbers 5440 100 5441 FF 5442 ''') 5443 5444 pyparsing_common.fraction.runTests(''' 5445 # fractions 5446 1/2 5447 -3/4 5448 ''') 5449 5450 pyparsing_common.mixed_integer.runTests(''' 5451 # mixed fractions 5452 1 5453 1/2 5454 -3/4 5455 1-3/4 5456 ''') 5457 5458 import uuid 5459 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5460 pyparsing_common.uuid.runTests(''' 5461 # uuid 5462 12345678-1234-5678-1234-567812345678 5463 ''') 5464 prints:: 5465 # any int or real number, returned as the appropriate type 5466 100 5467 [100] 5468 5469 -100 5470 [-100] 5471 5472 +100 5473 [100] 5474 5475 3.14159 5476 [3.14159] 5477 5478 6.02e23 5479 [6.02e+23] 5480 5481 1e-12 5482 [1e-12] 5483 5484 # any int or real number, returned as float 5485 100 5486 [100.0] 5487 5488 -100 5489 [-100.0] 5490 5491 +100 5492 [100.0] 5493 5494 3.14159 5495 [3.14159] 5496 5497 6.02e23 5498 [6.02e+23] 5499 5500 1e-12 5501 [1e-12] 5502 5503 # hex numbers 5504 100 5505 [256] 5506 5507 FF 5508 [255] 5509 5510 # fractions 5511 1/2 5512 [0.5] 5513 5514 -3/4 5515 [-0.75] 5516 5517 # mixed fractions 5518 1 5519 [1] 5520 5521 1/2 5522 [0.5] 5523 5524 -3/4 5525 [-0.75] 5526 5527 1-3/4 5528 [1.75] 5529 5530 # uuid 5531 12345678-1234-5678-1234-567812345678 5532 [UUID('12345678-1234-5678-1234-567812345678')] 5533 """ 5534 5535 convertToInteger = tokenMap(int) 5536 """ 5537 Parse action for converting parsed integers to Python int 5538 """ 5539 5540 convertToFloat = tokenMap(float) 5541 """ 5542 Parse action for converting parsed numbers to Python float 5543 """ 5544 5545 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5546 """expression that parses an unsigned integer, returns an int""" 5547 5548 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5549 """expression that parses a hexadecimal integer, returns an int""" 5550 5551 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5552 """expression that parses an integer with optional leading sign, returns an int""" 5553 5554 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 5555 """fractional expression of an integer divided by an integer, returns a float""" 5556 fraction.addParseAction(lambda t: t[0]/t[-1]) 5557 5558 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5559 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5560 mixed_integer.addParseAction(sum) 5561 5562 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5563 """expression that parses a floating point number and returns a float""" 5564 5565 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5566 """expression that parses a floating point number with optional scientific notation and returns a float""" 5567 5568 # streamlining this expression makes the docs nicer-looking 5569 number = (sci_real | real | signed_integer).streamline() 5570 """any numeric expression, returns the corresponding Python type""" 5571 5572 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5573 """any int or real number, returned as float""" 5574 5575 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5576 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5577 5578 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5579 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5580 5581 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5582 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5583 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5584 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5585 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5586 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5587 "IPv6 address (long, short, or mixed form)" 5588 5589 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5590 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5591 5592 @staticmethod 5593 def convertToDate(fmt="%Y-%m-%d"): 5594 """ 5595 Helper to create a parse action for converting parsed date string to Python datetime.date 5596 5597 Params - 5598 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5599 5600 Example:: 5601 date_expr = pyparsing_common.iso8601_date.copy() 5602 date_expr.setParseAction(pyparsing_common.convertToDate()) 5603 print(date_expr.parseString("1999-12-31")) 5604 prints:: 5605 [datetime.date(1999, 12, 31)] 5606 """ 5607 def cvt_fn(s,l,t): 5608 try: 5609 return datetime.strptime(t[0], fmt).date() 5610 except ValueError as ve: 5611 raise ParseException(s, l, str(ve)) 5612 return cvt_fn 5613 5614 @staticmethod 5615 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): 5616 """ 5617 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5618 5619 Params - 5620 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5621 5622 Example:: 5623 dt_expr = pyparsing_common.iso8601_datetime.copy() 5624 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5625 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5626 prints:: 5627 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5628 """ 5629 def cvt_fn(s,l,t): 5630 try: 5631 return datetime.strptime(t[0], fmt) 5632 except ValueError as ve: 5633 raise ParseException(s, l, str(ve)) 5634 return cvt_fn 5635 5636 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5637 "ISO8601 date (C{yyyy-mm-dd})" 5638 5639 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5640 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5641 5642 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5643 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5644 5645 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5646 @staticmethod 5647 def stripHTMLTags(s, l, tokens): 5648 """ 5649 Parse action to remove HTML tags from web page HTML source 5650 5651 Example:: 5652 # strip HTML links from normal text 5653 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5654 td,td_end = makeHTMLTags("TD") 5655 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5656 5657 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5658 """ 5659 return pyparsing_common._html_stripper.transformString(tokens[0]) 5660 5661 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 5662 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 5663 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 5664 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 5665 5666 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5667 """Parse action to convert tokens to upper case.""" 5668 5669 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5670 """Parse action to convert tokens to lower case.""" 5671 5672 5673if __name__ == "__main__": 5674 5675 selectToken = CaselessLiteral("select") 5676 fromToken = CaselessLiteral("from") 5677 5678 ident = Word(alphas, alphanums + "_$") 5679 5680 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5681 columnNameList = Group(delimitedList(columnName)).setName("columns") 5682 columnSpec = ('*' | columnNameList) 5683 5684 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5685 tableNameList = Group(delimitedList(tableName)).setName("tables") 5686 5687 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5688 5689 # demo runTests method, including embedded comments in test string 5690 simpleSQL.runTests(""" 5691 # '*' as column list and dotted table name 5692 select * from SYS.XYZZY 5693 5694 # caseless match on "SELECT", and casts back to "select" 5695 SELECT * from XYZZY, ABC 5696 5697 # list of column names, and mixed case SELECT keyword 5698 Select AA,BB,CC from Sys.dual 5699 5700 # multiple tables 5701 Select A, B, C from Sys.dual, Table2 5702 5703 # invalid SELECT keyword - should fail 5704 Xelect A, B, C from Sys.dual 5705 5706 # incomplete command - should fail 5707 Select 5708 5709 # invalid column name - should fail 5710 Select ^^^ frox Sys.dual 5711 5712 """) 5713 5714 pyparsing_common.number.runTests(""" 5715 100 5716 -100 5717 +100 5718 3.14159 5719 6.02e23 5720 1e-12 5721 """) 5722 5723 # any int or real number, returned as float 5724 pyparsing_common.fnumber.runTests(""" 5725 100 5726 -100 5727 +100 5728 3.14159 5729 6.02e23 5730 1e-12 5731 """) 5732 5733 pyparsing_common.hex_integer.runTests(""" 5734 100 5735 FF 5736 """) 5737 5738 import uuid 5739 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5740 pyparsing_common.uuid.runTests(""" 5741 12345678-1234-5678-1234-567812345678 5742 """) 5743