1*e1fe3e4aSElliott Hughesfrom fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound 2*e1fe3e4aSElliott Hughesfrom fontTools.feaLib.location import FeatureLibLocation 3*e1fe3e4aSElliott Hughesimport re 4*e1fe3e4aSElliott Hughesimport os 5*e1fe3e4aSElliott Hughes 6*e1fe3e4aSElliott Hughestry: 7*e1fe3e4aSElliott Hughes import cython 8*e1fe3e4aSElliott Hughesexcept ImportError: 9*e1fe3e4aSElliott Hughes # if cython not installed, use mock module with no-op decorators and types 10*e1fe3e4aSElliott Hughes from fontTools.misc import cython 11*e1fe3e4aSElliott Hughes 12*e1fe3e4aSElliott Hughes 13*e1fe3e4aSElliott Hughesclass Lexer(object): 14*e1fe3e4aSElliott Hughes NUMBER = "NUMBER" 15*e1fe3e4aSElliott Hughes HEXADECIMAL = "HEXADECIMAL" 16*e1fe3e4aSElliott Hughes OCTAL = "OCTAL" 17*e1fe3e4aSElliott Hughes NUMBERS = (NUMBER, HEXADECIMAL, OCTAL) 18*e1fe3e4aSElliott Hughes FLOAT = "FLOAT" 19*e1fe3e4aSElliott Hughes STRING = "STRING" 20*e1fe3e4aSElliott Hughes NAME = "NAME" 21*e1fe3e4aSElliott Hughes FILENAME = "FILENAME" 22*e1fe3e4aSElliott Hughes GLYPHCLASS = "GLYPHCLASS" 23*e1fe3e4aSElliott Hughes CID = "CID" 24*e1fe3e4aSElliott Hughes SYMBOL = "SYMBOL" 25*e1fe3e4aSElliott Hughes COMMENT = "COMMENT" 26*e1fe3e4aSElliott Hughes NEWLINE = "NEWLINE" 27*e1fe3e4aSElliott Hughes ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK" 28*e1fe3e4aSElliott Hughes 29*e1fe3e4aSElliott Hughes CHAR_WHITESPACE_ = " \t" 30*e1fe3e4aSElliott Hughes CHAR_NEWLINE_ = "\r\n" 31*e1fe3e4aSElliott Hughes CHAR_SYMBOL_ = ",;:-+'{}[]<>()=" 32*e1fe3e4aSElliott Hughes CHAR_DIGIT_ = "0123456789" 33*e1fe3e4aSElliott Hughes CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef" 34*e1fe3e4aSElliott Hughes CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 35*e1fe3e4aSElliott Hughes CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\" 36*e1fe3e4aSElliott Hughes CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-" 37*e1fe3e4aSElliott Hughes 38*e1fe3e4aSElliott Hughes RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$") 39*e1fe3e4aSElliott Hughes 40*e1fe3e4aSElliott Hughes MODE_NORMAL_ = "NORMAL" 41*e1fe3e4aSElliott Hughes MODE_FILENAME_ = "FILENAME" 42*e1fe3e4aSElliott Hughes 43*e1fe3e4aSElliott Hughes def __init__(self, text, filename): 44*e1fe3e4aSElliott Hughes self.filename_ = filename 45*e1fe3e4aSElliott Hughes self.line_ = 1 46*e1fe3e4aSElliott Hughes self.pos_ = 0 47*e1fe3e4aSElliott Hughes self.line_start_ = 0 48*e1fe3e4aSElliott Hughes self.text_ = text 49*e1fe3e4aSElliott Hughes self.text_length_ = len(text) 50*e1fe3e4aSElliott Hughes self.mode_ = Lexer.MODE_NORMAL_ 51*e1fe3e4aSElliott Hughes 52*e1fe3e4aSElliott Hughes def __iter__(self): 53*e1fe3e4aSElliott Hughes return self 54*e1fe3e4aSElliott Hughes 55*e1fe3e4aSElliott Hughes def next(self): # Python 2 56*e1fe3e4aSElliott Hughes return self.__next__() 57*e1fe3e4aSElliott Hughes 58*e1fe3e4aSElliott Hughes def __next__(self): # Python 3 59*e1fe3e4aSElliott Hughes while True: 60*e1fe3e4aSElliott Hughes token_type, token, location = self.next_() 61*e1fe3e4aSElliott Hughes if token_type != Lexer.NEWLINE: 62*e1fe3e4aSElliott Hughes return (token_type, token, location) 63*e1fe3e4aSElliott Hughes 64*e1fe3e4aSElliott Hughes def location_(self): 65*e1fe3e4aSElliott Hughes column = self.pos_ - self.line_start_ + 1 66*e1fe3e4aSElliott Hughes return FeatureLibLocation(self.filename_ or "<features>", self.line_, column) 67*e1fe3e4aSElliott Hughes 68*e1fe3e4aSElliott Hughes def next_(self): 69*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_WHITESPACE_) 70*e1fe3e4aSElliott Hughes location = self.location_() 71*e1fe3e4aSElliott Hughes start = self.pos_ 72*e1fe3e4aSElliott Hughes text = self.text_ 73*e1fe3e4aSElliott Hughes limit = len(text) 74*e1fe3e4aSElliott Hughes if start >= limit: 75*e1fe3e4aSElliott Hughes raise StopIteration() 76*e1fe3e4aSElliott Hughes cur_char = text[start] 77*e1fe3e4aSElliott Hughes next_char = text[start + 1] if start + 1 < limit else None 78*e1fe3e4aSElliott Hughes 79*e1fe3e4aSElliott Hughes if cur_char == "\n": 80*e1fe3e4aSElliott Hughes self.pos_ += 1 81*e1fe3e4aSElliott Hughes self.line_ += 1 82*e1fe3e4aSElliott Hughes self.line_start_ = self.pos_ 83*e1fe3e4aSElliott Hughes return (Lexer.NEWLINE, None, location) 84*e1fe3e4aSElliott Hughes if cur_char == "\r": 85*e1fe3e4aSElliott Hughes self.pos_ += 2 if next_char == "\n" else 1 86*e1fe3e4aSElliott Hughes self.line_ += 1 87*e1fe3e4aSElliott Hughes self.line_start_ = self.pos_ 88*e1fe3e4aSElliott Hughes return (Lexer.NEWLINE, None, location) 89*e1fe3e4aSElliott Hughes if cur_char == "#": 90*e1fe3e4aSElliott Hughes self.scan_until_(Lexer.CHAR_NEWLINE_) 91*e1fe3e4aSElliott Hughes return (Lexer.COMMENT, text[start : self.pos_], location) 92*e1fe3e4aSElliott Hughes 93*e1fe3e4aSElliott Hughes if self.mode_ is Lexer.MODE_FILENAME_: 94*e1fe3e4aSElliott Hughes if cur_char != "(": 95*e1fe3e4aSElliott Hughes raise FeatureLibError("Expected '(' before file name", location) 96*e1fe3e4aSElliott Hughes self.scan_until_(")") 97*e1fe3e4aSElliott Hughes cur_char = text[self.pos_] if self.pos_ < limit else None 98*e1fe3e4aSElliott Hughes if cur_char != ")": 99*e1fe3e4aSElliott Hughes raise FeatureLibError("Expected ')' after file name", location) 100*e1fe3e4aSElliott Hughes self.pos_ += 1 101*e1fe3e4aSElliott Hughes self.mode_ = Lexer.MODE_NORMAL_ 102*e1fe3e4aSElliott Hughes return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location) 103*e1fe3e4aSElliott Hughes 104*e1fe3e4aSElliott Hughes if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_: 105*e1fe3e4aSElliott Hughes self.pos_ += 1 106*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_DIGIT_) 107*e1fe3e4aSElliott Hughes return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location) 108*e1fe3e4aSElliott Hughes if cur_char == "@": 109*e1fe3e4aSElliott Hughes self.pos_ += 1 110*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 111*e1fe3e4aSElliott Hughes glyphclass = text[start + 1 : self.pos_] 112*e1fe3e4aSElliott Hughes if len(glyphclass) < 1: 113*e1fe3e4aSElliott Hughes raise FeatureLibError("Expected glyph class name", location) 114*e1fe3e4aSElliott Hughes if not Lexer.RE_GLYPHCLASS.match(glyphclass): 115*e1fe3e4aSElliott Hughes raise FeatureLibError( 116*e1fe3e4aSElliott Hughes "Glyph class names must consist of letters, digits, " 117*e1fe3e4aSElliott Hughes "underscore, period or hyphen", 118*e1fe3e4aSElliott Hughes location, 119*e1fe3e4aSElliott Hughes ) 120*e1fe3e4aSElliott Hughes return (Lexer.GLYPHCLASS, glyphclass, location) 121*e1fe3e4aSElliott Hughes if cur_char in Lexer.CHAR_NAME_START_: 122*e1fe3e4aSElliott Hughes self.pos_ += 1 123*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 124*e1fe3e4aSElliott Hughes token = text[start : self.pos_] 125*e1fe3e4aSElliott Hughes if token == "include": 126*e1fe3e4aSElliott Hughes self.mode_ = Lexer.MODE_FILENAME_ 127*e1fe3e4aSElliott Hughes return (Lexer.NAME, token, location) 128*e1fe3e4aSElliott Hughes if cur_char == "0" and next_char in "xX": 129*e1fe3e4aSElliott Hughes self.pos_ += 2 130*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_HEXDIGIT_) 131*e1fe3e4aSElliott Hughes return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location) 132*e1fe3e4aSElliott Hughes if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_: 133*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_DIGIT_) 134*e1fe3e4aSElliott Hughes return (Lexer.OCTAL, int(text[start : self.pos_], 8), location) 135*e1fe3e4aSElliott Hughes if cur_char in Lexer.CHAR_DIGIT_: 136*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_DIGIT_) 137*e1fe3e4aSElliott Hughes if self.pos_ >= limit or text[self.pos_] != ".": 138*e1fe3e4aSElliott Hughes return (Lexer.NUMBER, int(text[start : self.pos_], 10), location) 139*e1fe3e4aSElliott Hughes self.scan_over_(".") 140*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_DIGIT_) 141*e1fe3e4aSElliott Hughes return (Lexer.FLOAT, float(text[start : self.pos_]), location) 142*e1fe3e4aSElliott Hughes if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: 143*e1fe3e4aSElliott Hughes self.pos_ += 1 144*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_DIGIT_) 145*e1fe3e4aSElliott Hughes if self.pos_ >= limit or text[self.pos_] != ".": 146*e1fe3e4aSElliott Hughes return (Lexer.NUMBER, int(text[start : self.pos_], 10), location) 147*e1fe3e4aSElliott Hughes self.scan_over_(".") 148*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_DIGIT_) 149*e1fe3e4aSElliott Hughes return (Lexer.FLOAT, float(text[start : self.pos_]), location) 150*e1fe3e4aSElliott Hughes if cur_char in Lexer.CHAR_SYMBOL_: 151*e1fe3e4aSElliott Hughes self.pos_ += 1 152*e1fe3e4aSElliott Hughes return (Lexer.SYMBOL, cur_char, location) 153*e1fe3e4aSElliott Hughes if cur_char == '"': 154*e1fe3e4aSElliott Hughes self.pos_ += 1 155*e1fe3e4aSElliott Hughes self.scan_until_('"') 156*e1fe3e4aSElliott Hughes if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': 157*e1fe3e4aSElliott Hughes self.pos_ += 1 158*e1fe3e4aSElliott Hughes # strip newlines embedded within a string 159*e1fe3e4aSElliott Hughes string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1]) 160*e1fe3e4aSElliott Hughes return (Lexer.STRING, string, location) 161*e1fe3e4aSElliott Hughes else: 162*e1fe3e4aSElliott Hughes raise FeatureLibError("Expected '\"' to terminate string", location) 163*e1fe3e4aSElliott Hughes raise FeatureLibError("Unexpected character: %r" % cur_char, location) 164*e1fe3e4aSElliott Hughes 165*e1fe3e4aSElliott Hughes def scan_over_(self, valid): 166*e1fe3e4aSElliott Hughes p = self.pos_ 167*e1fe3e4aSElliott Hughes while p < self.text_length_ and self.text_[p] in valid: 168*e1fe3e4aSElliott Hughes p += 1 169*e1fe3e4aSElliott Hughes self.pos_ = p 170*e1fe3e4aSElliott Hughes 171*e1fe3e4aSElliott Hughes def scan_until_(self, stop_at): 172*e1fe3e4aSElliott Hughes p = self.pos_ 173*e1fe3e4aSElliott Hughes while p < self.text_length_ and self.text_[p] not in stop_at: 174*e1fe3e4aSElliott Hughes p += 1 175*e1fe3e4aSElliott Hughes self.pos_ = p 176*e1fe3e4aSElliott Hughes 177*e1fe3e4aSElliott Hughes def scan_anonymous_block(self, tag): 178*e1fe3e4aSElliott Hughes location = self.location_() 179*e1fe3e4aSElliott Hughes tag = tag.strip() 180*e1fe3e4aSElliott Hughes self.scan_until_(Lexer.CHAR_NEWLINE_) 181*e1fe3e4aSElliott Hughes self.scan_over_(Lexer.CHAR_NEWLINE_) 182*e1fe3e4aSElliott Hughes regexp = r"}\s*" + tag + r"\s*;" 183*e1fe3e4aSElliott Hughes split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1) 184*e1fe3e4aSElliott Hughes if len(split) != 2: 185*e1fe3e4aSElliott Hughes raise FeatureLibError( 186*e1fe3e4aSElliott Hughes "Expected '} %s;' to terminate anonymous block" % tag, location 187*e1fe3e4aSElliott Hughes ) 188*e1fe3e4aSElliott Hughes self.pos_ += len(split[0]) 189*e1fe3e4aSElliott Hughes return (Lexer.ANONYMOUS_BLOCK, split[0], location) 190*e1fe3e4aSElliott Hughes 191*e1fe3e4aSElliott Hughes 192*e1fe3e4aSElliott Hughesclass IncludingLexer(object): 193*e1fe3e4aSElliott Hughes """A Lexer that follows include statements. 194*e1fe3e4aSElliott Hughes 195*e1fe3e4aSElliott Hughes The OpenType feature file specification states that due to 196*e1fe3e4aSElliott Hughes historical reasons, relative imports should be resolved in this 197*e1fe3e4aSElliott Hughes order: 198*e1fe3e4aSElliott Hughes 199*e1fe3e4aSElliott Hughes 1. If the source font is UFO format, then relative to the UFO's 200*e1fe3e4aSElliott Hughes font directory 201*e1fe3e4aSElliott Hughes 2. relative to the top-level include file 202*e1fe3e4aSElliott Hughes 3. relative to the parent include file 203*e1fe3e4aSElliott Hughes 204*e1fe3e4aSElliott Hughes We only support 1 (via includeDir) and 2. 205*e1fe3e4aSElliott Hughes """ 206*e1fe3e4aSElliott Hughes 207*e1fe3e4aSElliott Hughes def __init__(self, featurefile, *, includeDir=None): 208*e1fe3e4aSElliott Hughes """Initializes an IncludingLexer. 209*e1fe3e4aSElliott Hughes 210*e1fe3e4aSElliott Hughes Behavior: 211*e1fe3e4aSElliott Hughes If includeDir is passed, it will be used to determine the top-level 212*e1fe3e4aSElliott Hughes include directory to use for all encountered include statements. If it is 213*e1fe3e4aSElliott Hughes not passed, ``os.path.dirname(featurefile)`` will be considered the 214*e1fe3e4aSElliott Hughes include directory. 215*e1fe3e4aSElliott Hughes """ 216*e1fe3e4aSElliott Hughes 217*e1fe3e4aSElliott Hughes self.lexers_ = [self.make_lexer_(featurefile)] 218*e1fe3e4aSElliott Hughes self.featurefilepath = self.lexers_[0].filename_ 219*e1fe3e4aSElliott Hughes self.includeDir = includeDir 220*e1fe3e4aSElliott Hughes 221*e1fe3e4aSElliott Hughes def __iter__(self): 222*e1fe3e4aSElliott Hughes return self 223*e1fe3e4aSElliott Hughes 224*e1fe3e4aSElliott Hughes def next(self): # Python 2 225*e1fe3e4aSElliott Hughes return self.__next__() 226*e1fe3e4aSElliott Hughes 227*e1fe3e4aSElliott Hughes def __next__(self): # Python 3 228*e1fe3e4aSElliott Hughes while self.lexers_: 229*e1fe3e4aSElliott Hughes lexer = self.lexers_[-1] 230*e1fe3e4aSElliott Hughes try: 231*e1fe3e4aSElliott Hughes token_type, token, location = next(lexer) 232*e1fe3e4aSElliott Hughes except StopIteration: 233*e1fe3e4aSElliott Hughes self.lexers_.pop() 234*e1fe3e4aSElliott Hughes continue 235*e1fe3e4aSElliott Hughes if token_type is Lexer.NAME and token == "include": 236*e1fe3e4aSElliott Hughes fname_type, fname_token, fname_location = lexer.next() 237*e1fe3e4aSElliott Hughes if fname_type is not Lexer.FILENAME: 238*e1fe3e4aSElliott Hughes raise FeatureLibError("Expected file name", fname_location) 239*e1fe3e4aSElliott Hughes # semi_type, semi_token, semi_location = lexer.next() 240*e1fe3e4aSElliott Hughes # if semi_type is not Lexer.SYMBOL or semi_token != ";": 241*e1fe3e4aSElliott Hughes # raise FeatureLibError("Expected ';'", semi_location) 242*e1fe3e4aSElliott Hughes if os.path.isabs(fname_token): 243*e1fe3e4aSElliott Hughes path = fname_token 244*e1fe3e4aSElliott Hughes else: 245*e1fe3e4aSElliott Hughes if self.includeDir is not None: 246*e1fe3e4aSElliott Hughes curpath = self.includeDir 247*e1fe3e4aSElliott Hughes elif self.featurefilepath is not None: 248*e1fe3e4aSElliott Hughes curpath = os.path.dirname(self.featurefilepath) 249*e1fe3e4aSElliott Hughes else: 250*e1fe3e4aSElliott Hughes # if the IncludingLexer was initialized from an in-memory 251*e1fe3e4aSElliott Hughes # file-like stream, it doesn't have a 'name' pointing to 252*e1fe3e4aSElliott Hughes # its filesystem path, therefore we fall back to using the 253*e1fe3e4aSElliott Hughes # current working directory to resolve relative includes 254*e1fe3e4aSElliott Hughes curpath = os.getcwd() 255*e1fe3e4aSElliott Hughes path = os.path.join(curpath, fname_token) 256*e1fe3e4aSElliott Hughes if len(self.lexers_) >= 5: 257*e1fe3e4aSElliott Hughes raise FeatureLibError("Too many recursive includes", fname_location) 258*e1fe3e4aSElliott Hughes try: 259*e1fe3e4aSElliott Hughes self.lexers_.append(self.make_lexer_(path)) 260*e1fe3e4aSElliott Hughes except FileNotFoundError as err: 261*e1fe3e4aSElliott Hughes raise IncludedFeaNotFound(fname_token, fname_location) from err 262*e1fe3e4aSElliott Hughes else: 263*e1fe3e4aSElliott Hughes return (token_type, token, location) 264*e1fe3e4aSElliott Hughes raise StopIteration() 265*e1fe3e4aSElliott Hughes 266*e1fe3e4aSElliott Hughes @staticmethod 267*e1fe3e4aSElliott Hughes def make_lexer_(file_or_path): 268*e1fe3e4aSElliott Hughes if hasattr(file_or_path, "read"): 269*e1fe3e4aSElliott Hughes fileobj, closing = file_or_path, False 270*e1fe3e4aSElliott Hughes else: 271*e1fe3e4aSElliott Hughes filename, closing = file_or_path, True 272*e1fe3e4aSElliott Hughes fileobj = open(filename, "r", encoding="utf-8") 273*e1fe3e4aSElliott Hughes data = fileobj.read() 274*e1fe3e4aSElliott Hughes filename = getattr(fileobj, "name", None) 275*e1fe3e4aSElliott Hughes if closing: 276*e1fe3e4aSElliott Hughes fileobj.close() 277*e1fe3e4aSElliott Hughes return Lexer(data, filename) 278*e1fe3e4aSElliott Hughes 279*e1fe3e4aSElliott Hughes def scan_anonymous_block(self, tag): 280*e1fe3e4aSElliott Hughes return self.lexers_[-1].scan_anonymous_block(tag) 281*e1fe3e4aSElliott Hughes 282*e1fe3e4aSElliott Hughes 283*e1fe3e4aSElliott Hughesclass NonIncludingLexer(IncludingLexer): 284*e1fe3e4aSElliott Hughes """Lexer that does not follow `include` statements, emits them as-is.""" 285*e1fe3e4aSElliott Hughes 286*e1fe3e4aSElliott Hughes def __next__(self): # Python 3 287*e1fe3e4aSElliott Hughes return next(self.lexers_[0]) 288