xref: /aosp_15_r20/external/fonttools/Lib/fontTools/feaLib/lexer.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughesfrom fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
2*e1fe3e4aSElliott Hughesfrom fontTools.feaLib.location import FeatureLibLocation
3*e1fe3e4aSElliott Hughesimport re
4*e1fe3e4aSElliott Hughesimport os
5*e1fe3e4aSElliott Hughes
6*e1fe3e4aSElliott Hughestry:
7*e1fe3e4aSElliott Hughes    import cython
8*e1fe3e4aSElliott Hughesexcept ImportError:
9*e1fe3e4aSElliott Hughes    # if cython not installed, use mock module with no-op decorators and types
10*e1fe3e4aSElliott Hughes    from fontTools.misc import cython
11*e1fe3e4aSElliott Hughes
12*e1fe3e4aSElliott Hughes
13*e1fe3e4aSElliott Hughesclass Lexer(object):
14*e1fe3e4aSElliott Hughes    NUMBER = "NUMBER"
15*e1fe3e4aSElliott Hughes    HEXADECIMAL = "HEXADECIMAL"
16*e1fe3e4aSElliott Hughes    OCTAL = "OCTAL"
17*e1fe3e4aSElliott Hughes    NUMBERS = (NUMBER, HEXADECIMAL, OCTAL)
18*e1fe3e4aSElliott Hughes    FLOAT = "FLOAT"
19*e1fe3e4aSElliott Hughes    STRING = "STRING"
20*e1fe3e4aSElliott Hughes    NAME = "NAME"
21*e1fe3e4aSElliott Hughes    FILENAME = "FILENAME"
22*e1fe3e4aSElliott Hughes    GLYPHCLASS = "GLYPHCLASS"
23*e1fe3e4aSElliott Hughes    CID = "CID"
24*e1fe3e4aSElliott Hughes    SYMBOL = "SYMBOL"
25*e1fe3e4aSElliott Hughes    COMMENT = "COMMENT"
26*e1fe3e4aSElliott Hughes    NEWLINE = "NEWLINE"
27*e1fe3e4aSElliott Hughes    ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
28*e1fe3e4aSElliott Hughes
29*e1fe3e4aSElliott Hughes    CHAR_WHITESPACE_ = " \t"
30*e1fe3e4aSElliott Hughes    CHAR_NEWLINE_ = "\r\n"
31*e1fe3e4aSElliott Hughes    CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
32*e1fe3e4aSElliott Hughes    CHAR_DIGIT_ = "0123456789"
33*e1fe3e4aSElliott Hughes    CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
34*e1fe3e4aSElliott Hughes    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
35*e1fe3e4aSElliott Hughes    CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
36*e1fe3e4aSElliott Hughes    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
37*e1fe3e4aSElliott Hughes
38*e1fe3e4aSElliott Hughes    RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
39*e1fe3e4aSElliott Hughes
40*e1fe3e4aSElliott Hughes    MODE_NORMAL_ = "NORMAL"
41*e1fe3e4aSElliott Hughes    MODE_FILENAME_ = "FILENAME"
42*e1fe3e4aSElliott Hughes
43*e1fe3e4aSElliott Hughes    def __init__(self, text, filename):
44*e1fe3e4aSElliott Hughes        self.filename_ = filename
45*e1fe3e4aSElliott Hughes        self.line_ = 1
46*e1fe3e4aSElliott Hughes        self.pos_ = 0
47*e1fe3e4aSElliott Hughes        self.line_start_ = 0
48*e1fe3e4aSElliott Hughes        self.text_ = text
49*e1fe3e4aSElliott Hughes        self.text_length_ = len(text)
50*e1fe3e4aSElliott Hughes        self.mode_ = Lexer.MODE_NORMAL_
51*e1fe3e4aSElliott Hughes
52*e1fe3e4aSElliott Hughes    def __iter__(self):
53*e1fe3e4aSElliott Hughes        return self
54*e1fe3e4aSElliott Hughes
55*e1fe3e4aSElliott Hughes    def next(self):  # Python 2
56*e1fe3e4aSElliott Hughes        return self.__next__()
57*e1fe3e4aSElliott Hughes
58*e1fe3e4aSElliott Hughes    def __next__(self):  # Python 3
59*e1fe3e4aSElliott Hughes        while True:
60*e1fe3e4aSElliott Hughes            token_type, token, location = self.next_()
61*e1fe3e4aSElliott Hughes            if token_type != Lexer.NEWLINE:
62*e1fe3e4aSElliott Hughes                return (token_type, token, location)
63*e1fe3e4aSElliott Hughes
64*e1fe3e4aSElliott Hughes    def location_(self):
65*e1fe3e4aSElliott Hughes        column = self.pos_ - self.line_start_ + 1
66*e1fe3e4aSElliott Hughes        return FeatureLibLocation(self.filename_ or "<features>", self.line_, column)
67*e1fe3e4aSElliott Hughes
68*e1fe3e4aSElliott Hughes    def next_(self):
69*e1fe3e4aSElliott Hughes        self.scan_over_(Lexer.CHAR_WHITESPACE_)
70*e1fe3e4aSElliott Hughes        location = self.location_()
71*e1fe3e4aSElliott Hughes        start = self.pos_
72*e1fe3e4aSElliott Hughes        text = self.text_
73*e1fe3e4aSElliott Hughes        limit = len(text)
74*e1fe3e4aSElliott Hughes        if start >= limit:
75*e1fe3e4aSElliott Hughes            raise StopIteration()
76*e1fe3e4aSElliott Hughes        cur_char = text[start]
77*e1fe3e4aSElliott Hughes        next_char = text[start + 1] if start + 1 < limit else None
78*e1fe3e4aSElliott Hughes
79*e1fe3e4aSElliott Hughes        if cur_char == "\n":
80*e1fe3e4aSElliott Hughes            self.pos_ += 1
81*e1fe3e4aSElliott Hughes            self.line_ += 1
82*e1fe3e4aSElliott Hughes            self.line_start_ = self.pos_
83*e1fe3e4aSElliott Hughes            return (Lexer.NEWLINE, None, location)
84*e1fe3e4aSElliott Hughes        if cur_char == "\r":
85*e1fe3e4aSElliott Hughes            self.pos_ += 2 if next_char == "\n" else 1
86*e1fe3e4aSElliott Hughes            self.line_ += 1
87*e1fe3e4aSElliott Hughes            self.line_start_ = self.pos_
88*e1fe3e4aSElliott Hughes            return (Lexer.NEWLINE, None, location)
89*e1fe3e4aSElliott Hughes        if cur_char == "#":
90*e1fe3e4aSElliott Hughes            self.scan_until_(Lexer.CHAR_NEWLINE_)
91*e1fe3e4aSElliott Hughes            return (Lexer.COMMENT, text[start : self.pos_], location)
92*e1fe3e4aSElliott Hughes
93*e1fe3e4aSElliott Hughes        if self.mode_ is Lexer.MODE_FILENAME_:
94*e1fe3e4aSElliott Hughes            if cur_char != "(":
95*e1fe3e4aSElliott Hughes                raise FeatureLibError("Expected '(' before file name", location)
96*e1fe3e4aSElliott Hughes            self.scan_until_(")")
97*e1fe3e4aSElliott Hughes            cur_char = text[self.pos_] if self.pos_ < limit else None
98*e1fe3e4aSElliott Hughes            if cur_char != ")":
99*e1fe3e4aSElliott Hughes                raise FeatureLibError("Expected ')' after file name", location)
100*e1fe3e4aSElliott Hughes            self.pos_ += 1
101*e1fe3e4aSElliott Hughes            self.mode_ = Lexer.MODE_NORMAL_
102*e1fe3e4aSElliott Hughes            return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location)
103*e1fe3e4aSElliott Hughes
104*e1fe3e4aSElliott Hughes        if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
105*e1fe3e4aSElliott Hughes            self.pos_ += 1
106*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_DIGIT_)
107*e1fe3e4aSElliott Hughes            return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location)
108*e1fe3e4aSElliott Hughes        if cur_char == "@":
109*e1fe3e4aSElliott Hughes            self.pos_ += 1
110*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
111*e1fe3e4aSElliott Hughes            glyphclass = text[start + 1 : self.pos_]
112*e1fe3e4aSElliott Hughes            if len(glyphclass) < 1:
113*e1fe3e4aSElliott Hughes                raise FeatureLibError("Expected glyph class name", location)
114*e1fe3e4aSElliott Hughes            if not Lexer.RE_GLYPHCLASS.match(glyphclass):
115*e1fe3e4aSElliott Hughes                raise FeatureLibError(
116*e1fe3e4aSElliott Hughes                    "Glyph class names must consist of letters, digits, "
117*e1fe3e4aSElliott Hughes                    "underscore, period or hyphen",
118*e1fe3e4aSElliott Hughes                    location,
119*e1fe3e4aSElliott Hughes                )
120*e1fe3e4aSElliott Hughes            return (Lexer.GLYPHCLASS, glyphclass, location)
121*e1fe3e4aSElliott Hughes        if cur_char in Lexer.CHAR_NAME_START_:
122*e1fe3e4aSElliott Hughes            self.pos_ += 1
123*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
124*e1fe3e4aSElliott Hughes            token = text[start : self.pos_]
125*e1fe3e4aSElliott Hughes            if token == "include":
126*e1fe3e4aSElliott Hughes                self.mode_ = Lexer.MODE_FILENAME_
127*e1fe3e4aSElliott Hughes            return (Lexer.NAME, token, location)
128*e1fe3e4aSElliott Hughes        if cur_char == "0" and next_char in "xX":
129*e1fe3e4aSElliott Hughes            self.pos_ += 2
130*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_HEXDIGIT_)
131*e1fe3e4aSElliott Hughes            return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location)
132*e1fe3e4aSElliott Hughes        if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_:
133*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_DIGIT_)
134*e1fe3e4aSElliott Hughes            return (Lexer.OCTAL, int(text[start : self.pos_], 8), location)
135*e1fe3e4aSElliott Hughes        if cur_char in Lexer.CHAR_DIGIT_:
136*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_DIGIT_)
137*e1fe3e4aSElliott Hughes            if self.pos_ >= limit or text[self.pos_] != ".":
138*e1fe3e4aSElliott Hughes                return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
139*e1fe3e4aSElliott Hughes            self.scan_over_(".")
140*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_DIGIT_)
141*e1fe3e4aSElliott Hughes            return (Lexer.FLOAT, float(text[start : self.pos_]), location)
142*e1fe3e4aSElliott Hughes        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
143*e1fe3e4aSElliott Hughes            self.pos_ += 1
144*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_DIGIT_)
145*e1fe3e4aSElliott Hughes            if self.pos_ >= limit or text[self.pos_] != ".":
146*e1fe3e4aSElliott Hughes                return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
147*e1fe3e4aSElliott Hughes            self.scan_over_(".")
148*e1fe3e4aSElliott Hughes            self.scan_over_(Lexer.CHAR_DIGIT_)
149*e1fe3e4aSElliott Hughes            return (Lexer.FLOAT, float(text[start : self.pos_]), location)
150*e1fe3e4aSElliott Hughes        if cur_char in Lexer.CHAR_SYMBOL_:
151*e1fe3e4aSElliott Hughes            self.pos_ += 1
152*e1fe3e4aSElliott Hughes            return (Lexer.SYMBOL, cur_char, location)
153*e1fe3e4aSElliott Hughes        if cur_char == '"':
154*e1fe3e4aSElliott Hughes            self.pos_ += 1
155*e1fe3e4aSElliott Hughes            self.scan_until_('"')
156*e1fe3e4aSElliott Hughes            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
157*e1fe3e4aSElliott Hughes                self.pos_ += 1
158*e1fe3e4aSElliott Hughes                # strip newlines embedded within a string
159*e1fe3e4aSElliott Hughes                string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1])
160*e1fe3e4aSElliott Hughes                return (Lexer.STRING, string, location)
161*e1fe3e4aSElliott Hughes            else:
162*e1fe3e4aSElliott Hughes                raise FeatureLibError("Expected '\"' to terminate string", location)
163*e1fe3e4aSElliott Hughes        raise FeatureLibError("Unexpected character: %r" % cur_char, location)
164*e1fe3e4aSElliott Hughes
165*e1fe3e4aSElliott Hughes    def scan_over_(self, valid):
166*e1fe3e4aSElliott Hughes        p = self.pos_
167*e1fe3e4aSElliott Hughes        while p < self.text_length_ and self.text_[p] in valid:
168*e1fe3e4aSElliott Hughes            p += 1
169*e1fe3e4aSElliott Hughes        self.pos_ = p
170*e1fe3e4aSElliott Hughes
171*e1fe3e4aSElliott Hughes    def scan_until_(self, stop_at):
172*e1fe3e4aSElliott Hughes        p = self.pos_
173*e1fe3e4aSElliott Hughes        while p < self.text_length_ and self.text_[p] not in stop_at:
174*e1fe3e4aSElliott Hughes            p += 1
175*e1fe3e4aSElliott Hughes        self.pos_ = p
176*e1fe3e4aSElliott Hughes
177*e1fe3e4aSElliott Hughes    def scan_anonymous_block(self, tag):
178*e1fe3e4aSElliott Hughes        location = self.location_()
179*e1fe3e4aSElliott Hughes        tag = tag.strip()
180*e1fe3e4aSElliott Hughes        self.scan_until_(Lexer.CHAR_NEWLINE_)
181*e1fe3e4aSElliott Hughes        self.scan_over_(Lexer.CHAR_NEWLINE_)
182*e1fe3e4aSElliott Hughes        regexp = r"}\s*" + tag + r"\s*;"
183*e1fe3e4aSElliott Hughes        split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1)
184*e1fe3e4aSElliott Hughes        if len(split) != 2:
185*e1fe3e4aSElliott Hughes            raise FeatureLibError(
186*e1fe3e4aSElliott Hughes                "Expected '} %s;' to terminate anonymous block" % tag, location
187*e1fe3e4aSElliott Hughes            )
188*e1fe3e4aSElliott Hughes        self.pos_ += len(split[0])
189*e1fe3e4aSElliott Hughes        return (Lexer.ANONYMOUS_BLOCK, split[0], location)
190*e1fe3e4aSElliott Hughes
191*e1fe3e4aSElliott Hughes
192*e1fe3e4aSElliott Hughesclass IncludingLexer(object):
193*e1fe3e4aSElliott Hughes    """A Lexer that follows include statements.
194*e1fe3e4aSElliott Hughes
195*e1fe3e4aSElliott Hughes    The OpenType feature file specification states that due to
196*e1fe3e4aSElliott Hughes    historical reasons, relative imports should be resolved in this
197*e1fe3e4aSElliott Hughes    order:
198*e1fe3e4aSElliott Hughes
199*e1fe3e4aSElliott Hughes    1. If the source font is UFO format, then relative to the UFO's
200*e1fe3e4aSElliott Hughes       font directory
201*e1fe3e4aSElliott Hughes    2. relative to the top-level include file
202*e1fe3e4aSElliott Hughes    3. relative to the parent include file
203*e1fe3e4aSElliott Hughes
204*e1fe3e4aSElliott Hughes    We only support 1 (via includeDir) and 2.
205*e1fe3e4aSElliott Hughes    """
206*e1fe3e4aSElliott Hughes
207*e1fe3e4aSElliott Hughes    def __init__(self, featurefile, *, includeDir=None):
208*e1fe3e4aSElliott Hughes        """Initializes an IncludingLexer.
209*e1fe3e4aSElliott Hughes
210*e1fe3e4aSElliott Hughes        Behavior:
211*e1fe3e4aSElliott Hughes            If includeDir is passed, it will be used to determine the top-level
212*e1fe3e4aSElliott Hughes            include directory to use for all encountered include statements. If it is
213*e1fe3e4aSElliott Hughes            not passed, ``os.path.dirname(featurefile)`` will be considered the
214*e1fe3e4aSElliott Hughes            include directory.
215*e1fe3e4aSElliott Hughes        """
216*e1fe3e4aSElliott Hughes
217*e1fe3e4aSElliott Hughes        self.lexers_ = [self.make_lexer_(featurefile)]
218*e1fe3e4aSElliott Hughes        self.featurefilepath = self.lexers_[0].filename_
219*e1fe3e4aSElliott Hughes        self.includeDir = includeDir
220*e1fe3e4aSElliott Hughes
221*e1fe3e4aSElliott Hughes    def __iter__(self):
222*e1fe3e4aSElliott Hughes        return self
223*e1fe3e4aSElliott Hughes
224*e1fe3e4aSElliott Hughes    def next(self):  # Python 2
225*e1fe3e4aSElliott Hughes        return self.__next__()
226*e1fe3e4aSElliott Hughes
227*e1fe3e4aSElliott Hughes    def __next__(self):  # Python 3
228*e1fe3e4aSElliott Hughes        while self.lexers_:
229*e1fe3e4aSElliott Hughes            lexer = self.lexers_[-1]
230*e1fe3e4aSElliott Hughes            try:
231*e1fe3e4aSElliott Hughes                token_type, token, location = next(lexer)
232*e1fe3e4aSElliott Hughes            except StopIteration:
233*e1fe3e4aSElliott Hughes                self.lexers_.pop()
234*e1fe3e4aSElliott Hughes                continue
235*e1fe3e4aSElliott Hughes            if token_type is Lexer.NAME and token == "include":
236*e1fe3e4aSElliott Hughes                fname_type, fname_token, fname_location = lexer.next()
237*e1fe3e4aSElliott Hughes                if fname_type is not Lexer.FILENAME:
238*e1fe3e4aSElliott Hughes                    raise FeatureLibError("Expected file name", fname_location)
239*e1fe3e4aSElliott Hughes                # semi_type, semi_token, semi_location = lexer.next()
240*e1fe3e4aSElliott Hughes                # if semi_type is not Lexer.SYMBOL or semi_token != ";":
241*e1fe3e4aSElliott Hughes                #    raise FeatureLibError("Expected ';'", semi_location)
242*e1fe3e4aSElliott Hughes                if os.path.isabs(fname_token):
243*e1fe3e4aSElliott Hughes                    path = fname_token
244*e1fe3e4aSElliott Hughes                else:
245*e1fe3e4aSElliott Hughes                    if self.includeDir is not None:
246*e1fe3e4aSElliott Hughes                        curpath = self.includeDir
247*e1fe3e4aSElliott Hughes                    elif self.featurefilepath is not None:
248*e1fe3e4aSElliott Hughes                        curpath = os.path.dirname(self.featurefilepath)
249*e1fe3e4aSElliott Hughes                    else:
250*e1fe3e4aSElliott Hughes                        # if the IncludingLexer was initialized from an in-memory
251*e1fe3e4aSElliott Hughes                        # file-like stream, it doesn't have a 'name' pointing to
252*e1fe3e4aSElliott Hughes                        # its filesystem path, therefore we fall back to using the
253*e1fe3e4aSElliott Hughes                        # current working directory to resolve relative includes
254*e1fe3e4aSElliott Hughes                        curpath = os.getcwd()
255*e1fe3e4aSElliott Hughes                    path = os.path.join(curpath, fname_token)
256*e1fe3e4aSElliott Hughes                if len(self.lexers_) >= 5:
257*e1fe3e4aSElliott Hughes                    raise FeatureLibError("Too many recursive includes", fname_location)
258*e1fe3e4aSElliott Hughes                try:
259*e1fe3e4aSElliott Hughes                    self.lexers_.append(self.make_lexer_(path))
260*e1fe3e4aSElliott Hughes                except FileNotFoundError as err:
261*e1fe3e4aSElliott Hughes                    raise IncludedFeaNotFound(fname_token, fname_location) from err
262*e1fe3e4aSElliott Hughes            else:
263*e1fe3e4aSElliott Hughes                return (token_type, token, location)
264*e1fe3e4aSElliott Hughes        raise StopIteration()
265*e1fe3e4aSElliott Hughes
266*e1fe3e4aSElliott Hughes    @staticmethod
267*e1fe3e4aSElliott Hughes    def make_lexer_(file_or_path):
268*e1fe3e4aSElliott Hughes        if hasattr(file_or_path, "read"):
269*e1fe3e4aSElliott Hughes            fileobj, closing = file_or_path, False
270*e1fe3e4aSElliott Hughes        else:
271*e1fe3e4aSElliott Hughes            filename, closing = file_or_path, True
272*e1fe3e4aSElliott Hughes            fileobj = open(filename, "r", encoding="utf-8")
273*e1fe3e4aSElliott Hughes        data = fileobj.read()
274*e1fe3e4aSElliott Hughes        filename = getattr(fileobj, "name", None)
275*e1fe3e4aSElliott Hughes        if closing:
276*e1fe3e4aSElliott Hughes            fileobj.close()
277*e1fe3e4aSElliott Hughes        return Lexer(data, filename)
278*e1fe3e4aSElliott Hughes
279*e1fe3e4aSElliott Hughes    def scan_anonymous_block(self, tag):
280*e1fe3e4aSElliott Hughes        return self.lexers_[-1].scan_anonymous_block(tag)
281*e1fe3e4aSElliott Hughes
282*e1fe3e4aSElliott Hughes
283*e1fe3e4aSElliott Hughesclass NonIncludingLexer(IncludingLexer):
284*e1fe3e4aSElliott Hughes    """Lexer that does not follow `include` statements, emits them as-is."""
285*e1fe3e4aSElliott Hughes
286*e1fe3e4aSElliott Hughes    def __next__(self):  # Python 3
287*e1fe3e4aSElliott Hughes        return next(self.lexers_[0])
288