xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/xmlWriter.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughes"""xmlWriter.py -- Simple XML authoring class"""
2*e1fe3e4aSElliott Hughes
3*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
4*e1fe3e4aSElliott Hughesimport sys
5*e1fe3e4aSElliott Hughesimport os
6*e1fe3e4aSElliott Hughesimport string
7*e1fe3e4aSElliott Hughes
8*e1fe3e4aSElliott HughesINDENT = "  "
9*e1fe3e4aSElliott Hughes
10*e1fe3e4aSElliott Hughes
11*e1fe3e4aSElliott Hughesclass XMLWriter(object):
12*e1fe3e4aSElliott Hughes    def __init__(
13*e1fe3e4aSElliott Hughes        self,
14*e1fe3e4aSElliott Hughes        fileOrPath,
15*e1fe3e4aSElliott Hughes        indentwhite=INDENT,
16*e1fe3e4aSElliott Hughes        idlefunc=None,
17*e1fe3e4aSElliott Hughes        encoding="utf_8",
18*e1fe3e4aSElliott Hughes        newlinestr="\n",
19*e1fe3e4aSElliott Hughes    ):
20*e1fe3e4aSElliott Hughes        if encoding.lower().replace("-", "").replace("_", "") != "utf8":
21*e1fe3e4aSElliott Hughes            raise Exception("Only UTF-8 encoding is supported.")
22*e1fe3e4aSElliott Hughes        if fileOrPath == "-":
23*e1fe3e4aSElliott Hughes            fileOrPath = sys.stdout
24*e1fe3e4aSElliott Hughes        if not hasattr(fileOrPath, "write"):
25*e1fe3e4aSElliott Hughes            self.filename = fileOrPath
26*e1fe3e4aSElliott Hughes            self.file = open(fileOrPath, "wb")
27*e1fe3e4aSElliott Hughes            self._closeStream = True
28*e1fe3e4aSElliott Hughes        else:
29*e1fe3e4aSElliott Hughes            self.filename = None
30*e1fe3e4aSElliott Hughes            # assume writable file object
31*e1fe3e4aSElliott Hughes            self.file = fileOrPath
32*e1fe3e4aSElliott Hughes            self._closeStream = False
33*e1fe3e4aSElliott Hughes
34*e1fe3e4aSElliott Hughes        # Figure out if writer expects bytes or unicodes
35*e1fe3e4aSElliott Hughes        try:
36*e1fe3e4aSElliott Hughes            # The bytes check should be first.  See:
37*e1fe3e4aSElliott Hughes            # https://github.com/fonttools/fonttools/pull/233
38*e1fe3e4aSElliott Hughes            self.file.write(b"")
39*e1fe3e4aSElliott Hughes            self.totype = tobytes
40*e1fe3e4aSElliott Hughes        except TypeError:
41*e1fe3e4aSElliott Hughes            # This better not fail.
42*e1fe3e4aSElliott Hughes            self.file.write("")
43*e1fe3e4aSElliott Hughes            self.totype = tostr
44*e1fe3e4aSElliott Hughes        self.indentwhite = self.totype(indentwhite)
45*e1fe3e4aSElliott Hughes        if newlinestr is None:
46*e1fe3e4aSElliott Hughes            self.newlinestr = self.totype(os.linesep)
47*e1fe3e4aSElliott Hughes        else:
48*e1fe3e4aSElliott Hughes            self.newlinestr = self.totype(newlinestr)
49*e1fe3e4aSElliott Hughes        self.indentlevel = 0
50*e1fe3e4aSElliott Hughes        self.stack = []
51*e1fe3e4aSElliott Hughes        self.needindent = 1
52*e1fe3e4aSElliott Hughes        self.idlefunc = idlefunc
53*e1fe3e4aSElliott Hughes        self.idlecounter = 0
54*e1fe3e4aSElliott Hughes        self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
55*e1fe3e4aSElliott Hughes        self.newline()
56*e1fe3e4aSElliott Hughes
57*e1fe3e4aSElliott Hughes    def __enter__(self):
58*e1fe3e4aSElliott Hughes        return self
59*e1fe3e4aSElliott Hughes
60*e1fe3e4aSElliott Hughes    def __exit__(self, exception_type, exception_value, traceback):
61*e1fe3e4aSElliott Hughes        self.close()
62*e1fe3e4aSElliott Hughes
63*e1fe3e4aSElliott Hughes    def close(self):
64*e1fe3e4aSElliott Hughes        if self._closeStream:
65*e1fe3e4aSElliott Hughes            self.file.close()
66*e1fe3e4aSElliott Hughes
67*e1fe3e4aSElliott Hughes    def write(self, string, indent=True):
68*e1fe3e4aSElliott Hughes        """Writes text."""
69*e1fe3e4aSElliott Hughes        self._writeraw(escape(string), indent=indent)
70*e1fe3e4aSElliott Hughes
71*e1fe3e4aSElliott Hughes    def writecdata(self, string):
72*e1fe3e4aSElliott Hughes        """Writes text in a CDATA section."""
73*e1fe3e4aSElliott Hughes        self._writeraw("<![CDATA[" + string + "]]>")
74*e1fe3e4aSElliott Hughes
75*e1fe3e4aSElliott Hughes    def write8bit(self, data, strip=False):
76*e1fe3e4aSElliott Hughes        """Writes a bytes() sequence into the XML, escaping
77*e1fe3e4aSElliott Hughes        non-ASCII bytes.  When this is read in xmlReader,
78*e1fe3e4aSElliott Hughes        the original bytes can be recovered by encoding to
79*e1fe3e4aSElliott Hughes        'latin-1'."""
80*e1fe3e4aSElliott Hughes        self._writeraw(escape8bit(data), strip=strip)
81*e1fe3e4aSElliott Hughes
82*e1fe3e4aSElliott Hughes    def write_noindent(self, string):
83*e1fe3e4aSElliott Hughes        """Writes text without indentation."""
84*e1fe3e4aSElliott Hughes        self._writeraw(escape(string), indent=False)
85*e1fe3e4aSElliott Hughes
86*e1fe3e4aSElliott Hughes    def _writeraw(self, data, indent=True, strip=False):
87*e1fe3e4aSElliott Hughes        """Writes bytes, possibly indented."""
88*e1fe3e4aSElliott Hughes        if indent and self.needindent:
89*e1fe3e4aSElliott Hughes            self.file.write(self.indentlevel * self.indentwhite)
90*e1fe3e4aSElliott Hughes            self.needindent = 0
91*e1fe3e4aSElliott Hughes        s = self.totype(data, encoding="utf_8")
92*e1fe3e4aSElliott Hughes        if strip:
93*e1fe3e4aSElliott Hughes            s = s.strip()
94*e1fe3e4aSElliott Hughes        self.file.write(s)
95*e1fe3e4aSElliott Hughes
96*e1fe3e4aSElliott Hughes    def newline(self):
97*e1fe3e4aSElliott Hughes        self.file.write(self.newlinestr)
98*e1fe3e4aSElliott Hughes        self.needindent = 1
99*e1fe3e4aSElliott Hughes        idlecounter = self.idlecounter
100*e1fe3e4aSElliott Hughes        if not idlecounter % 100 and self.idlefunc is not None:
101*e1fe3e4aSElliott Hughes            self.idlefunc()
102*e1fe3e4aSElliott Hughes        self.idlecounter = idlecounter + 1
103*e1fe3e4aSElliott Hughes
104*e1fe3e4aSElliott Hughes    def comment(self, data):
105*e1fe3e4aSElliott Hughes        data = escape(data)
106*e1fe3e4aSElliott Hughes        lines = data.split("\n")
107*e1fe3e4aSElliott Hughes        self._writeraw("<!-- " + lines[0])
108*e1fe3e4aSElliott Hughes        for line in lines[1:]:
109*e1fe3e4aSElliott Hughes            self.newline()
110*e1fe3e4aSElliott Hughes            self._writeraw("     " + line)
111*e1fe3e4aSElliott Hughes        self._writeraw(" -->")
112*e1fe3e4aSElliott Hughes
113*e1fe3e4aSElliott Hughes    def simpletag(self, _TAG_, *args, **kwargs):
114*e1fe3e4aSElliott Hughes        attrdata = self.stringifyattrs(*args, **kwargs)
115*e1fe3e4aSElliott Hughes        data = "<%s%s/>" % (_TAG_, attrdata)
116*e1fe3e4aSElliott Hughes        self._writeraw(data)
117*e1fe3e4aSElliott Hughes
118*e1fe3e4aSElliott Hughes    def begintag(self, _TAG_, *args, **kwargs):
119*e1fe3e4aSElliott Hughes        attrdata = self.stringifyattrs(*args, **kwargs)
120*e1fe3e4aSElliott Hughes        data = "<%s%s>" % (_TAG_, attrdata)
121*e1fe3e4aSElliott Hughes        self._writeraw(data)
122*e1fe3e4aSElliott Hughes        self.stack.append(_TAG_)
123*e1fe3e4aSElliott Hughes        self.indent()
124*e1fe3e4aSElliott Hughes
125*e1fe3e4aSElliott Hughes    def endtag(self, _TAG_):
126*e1fe3e4aSElliott Hughes        assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
127*e1fe3e4aSElliott Hughes        del self.stack[-1]
128*e1fe3e4aSElliott Hughes        self.dedent()
129*e1fe3e4aSElliott Hughes        data = "</%s>" % _TAG_
130*e1fe3e4aSElliott Hughes        self._writeraw(data)
131*e1fe3e4aSElliott Hughes
132*e1fe3e4aSElliott Hughes    def dumphex(self, data):
133*e1fe3e4aSElliott Hughes        linelength = 16
134*e1fe3e4aSElliott Hughes        hexlinelength = linelength * 2
135*e1fe3e4aSElliott Hughes        chunksize = 8
136*e1fe3e4aSElliott Hughes        for i in range(0, len(data), linelength):
137*e1fe3e4aSElliott Hughes            hexline = hexStr(data[i : i + linelength])
138*e1fe3e4aSElliott Hughes            line = ""
139*e1fe3e4aSElliott Hughes            white = ""
140*e1fe3e4aSElliott Hughes            for j in range(0, hexlinelength, chunksize):
141*e1fe3e4aSElliott Hughes                line = line + white + hexline[j : j + chunksize]
142*e1fe3e4aSElliott Hughes                white = " "
143*e1fe3e4aSElliott Hughes            self._writeraw(line)
144*e1fe3e4aSElliott Hughes            self.newline()
145*e1fe3e4aSElliott Hughes
146*e1fe3e4aSElliott Hughes    def indent(self):
147*e1fe3e4aSElliott Hughes        self.indentlevel = self.indentlevel + 1
148*e1fe3e4aSElliott Hughes
149*e1fe3e4aSElliott Hughes    def dedent(self):
150*e1fe3e4aSElliott Hughes        assert self.indentlevel > 0
151*e1fe3e4aSElliott Hughes        self.indentlevel = self.indentlevel - 1
152*e1fe3e4aSElliott Hughes
153*e1fe3e4aSElliott Hughes    def stringifyattrs(self, *args, **kwargs):
154*e1fe3e4aSElliott Hughes        if kwargs:
155*e1fe3e4aSElliott Hughes            assert not args
156*e1fe3e4aSElliott Hughes            attributes = sorted(kwargs.items())
157*e1fe3e4aSElliott Hughes        elif args:
158*e1fe3e4aSElliott Hughes            assert len(args) == 1
159*e1fe3e4aSElliott Hughes            attributes = args[0]
160*e1fe3e4aSElliott Hughes        else:
161*e1fe3e4aSElliott Hughes            return ""
162*e1fe3e4aSElliott Hughes        data = ""
163*e1fe3e4aSElliott Hughes        for attr, value in attributes:
164*e1fe3e4aSElliott Hughes            if not isinstance(value, (bytes, str)):
165*e1fe3e4aSElliott Hughes                value = str(value)
166*e1fe3e4aSElliott Hughes            data = data + ' %s="%s"' % (attr, escapeattr(value))
167*e1fe3e4aSElliott Hughes        return data
168*e1fe3e4aSElliott Hughes
169*e1fe3e4aSElliott Hughes
170*e1fe3e4aSElliott Hughesdef escape(data):
171*e1fe3e4aSElliott Hughes    data = tostr(data, "utf_8")
172*e1fe3e4aSElliott Hughes    data = data.replace("&", "&amp;")
173*e1fe3e4aSElliott Hughes    data = data.replace("<", "&lt;")
174*e1fe3e4aSElliott Hughes    data = data.replace(">", "&gt;")
175*e1fe3e4aSElliott Hughes    data = data.replace("\r", "&#13;")
176*e1fe3e4aSElliott Hughes    return data
177*e1fe3e4aSElliott Hughes
178*e1fe3e4aSElliott Hughes
179*e1fe3e4aSElliott Hughesdef escapeattr(data):
180*e1fe3e4aSElliott Hughes    data = escape(data)
181*e1fe3e4aSElliott Hughes    data = data.replace('"', "&quot;")
182*e1fe3e4aSElliott Hughes    return data
183*e1fe3e4aSElliott Hughes
184*e1fe3e4aSElliott Hughes
185*e1fe3e4aSElliott Hughesdef escape8bit(data):
186*e1fe3e4aSElliott Hughes    """Input is Unicode string."""
187*e1fe3e4aSElliott Hughes
188*e1fe3e4aSElliott Hughes    def escapechar(c):
189*e1fe3e4aSElliott Hughes        n = ord(c)
190*e1fe3e4aSElliott Hughes        if 32 <= n <= 127 and c not in "<&>":
191*e1fe3e4aSElliott Hughes            return c
192*e1fe3e4aSElliott Hughes        else:
193*e1fe3e4aSElliott Hughes            return "&#" + repr(n) + ";"
194*e1fe3e4aSElliott Hughes
195*e1fe3e4aSElliott Hughes    return strjoin(map(escapechar, data.decode("latin-1")))
196*e1fe3e4aSElliott Hughes
197*e1fe3e4aSElliott Hughes
198*e1fe3e4aSElliott Hughesdef hexStr(s):
199*e1fe3e4aSElliott Hughes    h = string.hexdigits
200*e1fe3e4aSElliott Hughes    r = ""
201*e1fe3e4aSElliott Hughes    for c in s:
202*e1fe3e4aSElliott Hughes        i = byteord(c)
203*e1fe3e4aSElliott Hughes        r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
204*e1fe3e4aSElliott Hughes    return r
205