1*e1fe3e4aSElliott Hughes"""xmlWriter.py -- Simple XML authoring class""" 2*e1fe3e4aSElliott Hughes 3*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import byteord, strjoin, tobytes, tostr 4*e1fe3e4aSElliott Hughesimport sys 5*e1fe3e4aSElliott Hughesimport os 6*e1fe3e4aSElliott Hughesimport string 7*e1fe3e4aSElliott Hughes 8*e1fe3e4aSElliott HughesINDENT = " " 9*e1fe3e4aSElliott Hughes 10*e1fe3e4aSElliott Hughes 11*e1fe3e4aSElliott Hughesclass XMLWriter(object): 12*e1fe3e4aSElliott Hughes def __init__( 13*e1fe3e4aSElliott Hughes self, 14*e1fe3e4aSElliott Hughes fileOrPath, 15*e1fe3e4aSElliott Hughes indentwhite=INDENT, 16*e1fe3e4aSElliott Hughes idlefunc=None, 17*e1fe3e4aSElliott Hughes encoding="utf_8", 18*e1fe3e4aSElliott Hughes newlinestr="\n", 19*e1fe3e4aSElliott Hughes ): 20*e1fe3e4aSElliott Hughes if encoding.lower().replace("-", "").replace("_", "") != "utf8": 21*e1fe3e4aSElliott Hughes raise Exception("Only UTF-8 encoding is supported.") 22*e1fe3e4aSElliott Hughes if fileOrPath == "-": 23*e1fe3e4aSElliott Hughes fileOrPath = sys.stdout 24*e1fe3e4aSElliott Hughes if not hasattr(fileOrPath, "write"): 25*e1fe3e4aSElliott Hughes self.filename = fileOrPath 26*e1fe3e4aSElliott Hughes self.file = open(fileOrPath, "wb") 27*e1fe3e4aSElliott Hughes self._closeStream = True 28*e1fe3e4aSElliott Hughes else: 29*e1fe3e4aSElliott Hughes self.filename = None 30*e1fe3e4aSElliott Hughes # assume writable file object 31*e1fe3e4aSElliott Hughes self.file = fileOrPath 32*e1fe3e4aSElliott Hughes self._closeStream = False 33*e1fe3e4aSElliott Hughes 34*e1fe3e4aSElliott Hughes # Figure out if writer expects bytes or unicodes 35*e1fe3e4aSElliott Hughes try: 36*e1fe3e4aSElliott Hughes # The bytes check should be first. See: 37*e1fe3e4aSElliott Hughes # https://github.com/fonttools/fonttools/pull/233 38*e1fe3e4aSElliott Hughes self.file.write(b"") 39*e1fe3e4aSElliott Hughes self.totype = tobytes 40*e1fe3e4aSElliott Hughes except TypeError: 41*e1fe3e4aSElliott Hughes # This better not fail. 42*e1fe3e4aSElliott Hughes self.file.write("") 43*e1fe3e4aSElliott Hughes self.totype = tostr 44*e1fe3e4aSElliott Hughes self.indentwhite = self.totype(indentwhite) 45*e1fe3e4aSElliott Hughes if newlinestr is None: 46*e1fe3e4aSElliott Hughes self.newlinestr = self.totype(os.linesep) 47*e1fe3e4aSElliott Hughes else: 48*e1fe3e4aSElliott Hughes self.newlinestr = self.totype(newlinestr) 49*e1fe3e4aSElliott Hughes self.indentlevel = 0 50*e1fe3e4aSElliott Hughes self.stack = [] 51*e1fe3e4aSElliott Hughes self.needindent = 1 52*e1fe3e4aSElliott Hughes self.idlefunc = idlefunc 53*e1fe3e4aSElliott Hughes self.idlecounter = 0 54*e1fe3e4aSElliott Hughes self._writeraw('<?xml version="1.0" encoding="UTF-8"?>') 55*e1fe3e4aSElliott Hughes self.newline() 56*e1fe3e4aSElliott Hughes 57*e1fe3e4aSElliott Hughes def __enter__(self): 58*e1fe3e4aSElliott Hughes return self 59*e1fe3e4aSElliott Hughes 60*e1fe3e4aSElliott Hughes def __exit__(self, exception_type, exception_value, traceback): 61*e1fe3e4aSElliott Hughes self.close() 62*e1fe3e4aSElliott Hughes 63*e1fe3e4aSElliott Hughes def close(self): 64*e1fe3e4aSElliott Hughes if self._closeStream: 65*e1fe3e4aSElliott Hughes self.file.close() 66*e1fe3e4aSElliott Hughes 67*e1fe3e4aSElliott Hughes def write(self, string, indent=True): 68*e1fe3e4aSElliott Hughes """Writes text.""" 69*e1fe3e4aSElliott Hughes self._writeraw(escape(string), indent=indent) 70*e1fe3e4aSElliott Hughes 71*e1fe3e4aSElliott Hughes def writecdata(self, string): 72*e1fe3e4aSElliott Hughes """Writes text in a CDATA section.""" 73*e1fe3e4aSElliott Hughes self._writeraw("<![CDATA[" + string + "]]>") 74*e1fe3e4aSElliott Hughes 75*e1fe3e4aSElliott Hughes def write8bit(self, data, strip=False): 76*e1fe3e4aSElliott Hughes """Writes a bytes() sequence into the XML, escaping 77*e1fe3e4aSElliott Hughes non-ASCII bytes. When this is read in xmlReader, 78*e1fe3e4aSElliott Hughes the original bytes can be recovered by encoding to 79*e1fe3e4aSElliott Hughes 'latin-1'.""" 80*e1fe3e4aSElliott Hughes self._writeraw(escape8bit(data), strip=strip) 81*e1fe3e4aSElliott Hughes 82*e1fe3e4aSElliott Hughes def write_noindent(self, string): 83*e1fe3e4aSElliott Hughes """Writes text without indentation.""" 84*e1fe3e4aSElliott Hughes self._writeraw(escape(string), indent=False) 85*e1fe3e4aSElliott Hughes 86*e1fe3e4aSElliott Hughes def _writeraw(self, data, indent=True, strip=False): 87*e1fe3e4aSElliott Hughes """Writes bytes, possibly indented.""" 88*e1fe3e4aSElliott Hughes if indent and self.needindent: 89*e1fe3e4aSElliott Hughes self.file.write(self.indentlevel * self.indentwhite) 90*e1fe3e4aSElliott Hughes self.needindent = 0 91*e1fe3e4aSElliott Hughes s = self.totype(data, encoding="utf_8") 92*e1fe3e4aSElliott Hughes if strip: 93*e1fe3e4aSElliott Hughes s = s.strip() 94*e1fe3e4aSElliott Hughes self.file.write(s) 95*e1fe3e4aSElliott Hughes 96*e1fe3e4aSElliott Hughes def newline(self): 97*e1fe3e4aSElliott Hughes self.file.write(self.newlinestr) 98*e1fe3e4aSElliott Hughes self.needindent = 1 99*e1fe3e4aSElliott Hughes idlecounter = self.idlecounter 100*e1fe3e4aSElliott Hughes if not idlecounter % 100 and self.idlefunc is not None: 101*e1fe3e4aSElliott Hughes self.idlefunc() 102*e1fe3e4aSElliott Hughes self.idlecounter = idlecounter + 1 103*e1fe3e4aSElliott Hughes 104*e1fe3e4aSElliott Hughes def comment(self, data): 105*e1fe3e4aSElliott Hughes data = escape(data) 106*e1fe3e4aSElliott Hughes lines = data.split("\n") 107*e1fe3e4aSElliott Hughes self._writeraw("<!-- " + lines[0]) 108*e1fe3e4aSElliott Hughes for line in lines[1:]: 109*e1fe3e4aSElliott Hughes self.newline() 110*e1fe3e4aSElliott Hughes self._writeraw(" " + line) 111*e1fe3e4aSElliott Hughes self._writeraw(" -->") 112*e1fe3e4aSElliott Hughes 113*e1fe3e4aSElliott Hughes def simpletag(self, _TAG_, *args, **kwargs): 114*e1fe3e4aSElliott Hughes attrdata = self.stringifyattrs(*args, **kwargs) 115*e1fe3e4aSElliott Hughes data = "<%s%s/>" % (_TAG_, attrdata) 116*e1fe3e4aSElliott Hughes self._writeraw(data) 117*e1fe3e4aSElliott Hughes 118*e1fe3e4aSElliott Hughes def begintag(self, _TAG_, *args, **kwargs): 119*e1fe3e4aSElliott Hughes attrdata = self.stringifyattrs(*args, **kwargs) 120*e1fe3e4aSElliott Hughes data = "<%s%s>" % (_TAG_, attrdata) 121*e1fe3e4aSElliott Hughes self._writeraw(data) 122*e1fe3e4aSElliott Hughes self.stack.append(_TAG_) 123*e1fe3e4aSElliott Hughes self.indent() 124*e1fe3e4aSElliott Hughes 125*e1fe3e4aSElliott Hughes def endtag(self, _TAG_): 126*e1fe3e4aSElliott Hughes assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag" 127*e1fe3e4aSElliott Hughes del self.stack[-1] 128*e1fe3e4aSElliott Hughes self.dedent() 129*e1fe3e4aSElliott Hughes data = "</%s>" % _TAG_ 130*e1fe3e4aSElliott Hughes self._writeraw(data) 131*e1fe3e4aSElliott Hughes 132*e1fe3e4aSElliott Hughes def dumphex(self, data): 133*e1fe3e4aSElliott Hughes linelength = 16 134*e1fe3e4aSElliott Hughes hexlinelength = linelength * 2 135*e1fe3e4aSElliott Hughes chunksize = 8 136*e1fe3e4aSElliott Hughes for i in range(0, len(data), linelength): 137*e1fe3e4aSElliott Hughes hexline = hexStr(data[i : i + linelength]) 138*e1fe3e4aSElliott Hughes line = "" 139*e1fe3e4aSElliott Hughes white = "" 140*e1fe3e4aSElliott Hughes for j in range(0, hexlinelength, chunksize): 141*e1fe3e4aSElliott Hughes line = line + white + hexline[j : j + chunksize] 142*e1fe3e4aSElliott Hughes white = " " 143*e1fe3e4aSElliott Hughes self._writeraw(line) 144*e1fe3e4aSElliott Hughes self.newline() 145*e1fe3e4aSElliott Hughes 146*e1fe3e4aSElliott Hughes def indent(self): 147*e1fe3e4aSElliott Hughes self.indentlevel = self.indentlevel + 1 148*e1fe3e4aSElliott Hughes 149*e1fe3e4aSElliott Hughes def dedent(self): 150*e1fe3e4aSElliott Hughes assert self.indentlevel > 0 151*e1fe3e4aSElliott Hughes self.indentlevel = self.indentlevel - 1 152*e1fe3e4aSElliott Hughes 153*e1fe3e4aSElliott Hughes def stringifyattrs(self, *args, **kwargs): 154*e1fe3e4aSElliott Hughes if kwargs: 155*e1fe3e4aSElliott Hughes assert not args 156*e1fe3e4aSElliott Hughes attributes = sorted(kwargs.items()) 157*e1fe3e4aSElliott Hughes elif args: 158*e1fe3e4aSElliott Hughes assert len(args) == 1 159*e1fe3e4aSElliott Hughes attributes = args[0] 160*e1fe3e4aSElliott Hughes else: 161*e1fe3e4aSElliott Hughes return "" 162*e1fe3e4aSElliott Hughes data = "" 163*e1fe3e4aSElliott Hughes for attr, value in attributes: 164*e1fe3e4aSElliott Hughes if not isinstance(value, (bytes, str)): 165*e1fe3e4aSElliott Hughes value = str(value) 166*e1fe3e4aSElliott Hughes data = data + ' %s="%s"' % (attr, escapeattr(value)) 167*e1fe3e4aSElliott Hughes return data 168*e1fe3e4aSElliott Hughes 169*e1fe3e4aSElliott Hughes 170*e1fe3e4aSElliott Hughesdef escape(data): 171*e1fe3e4aSElliott Hughes data = tostr(data, "utf_8") 172*e1fe3e4aSElliott Hughes data = data.replace("&", "&") 173*e1fe3e4aSElliott Hughes data = data.replace("<", "<") 174*e1fe3e4aSElliott Hughes data = data.replace(">", ">") 175*e1fe3e4aSElliott Hughes data = data.replace("\r", " ") 176*e1fe3e4aSElliott Hughes return data 177*e1fe3e4aSElliott Hughes 178*e1fe3e4aSElliott Hughes 179*e1fe3e4aSElliott Hughesdef escapeattr(data): 180*e1fe3e4aSElliott Hughes data = escape(data) 181*e1fe3e4aSElliott Hughes data = data.replace('"', """) 182*e1fe3e4aSElliott Hughes return data 183*e1fe3e4aSElliott Hughes 184*e1fe3e4aSElliott Hughes 185*e1fe3e4aSElliott Hughesdef escape8bit(data): 186*e1fe3e4aSElliott Hughes """Input is Unicode string.""" 187*e1fe3e4aSElliott Hughes 188*e1fe3e4aSElliott Hughes def escapechar(c): 189*e1fe3e4aSElliott Hughes n = ord(c) 190*e1fe3e4aSElliott Hughes if 32 <= n <= 127 and c not in "<&>": 191*e1fe3e4aSElliott Hughes return c 192*e1fe3e4aSElliott Hughes else: 193*e1fe3e4aSElliott Hughes return "&#" + repr(n) + ";" 194*e1fe3e4aSElliott Hughes 195*e1fe3e4aSElliott Hughes return strjoin(map(escapechar, data.decode("latin-1"))) 196*e1fe3e4aSElliott Hughes 197*e1fe3e4aSElliott Hughes 198*e1fe3e4aSElliott Hughesdef hexStr(s): 199*e1fe3e4aSElliott Hughes h = string.hexdigits 200*e1fe3e4aSElliott Hughes r = "" 201*e1fe3e4aSElliott Hughes for c in s: 202*e1fe3e4aSElliott Hughes i = byteord(c) 203*e1fe3e4aSElliott Hughes r = r + h[(i >> 4) & 0xF] + h[i & 0xF] 204*e1fe3e4aSElliott Hughes return r 205