1"""xmlWriter.py -- Simple XML authoring class""" 2 3from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr 4import sys 5import os 6import string 7 8INDENT = " " 9 10 11class XMLWriter(object): 12 def __init__( 13 self, 14 fileOrPath, 15 indentwhite=INDENT, 16 idlefunc=None, 17 encoding="utf_8", 18 newlinestr="\n", 19 ): 20 if encoding.lower().replace("-", "").replace("_", "") != "utf8": 21 raise Exception("Only UTF-8 encoding is supported.") 22 if fileOrPath == "-": 23 fileOrPath = sys.stdout 24 if not hasattr(fileOrPath, "write"): 25 self.filename = fileOrPath 26 self.file = open(fileOrPath, "wb") 27 self._closeStream = True 28 else: 29 self.filename = None 30 # assume writable file object 31 self.file = fileOrPath 32 self._closeStream = False 33 34 # Figure out if writer expects bytes or unicodes 35 try: 36 # The bytes check should be first. See: 37 # https://github.com/fonttools/fonttools/pull/233 38 self.file.write(b"") 39 self.totype = tobytes 40 except TypeError: 41 # This better not fail. 42 self.file.write("") 43 self.totype = tostr 44 self.indentwhite = self.totype(indentwhite) 45 if newlinestr is None: 46 self.newlinestr = self.totype(os.linesep) 47 else: 48 self.newlinestr = self.totype(newlinestr) 49 self.indentlevel = 0 50 self.stack = [] 51 self.needindent = 1 52 self.idlefunc = idlefunc 53 self.idlecounter = 0 54 self._writeraw('<?xml version="1.0" encoding="UTF-8"?>') 55 self.newline() 56 57 def __enter__(self): 58 return self 59 60 def __exit__(self, exception_type, exception_value, traceback): 61 self.close() 62 63 def close(self): 64 if self._closeStream: 65 self.file.close() 66 67 def write(self, string, indent=True): 68 """Writes text.""" 69 self._writeraw(escape(string), indent=indent) 70 71 def writecdata(self, string): 72 """Writes text in a CDATA section.""" 73 self._writeraw("<![CDATA[" + string + "]]>") 74 75 def write8bit(self, data, strip=False): 76 """Writes a bytes() sequence into the XML, escaping 77 non-ASCII bytes. When this is read in xmlReader, 78 the original bytes can be recovered by encoding to 79 'latin-1'.""" 80 self._writeraw(escape8bit(data), strip=strip) 81 82 def write_noindent(self, string): 83 """Writes text without indentation.""" 84 self._writeraw(escape(string), indent=False) 85 86 def _writeraw(self, data, indent=True, strip=False): 87 """Writes bytes, possibly indented.""" 88 if indent and self.needindent: 89 self.file.write(self.indentlevel * self.indentwhite) 90 self.needindent = 0 91 s = self.totype(data, encoding="utf_8") 92 if strip: 93 s = s.strip() 94 self.file.write(s) 95 96 def newline(self): 97 self.file.write(self.newlinestr) 98 self.needindent = 1 99 idlecounter = self.idlecounter 100 if not idlecounter % 100 and self.idlefunc is not None: 101 self.idlefunc() 102 self.idlecounter = idlecounter + 1 103 104 def comment(self, data): 105 data = escape(data) 106 lines = data.split("\n") 107 self._writeraw("<!-- " + lines[0]) 108 for line in lines[1:]: 109 self.newline() 110 self._writeraw(" " + line) 111 self._writeraw(" -->") 112 113 def simpletag(self, _TAG_, *args, **kwargs): 114 attrdata = self.stringifyattrs(*args, **kwargs) 115 data = "<%s%s/>" % (_TAG_, attrdata) 116 self._writeraw(data) 117 118 def begintag(self, _TAG_, *args, **kwargs): 119 attrdata = self.stringifyattrs(*args, **kwargs) 120 data = "<%s%s>" % (_TAG_, attrdata) 121 self._writeraw(data) 122 self.stack.append(_TAG_) 123 self.indent() 124 125 def endtag(self, _TAG_): 126 assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag" 127 del self.stack[-1] 128 self.dedent() 129 data = "</%s>" % _TAG_ 130 self._writeraw(data) 131 132 def dumphex(self, data): 133 linelength = 16 134 hexlinelength = linelength * 2 135 chunksize = 8 136 for i in range(0, len(data), linelength): 137 hexline = hexStr(data[i : i + linelength]) 138 line = "" 139 white = "" 140 for j in range(0, hexlinelength, chunksize): 141 line = line + white + hexline[j : j + chunksize] 142 white = " " 143 self._writeraw(line) 144 self.newline() 145 146 def indent(self): 147 self.indentlevel = self.indentlevel + 1 148 149 def dedent(self): 150 assert self.indentlevel > 0 151 self.indentlevel = self.indentlevel - 1 152 153 def stringifyattrs(self, *args, **kwargs): 154 if kwargs: 155 assert not args 156 attributes = sorted(kwargs.items()) 157 elif args: 158 assert len(args) == 1 159 attributes = args[0] 160 else: 161 return "" 162 data = "" 163 for attr, value in attributes: 164 if not isinstance(value, (bytes, str)): 165 value = str(value) 166 data = data + ' %s="%s"' % (attr, escapeattr(value)) 167 return data 168 169 170def escape(data): 171 data = tostr(data, "utf_8") 172 data = data.replace("&", "&") 173 data = data.replace("<", "<") 174 data = data.replace(">", ">") 175 data = data.replace("\r", " ") 176 return data 177 178 179def escapeattr(data): 180 data = escape(data) 181 data = data.replace('"', """) 182 return data 183 184 185def escape8bit(data): 186 """Input is Unicode string.""" 187 188 def escapechar(c): 189 n = ord(c) 190 if 32 <= n <= 127 and c not in "<&>": 191 return c 192 else: 193 return "&#" + repr(n) + ";" 194 195 return strjoin(map(escapechar, data.decode("latin-1"))) 196 197 198def hexStr(s): 199 h = string.hexdigits 200 r = "" 201 for c in s: 202 i = byteord(c) 203 r = r + h[(i >> 4) & 0xF] + h[i & 0xF] 204 return r 205