1*9e965d6fSRomain Jobredeaux// Copyright 2018 The Bazel Authors. All rights reserved. 2*9e965d6fSRomain Jobredeaux// 3*9e965d6fSRomain Jobredeaux// Licensed under the Apache License, Version 2.0 (the "License"); 4*9e965d6fSRomain Jobredeaux// you may not use this file except in compliance with the License. 5*9e965d6fSRomain Jobredeaux// You may obtain a copy of the License at 6*9e965d6fSRomain Jobredeaux// 7*9e965d6fSRomain Jobredeaux// http://www.apache.org/licenses/LICENSE-2.0 8*9e965d6fSRomain Jobredeaux// 9*9e965d6fSRomain Jobredeaux// Unless required by applicable law or agreed to in writing, software 10*9e965d6fSRomain Jobredeaux// distributed under the License is distributed on an "AS IS" BASIS, 11*9e965d6fSRomain Jobredeaux// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*9e965d6fSRomain Jobredeaux// See the License for the specific language governing permissions and 13*9e965d6fSRomain Jobredeaux// limitations under the License. 14*9e965d6fSRomain Jobredeaux 15*9e965d6fSRomain Jobredeaux// Package xml2 provides drop-in replacement functionality for encoding/xml. 16*9e965d6fSRomain Jobredeaux// 17*9e965d6fSRomain Jobredeaux// There are existing issues with the encoding/xml package that affect AK tools. 18*9e965d6fSRomain Jobredeaux// 19*9e965d6fSRomain Jobredeaux// xml2.Encoder: 20*9e965d6fSRomain Jobredeaux// 21*9e965d6fSRomain Jobredeaux// The current encoding/xml Encoder has several issues around xml namespacing 22*9e965d6fSRomain Jobredeaux// that makes the output produced by it incompatible with AAPT. 23*9e965d6fSRomain Jobredeaux// 24*9e965d6fSRomain Jobredeaux// * Tracked here: https://golang.org/issue/7535 25*9e965d6fSRomain Jobredeaux// 26*9e965d6fSRomain Jobredeaux// The xml2.Encoder.EncodeToken verifies the validity of namespaces and encodes 27*9e965d6fSRomain Jobredeaux// them. For everything else, xml2.Encoder will fallback to the xml.Encoder. 28*9e965d6fSRomain Jobredeauxpackage xml2 29*9e965d6fSRomain Jobredeaux 30*9e965d6fSRomain Jobredeauximport ( 31*9e965d6fSRomain Jobredeaux "bytes" 32*9e965d6fSRomain Jobredeaux "encoding/xml" 33*9e965d6fSRomain Jobredeaux "fmt" 34*9e965d6fSRomain Jobredeaux "io" 35*9e965d6fSRomain Jobredeaux "log" 36*9e965d6fSRomain Jobredeaux) 37*9e965d6fSRomain Jobredeaux 38*9e965d6fSRomain Jobredeauxconst xmlNS = "xmlns" 39*9e965d6fSRomain Jobredeaux 40*9e965d6fSRomain Jobredeaux// Encoder is an xml encoder which behaves much like the encoding/xml Encoder. 41*9e965d6fSRomain Jobredeauxtype Encoder struct { 42*9e965d6fSRomain Jobredeaux *xml.Encoder 43*9e965d6fSRomain Jobredeaux p printer 44*9e965d6fSRomain Jobredeaux prefixURI map[string]string 45*9e965d6fSRomain Jobredeaux state []state 46*9e965d6fSRomain Jobredeaux uriPrefix *uriPrefixMap 47*9e965d6fSRomain Jobredeaux} 48*9e965d6fSRomain Jobredeaux 49*9e965d6fSRomain Jobredeaux// ChildEncoder returns an encoder whose state is copied the given parent Encoder and writes to w. 50*9e965d6fSRomain Jobredeauxfunc ChildEncoder(w io.Writer, parent *Encoder) *Encoder { 51*9e965d6fSRomain Jobredeaux e := NewEncoder(w) 52*9e965d6fSRomain Jobredeaux for k, v := range parent.prefixURI { 53*9e965d6fSRomain Jobredeaux e.prefixURI[k] = v 54*9e965d6fSRomain Jobredeaux } 55*9e965d6fSRomain Jobredeaux for k, v := range parent.uriPrefix.up { 56*9e965d6fSRomain Jobredeaux e.uriPrefix.up[k] = make([]string, len(v)) 57*9e965d6fSRomain Jobredeaux copy(e.uriPrefix.up[k], v) 58*9e965d6fSRomain Jobredeaux } 59*9e965d6fSRomain Jobredeaux return e 60*9e965d6fSRomain Jobredeaux} 61*9e965d6fSRomain Jobredeaux 62*9e965d6fSRomain Jobredeaux// NewEncoder returns a new encoder that writes to w. 63*9e965d6fSRomain Jobredeauxfunc NewEncoder(w io.Writer) *Encoder { 64*9e965d6fSRomain Jobredeaux e := &Encoder{ 65*9e965d6fSRomain Jobredeaux Encoder: xml.NewEncoder(w), 66*9e965d6fSRomain Jobredeaux p: printer{Writer: w}, 67*9e965d6fSRomain Jobredeaux prefixURI: make(map[string]string), 68*9e965d6fSRomain Jobredeaux uriPrefix: &uriPrefixMap{up: make(map[string][]string)}, 69*9e965d6fSRomain Jobredeaux } 70*9e965d6fSRomain Jobredeaux return e 71*9e965d6fSRomain Jobredeaux} 72*9e965d6fSRomain Jobredeaux 73*9e965d6fSRomain Jobredeaux// EncodeToken behaves almost the same as encoding/xml.Encoder.EncodeToken 74*9e965d6fSRomain Jobredeaux// but deals with StartElement and EndElement differently. 75*9e965d6fSRomain Jobredeauxfunc (enc *Encoder) EncodeToken(t xml.Token) error { 76*9e965d6fSRomain Jobredeaux switch t := t.(type) { 77*9e965d6fSRomain Jobredeaux case xml.StartElement: 78*9e965d6fSRomain Jobredeaux enc.Encoder.Flush() // Need to flush the wrapped encoder before we write. 79*9e965d6fSRomain Jobredeaux if err := enc.writeStart(&t); err != nil { 80*9e965d6fSRomain Jobredeaux return err 81*9e965d6fSRomain Jobredeaux } 82*9e965d6fSRomain Jobredeaux case xml.EndElement: 83*9e965d6fSRomain Jobredeaux enc.Encoder.Flush() // Need to flush the wrapped encoder before we write. 84*9e965d6fSRomain Jobredeaux if err := enc.writeEnd(t.Name); err != nil { 85*9e965d6fSRomain Jobredeaux return err 86*9e965d6fSRomain Jobredeaux } 87*9e965d6fSRomain Jobredeaux default: 88*9e965d6fSRomain Jobredeaux // Delegate to the embedded encoder for everything else. 89*9e965d6fSRomain Jobredeaux return enc.Encoder.EncodeToken(t) 90*9e965d6fSRomain Jobredeaux } 91*9e965d6fSRomain Jobredeaux return nil 92*9e965d6fSRomain Jobredeaux} 93*9e965d6fSRomain Jobredeaux 94*9e965d6fSRomain Jobredeauxfunc (enc *Encoder) writeStart(start *xml.StartElement) error { 95*9e965d6fSRomain Jobredeaux if start.Name.Local == "" { 96*9e965d6fSRomain Jobredeaux return fmt.Errorf("start tag with no name") 97*9e965d6fSRomain Jobredeaux } 98*9e965d6fSRomain Jobredeaux enc.setUpState(start) 99*9e965d6fSRomain Jobredeaux 100*9e965d6fSRomain Jobredeaux // Begin creating the start tag. 101*9e965d6fSRomain Jobredeaux var st bytes.Buffer 102*9e965d6fSRomain Jobredeaux st.WriteByte('<') 103*9e965d6fSRomain Jobredeaux n, err := enc.translateName(start.Name) 104*9e965d6fSRomain Jobredeaux if err != nil { 105*9e965d6fSRomain Jobredeaux return fmt.Errorf("translating start tag name %q failed, got: %v", start.Name.Local, err) 106*9e965d6fSRomain Jobredeaux } 107*9e965d6fSRomain Jobredeaux st.Write(n) 108*9e965d6fSRomain Jobredeaux for _, attr := range start.Attr { 109*9e965d6fSRomain Jobredeaux name := attr.Name 110*9e965d6fSRomain Jobredeaux if name.Local == "" { 111*9e965d6fSRomain Jobredeaux continue 112*9e965d6fSRomain Jobredeaux } 113*9e965d6fSRomain Jobredeaux st.WriteByte(' ') 114*9e965d6fSRomain Jobredeaux n, err := enc.translateName(attr.Name) 115*9e965d6fSRomain Jobredeaux if err != nil { 116*9e965d6fSRomain Jobredeaux return fmt.Errorf("translating attribute name %q failed, got: %v", start.Name.Local, err) 117*9e965d6fSRomain Jobredeaux } 118*9e965d6fSRomain Jobredeaux st.Write(n) 119*9e965d6fSRomain Jobredeaux st.WriteString(`="`) 120*9e965d6fSRomain Jobredeaux xml.EscapeText(&st, []byte(attr.Value)) 121*9e965d6fSRomain Jobredeaux st.WriteByte('"') 122*9e965d6fSRomain Jobredeaux } 123*9e965d6fSRomain Jobredeaux st.WriteByte('>') 124*9e965d6fSRomain Jobredeaux 125*9e965d6fSRomain Jobredeaux enc.p.writeIndent(1) 126*9e965d6fSRomain Jobredeaux enc.p.Write(st.Bytes()) 127*9e965d6fSRomain Jobredeaux return nil 128*9e965d6fSRomain Jobredeaux} 129*9e965d6fSRomain Jobredeaux 130*9e965d6fSRomain Jobredeauxfunc (enc *Encoder) writeEnd(name xml.Name) error { 131*9e965d6fSRomain Jobredeaux if name.Local == "" { 132*9e965d6fSRomain Jobredeaux return fmt.Errorf("end tag with no name") 133*9e965d6fSRomain Jobredeaux } 134*9e965d6fSRomain Jobredeaux n, err := enc.translateName(name) 135*9e965d6fSRomain Jobredeaux if err != nil { 136*9e965d6fSRomain Jobredeaux return fmt.Errorf("translating end tag name %q failed, got: %v", name.Local, err) 137*9e965d6fSRomain Jobredeaux } 138*9e965d6fSRomain Jobredeaux sn := enc.tearDownState() 139*9e965d6fSRomain Jobredeaux if sn == nil || name.Local != sn.Local && name.Space != sn.Space { 140*9e965d6fSRomain Jobredeaux return fmt.Errorf("tags are unbalanced, got: %v, wanted: %v", name, sn) 141*9e965d6fSRomain Jobredeaux } 142*9e965d6fSRomain Jobredeaux 143*9e965d6fSRomain Jobredeaux // Begin creating the end tag 144*9e965d6fSRomain Jobredeaux var et bytes.Buffer 145*9e965d6fSRomain Jobredeaux et.WriteString("</") 146*9e965d6fSRomain Jobredeaux et.Write(n) 147*9e965d6fSRomain Jobredeaux et.WriteByte('>') 148*9e965d6fSRomain Jobredeaux 149*9e965d6fSRomain Jobredeaux enc.p.writeIndent(-1) 150*9e965d6fSRomain Jobredeaux enc.p.Write(et.Bytes()) 151*9e965d6fSRomain Jobredeaux return nil 152*9e965d6fSRomain Jobredeaux} 153*9e965d6fSRomain Jobredeaux 154*9e965d6fSRomain Jobredeauxfunc (enc *Encoder) setUpState(start *xml.StartElement) { 155*9e965d6fSRomain Jobredeaux enc.state = append(enc.state, element{n: &start.Name}) // Store start element to verify balanced close tags. 156*9e965d6fSRomain Jobredeaux // Track attrs that affect the state of the xml (e.g. xmlns, xmlns:foo). 157*9e965d6fSRomain Jobredeaux for _, attr := range start.Attr { 158*9e965d6fSRomain Jobredeaux // push any xmlns type attrs as xml namespaces are valid within the tag they are declared in, and onward. 159*9e965d6fSRomain Jobredeaux if attr.Name.Space == "xmlns" || attr.Name.Local == "xmlns" { 160*9e965d6fSRomain Jobredeaux prefix := attr.Name.Local 161*9e965d6fSRomain Jobredeaux if attr.Name.Local == "xmlns" { 162*9e965d6fSRomain Jobredeaux prefix = "" // Default xml namespace is being set. 163*9e965d6fSRomain Jobredeaux } 164*9e965d6fSRomain Jobredeaux // Store the previous state, to be restored when exiting the tag. 165*9e965d6fSRomain Jobredeaux enc.state = append(enc.state, xmlns{prefix: prefix, uri: enc.prefixURI[prefix]}) 166*9e965d6fSRomain Jobredeaux enc.prefixURI[prefix] = attr.Value 167*9e965d6fSRomain Jobredeaux enc.uriPrefix.put(attr.Value, prefix) 168*9e965d6fSRomain Jobredeaux } 169*9e965d6fSRomain Jobredeaux } 170*9e965d6fSRomain Jobredeaux} 171*9e965d6fSRomain Jobredeaux 172*9e965d6fSRomain Jobredeauxfunc (enc *Encoder) tearDownState() *xml.Name { 173*9e965d6fSRomain Jobredeaux // Unwind the state setup on start element. 174*9e965d6fSRomain Jobredeaux for len(enc.state) > 0 { 175*9e965d6fSRomain Jobredeaux s := enc.state[len(enc.state)-1] 176*9e965d6fSRomain Jobredeaux enc.state = enc.state[:len(enc.state)-1] 177*9e965d6fSRomain Jobredeaux switch s := s.(type) { 178*9e965d6fSRomain Jobredeaux case element: 179*9e965d6fSRomain Jobredeaux // Stop unwinding As soon as an element type is seen and verify that the 180*9e965d6fSRomain Jobredeaux // tags are balanced 181*9e965d6fSRomain Jobredeaux return s.n 182*9e965d6fSRomain Jobredeaux case xmlns: 183*9e965d6fSRomain Jobredeaux if p, ok := enc.uriPrefix.removeLast(enc.prefixURI[s.prefix]); !ok || p != s.prefix { 184*9e965d6fSRomain Jobredeaux // Unexpected error, internal state is corrupt. 185*9e965d6fSRomain Jobredeaux if !ok { 186*9e965d6fSRomain Jobredeaux log.Fatalf("xmlns attribute state corrupt, uri %q does not exist", enc.prefixURI[s.prefix]) 187*9e965d6fSRomain Jobredeaux } 188*9e965d6fSRomain Jobredeaux log.Fatalf("xmlns attributes state corrupt, got: %q, wanted: %q", s.prefix, p) 189*9e965d6fSRomain Jobredeaux } 190*9e965d6fSRomain Jobredeaux if s.uri == "" { 191*9e965d6fSRomain Jobredeaux delete(enc.prefixURI, s.prefix) 192*9e965d6fSRomain Jobredeaux } else { 193*9e965d6fSRomain Jobredeaux enc.prefixURI[s.prefix] = s.uri 194*9e965d6fSRomain Jobredeaux } 195*9e965d6fSRomain Jobredeaux } 196*9e965d6fSRomain Jobredeaux } 197*9e965d6fSRomain Jobredeaux return nil 198*9e965d6fSRomain Jobredeaux} 199*9e965d6fSRomain Jobredeaux 200*9e965d6fSRomain Jobredeauxfunc (enc *Encoder) translateName(name xml.Name) ([]byte, error) { 201*9e965d6fSRomain Jobredeaux var n bytes.Buffer 202*9e965d6fSRomain Jobredeaux if name.Space != "" { 203*9e965d6fSRomain Jobredeaux prefix := "" 204*9e965d6fSRomain Jobredeaux if name.Space == xmlNS { 205*9e965d6fSRomain Jobredeaux prefix = xmlNS 206*9e965d6fSRomain Jobredeaux } else if ns, ok := enc.uriPrefix.getLast(name.Space); ok { 207*9e965d6fSRomain Jobredeaux // URI Space is defined in current context, use the namespace. 208*9e965d6fSRomain Jobredeaux prefix = ns 209*9e965d6fSRomain Jobredeaux } else if _, ok := enc.prefixURI[name.Space]; ok { 210*9e965d6fSRomain Jobredeaux // If URI Space is not defined in current context, there is a possibility 211*9e965d6fSRomain Jobredeaux // that the Space is in fact a namespace prefix. If present use it. 212*9e965d6fSRomain Jobredeaux prefix = name.Space 213*9e965d6fSRomain Jobredeaux } else { 214*9e965d6fSRomain Jobredeaux return nil, fmt.Errorf("unknown namespace: %s", name.Space) 215*9e965d6fSRomain Jobredeaux } 216*9e965d6fSRomain Jobredeaux if prefix != "" { 217*9e965d6fSRomain Jobredeaux n.WriteString(prefix) 218*9e965d6fSRomain Jobredeaux n.WriteByte(':') 219*9e965d6fSRomain Jobredeaux } 220*9e965d6fSRomain Jobredeaux } 221*9e965d6fSRomain Jobredeaux n.WriteString(name.Local) 222*9e965d6fSRomain Jobredeaux return n.Bytes(), nil 223*9e965d6fSRomain Jobredeaux} 224*9e965d6fSRomain Jobredeaux 225*9e965d6fSRomain Jobredeauxtype printer struct { 226*9e965d6fSRomain Jobredeaux io.Writer 227*9e965d6fSRomain Jobredeaux indent string 228*9e965d6fSRomain Jobredeaux prefix string 229*9e965d6fSRomain Jobredeaux depth int 230*9e965d6fSRomain Jobredeaux indentedIn bool 231*9e965d6fSRomain Jobredeaux putNewline bool 232*9e965d6fSRomain Jobredeaux} 233*9e965d6fSRomain Jobredeaux 234*9e965d6fSRomain Jobredeaux// writeIndent is directly cribbed from encoding/xml/marshal.go to keep indentation behavior the same. 235*9e965d6fSRomain Jobredeauxfunc (p *printer) writeIndent(depthDelta int) { 236*9e965d6fSRomain Jobredeaux if len(p.prefix) == 0 && len(p.indent) == 0 { 237*9e965d6fSRomain Jobredeaux return 238*9e965d6fSRomain Jobredeaux } 239*9e965d6fSRomain Jobredeaux if depthDelta < 0 { 240*9e965d6fSRomain Jobredeaux p.depth-- 241*9e965d6fSRomain Jobredeaux if p.indentedIn { 242*9e965d6fSRomain Jobredeaux p.indentedIn = false 243*9e965d6fSRomain Jobredeaux return 244*9e965d6fSRomain Jobredeaux } 245*9e965d6fSRomain Jobredeaux p.indentedIn = false 246*9e965d6fSRomain Jobredeaux } 247*9e965d6fSRomain Jobredeaux if p.putNewline { 248*9e965d6fSRomain Jobredeaux p.Write([]byte("\n")) 249*9e965d6fSRomain Jobredeaux } else { 250*9e965d6fSRomain Jobredeaux p.putNewline = true 251*9e965d6fSRomain Jobredeaux } 252*9e965d6fSRomain Jobredeaux if len(p.prefix) > 0 { 253*9e965d6fSRomain Jobredeaux p.Write([]byte(p.prefix)) 254*9e965d6fSRomain Jobredeaux } 255*9e965d6fSRomain Jobredeaux if len(p.indent) > 0 { 256*9e965d6fSRomain Jobredeaux for i := 0; i < p.depth; i++ { 257*9e965d6fSRomain Jobredeaux p.Write([]byte(p.indent)) 258*9e965d6fSRomain Jobredeaux } 259*9e965d6fSRomain Jobredeaux } 260*9e965d6fSRomain Jobredeaux if depthDelta > 0 { 261*9e965d6fSRomain Jobredeaux p.depth++ 262*9e965d6fSRomain Jobredeaux p.indentedIn = true 263*9e965d6fSRomain Jobredeaux } 264*9e965d6fSRomain Jobredeaux 265*9e965d6fSRomain Jobredeaux} 266*9e965d6fSRomain Jobredeaux 267*9e965d6fSRomain Jobredeaux// uriPrefixMap is a multimap, mapping a uri to many xml namespace prefixes. The 268*9e965d6fSRomain Jobredeaux// difference with this and a a traditional multimap is that, you can only get 269*9e965d6fSRomain Jobredeaux// or remove the last prefixed added. This is mainly due to the way xml decoding 270*9e965d6fSRomain Jobredeaux// is implemented by the encoding/xml Decoder. 271*9e965d6fSRomain Jobredeauxtype uriPrefixMap struct { 272*9e965d6fSRomain Jobredeaux up map[string][]string 273*9e965d6fSRomain Jobredeaux} 274*9e965d6fSRomain Jobredeaux 275*9e965d6fSRomain Jobredeaux// getLast returns a boolean which signifies if the entry exists and the last 276*9e965d6fSRomain Jobredeaux// prefix stored for the given uri. 277*9e965d6fSRomain Jobredeauxfunc (u *uriPrefixMap) getLast(uri string) (string, bool) { 278*9e965d6fSRomain Jobredeaux ps, ok := u.up[uri] 279*9e965d6fSRomain Jobredeaux if !ok { 280*9e965d6fSRomain Jobredeaux return "", ok 281*9e965d6fSRomain Jobredeaux } 282*9e965d6fSRomain Jobredeaux return ps[len(ps)-1], ok 283*9e965d6fSRomain Jobredeaux} 284*9e965d6fSRomain Jobredeaux 285*9e965d6fSRomain Jobredeauxfunc (u *uriPrefixMap) put(uri, prefix string) { 286*9e965d6fSRomain Jobredeaux if _, ok := u.up[uri]; !ok { 287*9e965d6fSRomain Jobredeaux // Though the mapping of url-to-prefix is implemented for a multimap, in practice, 288*9e965d6fSRomain Jobredeaux // there should never be more than a single prefix defined for any given uri within 289*9e965d6fSRomain Jobredeaux // at any point in time in an xml file. 290*9e965d6fSRomain Jobredeaux u.up[uri] = make([]string, 1) 291*9e965d6fSRomain Jobredeaux } 292*9e965d6fSRomain Jobredeaux u.up[uri] = append(u.up[uri], prefix) 293*9e965d6fSRomain Jobredeaux} 294*9e965d6fSRomain Jobredeaux 295*9e965d6fSRomain Jobredeaux// removeLast a boolean which signifies if the entry exists and returns the last 296*9e965d6fSRomain Jobredeaux// prefix removed for the given uri. If the last entry is removed the key is 297*9e965d6fSRomain Jobredeaux// also deleted. 298*9e965d6fSRomain Jobredeauxfunc (u *uriPrefixMap) removeLast(uri string) (string, bool) { 299*9e965d6fSRomain Jobredeaux p, ok := u.getLast(uri) 300*9e965d6fSRomain Jobredeaux if ok { 301*9e965d6fSRomain Jobredeaux if len(u.up[uri]) > 1 { 302*9e965d6fSRomain Jobredeaux u.up[uri] = u.up[uri][:len(u.up[uri])-1] 303*9e965d6fSRomain Jobredeaux } else { 304*9e965d6fSRomain Jobredeaux delete(u.up, uri) 305*9e965d6fSRomain Jobredeaux } 306*9e965d6fSRomain Jobredeaux } 307*9e965d6fSRomain Jobredeaux return p, ok 308*9e965d6fSRomain Jobredeaux} 309*9e965d6fSRomain Jobredeaux 310*9e965d6fSRomain Jobredeaux// state stores the state of the xml when a new start element is seen. 311*9e965d6fSRomain Jobredeauxtype state interface{} 312*9e965d6fSRomain Jobredeaux 313*9e965d6fSRomain Jobredeaux// xml element state entry. 314*9e965d6fSRomain Jobredeauxtype element struct { 315*9e965d6fSRomain Jobredeaux n *xml.Name 316*9e965d6fSRomain Jobredeaux} 317*9e965d6fSRomain Jobredeaux 318*9e965d6fSRomain Jobredeaux// xmlns attribute state entry. 319*9e965d6fSRomain Jobredeauxtype xmlns struct { 320*9e965d6fSRomain Jobredeaux prefix string 321*9e965d6fSRomain Jobredeaux uri string 322*9e965d6fSRomain Jobredeaux} 323