1// Copyright 2018 The Bazel Authors. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package xml2 provides drop-in replacement functionality for encoding/xml. 16// 17// There are existing issues with the encoding/xml package that affect AK tools. 18// 19// xml2.Encoder: 20// 21// The current encoding/xml Encoder has several issues around xml namespacing 22// that makes the output produced by it incompatible with AAPT. 23// 24// * Tracked here: https://golang.org/issue/7535 25// 26// The xml2.Encoder.EncodeToken verifies the validity of namespaces and encodes 27// them. For everything else, xml2.Encoder will fallback to the xml.Encoder. 28package xml2 29 30import ( 31 "bytes" 32 "encoding/xml" 33 "fmt" 34 "io" 35 "log" 36) 37 38const xmlNS = "xmlns" 39 40// Encoder is an xml encoder which behaves much like the encoding/xml Encoder. 41type Encoder struct { 42 *xml.Encoder 43 p printer 44 prefixURI map[string]string 45 state []state 46 uriPrefix *uriPrefixMap 47} 48 49// ChildEncoder returns an encoder whose state is copied the given parent Encoder and writes to w. 50func ChildEncoder(w io.Writer, parent *Encoder) *Encoder { 51 e := NewEncoder(w) 52 for k, v := range parent.prefixURI { 53 e.prefixURI[k] = v 54 } 55 for k, v := range parent.uriPrefix.up { 56 e.uriPrefix.up[k] = make([]string, len(v)) 57 copy(e.uriPrefix.up[k], v) 58 } 59 return e 60} 61 62// NewEncoder returns a new encoder that writes to w. 63func NewEncoder(w io.Writer) *Encoder { 64 e := &Encoder{ 65 Encoder: xml.NewEncoder(w), 66 p: printer{Writer: w}, 67 prefixURI: make(map[string]string), 68 uriPrefix: &uriPrefixMap{up: make(map[string][]string)}, 69 } 70 return e 71} 72 73// EncodeToken behaves almost the same as encoding/xml.Encoder.EncodeToken 74// but deals with StartElement and EndElement differently. 75func (enc *Encoder) EncodeToken(t xml.Token) error { 76 switch t := t.(type) { 77 case xml.StartElement: 78 enc.Encoder.Flush() // Need to flush the wrapped encoder before we write. 79 if err := enc.writeStart(&t); err != nil { 80 return err 81 } 82 case xml.EndElement: 83 enc.Encoder.Flush() // Need to flush the wrapped encoder before we write. 84 if err := enc.writeEnd(t.Name); err != nil { 85 return err 86 } 87 default: 88 // Delegate to the embedded encoder for everything else. 89 return enc.Encoder.EncodeToken(t) 90 } 91 return nil 92} 93 94func (enc *Encoder) writeStart(start *xml.StartElement) error { 95 if start.Name.Local == "" { 96 return fmt.Errorf("start tag with no name") 97 } 98 enc.setUpState(start) 99 100 // Begin creating the start tag. 101 var st bytes.Buffer 102 st.WriteByte('<') 103 n, err := enc.translateName(start.Name) 104 if err != nil { 105 return fmt.Errorf("translating start tag name %q failed, got: %v", start.Name.Local, err) 106 } 107 st.Write(n) 108 for _, attr := range start.Attr { 109 name := attr.Name 110 if name.Local == "" { 111 continue 112 } 113 st.WriteByte(' ') 114 n, err := enc.translateName(attr.Name) 115 if err != nil { 116 return fmt.Errorf("translating attribute name %q failed, got: %v", start.Name.Local, err) 117 } 118 st.Write(n) 119 st.WriteString(`="`) 120 xml.EscapeText(&st, []byte(attr.Value)) 121 st.WriteByte('"') 122 } 123 st.WriteByte('>') 124 125 enc.p.writeIndent(1) 126 enc.p.Write(st.Bytes()) 127 return nil 128} 129 130func (enc *Encoder) writeEnd(name xml.Name) error { 131 if name.Local == "" { 132 return fmt.Errorf("end tag with no name") 133 } 134 n, err := enc.translateName(name) 135 if err != nil { 136 return fmt.Errorf("translating end tag name %q failed, got: %v", name.Local, err) 137 } 138 sn := enc.tearDownState() 139 if sn == nil || name.Local != sn.Local && name.Space != sn.Space { 140 return fmt.Errorf("tags are unbalanced, got: %v, wanted: %v", name, sn) 141 } 142 143 // Begin creating the end tag 144 var et bytes.Buffer 145 et.WriteString("</") 146 et.Write(n) 147 et.WriteByte('>') 148 149 enc.p.writeIndent(-1) 150 enc.p.Write(et.Bytes()) 151 return nil 152} 153 154func (enc *Encoder) setUpState(start *xml.StartElement) { 155 enc.state = append(enc.state, element{n: &start.Name}) // Store start element to verify balanced close tags. 156 // Track attrs that affect the state of the xml (e.g. xmlns, xmlns:foo). 157 for _, attr := range start.Attr { 158 // push any xmlns type attrs as xml namespaces are valid within the tag they are declared in, and onward. 159 if attr.Name.Space == "xmlns" || attr.Name.Local == "xmlns" { 160 prefix := attr.Name.Local 161 if attr.Name.Local == "xmlns" { 162 prefix = "" // Default xml namespace is being set. 163 } 164 // Store the previous state, to be restored when exiting the tag. 165 enc.state = append(enc.state, xmlns{prefix: prefix, uri: enc.prefixURI[prefix]}) 166 enc.prefixURI[prefix] = attr.Value 167 enc.uriPrefix.put(attr.Value, prefix) 168 } 169 } 170} 171 172func (enc *Encoder) tearDownState() *xml.Name { 173 // Unwind the state setup on start element. 174 for len(enc.state) > 0 { 175 s := enc.state[len(enc.state)-1] 176 enc.state = enc.state[:len(enc.state)-1] 177 switch s := s.(type) { 178 case element: 179 // Stop unwinding As soon as an element type is seen and verify that the 180 // tags are balanced 181 return s.n 182 case xmlns: 183 if p, ok := enc.uriPrefix.removeLast(enc.prefixURI[s.prefix]); !ok || p != s.prefix { 184 // Unexpected error, internal state is corrupt. 185 if !ok { 186 log.Fatalf("xmlns attribute state corrupt, uri %q does not exist", enc.prefixURI[s.prefix]) 187 } 188 log.Fatalf("xmlns attributes state corrupt, got: %q, wanted: %q", s.prefix, p) 189 } 190 if s.uri == "" { 191 delete(enc.prefixURI, s.prefix) 192 } else { 193 enc.prefixURI[s.prefix] = s.uri 194 } 195 } 196 } 197 return nil 198} 199 200func (enc *Encoder) translateName(name xml.Name) ([]byte, error) { 201 var n bytes.Buffer 202 if name.Space != "" { 203 prefix := "" 204 if name.Space == xmlNS { 205 prefix = xmlNS 206 } else if ns, ok := enc.uriPrefix.getLast(name.Space); ok { 207 // URI Space is defined in current context, use the namespace. 208 prefix = ns 209 } else if _, ok := enc.prefixURI[name.Space]; ok { 210 // If URI Space is not defined in current context, there is a possibility 211 // that the Space is in fact a namespace prefix. If present use it. 212 prefix = name.Space 213 } else { 214 return nil, fmt.Errorf("unknown namespace: %s", name.Space) 215 } 216 if prefix != "" { 217 n.WriteString(prefix) 218 n.WriteByte(':') 219 } 220 } 221 n.WriteString(name.Local) 222 return n.Bytes(), nil 223} 224 225type printer struct { 226 io.Writer 227 indent string 228 prefix string 229 depth int 230 indentedIn bool 231 putNewline bool 232} 233 234// writeIndent is directly cribbed from encoding/xml/marshal.go to keep indentation behavior the same. 235func (p *printer) writeIndent(depthDelta int) { 236 if len(p.prefix) == 0 && len(p.indent) == 0 { 237 return 238 } 239 if depthDelta < 0 { 240 p.depth-- 241 if p.indentedIn { 242 p.indentedIn = false 243 return 244 } 245 p.indentedIn = false 246 } 247 if p.putNewline { 248 p.Write([]byte("\n")) 249 } else { 250 p.putNewline = true 251 } 252 if len(p.prefix) > 0 { 253 p.Write([]byte(p.prefix)) 254 } 255 if len(p.indent) > 0 { 256 for i := 0; i < p.depth; i++ { 257 p.Write([]byte(p.indent)) 258 } 259 } 260 if depthDelta > 0 { 261 p.depth++ 262 p.indentedIn = true 263 } 264 265} 266 267// uriPrefixMap is a multimap, mapping a uri to many xml namespace prefixes. The 268// difference with this and a a traditional multimap is that, you can only get 269// or remove the last prefixed added. This is mainly due to the way xml decoding 270// is implemented by the encoding/xml Decoder. 271type uriPrefixMap struct { 272 up map[string][]string 273} 274 275// getLast returns a boolean which signifies if the entry exists and the last 276// prefix stored for the given uri. 277func (u *uriPrefixMap) getLast(uri string) (string, bool) { 278 ps, ok := u.up[uri] 279 if !ok { 280 return "", ok 281 } 282 return ps[len(ps)-1], ok 283} 284 285func (u *uriPrefixMap) put(uri, prefix string) { 286 if _, ok := u.up[uri]; !ok { 287 // Though the mapping of url-to-prefix is implemented for a multimap, in practice, 288 // there should never be more than a single prefix defined for any given uri within 289 // at any point in time in an xml file. 290 u.up[uri] = make([]string, 1) 291 } 292 u.up[uri] = append(u.up[uri], prefix) 293} 294 295// removeLast a boolean which signifies if the entry exists and returns the last 296// prefix removed for the given uri. If the last entry is removed the key is 297// also deleted. 298func (u *uriPrefixMap) removeLast(uri string) (string, bool) { 299 p, ok := u.getLast(uri) 300 if ok { 301 if len(u.up[uri]) > 1 { 302 u.up[uri] = u.up[uri][:len(u.up[uri])-1] 303 } else { 304 delete(u.up, uri) 305 } 306 } 307 return p, ok 308} 309 310// state stores the state of the xml when a new start element is seen. 311type state interface{} 312 313// xml element state entry. 314type element struct { 315 n *xml.Name 316} 317 318// xmlns attribute state entry. 319type xmlns struct { 320 prefix string 321 uri string 322} 323