xref: /aosp_15_r20/external/bazelbuild-rules_android/src/common/golang/marshal.go (revision 9e965d6fece27a77de5377433c2f7e6999b8cc0b)
1// Copyright 2018 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package xml2 provides drop-in replacement functionality for encoding/xml.
16//
17// There are existing issues with the encoding/xml package that affect AK tools.
18//
19// xml2.Encoder:
20//
21// The current encoding/xml Encoder has several issues around xml namespacing
22// that makes the output produced by it incompatible with AAPT.
23//
24// * Tracked here: https://golang.org/issue/7535
25//
26// The xml2.Encoder.EncodeToken verifies the validity of namespaces and encodes
27// them. For everything else, xml2.Encoder will fallback to the xml.Encoder.
28package xml2
29
30import (
31	"bytes"
32	"encoding/xml"
33	"fmt"
34	"io"
35	"log"
36)
37
38const xmlNS = "xmlns"
39
40// Encoder is an xml encoder which behaves much like the encoding/xml Encoder.
41type Encoder struct {
42	*xml.Encoder
43	p         printer
44	prefixURI map[string]string
45	state     []state
46	uriPrefix *uriPrefixMap
47}
48
49// ChildEncoder returns an encoder whose state is copied the given parent Encoder and writes to w.
50func ChildEncoder(w io.Writer, parent *Encoder) *Encoder {
51	e := NewEncoder(w)
52	for k, v := range parent.prefixURI {
53		e.prefixURI[k] = v
54	}
55	for k, v := range parent.uriPrefix.up {
56		e.uriPrefix.up[k] = make([]string, len(v))
57		copy(e.uriPrefix.up[k], v)
58	}
59	return e
60}
61
62// NewEncoder returns a new encoder that writes to w.
63func NewEncoder(w io.Writer) *Encoder {
64	e := &Encoder{
65		Encoder:   xml.NewEncoder(w),
66		p:         printer{Writer: w},
67		prefixURI: make(map[string]string),
68		uriPrefix: &uriPrefixMap{up: make(map[string][]string)},
69	}
70	return e
71}
72
73// EncodeToken behaves almost the same as encoding/xml.Encoder.EncodeToken
74// but deals with StartElement and EndElement differently.
75func (enc *Encoder) EncodeToken(t xml.Token) error {
76	switch t := t.(type) {
77	case xml.StartElement:
78		enc.Encoder.Flush() // Need to flush the wrapped encoder before we write.
79		if err := enc.writeStart(&t); err != nil {
80			return err
81		}
82	case xml.EndElement:
83		enc.Encoder.Flush() // Need to flush the wrapped encoder before we write.
84		if err := enc.writeEnd(t.Name); err != nil {
85			return err
86		}
87	default:
88		// Delegate to the embedded encoder for everything else.
89		return enc.Encoder.EncodeToken(t)
90	}
91	return nil
92}
93
94func (enc *Encoder) writeStart(start *xml.StartElement) error {
95	if start.Name.Local == "" {
96		return fmt.Errorf("start tag with no name")
97	}
98	enc.setUpState(start)
99
100	// Begin creating the start tag.
101	var st bytes.Buffer
102	st.WriteByte('<')
103	n, err := enc.translateName(start.Name)
104	if err != nil {
105		return fmt.Errorf("translating start tag name %q failed, got: %v", start.Name.Local, err)
106	}
107	st.Write(n)
108	for _, attr := range start.Attr {
109		name := attr.Name
110		if name.Local == "" {
111			continue
112		}
113		st.WriteByte(' ')
114		n, err := enc.translateName(attr.Name)
115		if err != nil {
116			return fmt.Errorf("translating attribute name %q failed, got: %v", start.Name.Local, err)
117		}
118		st.Write(n)
119		st.WriteString(`="`)
120		xml.EscapeText(&st, []byte(attr.Value))
121		st.WriteByte('"')
122	}
123	st.WriteByte('>')
124
125	enc.p.writeIndent(1)
126	enc.p.Write(st.Bytes())
127	return nil
128}
129
130func (enc *Encoder) writeEnd(name xml.Name) error {
131	if name.Local == "" {
132		return fmt.Errorf("end tag with no name")
133	}
134	n, err := enc.translateName(name)
135	if err != nil {
136		return fmt.Errorf("translating end tag name %q failed, got: %v", name.Local, err)
137	}
138	sn := enc.tearDownState()
139	if sn == nil || name.Local != sn.Local && name.Space != sn.Space {
140		return fmt.Errorf("tags are unbalanced, got: %v, wanted: %v", name, sn)
141	}
142
143	// Begin creating the end tag
144	var et bytes.Buffer
145	et.WriteString("</")
146	et.Write(n)
147	et.WriteByte('>')
148
149	enc.p.writeIndent(-1)
150	enc.p.Write(et.Bytes())
151	return nil
152}
153
154func (enc *Encoder) setUpState(start *xml.StartElement) {
155	enc.state = append(enc.state, element{n: &start.Name}) // Store start element to verify balanced close tags.
156	// Track attrs that affect the state of the xml (e.g. xmlns, xmlns:foo).
157	for _, attr := range start.Attr {
158		// push any xmlns type attrs as xml namespaces are valid within the tag they are declared in, and onward.
159		if attr.Name.Space == "xmlns" || attr.Name.Local == "xmlns" {
160			prefix := attr.Name.Local
161			if attr.Name.Local == "xmlns" {
162				prefix = "" // Default xml namespace is being set.
163			}
164			// Store the previous state, to be restored when exiting the tag.
165			enc.state = append(enc.state, xmlns{prefix: prefix, uri: enc.prefixURI[prefix]})
166			enc.prefixURI[prefix] = attr.Value
167			enc.uriPrefix.put(attr.Value, prefix)
168		}
169	}
170}
171
172func (enc *Encoder) tearDownState() *xml.Name {
173	// Unwind the state setup on start element.
174	for len(enc.state) > 0 {
175		s := enc.state[len(enc.state)-1]
176		enc.state = enc.state[:len(enc.state)-1]
177		switch s := s.(type) {
178		case element:
179			// Stop unwinding As soon as an element type is seen and verify that the
180			// tags are balanced
181			return s.n
182		case xmlns:
183			if p, ok := enc.uriPrefix.removeLast(enc.prefixURI[s.prefix]); !ok || p != s.prefix {
184				// Unexpected error, internal state is corrupt.
185				if !ok {
186					log.Fatalf("xmlns attribute state corrupt, uri %q does not exist", enc.prefixURI[s.prefix])
187				}
188				log.Fatalf("xmlns attributes state corrupt, got: %q, wanted: %q", s.prefix, p)
189			}
190			if s.uri == "" {
191				delete(enc.prefixURI, s.prefix)
192			} else {
193				enc.prefixURI[s.prefix] = s.uri
194			}
195		}
196	}
197	return nil
198}
199
200func (enc *Encoder) translateName(name xml.Name) ([]byte, error) {
201	var n bytes.Buffer
202	if name.Space != "" {
203		prefix := ""
204		if name.Space == xmlNS {
205			prefix = xmlNS
206		} else if ns, ok := enc.uriPrefix.getLast(name.Space); ok {
207			// URI Space is defined in current context, use the namespace.
208			prefix = ns
209		} else if _, ok := enc.prefixURI[name.Space]; ok {
210			// If URI Space is not defined in current context, there is a possibility
211			// that the Space is in fact a namespace prefix. If present use it.
212			prefix = name.Space
213		} else {
214			return nil, fmt.Errorf("unknown namespace: %s", name.Space)
215		}
216		if prefix != "" {
217			n.WriteString(prefix)
218			n.WriteByte(':')
219		}
220	}
221	n.WriteString(name.Local)
222	return n.Bytes(), nil
223}
224
225type printer struct {
226	io.Writer
227	indent     string
228	prefix     string
229	depth      int
230	indentedIn bool
231	putNewline bool
232}
233
234// writeIndent is directly cribbed from encoding/xml/marshal.go to keep indentation behavior the same.
235func (p *printer) writeIndent(depthDelta int) {
236	if len(p.prefix) == 0 && len(p.indent) == 0 {
237		return
238	}
239	if depthDelta < 0 {
240		p.depth--
241		if p.indentedIn {
242			p.indentedIn = false
243			return
244		}
245		p.indentedIn = false
246	}
247	if p.putNewline {
248		p.Write([]byte("\n"))
249	} else {
250		p.putNewline = true
251	}
252	if len(p.prefix) > 0 {
253		p.Write([]byte(p.prefix))
254	}
255	if len(p.indent) > 0 {
256		for i := 0; i < p.depth; i++ {
257			p.Write([]byte(p.indent))
258		}
259	}
260	if depthDelta > 0 {
261		p.depth++
262		p.indentedIn = true
263	}
264
265}
266
267// uriPrefixMap is a multimap, mapping a uri to many xml namespace prefixes. The
268// difference with this and a a traditional multimap is that, you can only get
269// or remove the last prefixed added. This is mainly due to the way xml decoding
270// is implemented by the encoding/xml Decoder.
271type uriPrefixMap struct {
272	up map[string][]string
273}
274
275// getLast returns a boolean which signifies if the entry exists and the last
276// prefix stored for the given uri.
277func (u *uriPrefixMap) getLast(uri string) (string, bool) {
278	ps, ok := u.up[uri]
279	if !ok {
280		return "", ok
281	}
282	return ps[len(ps)-1], ok
283}
284
285func (u *uriPrefixMap) put(uri, prefix string) {
286	if _, ok := u.up[uri]; !ok {
287		// Though the mapping of url-to-prefix is implemented for a multimap, in practice,
288		// there should never be more than a single prefix defined for any given uri within
289		// at any point in time in an xml file.
290		u.up[uri] = make([]string, 1)
291	}
292	u.up[uri] = append(u.up[uri], prefix)
293}
294
295// removeLast a boolean which signifies if the entry exists and returns the last
296// prefix removed for the given uri. If the last entry is removed the key is
297// also deleted.
298func (u *uriPrefixMap) removeLast(uri string) (string, bool) {
299	p, ok := u.getLast(uri)
300	if ok {
301		if len(u.up[uri]) > 1 {
302			u.up[uri] = u.up[uri][:len(u.up[uri])-1]
303		} else {
304			delete(u.up, uri)
305		}
306	}
307	return p, ok
308}
309
310// state stores the state of the xml when a new start element is seen.
311type state interface{}
312
313// xml element state entry.
314type element struct {
315	n *xml.Name
316}
317
318// xmlns attribute state entry.
319type xmlns struct {
320	prefix string
321	uri    string
322}
323