xref: /aosp_15_r20/external/spdx-tools/tvloader/reader/reader.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2*ba677afaSXin Li
3*ba677afaSXin Lipackage reader
4*ba677afaSXin Li
5*ba677afaSXin Liimport (
6*ba677afaSXin Li	"bufio"
7*ba677afaSXin Li	"fmt"
8*ba677afaSXin Li	"io"
9*ba677afaSXin Li	"strings"
10*ba677afaSXin Li	"unicode"
11*ba677afaSXin Li)
12*ba677afaSXin Li
13*ba677afaSXin Li// TagValuePair is a convenience struct for a (tag, value) string pair.
14*ba677afaSXin Litype TagValuePair struct {
15*ba677afaSXin Li	Tag   string
16*ba677afaSXin Li	Value string
17*ba677afaSXin Li}
18*ba677afaSXin Li
19*ba677afaSXin Li// ReadTagValues takes an io.Reader, scans it line by line and returns
20*ba677afaSXin Li// a slice of {string, string} structs in the form {tag, value}.
21*ba677afaSXin Lifunc ReadTagValues(content io.Reader) ([]TagValuePair, error) {
22*ba677afaSXin Li	r := &tvReader{}
23*ba677afaSXin Li
24*ba677afaSXin Li	scanner := bufio.NewScanner(content)
25*ba677afaSXin Li	for scanner.Scan() {
26*ba677afaSXin Li		// read each line, one by one
27*ba677afaSXin Li		err := r.readNextLine(scanner.Text())
28*ba677afaSXin Li		if err != nil {
29*ba677afaSXin Li			return nil, err
30*ba677afaSXin Li		}
31*ba677afaSXin Li	}
32*ba677afaSXin Li	if err := scanner.Err(); err != nil {
33*ba677afaSXin Li		return nil, err
34*ba677afaSXin Li	}
35*ba677afaSXin Li
36*ba677afaSXin Li	// finalize and make sure all is well
37*ba677afaSXin Li	tvList, err := r.finalize()
38*ba677afaSXin Li	if err != nil {
39*ba677afaSXin Li		return nil, err
40*ba677afaSXin Li	}
41*ba677afaSXin Li
42*ba677afaSXin Li	// convert internal format to exported TagValueList
43*ba677afaSXin Li	var exportedTVList []TagValuePair
44*ba677afaSXin Li	for _, tv := range tvList {
45*ba677afaSXin Li		tvPair := TagValuePair{Tag: tv.tag, Value: tv.value}
46*ba677afaSXin Li		exportedTVList = append(exportedTVList, tvPair)
47*ba677afaSXin Li	}
48*ba677afaSXin Li
49*ba677afaSXin Li	return exportedTVList, nil
50*ba677afaSXin Li}
51*ba677afaSXin Li
52*ba677afaSXin Litype tagvalue struct {
53*ba677afaSXin Li	tag   string
54*ba677afaSXin Li	value string
55*ba677afaSXin Li}
56*ba677afaSXin Li
57*ba677afaSXin Litype tvReader struct {
58*ba677afaSXin Li	midtext      bool
59*ba677afaSXin Li	tvList       []tagvalue
60*ba677afaSXin Li	currentLine  int
61*ba677afaSXin Li	currentTag   string
62*ba677afaSXin Li	currentValue string
63*ba677afaSXin Li}
64*ba677afaSXin Li
65*ba677afaSXin Lifunc (reader *tvReader) finalize() ([]tagvalue, error) {
66*ba677afaSXin Li	if reader.midtext {
67*ba677afaSXin Li		return nil, fmt.Errorf("finalize called while still midtext parsing a text tag")
68*ba677afaSXin Li	}
69*ba677afaSXin Li	return reader.tvList, nil
70*ba677afaSXin Li}
71*ba677afaSXin Li
72*ba677afaSXin Lifunc (reader *tvReader) readNextLine(line string) error {
73*ba677afaSXin Li	reader.currentLine++
74*ba677afaSXin Li
75*ba677afaSXin Li	if reader.midtext {
76*ba677afaSXin Li		return reader.readNextLineFromMidtext(line)
77*ba677afaSXin Li	}
78*ba677afaSXin Li
79*ba677afaSXin Li	return reader.readNextLineFromReady(line)
80*ba677afaSXin Li}
81*ba677afaSXin Li
82*ba677afaSXin Lifunc (reader *tvReader) readNextLineFromReady(line string) error {
83*ba677afaSXin Li	// strip whitespace from beginning of line
84*ba677afaSXin Li	line2 := strings.TrimLeftFunc(line, func(r rune) bool {
85*ba677afaSXin Li		return unicode.IsSpace(r)
86*ba677afaSXin Li	})
87*ba677afaSXin Li
88*ba677afaSXin Li	// ignore empty lines
89*ba677afaSXin Li	if line2 == "" {
90*ba677afaSXin Li		return nil
91*ba677afaSXin Li	}
92*ba677afaSXin Li
93*ba677afaSXin Li	// ignore comment lines
94*ba677afaSXin Li	if strings.HasPrefix(line2, "#") {
95*ba677afaSXin Li		return nil
96*ba677afaSXin Li	}
97*ba677afaSXin Li
98*ba677afaSXin Li	// split at colon
99*ba677afaSXin Li	substrings := strings.SplitN(line2, ":", 2)
100*ba677afaSXin Li	if len(substrings) == 1 {
101*ba677afaSXin Li		// error if a colon isn't found
102*ba677afaSXin Li		return fmt.Errorf("no colon found in '%s'", line)
103*ba677afaSXin Li	}
104*ba677afaSXin Li
105*ba677afaSXin Li	// the first substring is the tag
106*ba677afaSXin Li	reader.currentTag = strings.TrimSpace(substrings[0])
107*ba677afaSXin Li
108*ba677afaSXin Li	// determine whether the value contains (or starts) a <text> line
109*ba677afaSXin Li	substrings = strings.SplitN(substrings[1], "<text>", 2)
110*ba677afaSXin Li	if len(substrings) == 1 {
111*ba677afaSXin Li		// no <text> tag found means this is a single-line value
112*ba677afaSXin Li		// strip whitespace and use as a single line
113*ba677afaSXin Li		reader.currentValue = strings.TrimSpace(substrings[0])
114*ba677afaSXin Li	} else {
115*ba677afaSXin Li		// there was a <text> tag; now decide whether it's multi-line
116*ba677afaSXin Li		substrings = strings.SplitN(substrings[1], "</text>", 2)
117*ba677afaSXin Li		if len(substrings) > 1 {
118*ba677afaSXin Li			// there is also a </text> tag; take the middle part and
119*ba677afaSXin Li			// set as value
120*ba677afaSXin Li			reader.currentValue = substrings[0]
121*ba677afaSXin Li		} else {
122*ba677afaSXin Li			// there is no </text> tag on this line; switch to midtext
123*ba677afaSXin Li			reader.currentValue = substrings[0] + "\n"
124*ba677afaSXin Li			reader.midtext = true
125*ba677afaSXin Li			return nil
126*ba677afaSXin Li		}
127*ba677afaSXin Li	}
128*ba677afaSXin Li
129*ba677afaSXin Li	// if we got here, the value was on a single line
130*ba677afaSXin Li	// so go ahead and add it to the tag-value list
131*ba677afaSXin Li	tv := tagvalue{reader.currentTag, reader.currentValue}
132*ba677afaSXin Li	reader.tvList = append(reader.tvList, tv)
133*ba677afaSXin Li
134*ba677afaSXin Li	// and reset
135*ba677afaSXin Li	reader.currentTag = ""
136*ba677afaSXin Li	reader.currentValue = ""
137*ba677afaSXin Li
138*ba677afaSXin Li	return nil
139*ba677afaSXin Li}
140*ba677afaSXin Li
141*ba677afaSXin Lifunc (reader *tvReader) readNextLineFromMidtext(line string) error {
142*ba677afaSXin Li	// look for whether the line closes here
143*ba677afaSXin Li	substrings := strings.SplitN(line, "</text>", 2)
144*ba677afaSXin Li	if len(substrings) == 1 {
145*ba677afaSXin Li		// doesn't contain </text>, so keep building the current value
146*ba677afaSXin Li		reader.currentValue += line + "\n"
147*ba677afaSXin Li		return nil
148*ba677afaSXin Li	}
149*ba677afaSXin Li
150*ba677afaSXin Li	// contains </text>, so end and record this pair
151*ba677afaSXin Li	reader.currentValue += substrings[0]
152*ba677afaSXin Li	tv := tagvalue{reader.currentTag, reader.currentValue}
153*ba677afaSXin Li	reader.tvList = append(reader.tvList, tv)
154*ba677afaSXin Li
155*ba677afaSXin Li	// and reset
156*ba677afaSXin Li	reader.midtext = false
157*ba677afaSXin Li	reader.currentTag = ""
158*ba677afaSXin Li	reader.currentValue = ""
159*ba677afaSXin Li
160*ba677afaSXin Li	return nil
161*ba677afaSXin Li}
162