1*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 2*ba677afaSXin Li 3*ba677afaSXin Lipackage reader 4*ba677afaSXin Li 5*ba677afaSXin Liimport ( 6*ba677afaSXin Li "bufio" 7*ba677afaSXin Li "fmt" 8*ba677afaSXin Li "io" 9*ba677afaSXin Li "strings" 10*ba677afaSXin Li "unicode" 11*ba677afaSXin Li) 12*ba677afaSXin Li 13*ba677afaSXin Li// TagValuePair is a convenience struct for a (tag, value) string pair. 14*ba677afaSXin Litype TagValuePair struct { 15*ba677afaSXin Li Tag string 16*ba677afaSXin Li Value string 17*ba677afaSXin Li} 18*ba677afaSXin Li 19*ba677afaSXin Li// ReadTagValues takes an io.Reader, scans it line by line and returns 20*ba677afaSXin Li// a slice of {string, string} structs in the form {tag, value}. 21*ba677afaSXin Lifunc ReadTagValues(content io.Reader) ([]TagValuePair, error) { 22*ba677afaSXin Li r := &tvReader{} 23*ba677afaSXin Li 24*ba677afaSXin Li scanner := bufio.NewScanner(content) 25*ba677afaSXin Li for scanner.Scan() { 26*ba677afaSXin Li // read each line, one by one 27*ba677afaSXin Li err := r.readNextLine(scanner.Text()) 28*ba677afaSXin Li if err != nil { 29*ba677afaSXin Li return nil, err 30*ba677afaSXin Li } 31*ba677afaSXin Li } 32*ba677afaSXin Li if err := scanner.Err(); err != nil { 33*ba677afaSXin Li return nil, err 34*ba677afaSXin Li } 35*ba677afaSXin Li 36*ba677afaSXin Li // finalize and make sure all is well 37*ba677afaSXin Li tvList, err := r.finalize() 38*ba677afaSXin Li if err != nil { 39*ba677afaSXin Li return nil, err 40*ba677afaSXin Li } 41*ba677afaSXin Li 42*ba677afaSXin Li // convert internal format to exported TagValueList 43*ba677afaSXin Li var exportedTVList []TagValuePair 44*ba677afaSXin Li for _, tv := range tvList { 45*ba677afaSXin Li tvPair := TagValuePair{Tag: tv.tag, Value: tv.value} 46*ba677afaSXin Li exportedTVList = append(exportedTVList, tvPair) 47*ba677afaSXin Li } 48*ba677afaSXin Li 49*ba677afaSXin Li return exportedTVList, nil 50*ba677afaSXin Li} 51*ba677afaSXin Li 52*ba677afaSXin Litype tagvalue struct { 53*ba677afaSXin Li tag string 54*ba677afaSXin Li value string 55*ba677afaSXin Li} 56*ba677afaSXin Li 57*ba677afaSXin Litype tvReader struct { 58*ba677afaSXin Li midtext bool 59*ba677afaSXin Li tvList []tagvalue 60*ba677afaSXin Li currentLine int 61*ba677afaSXin Li currentTag string 62*ba677afaSXin Li currentValue string 63*ba677afaSXin Li} 64*ba677afaSXin Li 65*ba677afaSXin Lifunc (reader *tvReader) finalize() ([]tagvalue, error) { 66*ba677afaSXin Li if reader.midtext { 67*ba677afaSXin Li return nil, fmt.Errorf("finalize called while still midtext parsing a text tag") 68*ba677afaSXin Li } 69*ba677afaSXin Li return reader.tvList, nil 70*ba677afaSXin Li} 71*ba677afaSXin Li 72*ba677afaSXin Lifunc (reader *tvReader) readNextLine(line string) error { 73*ba677afaSXin Li reader.currentLine++ 74*ba677afaSXin Li 75*ba677afaSXin Li if reader.midtext { 76*ba677afaSXin Li return reader.readNextLineFromMidtext(line) 77*ba677afaSXin Li } 78*ba677afaSXin Li 79*ba677afaSXin Li return reader.readNextLineFromReady(line) 80*ba677afaSXin Li} 81*ba677afaSXin Li 82*ba677afaSXin Lifunc (reader *tvReader) readNextLineFromReady(line string) error { 83*ba677afaSXin Li // strip whitespace from beginning of line 84*ba677afaSXin Li line2 := strings.TrimLeftFunc(line, func(r rune) bool { 85*ba677afaSXin Li return unicode.IsSpace(r) 86*ba677afaSXin Li }) 87*ba677afaSXin Li 88*ba677afaSXin Li // ignore empty lines 89*ba677afaSXin Li if line2 == "" { 90*ba677afaSXin Li return nil 91*ba677afaSXin Li } 92*ba677afaSXin Li 93*ba677afaSXin Li // ignore comment lines 94*ba677afaSXin Li if strings.HasPrefix(line2, "#") { 95*ba677afaSXin Li return nil 96*ba677afaSXin Li } 97*ba677afaSXin Li 98*ba677afaSXin Li // split at colon 99*ba677afaSXin Li substrings := strings.SplitN(line2, ":", 2) 100*ba677afaSXin Li if len(substrings) == 1 { 101*ba677afaSXin Li // error if a colon isn't found 102*ba677afaSXin Li return fmt.Errorf("no colon found in '%s'", line) 103*ba677afaSXin Li } 104*ba677afaSXin Li 105*ba677afaSXin Li // the first substring is the tag 106*ba677afaSXin Li reader.currentTag = strings.TrimSpace(substrings[0]) 107*ba677afaSXin Li 108*ba677afaSXin Li // determine whether the value contains (or starts) a <text> line 109*ba677afaSXin Li substrings = strings.SplitN(substrings[1], "<text>", 2) 110*ba677afaSXin Li if len(substrings) == 1 { 111*ba677afaSXin Li // no <text> tag found means this is a single-line value 112*ba677afaSXin Li // strip whitespace and use as a single line 113*ba677afaSXin Li reader.currentValue = strings.TrimSpace(substrings[0]) 114*ba677afaSXin Li } else { 115*ba677afaSXin Li // there was a <text> tag; now decide whether it's multi-line 116*ba677afaSXin Li substrings = strings.SplitN(substrings[1], "</text>", 2) 117*ba677afaSXin Li if len(substrings) > 1 { 118*ba677afaSXin Li // there is also a </text> tag; take the middle part and 119*ba677afaSXin Li // set as value 120*ba677afaSXin Li reader.currentValue = substrings[0] 121*ba677afaSXin Li } else { 122*ba677afaSXin Li // there is no </text> tag on this line; switch to midtext 123*ba677afaSXin Li reader.currentValue = substrings[0] + "\n" 124*ba677afaSXin Li reader.midtext = true 125*ba677afaSXin Li return nil 126*ba677afaSXin Li } 127*ba677afaSXin Li } 128*ba677afaSXin Li 129*ba677afaSXin Li // if we got here, the value was on a single line 130*ba677afaSXin Li // so go ahead and add it to the tag-value list 131*ba677afaSXin Li tv := tagvalue{reader.currentTag, reader.currentValue} 132*ba677afaSXin Li reader.tvList = append(reader.tvList, tv) 133*ba677afaSXin Li 134*ba677afaSXin Li // and reset 135*ba677afaSXin Li reader.currentTag = "" 136*ba677afaSXin Li reader.currentValue = "" 137*ba677afaSXin Li 138*ba677afaSXin Li return nil 139*ba677afaSXin Li} 140*ba677afaSXin Li 141*ba677afaSXin Lifunc (reader *tvReader) readNextLineFromMidtext(line string) error { 142*ba677afaSXin Li // look for whether the line closes here 143*ba677afaSXin Li substrings := strings.SplitN(line, "</text>", 2) 144*ba677afaSXin Li if len(substrings) == 1 { 145*ba677afaSXin Li // doesn't contain </text>, so keep building the current value 146*ba677afaSXin Li reader.currentValue += line + "\n" 147*ba677afaSXin Li return nil 148*ba677afaSXin Li } 149*ba677afaSXin Li 150*ba677afaSXin Li // contains </text>, so end and record this pair 151*ba677afaSXin Li reader.currentValue += substrings[0] 152*ba677afaSXin Li tv := tagvalue{reader.currentTag, reader.currentValue} 153*ba677afaSXin Li reader.tvList = append(reader.tvList, tv) 154*ba677afaSXin Li 155*ba677afaSXin Li // and reset 156*ba677afaSXin Li reader.midtext = false 157*ba677afaSXin Li reader.currentTag = "" 158*ba677afaSXin Li reader.currentValue = "" 159*ba677afaSXin Li 160*ba677afaSXin Li return nil 161*ba677afaSXin Li} 162