1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
package reader
import (
"bufio"
"fmt"
"io"
"strings"
"unicode"
)
// TagValuePair is a convenience struct for a (tag, value) string pair.
type TagValuePair struct {
Tag string
Value string
}
// ReadTagValues takes an io.Reader, scans it line by line and returns
// a slice of {string, string} structs in the form {tag, value}.
func ReadTagValues(content io.Reader) ([]TagValuePair, error) {
r := &tvReader{}
scanner := bufio.NewScanner(content)
for scanner.Scan() {
// read each line, one by one
err := r.readNextLine(scanner.Text())
if err != nil {
return nil, err
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
// finalize and make sure all is well
tvList, err := r.finalize()
if err != nil {
return nil, err
}
// convert internal format to exported TagValueList
var exportedTVList []TagValuePair
for _, tv := range tvList {
tvPair := TagValuePair{Tag: tv.tag, Value: tv.value}
exportedTVList = append(exportedTVList, tvPair)
}
return exportedTVList, nil
}
type tagvalue struct {
tag string
value string
}
type tvReader struct {
midtext bool
tvList []tagvalue
currentLine int
currentTag string
currentValue string
}
func (reader *tvReader) finalize() ([]tagvalue, error) {
if reader.midtext {
return nil, fmt.Errorf("finalize called while still midtext parsing a text tag")
}
return reader.tvList, nil
}
func (reader *tvReader) readNextLine(line string) error {
reader.currentLine++
if reader.midtext {
return reader.readNextLineFromMidtext(line)
}
return reader.readNextLineFromReady(line)
}
func (reader *tvReader) readNextLineFromReady(line string) error {
// strip whitespace from beginning of line
line2 := strings.TrimLeftFunc(line, func(r rune) bool {
return unicode.IsSpace(r)
})
// ignore empty lines
if line2 == "" {
return nil
}
// ignore comment lines
if strings.HasPrefix(line2, "#") {
return nil
}
// split at colon
substrings := strings.SplitN(line2, ":", 2)
if len(substrings) == 1 {
// error if a colon isn't found
return fmt.Errorf("no colon found in '%s'", line)
}
// the first substring is the tag
reader.currentTag = strings.TrimSpace(substrings[0])
// determine whether the value contains (or starts) a <text> line
substrings = strings.SplitN(substrings[1], "<text>", 2)
if len(substrings) == 1 {
// no <text> tag found means this is a single-line value
// strip whitespace and use as a single line
reader.currentValue = strings.TrimSpace(substrings[0])
} else {
// there was a <text> tag; now decide whether it's multi-line
substrings = strings.SplitN(substrings[1], "</text>", 2)
if len(substrings) > 1 {
// there is also a </text> tag; take the middle part and
// set as value
reader.currentValue = substrings[0]
} else {
// there is no </text> tag on this line; switch to midtext
reader.currentValue = substrings[0] + "\n"
reader.midtext = true
return nil
}
}
// if we got here, the value was on a single line
// so go ahead and add it to the tag-value list
tv := tagvalue{reader.currentTag, reader.currentValue}
reader.tvList = append(reader.tvList, tv)
// and reset
reader.currentTag = ""
reader.currentValue = ""
return nil
}
func (reader *tvReader) readNextLineFromMidtext(line string) error {
// look for whether the line closes here
substrings := strings.SplitN(line, "</text>", 2)
if len(substrings) == 1 {
// doesn't contain </text>, so keep building the current value
reader.currentValue += line + "\n"
return nil
}
// contains </text>, so end and record this pair
reader.currentValue += substrings[0]
tv := tagvalue{reader.currentTag, reader.currentValue}
reader.tvList = append(reader.tvList, tv)
// and reset
reader.midtext = false
reader.currentTag = ""
reader.currentValue = ""
return nil
}
|