aboutsummaryrefslogtreecommitdiff
path: root/tvloader/reader/reader.go
blob: 786f7eaa7ead9a2e4b700a72dd621f4f8e6fa6d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

package reader

import (
	"bufio"
	"fmt"
	"io"
	"strings"
	"unicode"
)

// TagValuePair is a convenience struct for a (tag, value) string pair.
type TagValuePair struct {
	Tag   string
	Value string
}

// ReadTagValues takes an io.Reader, scans it line by line and returns
// a slice of {string, string} structs in the form {tag, value}.
func ReadTagValues(content io.Reader) ([]TagValuePair, error) {
	r := &tvReader{}

	scanner := bufio.NewScanner(content)
	for scanner.Scan() {
		// read each line, one by one
		err := r.readNextLine(scanner.Text())
		if err != nil {
			return nil, err
		}
	}
	if err := scanner.Err(); err != nil {
		return nil, err
	}

	// finalize and make sure all is well
	tvList, err := r.finalize()
	if err != nil {
		return nil, err
	}

	// convert internal format to exported TagValueList
	var exportedTVList []TagValuePair
	for _, tv := range tvList {
		tvPair := TagValuePair{Tag: tv.tag, Value: tv.value}
		exportedTVList = append(exportedTVList, tvPair)
	}

	return exportedTVList, nil
}

type tagvalue struct {
	tag   string
	value string
}

type tvReader struct {
	midtext      bool
	tvList       []tagvalue
	currentLine  int
	currentTag   string
	currentValue string
}

func (reader *tvReader) finalize() ([]tagvalue, error) {
	if reader.midtext {
		return nil, fmt.Errorf("finalize called while still midtext parsing a text tag")
	}
	return reader.tvList, nil
}

func (reader *tvReader) readNextLine(line string) error {
	reader.currentLine++

	if reader.midtext {
		return reader.readNextLineFromMidtext(line)
	}

	return reader.readNextLineFromReady(line)
}

func (reader *tvReader) readNextLineFromReady(line string) error {
	// strip whitespace from beginning of line
	line2 := strings.TrimLeftFunc(line, func(r rune) bool {
		return unicode.IsSpace(r)
	})

	// ignore empty lines
	if line2 == "" {
		return nil
	}

	// ignore comment lines
	if strings.HasPrefix(line2, "#") {
		return nil
	}

	// split at colon
	substrings := strings.SplitN(line2, ":", 2)
	if len(substrings) == 1 {
		// error if a colon isn't found
		return fmt.Errorf("no colon found in '%s'", line)
	}

	// the first substring is the tag
	reader.currentTag = strings.TrimSpace(substrings[0])

	// determine whether the value contains (or starts) a <text> line
	substrings = strings.SplitN(substrings[1], "<text>", 2)
	if len(substrings) == 1 {
		// no <text> tag found means this is a single-line value
		// strip whitespace and use as a single line
		reader.currentValue = strings.TrimSpace(substrings[0])
	} else {
		// there was a <text> tag; now decide whether it's multi-line
		substrings = strings.SplitN(substrings[1], "</text>", 2)
		if len(substrings) > 1 {
			// there is also a </text> tag; take the middle part and
			// set as value
			reader.currentValue = substrings[0]
		} else {
			// there is no </text> tag on this line; switch to midtext
			reader.currentValue = substrings[0] + "\n"
			reader.midtext = true
			return nil
		}
	}

	// if we got here, the value was on a single line
	// so go ahead and add it to the tag-value list
	tv := tagvalue{reader.currentTag, reader.currentValue}
	reader.tvList = append(reader.tvList, tv)

	// and reset
	reader.currentTag = ""
	reader.currentValue = ""

	return nil
}

func (reader *tvReader) readNextLineFromMidtext(line string) error {
	// look for whether the line closes here
	substrings := strings.SplitN(line, "</text>", 2)
	if len(substrings) == 1 {
		// doesn't contain </text>, so keep building the current value
		reader.currentValue += line + "\n"
		return nil
	}

	// contains </text>, so end and record this pair
	reader.currentValue += substrings[0]
	tv := tagvalue{reader.currentTag, reader.currentValue}
	reader.tvList = append(reader.tvList, tv)

	// and reset
	reader.midtext = false
	reader.currentTag = ""
	reader.currentValue = ""

	return nil
}