aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/yaml/snakeyaml/reader/StreamReader.java')
-rw-r--r--src/main/java/org/yaml/snakeyaml/reader/StreamReader.java394
1 files changed, 202 insertions, 192 deletions
diff --git a/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java b/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java
index 56ec0078..6799e76e 100644
--- a/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java
+++ b/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java
@@ -1,220 +1,230 @@
/**
- * Copyright (c) 2008, http://www.snakeyaml.org
+ * Copyright (c) 2008, SnakeYAML
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
*/
package org.yaml.snakeyaml.reader;
import java.io.IOException;
import java.io.Reader;
-import java.nio.charset.Charset;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
+import java.io.StringReader;
+import java.util.Arrays;
import org.yaml.snakeyaml.error.Mark;
import org.yaml.snakeyaml.error.YAMLException;
import org.yaml.snakeyaml.scanner.Constant;
/**
- * Reader: checks if characters are in allowed range, adds '\0' to the end.
+ * Reader: checks if code points are in allowed range. Returns '\0' when end of data has been
+ * reached.
*/
public class StreamReader {
- public final static Pattern NON_PRINTABLE = Pattern
- .compile("[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]");
- private String name;
- private final Reader stream;
- private int pointer = 0;
- private boolean eof = true;
- private String buffer;
- private int index = 0;
- private int line = 0;
- private int column = 0;
- private char[] data;
-
- public StreamReader(String stream) {
- this.name = "'string'";
- this.buffer = ""; // to set length to 0
- checkPrintable(stream);
- this.buffer = stream + "\0";
- this.stream = null;
- this.eof = true;
- this.data = null;
- }
-
- public StreamReader(Reader reader) {
- this.name = "'reader'";
- this.buffer = "";
- this.stream = reader;
- this.eof = false;
- this.data = new char[1024];
- this.update();
- }
- void checkPrintable(CharSequence data) {
- Matcher em = NON_PRINTABLE.matcher(data);
- if (em.find()) {
- int position = this.index + this.buffer.length() - this.pointer + em.start();
- throw new ReaderException(name, position, em.group().charAt(0),
- "special characters are not allowed");
+ private String name;
+ private final Reader stream;
+ /**
+ * Read data (as a moving window for input stream)
+ */
+ private int[] dataWindow;
+
+ /**
+ * Real length of the data in dataWindow
+ */
+ private int dataLength;
+
+ /**
+ * The variable points to the current position in the data array
+ */
+ private int pointer = 0;
+ private boolean eof;
+ private int index = 0; // in code points
+ private int line = 0;
+ private int column = 0; // in code points
+ private final char[] buffer; // temp buffer for one read operation (to avoid
+ // creating the array in stack)
+
+ private static final int BUFFER_SIZE = 1025;
+
+ public StreamReader(String stream) {
+ this(new StringReader(stream));
+ this.name = "'string'";
+ }
+
+ public StreamReader(Reader reader) {
+ this.name = "'reader'";
+ this.dataWindow = new int[0];
+ this.dataLength = 0;
+ this.stream = reader;
+ this.eof = false;
+ this.buffer = new char[BUFFER_SIZE];
+ }
+
+ public static boolean isPrintable(final String data) {
+ final int length = data.length();
+ for (int offset = 0; offset < length;) {
+ final int codePoint = data.codePointAt(offset);
+
+ if (!isPrintable(codePoint)) {
+ return false;
+ }
+
+ offset += Character.charCount(codePoint);
+ }
+
+ return true;
+ }
+
+ public static boolean isPrintable(final int c) {
+ return (c >= 0x20 && c <= 0x7E) || c == 0x9 || c == 0xA || c == 0xD || c == 0x85
+ || (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD)
+ || (c >= 0x10000 && c <= 0x10FFFF);
+ }
+
+ public Mark getMark() {
+ return new Mark(name, this.index, this.line, this.column, this.dataWindow, this.pointer);
+ }
+
+ public void forward() {
+ forward(1);
+ }
+
+ /**
+ * read the next length characters and move the pointer. if the last character is high surrogate
+ * one more character will be read
+ *
+ * @param length amount of characters to move forward
+ */
+ public void forward(int length) {
+ for (int i = 0; i < length && ensureEnoughData(); i++) {
+ int c = dataWindow[pointer++];
+ this.index++;
+ if (Constant.LINEBR.has(c)
+ || (c == '\r' && (ensureEnoughData() && dataWindow[pointer] != '\n'))) {
+ this.line++;
+ this.column = 0;
+ } else if (c != 0xFEFF) {
+ this.column++;
+ }
+ }
+ }
+
+ public int peek() {
+ return (ensureEnoughData()) ? dataWindow[pointer] : '\0';
+ }
+
+ /**
+ * Peek the next index-th code point
+ *
+ * @param index to peek
+ * @return the next index-th code point
+ */
+ public int peek(int index) {
+ return (ensureEnoughData(index)) ? dataWindow[pointer + index] : '\0';
+ }
+
+ /**
+ * peek the next length code points
+ *
+ * @param length amount of the characters to peek
+ * @return the next length code points
+ */
+ public String prefix(int length) {
+ if (length == 0) {
+ return "";
+ } else if (ensureEnoughData(length)) {
+ return new String(this.dataWindow, pointer, length);
+ } else {
+ return new String(this.dataWindow, pointer, Math.min(length, dataLength - pointer));
+ }
+ }
+
+ /**
+ * prefix(length) immediately followed by forward(length)
+ *
+ * @param length amount of characters to get
+ * @return the next length code points
+ */
+ public String prefixForward(int length) {
+ final String prefix = prefix(length);
+ this.pointer += length;
+ this.index += length;
+ // prefix never contains new line characters
+ this.column += length;
+ return prefix;
+ }
+
+ private boolean ensureEnoughData() {
+ return ensureEnoughData(0);
+ }
+
+ private boolean ensureEnoughData(int size) {
+ if (!eof && pointer + size >= dataLength) {
+ update();
+ }
+ return (this.pointer + size) < dataLength;
+ }
+
+ private void update() {
+ try {
+ int read = stream.read(buffer, 0, BUFFER_SIZE - 1);
+ if (read > 0) {
+ int cpIndex = (dataLength - pointer);
+ dataWindow = Arrays.copyOfRange(dataWindow, pointer, dataLength + read);
+
+ if (Character.isHighSurrogate(buffer[read - 1])) {
+ if (stream.read(buffer, read, 1) == -1) {
+ eof = true;
+ } else {
+ read++;
+ }
}
- }
- /**
- * Checks <code>chars</chars> for the non-printable characters.
- *
- * @param chars
- * the array where to search.
- * @param begin
- * the beginning index, inclusive.
- * @param end
- * the ending index, exclusive.
- * @throws ReaderException
- * if <code>chars</code> contains non-printable character(s).
- */
- void checkPrintable(final char[] chars, final int begin, final int end) {
- for (int i = begin; i < end; i++) {
- final char c = chars[i];
-
- if (isPrintable(c)) {
- continue;
- }
-
- int position = this.index + this.buffer.length() - this.pointer + i;
- throw new ReaderException(name, position, c, "special characters are not allowed");
+ int nonPrintable = ' ';
+ for (int i = 0; i < read; cpIndex++) {
+ int codePoint = Character.codePointAt(buffer, i);
+ dataWindow[cpIndex] = codePoint;
+ if (isPrintable(codePoint)) {
+ i += Character.charCount(codePoint);
+ } else {
+ nonPrintable = codePoint;
+ i = read;
+ }
}
- }
-
- public static boolean isPrintable(final char c) {
- return (c >= '\u0020' && c <= '\u007E') || c == '\n' || c == '\r' || c == '\t'
- || c == '\u0085' || (c >= '\u00A0' && c <= '\uD7FF')
- || (c >= '\uE000' && c <= '\uFFFD');
- }
- public Mark getMark() {
- return new Mark(name, this.index, this.line, this.column, this.buffer, this.pointer);
- }
-
- public void forward() {
- forward(1);
- }
-
- /**
- * read the next length characters and move the pointer.
- *
- * @param length
- */
- public void forward(int length) {
- if (this.pointer + length + 1 >= this.buffer.length()) {
- update();
- }
- char ch = 0;
- for (int i = 0; i < length; i++) {
- ch = this.buffer.charAt(this.pointer);
- this.pointer++;
- this.index++;
- if (Constant.LINEBR.has(ch) || (ch == '\r' && buffer.charAt(pointer) != '\n')) {
- this.line++;
- this.column = 0;
- } else if (ch != '\uFEFF') {
- this.column++;
- }
+ dataLength = cpIndex;
+ pointer = 0;
+ if (nonPrintable != ' ') {
+ throw new ReaderException(name, cpIndex - 1, nonPrintable,
+ "special characters are not allowed");
}
+ } else {
+ eof = true;
+ }
+ } catch (IOException ioe) {
+ throw new YAMLException(ioe);
}
+ }
- public char peek() {
- return this.buffer.charAt(this.pointer);
- }
- /**
- * Peek the next index-th character
- *
- * @param index
- * @return the next index-th character
- */
- public char peek(int index) {
- if (this.pointer + index + 1 > this.buffer.length()) {
- update();
- }
- return this.buffer.charAt(this.pointer + index);
- }
+ public int getColumn() {
+ return column;
+ }
- /**
- * peek the next length characters
- *
- * @param length
- * @return the next length characters
- */
- public String prefix(int length) {
- if (this.pointer + length >= this.buffer.length()) {
- update();
- }
- if (this.pointer + length > this.buffer.length()) {
- return this.buffer.substring(this.pointer);
- }
- return this.buffer.substring(this.pointer, this.pointer + length);
- }
-
- /**
- * prefix(length) immediately followed by forward(length)
- */
- public String prefixForward(int length) {
- final String prefix = prefix(length);
- this.pointer += length;
- this.index += length;
- // prefix never contains new line characters
- this.column += length;
- return prefix;
- }
-
- private void update() {
- if (!this.eof) {
- this.buffer = buffer.substring(this.pointer);
- this.pointer = 0;
- try {
- int converted = this.stream.read(data);
- if (converted > 0) {
- /*
- * Let's create StringBuilder manually. Anyway str1 + str2
- * generates new StringBuilder(str1).append(str2).toSting()
- * Giving correct capacity to the constructor prevents
- * unnecessary operations in appends.
- */
- checkPrintable(data, 0, converted);
- this.buffer = new StringBuilder(buffer.length() + converted).append(buffer)
- .append(data, 0, converted).toString();
- } else {
- this.eof = true;
- this.buffer += "\0";
- }
- } catch (IOException ioe) {
- throw new YAMLException(ioe);
- }
- }
- }
+ /**
+ * @return current position as number (in characters) from the beginning of the stream
+ */
+ public int getIndex() {
+ return index;
+ }
- public int getColumn() {
- return column;
- }
-
- public Charset getEncoding() {
- return Charset.forName(((UnicodeReader) this.stream).getEncoding());
- }
-
- public int getIndex() {
- return index;
- }
-
- public int getLine() {
- return line;
- }
+ public int getLine() {
+ return line;
+ }
}