diff options
Diffstat (limited to 'src/main/java/org/yaml/snakeyaml/reader/StreamReader.java')
-rw-r--r-- | src/main/java/org/yaml/snakeyaml/reader/StreamReader.java | 394 |
1 files changed, 202 insertions, 192 deletions
diff --git a/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java b/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java index 56ec0078..6799e76e 100644 --- a/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java +++ b/src/main/java/org/yaml/snakeyaml/reader/StreamReader.java @@ -1,220 +1,230 @@ /** - * Copyright (c) 2008, http://www.snakeyaml.org + * Copyright (c) 2008, SnakeYAML * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ package org.yaml.snakeyaml.reader; import java.io.IOException; import java.io.Reader; -import java.nio.charset.Charset; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - +import java.io.StringReader; +import java.util.Arrays; import org.yaml.snakeyaml.error.Mark; import org.yaml.snakeyaml.error.YAMLException; import org.yaml.snakeyaml.scanner.Constant; /** - * Reader: checks if characters are in allowed range, adds '\0' to the end. + * Reader: checks if code points are in allowed range. Returns '\0' when end of data has been + * reached. */ public class StreamReader { - public final static Pattern NON_PRINTABLE = Pattern - .compile("[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]"); - private String name; - private final Reader stream; - private int pointer = 0; - private boolean eof = true; - private String buffer; - private int index = 0; - private int line = 0; - private int column = 0; - private char[] data; - - public StreamReader(String stream) { - this.name = "'string'"; - this.buffer = ""; // to set length to 0 - checkPrintable(stream); - this.buffer = stream + "\0"; - this.stream = null; - this.eof = true; - this.data = null; - } - - public StreamReader(Reader reader) { - this.name = "'reader'"; - this.buffer = ""; - this.stream = reader; - this.eof = false; - this.data = new char[1024]; - this.update(); - } - void checkPrintable(CharSequence data) { - Matcher em = NON_PRINTABLE.matcher(data); - if (em.find()) { - int position = this.index + this.buffer.length() - this.pointer + em.start(); - throw new ReaderException(name, position, em.group().charAt(0), - "special characters are not allowed"); + private String name; + private final Reader stream; + /** + * Read data (as a moving window for input stream) + */ + private int[] dataWindow; + + /** + * Real length of the data in dataWindow + */ + private int dataLength; + + /** + * The variable points to the current position in the data array + */ + private int pointer = 0; + private boolean eof; + private int index = 0; // in code points + private int line = 0; + private int column = 0; // in code points + private final char[] buffer; // temp buffer for one read operation (to avoid + // creating the array in stack) + + private static final int BUFFER_SIZE = 1025; + + public StreamReader(String stream) { + this(new StringReader(stream)); + this.name = "'string'"; + } + + public StreamReader(Reader reader) { + this.name = "'reader'"; + this.dataWindow = new int[0]; + this.dataLength = 0; + this.stream = reader; + this.eof = false; + this.buffer = new char[BUFFER_SIZE]; + } + + public static boolean isPrintable(final String data) { + final int length = data.length(); + for (int offset = 0; offset < length;) { + final int codePoint = data.codePointAt(offset); + + if (!isPrintable(codePoint)) { + return false; + } + + offset += Character.charCount(codePoint); + } + + return true; + } + + public static boolean isPrintable(final int c) { + return (c >= 0x20 && c <= 0x7E) || c == 0x9 || c == 0xA || c == 0xD || c == 0x85 + || (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD) + || (c >= 0x10000 && c <= 0x10FFFF); + } + + public Mark getMark() { + return new Mark(name, this.index, this.line, this.column, this.dataWindow, this.pointer); + } + + public void forward() { + forward(1); + } + + /** + * read the next length characters and move the pointer. if the last character is high surrogate + * one more character will be read + * + * @param length amount of characters to move forward + */ + public void forward(int length) { + for (int i = 0; i < length && ensureEnoughData(); i++) { + int c = dataWindow[pointer++]; + this.index++; + if (Constant.LINEBR.has(c) + || (c == '\r' && (ensureEnoughData() && dataWindow[pointer] != '\n'))) { + this.line++; + this.column = 0; + } else if (c != 0xFEFF) { + this.column++; + } + } + } + + public int peek() { + return (ensureEnoughData()) ? dataWindow[pointer] : '\0'; + } + + /** + * Peek the next index-th code point + * + * @param index to peek + * @return the next index-th code point + */ + public int peek(int index) { + return (ensureEnoughData(index)) ? dataWindow[pointer + index] : '\0'; + } + + /** + * peek the next length code points + * + * @param length amount of the characters to peek + * @return the next length code points + */ + public String prefix(int length) { + if (length == 0) { + return ""; + } else if (ensureEnoughData(length)) { + return new String(this.dataWindow, pointer, length); + } else { + return new String(this.dataWindow, pointer, Math.min(length, dataLength - pointer)); + } + } + + /** + * prefix(length) immediately followed by forward(length) + * + * @param length amount of characters to get + * @return the next length code points + */ + public String prefixForward(int length) { + final String prefix = prefix(length); + this.pointer += length; + this.index += length; + // prefix never contains new line characters + this.column += length; + return prefix; + } + + private boolean ensureEnoughData() { + return ensureEnoughData(0); + } + + private boolean ensureEnoughData(int size) { + if (!eof && pointer + size >= dataLength) { + update(); + } + return (this.pointer + size) < dataLength; + } + + private void update() { + try { + int read = stream.read(buffer, 0, BUFFER_SIZE - 1); + if (read > 0) { + int cpIndex = (dataLength - pointer); + dataWindow = Arrays.copyOfRange(dataWindow, pointer, dataLength + read); + + if (Character.isHighSurrogate(buffer[read - 1])) { + if (stream.read(buffer, read, 1) == -1) { + eof = true; + } else { + read++; + } } - } - /** - * Checks <code>chars</chars> for the non-printable characters. - * - * @param chars - * the array where to search. - * @param begin - * the beginning index, inclusive. - * @param end - * the ending index, exclusive. - * @throws ReaderException - * if <code>chars</code> contains non-printable character(s). - */ - void checkPrintable(final char[] chars, final int begin, final int end) { - for (int i = begin; i < end; i++) { - final char c = chars[i]; - - if (isPrintable(c)) { - continue; - } - - int position = this.index + this.buffer.length() - this.pointer + i; - throw new ReaderException(name, position, c, "special characters are not allowed"); + int nonPrintable = ' '; + for (int i = 0; i < read; cpIndex++) { + int codePoint = Character.codePointAt(buffer, i); + dataWindow[cpIndex] = codePoint; + if (isPrintable(codePoint)) { + i += Character.charCount(codePoint); + } else { + nonPrintable = codePoint; + i = read; + } } - } - - public static boolean isPrintable(final char c) { - return (c >= '\u0020' && c <= '\u007E') || c == '\n' || c == '\r' || c == '\t' - || c == '\u0085' || (c >= '\u00A0' && c <= '\uD7FF') - || (c >= '\uE000' && c <= '\uFFFD'); - } - public Mark getMark() { - return new Mark(name, this.index, this.line, this.column, this.buffer, this.pointer); - } - - public void forward() { - forward(1); - } - - /** - * read the next length characters and move the pointer. - * - * @param length - */ - public void forward(int length) { - if (this.pointer + length + 1 >= this.buffer.length()) { - update(); - } - char ch = 0; - for (int i = 0; i < length; i++) { - ch = this.buffer.charAt(this.pointer); - this.pointer++; - this.index++; - if (Constant.LINEBR.has(ch) || (ch == '\r' && buffer.charAt(pointer) != '\n')) { - this.line++; - this.column = 0; - } else if (ch != '\uFEFF') { - this.column++; - } + dataLength = cpIndex; + pointer = 0; + if (nonPrintable != ' ') { + throw new ReaderException(name, cpIndex - 1, nonPrintable, + "special characters are not allowed"); } + } else { + eof = true; + } + } catch (IOException ioe) { + throw new YAMLException(ioe); } + } - public char peek() { - return this.buffer.charAt(this.pointer); - } - /** - * Peek the next index-th character - * - * @param index - * @return the next index-th character - */ - public char peek(int index) { - if (this.pointer + index + 1 > this.buffer.length()) { - update(); - } - return this.buffer.charAt(this.pointer + index); - } + public int getColumn() { + return column; + } - /** - * peek the next length characters - * - * @param length - * @return the next length characters - */ - public String prefix(int length) { - if (this.pointer + length >= this.buffer.length()) { - update(); - } - if (this.pointer + length > this.buffer.length()) { - return this.buffer.substring(this.pointer); - } - return this.buffer.substring(this.pointer, this.pointer + length); - } - - /** - * prefix(length) immediately followed by forward(length) - */ - public String prefixForward(int length) { - final String prefix = prefix(length); - this.pointer += length; - this.index += length; - // prefix never contains new line characters - this.column += length; - return prefix; - } - - private void update() { - if (!this.eof) { - this.buffer = buffer.substring(this.pointer); - this.pointer = 0; - try { - int converted = this.stream.read(data); - if (converted > 0) { - /* - * Let's create StringBuilder manually. Anyway str1 + str2 - * generates new StringBuilder(str1).append(str2).toSting() - * Giving correct capacity to the constructor prevents - * unnecessary operations in appends. - */ - checkPrintable(data, 0, converted); - this.buffer = new StringBuilder(buffer.length() + converted).append(buffer) - .append(data, 0, converted).toString(); - } else { - this.eof = true; - this.buffer += "\0"; - } - } catch (IOException ioe) { - throw new YAMLException(ioe); - } - } - } + /** + * @return current position as number (in characters) from the beginning of the stream + */ + public int getIndex() { + return index; + } - public int getColumn() { - return column; - } - - public Charset getEncoding() { - return Charset.forName(((UnicodeReader) this.stream).getEncoding()); - } - - public int getIndex() { - return index; - } - - public int getLine() { - return line; - } + public int getLine() { + return line; + } } |