Repository: johnzon Updated Branches: refs/heads/master 4f1c2778f -> 8019f4899
JOHNZON-147 support BOM even if encoding is defined If a BOM is set, then this defines the encoding. Any configured encoding is only used no BOM is found. Before this patch we did blow up if UTF-8 encoding was configured and the JSON stream did have a BOM. Project: http://git-wip-us.apache.org/repos/asf/johnzon/repo Commit: http://git-wip-us.apache.org/repos/asf/johnzon/commit/8019f489 Tree: http://git-wip-us.apache.org/repos/asf/johnzon/tree/8019f489 Diff: http://git-wip-us.apache.org/repos/asf/johnzon/diff/8019f489 Branch: refs/heads/master Commit: 8019f489967ed8dbf3a1224c371ab82f9bbc79c2 Parents: 4f1c277 Author: Mark Struberg <[email protected]> Authored: Wed Nov 15 20:41:36 2017 +0100 Committer: Mark Struberg <[email protected]> Committed: Wed Nov 15 20:41:36 2017 +0100 ---------------------------------------------------------------------- .../johnzon/core/JsonStreamParserImpl.java | 8 ++--- .../core/RFC4627AwareInputStreamReader.java | 36 ++++++++++++++------ 2 files changed, 28 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/johnzon/blob/8019f489/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java ---------------------------------------------------------------------- diff --git a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java index dc875fe..aa5ca1b 100644 --- a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java +++ b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java @@ -23,7 +23,6 @@ import javax.json.stream.JsonLocation; import javax.json.stream.JsonParsingException; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.Reader; import java.math.BigDecimal; import java.nio.charset.Charset; @@ -63,7 +62,7 @@ public class JsonStreamParserImpl extends JohnzonJsonParserImpl implements JsonC //we use a byte here, because comparing bytes //is more efficient than comparing enums //Additionally we handle internally two more event: COMMA_EVENT and KEY_SEPARATOR_EVENT - private byte previousEvent; + private byte previousEvent = 0; //this buffer is used to store current String or Number value in case that //within the value a buffer boundary is crossed or the string contains escaped characters @@ -147,11 +146,8 @@ public class JsonStreamParserImpl extends JohnzonJsonParserImpl implements JsonC if (reader != null) { this.in = reader; - } else if (encoding == null) { - this.in = new RFC4627AwareInputStreamReader(inputStream); - } else { - this.in = new InputStreamReader(inputStream, encoding.newDecoder()); + this.in = new RFC4627AwareInputStreamReader(inputStream, encoding); } } http://git-wip-us.apache.org/repos/asf/johnzon/blob/8019f489/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java ---------------------------------------------------------------------- diff --git a/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java b/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java index 645cf0e..aa09804 100644 --- a/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java +++ b/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java @@ -23,18 +23,21 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.PushbackInputStream; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import javax.json.JsonException; final class RFC4627AwareInputStreamReader extends InputStreamReader { - RFC4627AwareInputStreamReader(final InputStream in) { - this(new PushbackInputStream(in,4)); + /** + * @param preferredCharset the Charset to use if no BOM is used. If {@code null} use UTF-8 + */ + RFC4627AwareInputStreamReader(final InputStream in, Charset preferredCharset) { + this(new PushbackInputStream(in,4), preferredCharset); } - - private RFC4627AwareInputStreamReader(final PushbackInputStream in) { - super(in, getCharset(in).newDecoder()); - + + private RFC4627AwareInputStreamReader(final PushbackInputStream in, Charset preferredCharset) { + super(in, getCharset(in, preferredCharset).newDecoder()); } /** @@ -44,10 +47,15 @@ final class RFC4627AwareInputStreamReader extends InputStreamReader { */ private static byte[] readAllBytes(final PushbackInputStream inputStream) throws IOException { final int first = inputStream.read(); + if(first == -1) { + return new byte[0]; + } + final int second = inputStream.read(); - if(first == -1|| second == -1) { - throw new JsonException("Invalid Json. Valid Json has at least 2 bytes"); + if(second == -1) { + return new byte[] {(byte) first}; } + final int third = inputStream.read(); final int fourth = inputStream.read(); if(third == -1) { @@ -78,11 +86,19 @@ final class RFC4627AwareInputStreamReader extends InputStreamReader { */ - private static Charset getCharset(final PushbackInputStream inputStream) { - Charset charset = Charset.forName("UTF-8"); + private static Charset getCharset(final PushbackInputStream inputStream, Charset preferredCharset) { + Charset charset = preferredCharset != null ? preferredCharset : Charset.forName("UTF-8"); int bomLength=0; try { final byte[] utfBytes = readAllBytes(inputStream); + if (utfBytes.length == 0) { + return StandardCharsets.UTF_8; // empty file -> doesn't matter anyway + } + if (utfBytes.length == 1) { + inputStream.unread(utfBytes); + return StandardCharsets.UTF_8; // almost empty file -> doesn't matter neither + } + int first = (utfBytes[0] & 0xFF); int second = (utfBytes[1] & 0xFF); if (first == 0x00) {
