Repository: johnzon
Updated Branches:
  refs/heads/master 4f1c2778f -> 8019f4899


JOHNZON-147 support BOM even if encoding is defined

If a BOM is set, then this defines the encoding.
Any configured encoding is only used no BOM is found.
Before this patch we did blow up if UTF-8 encoding was configured and
the JSON stream did have a BOM.


Project: http://git-wip-us.apache.org/repos/asf/johnzon/repo
Commit: http://git-wip-us.apache.org/repos/asf/johnzon/commit/8019f489
Tree: http://git-wip-us.apache.org/repos/asf/johnzon/tree/8019f489
Diff: http://git-wip-us.apache.org/repos/asf/johnzon/diff/8019f489

Branch: refs/heads/master
Commit: 8019f489967ed8dbf3a1224c371ab82f9bbc79c2
Parents: 4f1c277
Author: Mark Struberg <[email protected]>
Authored: Wed Nov 15 20:41:36 2017 +0100
Committer: Mark Struberg <[email protected]>
Committed: Wed Nov 15 20:41:36 2017 +0100

----------------------------------------------------------------------
 .../johnzon/core/JsonStreamParserImpl.java      |  8 ++---
 .../core/RFC4627AwareInputStreamReader.java     | 36 ++++++++++++++------
 2 files changed, 28 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/johnzon/blob/8019f489/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
----------------------------------------------------------------------
diff --git 
a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java 
b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
index dc875fe..aa5ca1b 100644
--- 
a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
+++ 
b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
@@ -23,7 +23,6 @@ import javax.json.stream.JsonLocation;
 import javax.json.stream.JsonParsingException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.Reader;
 import java.math.BigDecimal;
 import java.nio.charset.Charset;
@@ -63,7 +62,7 @@ public class JsonStreamParserImpl extends 
JohnzonJsonParserImpl implements JsonC
     //we use a byte here, because comparing bytes
     //is more efficient than comparing enums
     //Additionally we handle internally two more event: COMMA_EVENT and 
KEY_SEPARATOR_EVENT
-    private byte previousEvent;
+    private byte previousEvent = 0;
 
     //this buffer is used to store current String or Number value in case that
     //within the value a buffer boundary is crossed or the string contains 
escaped characters
@@ -147,11 +146,8 @@ public class JsonStreamParserImpl extends 
JohnzonJsonParserImpl implements JsonC
 
         if (reader != null) {
             this.in = reader;
-        } else if (encoding == null) {
-            this.in = new RFC4627AwareInputStreamReader(inputStream);
-
         } else {
-            this.in = new InputStreamReader(inputStream, 
encoding.newDecoder());
+            this.in = new RFC4627AwareInputStreamReader(inputStream, encoding);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/johnzon/blob/8019f489/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java
----------------------------------------------------------------------
diff --git 
a/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java
 
b/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java
index 645cf0e..aa09804 100644
--- 
a/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java
+++ 
b/johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java
@@ -23,18 +23,21 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.PushbackInputStream;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 
 import javax.json.JsonException;
 
 final class RFC4627AwareInputStreamReader extends InputStreamReader {
 
-    RFC4627AwareInputStreamReader(final InputStream in) {
-        this(new PushbackInputStream(in,4));
+    /**
+     * @param preferredCharset the Charset to use if no BOM is used. If {@code 
null} use UTF-8
+     */
+    RFC4627AwareInputStreamReader(final InputStream in, Charset 
preferredCharset) {
+        this(new PushbackInputStream(in,4), preferredCharset);
     }
-    
-    private RFC4627AwareInputStreamReader(final PushbackInputStream in) {
-        super(in, getCharset(in).newDecoder());
-       
+
+    private RFC4627AwareInputStreamReader(final PushbackInputStream in, 
Charset preferredCharset) {
+        super(in, getCharset(in, preferredCharset).newDecoder());
     }
 
     /**
@@ -44,10 +47,15 @@ final class RFC4627AwareInputStreamReader extends 
InputStreamReader {
      */
     private static byte[] readAllBytes(final PushbackInputStream inputStream) 
throws IOException {
         final int first = inputStream.read();
+        if(first == -1) {
+            return new byte[0];
+        }
+
         final int second = inputStream.read();
-        if(first == -1|| second == -1) {
-            throw new JsonException("Invalid Json. Valid Json has at least 2 
bytes");
+        if(second == -1) {
+            return new byte[] {(byte) first};
         }
+
         final int third = inputStream.read();
         final int fourth = inputStream.read();
         if(third == -1) {
@@ -78,11 +86,19 @@ final class RFC4627AwareInputStreamReader extends 
InputStreamReader {
 
         */
 
-    private static Charset getCharset(final PushbackInputStream inputStream) {
-        Charset charset = Charset.forName("UTF-8");
+    private static Charset getCharset(final PushbackInputStream inputStream, 
Charset preferredCharset) {
+        Charset charset = preferredCharset != null ? preferredCharset : 
Charset.forName("UTF-8");
         int bomLength=0;
         try {
             final byte[] utfBytes = readAllBytes(inputStream);
+            if (utfBytes.length == 0) {
+                return StandardCharsets.UTF_8; // empty file -> doesn't matter 
anyway
+            }
+            if (utfBytes.length == 1) {
+                inputStream.unread(utfBytes);
+                return StandardCharsets.UTF_8; // almost empty file -> doesn't 
matter neither
+            }
+
             int first = (utfBytes[0] & 0xFF);
             int second = (utfBytes[1] & 0xFF);
             if (first == 0x00) {

Reply via email to