Repository: any23
Updated Branches:
  refs/heads/master e046f7329 -> 99398b46a


improve JsonCleaningInputStream


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/99398b46
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/99398b46
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/99398b46

Branch: refs/heads/master
Commit: 99398b46a6055e5811fd9539664d6eadc215c8e6
Parents: e046f73
Author: Hans <[email protected]>
Authored: Mon Aug 6 16:54:26 2018 -0500
Committer: Hans <[email protected]>
Committed: Mon Aug 6 16:54:26 2018 -0500

----------------------------------------------------------------------
 .../extractor/rdf/JsonCleaningInputStream.java  | 27 ++++++++++++--------
 1 file changed, 17 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/99398b46/core/src/main/java/org/apache/any23/extractor/rdf/JsonCleaningInputStream.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/JsonCleaningInputStream.java
 
b/core/src/main/java/org/apache/any23/extractor/rdf/JsonCleaningInputStream.java
index bda229e..958163f 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/rdf/JsonCleaningInputStream.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/rdf/JsonCleaningInputStream.java
@@ -20,6 +20,7 @@ package org.apache.any23.extractor.rdf;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
+import java.util.Arrays;
 
 /**
  * This class uses several strategies to fix common JSON syntax errors, 
including:
@@ -41,18 +42,26 @@ class JsonCleaningInputStream extends InputStream {
     private static final int EOL_COMMENT = 1;
     private static final int MULTILINE_COMMENT = 2;
 
-    private static final int NEEDS_COMMA = 1;
-    private static final int NEEDS_COMMA_AND_NEWLINE = 2;
+    private static final int NEEDS_COMMA = -1;
+    private static final int NEEDS_COMMA_AND_NEWLINE = 1;
 
     private boolean inEscape;
     private boolean inCDATA;
     private int needsComma;
     private int currentState;
 
+    private static final int MAX_BLANK_PUSHBACK = 128;
+    private static final byte[] BLANK_PUSHBACK = new byte[MAX_BLANK_PUSHBACK];
+
+    static {
+        Arrays.fill(BLANK_PUSHBACK, (byte)' ');
+        BLANK_PUSHBACK[0] = '\n';
+    }
+
     private final PushbackInputStream in;
 
     JsonCleaningInputStream(InputStream in) {
-        this.in = new PushbackInputStream(in, 16);
+        this.in = new PushbackInputStream(in, 256);
     }
 
     private static void unread(PushbackInputStream in, int c) throws 
IOException {
@@ -143,8 +152,9 @@ class JsonCleaningInputStream extends InputStream {
                     return c;
             }
 
+            //we're not in a quote or comment
+
             $whitespace: {
-                //we're not in a quote
                 switch (c) {
                     case '#':
                         currentState = EOL_COMMENT;
@@ -239,10 +249,7 @@ class JsonCleaningInputStream extends InputStream {
                     if (nc == NEEDS_COMMA) {
                         in.unread(' ');
                     } else {
-                        for (int i = NEEDS_COMMA_AND_NEWLINE; i < nc; i++) {
-                            in.unread(' ');
-                        }
-                        in.unread('\n');
+                        in.unread(BLANK_PUSHBACK, 0, nc);
                     }
                     needsComma = 0;
                     return ',';
@@ -257,8 +264,8 @@ class JsonCleaningInputStream extends InputStream {
 
             int nc = needsComma;
             if (nc != 0) {
-                if (nc != NEEDS_COMMA) {
-                    needsComma = (nc + 1) & 0xFF;
+                if (nc != NEEDS_COMMA && nc != MAX_BLANK_PUSHBACK) {
+                    needsComma = nc + 1;
                 }
                 continue;
             }

Reply via email to