Repository: any23
Updated Branches:
  refs/heads/master 1867cc66d -> 316b4ec0d


ANY23-338 fixed problem with json comment stripping


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/316b4ec0
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/316b4ec0
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/316b4ec0

Branch: refs/heads/master
Commit: 316b4ec0d6285a204789792084caf012c000b196
Parents: 1867cc6
Author: Hans <[email protected]>
Authored: Wed Mar 28 14:37:49 2018 -0500
Committer: Hans <[email protected]>
Committed: Wed Mar 28 14:37:49 2018 -0500

----------------------------------------------------------------------
 .../apache/any23/extractor/rdf/BaseRDFExtractor.java    | 12 ++++++------
 .../extractor/html/EmbeddedJSONLDExtractorTest.java     |  3 +++
 .../test/resources/html/html-jsonld-strip-comments.html |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/316b4ec0/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index e4d16e2..61b58c1 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -161,7 +161,7 @@ public abstract class BaseRDFExtractor implements 
Extractor.ContentExtractor {
 
     private static class JsonCommentStripperInputStream extends InputStream {
 
-        private int prevChar;
+        private boolean inEscape;
         private boolean inQuote;
         private boolean inCDATA;
 
@@ -191,16 +191,16 @@ public abstract class BaseRDFExtractor implements 
Extractor.ContentExtractor {
 
         @Override
         public int read() throws IOException {
-            return prevChar = privateRead();
-        }
-
-        private int privateRead() throws IOException {
             PushbackInputStream stream = wrapped;
             int c = stream.read();
 
             if (inQuote) {
-                if (c == '"' && prevChar != '\\') {
+                if (inEscape) {
+                    inEscape = false;
+                } else if (c == '"') {
                     inQuote = false;
+                } else if (c == '\\') {
+                    inEscape = true;
                 }
                 return c;
             }

http://git-wip-us.apache.org/repos/asf/any23/blob/316b4ec0/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
index caf580d..23af441 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
@@ -17,6 +17,8 @@
 package org.apache.any23.extractor.html;
 
 import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.vocab.FOAF;
 import org.junit.Test;
 
 /**
@@ -58,6 +60,7 @@ public class EmbeddedJSONLDExtractorTest extends 
AbstractExtractorTestCase {
                assertExtract("/html/html-jsonld-strip-comments.html");
                assertModelNotEmpty();
                assertStatementsSize(null, null, null, 3);
+               assertContains(RDFUtils.iri(FOAF.NS, "name"), "Robert\\\" 
Millar\\\\\"\"\\\\");
        }
 
        @Override

http://git-wip-us.apache.org/repos/asf/any23/blob/316b4ec0/test-resources/src/test/resources/html/html-jsonld-strip-comments.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/html/html-jsonld-strip-comments.html 
b/test-resources/src/test/resources/html/html-jsonld-strip-comments.html
index a75569e..f12f1cb 100644
--- a/test-resources/src/test/resources/html/html-jsonld-strip-comments.html
+++ b/test-resources/src/test/resources/html/html-jsonld-strip-comments.html
@@ -34,7 +34,7 @@
       //the above urls should test that comments inside quotes are *not* 
stripped
       "@type": "Person",]]> /*
        multiline comment
-      inside json */ "name": <![CDATA["Robert\" Millar", //comment
+      inside json */ "name": <![CDATA["Robert\\\" Millar\\\\\"\"\\\\", 
//comment
       #comment
       "born": "1958-09-13T00:00:00"
     }]]]> ///some more commenting

Reply via email to