Repository: any23 Updated Branches: refs/heads/master 99398b46a -> 92945ab9c
sanity check: added another JSON-cleaning test case Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/92945ab9 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/92945ab9 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/92945ab9 Branch: refs/heads/master Commit: 92945ab9cf8b846966f7da327885b4d3b6a4035a Parents: 99398b4 Author: Hans <[email protected]> Authored: Mon Aug 6 18:00:33 2018 -0500 Committer: Hans <[email protected]> Committed: Mon Aug 6 18:00:33 2018 -0500 ---------------------------------------------------------------------- .../extractor/rdf/JSONLDExtractorTest.java | 17 +++++++++++++ .../test/resources/html/json-cleaning-test.json | 26 ++++++++++++++++++++ 2 files changed, 43 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/92945ab9/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java index f1338b4..5db98ae 100644 --- a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; import org.apache.any23.extractor.ExtractionContext; import org.apache.any23.extractor.ExtractionException; import org.apache.any23.extractor.ExtractionParameters; @@ -82,6 +84,21 @@ public class JSONLDExtractorTest { } } + @Test + public void testJsonCleaning() throws Exception { + JsonCleaningInputStream stream = new JsonCleaningInputStream(getClass().getResourceAsStream("/html/json-cleaning-test.json")); + + JsonParser parser = new JsonFactory().createParser(stream); + + int numTokens = 0; + while (parser.nextToken() != null) { + numTokens++; + } + + Assert.assertEquals(numTokens, 41); + + } + public void extract(IRI uri, String filePath) throws IOException, ExtractionException, TripleHandlerException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); http://git-wip-us.apache.org/repos/asf/any23/blob/92945ab9/test-resources/src/test/resources/html/json-cleaning-test.json ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/html/json-cleaning-test.json b/test-resources/src/test/resources/html/json-cleaning-test.json new file mode 100644 index 0000000..09ec189 --- /dev/null +++ b/test-resources/src/test/resources/html/json-cleaning-test.json @@ -0,0 +1,26 @@ +{ /* " ' # //*/ + "a": { #comment <![CDATA[ + "b": 1234;'c': { + "d": [ + { + "f": { + + } "g": { + 'i':[1,2,3,//comment + ,4,5,, + 6] 'j':[ + 1 ]]> + + ] /**/ + }, /*} + */} { + "key" + : 'value',,, + } + + ] "e": { + + }, + } + } +} \ No newline at end of file
