[ 
https://issues.apache.org/jira/browse/ANY23-378?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hans Brende updated ANY23-378:
------------------------------
    Description: 
On the page http://golfavisen.dk/golfavisen-award-2018/ I'm getting a 
JsonParseException in the EmbeddedJSONLDExtractor:
{noformat}
org.apache.any23.extractor.ExtractionException: Error while parsing RDF 
document.
        at 
org.apache.any23.extractor.rdf.BaseRDFExtractor.run(BaseRDFExtractor.java:175)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.extractJSONLDScript(EmbeddedJSONLDExtractor.java:149)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.run(EmbeddedJSONLDExtractor.java:83)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.run(EmbeddedJSONLDExtractor.java:54)
        at 
org.apache.any23.extractor.SingleDocumentExtraction.runExtractor(SingleDocumentExtraction.java:480)
        at 
org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:259)
        at 
org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:323)
        at 
org.apache.any23.extractor.html.AbstractExtractorTestCase.extract(AbstractExtractorTestCase.java:189)
        at 
org.apache.any23.extractor.html.AbstractExtractorTestCase.assertExtract(AbstractExtractorTestCase.java:204)
        ... 28 more
Caused by: org.eclipse.rdf4j.rio.RDFParseException: Could not parse JSONLD
        at org.eclipse.rdf4j.rio.jsonld.JSONLDParser.parse(JSONLDParser.java:77)
        at 
org.apache.any23.extractor.rdf.BaseRDFExtractor.run(BaseRDFExtractor.java:171)
        ... 36 more
Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected character 
('}' (code 125)): was expecting double-quote to start field name
 at [Source: (BufferedReader); line: 9, column: 10]
        at 
com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1804)
        at 
com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:663)
        at 
com.fasterxml.jackson.core.base.ParserMinimalBase._reportUnexpectedChar(ParserMinimalBase.java:561)
        at 
com.fasterxml.jackson.core.json.ReaderBasedJsonParser._handleOddName(ReaderBasedJsonParser.java:1757)
        at 
com.fasterxml.jackson.core.json.ReaderBasedJsonParser.nextFieldName(ReaderBasedJsonParser.java:907)
        at 
com.fasterxml.jackson.databind.deser.std.MapDeserializer._readAndBindStringKeyMap(MapDeserializer.java:512)
        at 
com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:364)
        at 
com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:29)
        at 
com.fasterxml.jackson.databind.ObjectMapper._readValue(ObjectMapper.java:3972)
        at 
com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2264)
        at 
com.fasterxml.jackson.core.JsonParser.readValueAs(JsonParser.java:1729)
        at 
com.github.jsonldjava.utils.JsonUtils.fromJsonParser(JsonUtils.java:196)
        at com.github.jsonldjava.utils.JsonUtils.fromReader(JsonUtils.java:173)
        at 
com.github.jsonldjava.utils.JsonUtils.fromInputStream(JsonUtils.java:154)
        at 
com.github.jsonldjava.utils.JsonUtils.fromInputStream(JsonUtils.java:111)
        at org.eclipse.rdf4j.rio.jsonld.JSONLDParser.parse(JSONLDParser.java:71)
        ... 37 more

{noformat}


caused by the following json:

{noformat}
{       "@context": "http://schema.org";,
        "@type": "Event",
        "name": "PINNACLE BANK CHAMPIONSHIP",
        "startDate": "2018-7-19T00-00-00-00",
        "endDate": "2018-7-19T23-23-59-00",
        "image":"http://golfavisen.dk/wp-content/uploads/2017/03/WEB.png";,
        "description":"PINNACLE BANK CHAMPIONSHIP",
}
{noformat}

  was:
On the page http://golfavisen.dk/golfavisen-award-2018/ I'm getting a 
JsonParseException in the EmbeddedJSONLDExtractor:
{noformat}
java.lang.RuntimeException: org.apache.any23.extractor.ExtractionException: 
Error while parsing RDF document.

        at 
org.apache.any23.extractor.html.AbstractExtractorTestCase.assertExtract(AbstractExtractorTestCase.java:208)
        at 
org.apache.any23.extractor.html.AbstractExtractorTestCase.assertExtract(AbstractExtractorTestCase.java:221)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractorTest.testJSONLD(EmbeddedJSONLDExtractorTest.java:68)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
        at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
        at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
        at org.junit.rules.RunRules.evaluate(RunRules.java:20)
        at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
        at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
        at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
        at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
        at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
        at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
        at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
        at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
        at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
        at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
        at 
com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
        at 
com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47)
        at 
com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242)
        at 
com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70)
Caused by: org.apache.any23.extractor.ExtractionException: Error while parsing 
RDF document.
        at 
org.apache.any23.extractor.rdf.BaseRDFExtractor.run(BaseRDFExtractor.java:175)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.extractJSONLDScript(EmbeddedJSONLDExtractor.java:149)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.run(EmbeddedJSONLDExtractor.java:83)
        at 
org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.run(EmbeddedJSONLDExtractor.java:54)
        at 
org.apache.any23.extractor.SingleDocumentExtraction.runExtractor(SingleDocumentExtraction.java:480)
        at 
org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:259)
        at 
org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:323)
        at 
org.apache.any23.extractor.html.AbstractExtractorTestCase.extract(AbstractExtractorTestCase.java:189)
        at 
org.apache.any23.extractor.html.AbstractExtractorTestCase.assertExtract(AbstractExtractorTestCase.java:204)
        ... 28 more
Caused by: org.eclipse.rdf4j.rio.RDFParseException: Could not parse JSONLD
        at org.eclipse.rdf4j.rio.jsonld.JSONLDParser.parse(JSONLDParser.java:77)
        at 
org.apache.any23.extractor.rdf.BaseRDFExtractor.run(BaseRDFExtractor.java:171)
        ... 36 more
Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected character 
('}' (code 125)): was expecting double-quote to start field name
 at [Source: (BufferedReader); line: 9, column: 10]
        at 
com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1804)
        at 
com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:663)
        at 
com.fasterxml.jackson.core.base.ParserMinimalBase._reportUnexpectedChar(ParserMinimalBase.java:561)
        at 
com.fasterxml.jackson.core.json.ReaderBasedJsonParser._handleOddName(ReaderBasedJsonParser.java:1757)
        at 
com.fasterxml.jackson.core.json.ReaderBasedJsonParser.nextFieldName(ReaderBasedJsonParser.java:907)
        at 
com.fasterxml.jackson.databind.deser.std.MapDeserializer._readAndBindStringKeyMap(MapDeserializer.java:512)
        at 
com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:364)
        at 
com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:29)
        at 
com.fasterxml.jackson.databind.ObjectMapper._readValue(ObjectMapper.java:3972)
        at 
com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2264)
        at 
com.fasterxml.jackson.core.JsonParser.readValueAs(JsonParser.java:1729)
        at 
com.github.jsonldjava.utils.JsonUtils.fromJsonParser(JsonUtils.java:196)
        at com.github.jsonldjava.utils.JsonUtils.fromReader(JsonUtils.java:173)
        at 
com.github.jsonldjava.utils.JsonUtils.fromInputStream(JsonUtils.java:154)
        at 
com.github.jsonldjava.utils.JsonUtils.fromInputStream(JsonUtils.java:111)
        at org.eclipse.rdf4j.rio.jsonld.JSONLDParser.parse(JSONLDParser.java:71)
        ... 37 more

{noformat}


caused by the following json:

{noformat}
{       "@context": "http://schema.org";,
        "@type": "Event",
        "name": "PINNACLE BANK CHAMPIONSHIP",
        "startDate": "2018-7-19T00-00-00-00",
        "endDate": "2018-7-19T23-23-59-00",
        "image":"http://golfavisen.dk/wp-content/uploads/2017/03/WEB.png";,
        "description":"PINNACLE BANK CHAMPIONSHIP",
}
{noformat}


> JsonParseException
> ------------------
>
>                 Key: ANY23-378
>                 URL: https://issues.apache.org/jira/browse/ANY23-378
>             Project: Apache Any23
>          Issue Type: Bug
>          Components: extractors
>    Affects Versions: 2.3
>            Reporter: Hans Brende
>            Priority: Major
>             Fix For: 2.3
>
>
> On the page http://golfavisen.dk/golfavisen-award-2018/ I'm getting a 
> JsonParseException in the EmbeddedJSONLDExtractor:
> {noformat}
> org.apache.any23.extractor.ExtractionException: Error while parsing RDF 
> document.
>       at 
> org.apache.any23.extractor.rdf.BaseRDFExtractor.run(BaseRDFExtractor.java:175)
>       at 
> org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.extractJSONLDScript(EmbeddedJSONLDExtractor.java:149)
>       at 
> org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.run(EmbeddedJSONLDExtractor.java:83)
>       at 
> org.apache.any23.extractor.html.EmbeddedJSONLDExtractor.run(EmbeddedJSONLDExtractor.java:54)
>       at 
> org.apache.any23.extractor.SingleDocumentExtraction.runExtractor(SingleDocumentExtraction.java:480)
>       at 
> org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:259)
>       at 
> org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:323)
>       at 
> org.apache.any23.extractor.html.AbstractExtractorTestCase.extract(AbstractExtractorTestCase.java:189)
>       at 
> org.apache.any23.extractor.html.AbstractExtractorTestCase.assertExtract(AbstractExtractorTestCase.java:204)
>       ... 28 more
> Caused by: org.eclipse.rdf4j.rio.RDFParseException: Could not parse JSONLD
>       at org.eclipse.rdf4j.rio.jsonld.JSONLDParser.parse(JSONLDParser.java:77)
>       at 
> org.apache.any23.extractor.rdf.BaseRDFExtractor.run(BaseRDFExtractor.java:171)
>       ... 36 more
> Caused by: com.fasterxml.jackson.core.JsonParseException: Unexpected 
> character ('}' (code 125)): was expecting double-quote to start field name
>  at [Source: (BufferedReader); line: 9, column: 10]
>       at 
> com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1804)
>       at 
> com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:663)
>       at 
> com.fasterxml.jackson.core.base.ParserMinimalBase._reportUnexpectedChar(ParserMinimalBase.java:561)
>       at 
> com.fasterxml.jackson.core.json.ReaderBasedJsonParser._handleOddName(ReaderBasedJsonParser.java:1757)
>       at 
> com.fasterxml.jackson.core.json.ReaderBasedJsonParser.nextFieldName(ReaderBasedJsonParser.java:907)
>       at 
> com.fasterxml.jackson.databind.deser.std.MapDeserializer._readAndBindStringKeyMap(MapDeserializer.java:512)
>       at 
> com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:364)
>       at 
> com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:29)
>       at 
> com.fasterxml.jackson.databind.ObjectMapper._readValue(ObjectMapper.java:3972)
>       at 
> com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2264)
>       at 
> com.fasterxml.jackson.core.JsonParser.readValueAs(JsonParser.java:1729)
>       at 
> com.github.jsonldjava.utils.JsonUtils.fromJsonParser(JsonUtils.java:196)
>       at com.github.jsonldjava.utils.JsonUtils.fromReader(JsonUtils.java:173)
>       at 
> com.github.jsonldjava.utils.JsonUtils.fromInputStream(JsonUtils.java:154)
>       at 
> com.github.jsonldjava.utils.JsonUtils.fromInputStream(JsonUtils.java:111)
>       at org.eclipse.rdf4j.rio.jsonld.JSONLDParser.parse(JSONLDParser.java:71)
>       ... 37 more
> {noformat}
> caused by the following json:
> {noformat}
> {     "@context": "http://schema.org";,
>       "@type": "Event",
>       "name": "PINNACLE BANK CHAMPIONSHIP",
>       "startDate": "2018-7-19T00-00-00-00",
>       "endDate": "2018-7-19T23-23-59-00",
>       "image":"http://golfavisen.dk/wp-content/uploads/2017/03/WEB.png";,
>       "description":"PINNACLE BANK CHAMPIONSHIP",
> }
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to