Repository: any23 Updated Branches: refs/heads/master eefa208db -> e35bff451
ANY23-291 Allow JSONLD scripts to be located anywhere in document Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d6955826 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d6955826 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d6955826 Branch: refs/heads/master Commit: d69558268b5d8e8d57f00d94b864c54ec2eaf75f Parents: 07f7421 Author: Hans <[email protected]> Authored: Wed Jan 24 19:58:25 2018 -0600 Committer: Hans <[email protected]> Committed: Wed Jan 24 21:20:27 2018 -0600 ---------------------------------------------------------------------- .../extractor/html/EmbeddedJSONLDExtractor.java | 2 +- .../html/EmbeddedJSONLDExtractorTest.java | 14 ++++++ .../html-body-embedded-jsonld-extractor.html | 37 +++++++++++++++ ...head-and-body-embedded-jsonld-extractor.html | 47 ++++++++++++++++++++ 4 files changed, 99 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java index 1e6efdf..aeffdda 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java @@ -137,7 +137,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { String baseProfile, ExtractionParameters extractionParameters, ExtractionContext extractionContext, ExtractionResult out) throws IOException, ExtractionException { - List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT"); + List<Node> scriptNodes = DomUtils.findAll(in, "//SCRIPT"); Set<JSONLDScript> result = new HashSet<>(); extractor = new JSONLDExtractorFactory().createExtractor(); for (Node jsonldNode : scriptNodes) { http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java index 70baa30..6e7bfa4 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java @@ -39,6 +39,20 @@ public class EmbeddedJSONLDExtractorTest extends AbstractExtractorTestCase { assertStatementsSize(null, null, null, 7); } + @Test + public void testEmbeddedJSONLDInBody() throws Exception { + assertExtract("/html/html-body-embedded-jsonld-extractor.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 3); + } + + @Test + public void testEmbeddedJSONLDInHeadAndBody() throws Exception { + assertExtract("/html/html-head-and-body-embedded-jsonld-extractor.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 7); + } + @Override protected ExtractorFactory<?> getExtractorFactory() { return new EmbeddedJSONLDExtractorFactory(); http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html new file mode 100644 index 0000000..7efce2d --- /dev/null +++ b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html @@ -0,0 +1,37 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<html> +<head> + <title>Hello World!</title> + <meta name="title" content="Embedded JSONLD extractor"/> +</head> +<body> + <h1>Embedded JSONLD Extractor</h1> + <p>It extracts only the embedded JSON-LD elements. + <div> + <script type="application/ld+json"> + { + "@context": "http://json-ld.org/contexts/person.jsonld", + "@id": "http://dbpedia.org/resource/Robert_Millar", + "@type": "Person", + "name": "Robert Millar", + "born": "1958-09-13T00:00:00" + } + </script> + </div> +</body> +</html> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html new file mode 100644 index 0000000..f8ce071 --- /dev/null +++ b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html @@ -0,0 +1,47 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<html> +<head> + <title>Hello World!</title> + <meta name="title" content="Embedded JSONLD extractor"/> + <!-- As per spec in http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents --> + <script type="application/ld+json"> + { + "@context": "http://json-ld.org/contexts/person.jsonld", + "@id": "http://dbpedia.org/resource/Robert_Millar", + "@type": "Person", + "name": "Robert Millar", + "born": "1958-09-13T00:00:00" + } + </script> + +</head> +<h1>Embedded JSONLD Extractor</h1> +<p>It extracts only the embedded JSON-LD elements. +<body> + <script type="application/ld+json"> + { + "@context": "http://json-ld.org/contexts/person.jsonld", + "@id": "http://dbpedia.org/resource/Robert_Frost", + "@type": "Person", + "name": "Robert Frost", + "born": "1874-03-26T00:00:00", + "died": "1963-01-29T00:00:00" + } + </script> +</body> +</html> \ No newline at end of file
