Repository: any23
Updated Branches:
  refs/heads/master eefa208db -> e35bff451


ANY23-291 Allow JSONLD scripts to be located anywhere in document


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d6955826
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d6955826
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d6955826

Branch: refs/heads/master
Commit: d69558268b5d8e8d57f00d94b864c54ec2eaf75f
Parents: 07f7421
Author: Hans <[email protected]>
Authored: Wed Jan 24 19:58:25 2018 -0600
Committer: Hans <[email protected]>
Committed: Wed Jan 24 21:20:27 2018 -0600

----------------------------------------------------------------------
 .../extractor/html/EmbeddedJSONLDExtractor.java |  2 +-
 .../html/EmbeddedJSONLDExtractorTest.java       | 14 ++++++
 .../html-body-embedded-jsonld-extractor.html    | 37 +++++++++++++++
 ...head-and-body-embedded-jsonld-extractor.html | 47 ++++++++++++++++++++
 4 files changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
 
b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index 1e6efdf..aeffdda 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -137,7 +137,7 @@ public class EmbeddedJSONLDExtractor implements 
Extractor.TagSoupDOMExtractor {
           String baseProfile, ExtractionParameters extractionParameters,
           ExtractionContext extractionContext, ExtractionResult out)
                   throws IOException, ExtractionException {
-    List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
+    List<Node> scriptNodes = DomUtils.findAll(in, "//SCRIPT");
     Set<JSONLDScript> result = new HashSet<>();
     extractor = new JSONLDExtractorFactory().createExtractor();
     for (Node jsonldNode : scriptNodes) {

http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
index 70baa30..6e7bfa4 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
@@ -39,6 +39,20 @@ public class EmbeddedJSONLDExtractorTest extends 
AbstractExtractorTestCase {
                assertStatementsSize(null, null, null, 7);
        }
 
+       @Test
+       public void testEmbeddedJSONLDInBody() throws Exception {
+               assertExtract("/html/html-body-embedded-jsonld-extractor.html");
+               assertModelNotEmpty();
+               assertStatementsSize(null, null, null, 3);
+       }
+
+       @Test
+       public void testEmbeddedJSONLDInHeadAndBody() throws Exception {
+               
assertExtract("/html/html-head-and-body-embedded-jsonld-extractor.html");
+               assertModelNotEmpty();
+               assertStatementsSize(null, null, null, 7);
+       }
+
        @Override
        protected ExtractorFactory<?> getExtractorFactory() {
                return new EmbeddedJSONLDExtractorFactory();

http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
 
b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
new file mode 100644
index 0000000..7efce2d
--- /dev/null
+++ 
b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
@@ -0,0 +1,37 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+    <title>Hello World!</title>
+    <meta name="title" content="Embedded JSONLD extractor"/>
+</head>
+<body>
+    <h1>Embedded JSONLD Extractor</h1>
+    <p>It extracts only the embedded JSON-LD elements.
+    <div>
+    <script type="application/ld+json">
+    {
+      "@context": "http://json-ld.org/contexts/person.jsonld";,
+      "@id": "http://dbpedia.org/resource/Robert_Millar";,
+      "@type": "Person",
+      "name": "Robert Millar",
+      "born": "1958-09-13T00:00:00"
+    }
+    </script>
+    </div>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
 
b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
new file mode 100644
index 0000000..f8ce071
--- /dev/null
+++ 
b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
@@ -0,0 +1,47 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+    <title>Hello World!</title>
+    <meta name="title" content="Embedded JSONLD extractor"/>
+    <!-- As per spec in 
http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents -->
+    <script type="application/ld+json">
+    {
+      "@context": "http://json-ld.org/contexts/person.jsonld";,
+      "@id": "http://dbpedia.org/resource/Robert_Millar";,
+      "@type": "Person",
+      "name": "Robert Millar",
+      "born": "1958-09-13T00:00:00"
+    }
+    </script>
+
+</head>
+<h1>Embedded JSONLD Extractor</h1>
+<p>It extracts only the embedded JSON-LD elements.
+<body>
+    <script type="application/ld+json">
+    {
+      "@context": "http://json-ld.org/contexts/person.jsonld";,
+      "@id": "http://dbpedia.org/resource/Robert_Frost";,
+      "@type": "Person",
+      "name": "Robert Frost",
+      "born": "1874-03-26T00:00:00",
+      "died": "1963-01-29T00:00:00"
+    }
+    </script>
+</body>
+</html>
\ No newline at end of file

Reply via email to