Repository: any23
Updated Branches:
  refs/heads/master 5b93f21ec -> 36682ccdf


ANY23-154 allow unused itemprops


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/36682ccd
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/36682ccd
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/36682ccd

Branch: refs/heads/master
Commit: 36682ccdfbddcd924cb5840e25d956f581e7125f
Parents: 5b93f21
Author: Hans <[email protected]>
Authored: Sun Oct 28 20:13:31 2018 -0500
Committer: Hans <[email protected]>
Committed: Sun Oct 28 20:13:31 2018 -0500

----------------------------------------------------------------------
 .../extractor/microdata/ItemPropValue.java      |  3 +-
 .../extractor/microdata/MicrodataParser.java    | 68 ++++++++++++++++----
 .../microdata/MicrodataExtractorTest.java       |  7 ++
 .../resources/microdata/unused-itemprop.html    | 30 +++++++++
 4 files changed, 93 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/36682ccd/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java 
b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index 2b6659a..8b5bffd 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -97,6 +97,7 @@ public class ItemPropValue {
     public ItemPropValue(Object content, Type type) {
         this.type = Objects.requireNonNull(type, "type cannot be null");
         this.content = type.checkClass(content);
+        this.literal = null;
     }
 
     ItemPropValue(Literal literal) {
@@ -122,7 +123,7 @@ public class ItemPropValue {
         this.content = content;
     }
 
-    Literal literal;
+    final Literal literal;
 
     /**
      * @return the content object.

http://git-wip-us.apache.org/repos/asf/any23/blob/36682ccd/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java 
b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
index f59bbdb..8964b32 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
@@ -41,6 +41,7 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 /**
  * This class provides utility methods for handling <b>Microdata</b>
@@ -162,6 +163,27 @@ public class MicrodataParser {
         return DomUtils.readAttribute(node, ITEMPROP_ATTRIBUTE, null) != null;
     }
 
+    private static boolean isContainedInItemScope(Node node) {
+        for (Node p = node.getParentNode(); p != null; p = p.getParentNode()) {
+            NamedNodeMap attrs = p.getAttributes();
+            if (attrs != null && attrs.getNamedItem(ITEMSCOPE_ATTRIBUTE) != 
null) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static boolean isContainedInId(Node node, Set<String> ids) {
+        do {
+            String id = DomUtils.readAttribute(node, "id", null);
+            if (id != null && ids.contains(id)) {
+                return true;
+            }
+            node = node.getParentNode();
+        } while (node != null);
+        return false;
+    }
+
     /**
      * Returns only the <i>itemScope</i>s that are top level items.
      *
@@ -171,13 +193,27 @@ public class MicrodataParser {
     public static List<Node> getTopLevelItemScopeNodes(Node node)  {
         final List<Node> itemScopes = getItemScopeNodes(node);
         final List<Node> topLevelItemScopes = new ArrayList<>();
-        for(Node itemScope : itemScopes) {
-            if( ! isItemProp(itemScope) ) {
+        final List<Node> possibles = new ArrayList<>();
+        for (Node itemScope : itemScopes) {
+            if (!isItemProp(itemScope)) {
                 topLevelItemScopes.add(itemScope);
+            } else if (!isContainedInItemScope(itemScope)) {
+                possibles.add(itemScope);
+            }
+        }
+
+        if (!possibles.isEmpty()) {
+            Set<String> refIds = itemScopes.stream()
+                    .flatMap(n -> Arrays.stream(itemrefIds(n)))
+                    .collect(Collectors.toSet());
+
+            for (Node itemScope : possibles) {
+                if (!isContainedInId(itemScope, refIds)) {
+                    topLevelItemScopes.add(itemScope);
+                }
             }
         }
-        // ANY23-131 Nested Microdata are not extracted
-        //return getUnnestedNodes( topLevelItemScopes );
+
         return topLevelItemScopes;
     }
 
@@ -470,15 +506,14 @@ public class MicrodataParser {
                 continue;
             }
 
-            final String[] propertyNames = itemProp.trim().split("\\s+");
             ItemPropValue itemPropValue;
-            for (String propertyName : propertyNames) {
-                try {
-                    itemPropValue = getPropertyValue(itemPropNode);
-                } catch (MicrodataParserException mpe) {
-                    manageError(mpe);
-                    continue;
-                }
+            try {
+                itemPropValue = getPropertyValue(itemPropNode);
+            } catch (MicrodataParserException mpe) {
+                manageError(mpe);
+                continue;
+            }
+            for (String propertyName : itemProp.trim().split("\\s+")) {
                 result.add(
                         new ItemProp(
                                 DomUtils.getXPathForNode(itemPropNode),
@@ -537,6 +572,12 @@ public class MicrodataParser {
         return result.toArray( new ItemProp[result.size()] );
     }
 
+    private static final String[] EMPTY_STRINGS = new String[0];
+    private static String[] itemrefIds(Node node) {
+        String itemref = DomUtils.readAttribute(node, "itemref" , null);
+        return StringUtils.isBlank(itemref) ? EMPTY_STRINGS : 
itemref.trim().split("\\s+");
+    }
+
     /**
      * Returns the {@link ItemScope} instance described within the specified 
<code>node</code>.
      *
@@ -550,12 +591,11 @@ public class MicrodataParser {
             return itemScope;
 
         final String id       = DomUtils.readAttribute(node, "id"      , null);
-        final String itemref  = DomUtils.readAttribute(node, "itemref" , null);
         final String itemType = DomUtils.readAttribute(node, "itemtype", null);
         final String itemId   = DomUtils.readAttribute(node, "itemid"  , null);
 
         final List<ItemProp> itemProps = getItemProps(node, true);
-        final String[] itemrefIDs = itemref == null ? new String[0] : 
itemref.split("\\s+");
+        final String[] itemrefIDs = itemrefIds(node);
         final ItemProp[] deferredProperties;
         try {
             deferredProperties = deferProperties(itemrefIDs);

http://git-wip-us.apache.org/repos/asf/any23/blob/36682ccd/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
index 9d7a079..11aa353 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
@@ -99,6 +99,13 @@ public class MicrodataExtractorTest extends 
AbstractExtractorTestCase {
     }
 
     @Test
+    public void testUnusedItemprop() {
+        //Test for ANY23-154
+        assertExtract("/microdata/unused-itemprop.html");
+        assertContains(null, RDF.TYPE, 
RDFUtils.iri("http://schema.org/Offer";));
+    }
+
+    @Test
     public void testExample2() {
         //Property URI generation for hcard
         assertExtract("/microdata/example2.html");

http://git-wip-us.apache.org/repos/asf/any23/blob/36682ccd/test-resources/src/test/resources/microdata/unused-itemprop.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/unused-itemprop.html 
b/test-resources/src/test/resources/microdata/unused-itemprop.html
new file mode 100644
index 0000000..ca50180
--- /dev/null
+++ b/test-resources/src/test/resources/microdata/unused-itemprop.html
@@ -0,0 +1,30 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!-- Test for ANY23-154 -->
+
+<!DOCTYPE html>
+<html lang="en">
+<head>
+</head>
+<body>
+
+<div id="someid" itemprop="offer" itemscope itemtype="http://schema.org/Offer";>
+</div>
+
+</body>
+</html>
\ No newline at end of file

Reply via email to