require strict isomorphism w/online microdata tests' expected models

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/2175c2d3
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/2175c2d3
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/2175c2d3

Branch: refs/heads/master
Commit: 2175c2d376712e2bb7a18c2c17f6527a46aa35ae
Parents: 4f28040
Author: Hans <[email protected]>
Authored: Mon Oct 29 22:28:45 2018 -0500
Committer: Hans <[email protected]>
Committed: Mon Oct 29 22:28:45 2018 -0500

----------------------------------------------------------------------
 .../extractor/microdata/MicrodataExtractor.java |  2 +-
 .../extractor/microdata/MicrodataParser.java    | 73 ++++++++------------
 .../microdata/MicrodataExtractorTest.java       |  4 +-
 ....2.1-non-normative-example-2-expected.nquads |  2 -
 4 files changed, 30 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
 
b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
index cac6689..50f880f 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
@@ -49,7 +49,7 @@ import java.util.Optional;
  */
 public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
 
-    private static final IRI MICRODATA_ITEM
+    static final IRI MICRODATA_ITEM
             = RDFUtils.iri("http://www.w3.org/1999/xhtml/microdata#item";);
 
     private static final ParsedIRI EMPTY_FRAG = ParsedIRI.create("#");

http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java 
b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
index c086d3f..8c3c641 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
@@ -300,32 +300,6 @@ public class MicrodataParser {
         ps.append("}");
     }
 
-    /**
-     * Returns only nodes that are <b>not</b> nested one each other.
-     *
-     * @param candidates list of candidate nodes.
-     * @return list of unnested nodes.
-     */
-    @SuppressWarnings("unused")
-    private static List<Node> getUnnestedNodes(List<Node> candidates) {
-        final List<Node> unnesteds  = new ArrayList<>();
-        for(int i = 0; i < candidates.size(); i++) {
-            boolean skip = false;
-            for(int j = 0; j < candidates.size(); j++) {
-                if(i == j)
-                    continue;
-                if( DomUtils.isAncestorOf(candidates.get(j), 
candidates.get(i), true) ) {
-                    skip = true;
-                    break;
-                }
-            }
-            if(!skip) {
-                unnesteds.add( candidates.get(i) );
-            }
-        }
-        return unnesteds;
-    }
-
     public void setErrorMode(ErrorMode errorMode) {
         if(errorMode == null)
             throw new IllegalArgumentException("errorMode must be not null.");
@@ -527,35 +501,42 @@ public class MicrodataParser {
     public List<ItemProp> getItemProps(final Node scopeNode, boolean skipRoot) 
throws MicrodataParserException {
         final Set<Node> accepted = new LinkedHashSet<>();
 
+        boolean skipRootChildren = false;
         if (!skipRoot) {
             NamedNodeMap attributes = scopeNode.getAttributes();
             if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null) {
                 accepted.add(scopeNode);
             }
+            if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
+                skipRootChildren = true;
+            }
         }
 
-        // TreeWalker to walk DOM tree starting with the scopeNode. Nodes 
maybe visited multiple times.
-        TreeWalker treeWalker = ((DocumentTraversal) 
scopeNode.getOwnerDocument())
-            .createTreeWalker(scopeNode, NodeFilter.SHOW_ELEMENT, new 
NodeFilter() {
-            @Override
-            public short acceptNode(Node node) {
-                if (node.getNodeType() == Node.ELEMENT_NODE) {
-                    NamedNodeMap attributes = node.getAttributes();
-                    if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && 
!scopeNode.equals(node)) {
-                        accepted.add(node);
-                    }
+        if (!skipRootChildren) {
+            // TreeWalker to walk DOM tree starting with the scopeNode. Nodes 
maybe visited multiple times.
+            TreeWalker treeWalker = ((DocumentTraversal) 
scopeNode.getOwnerDocument())
+                    .createTreeWalker(scopeNode, NodeFilter.SHOW_ELEMENT, new 
NodeFilter() {
+                        @Override
+                        public short acceptNode(Node node) {
+                            if (node.getNodeType() == Node.ELEMENT_NODE) {
+                                NamedNodeMap attributes = node.getAttributes();
+                                if 
(attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && 
!scopeNode.equals(node)) {
+                                    accepted.add(node);
+                                }
+
+                                if 
(attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
+                                    // Don't visit descendants of nodes that 
define a new scope
+                                    return FILTER_REJECT;
+                                }
+                            }
+                            return FILTER_ACCEPT;
+                        }
+                    }, false);
 
-                    if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
-                        // Don't visit descendants of nodes that define a new 
scope
-                        return FILTER_REJECT;
-                    }
-                }
-                return FILTER_ACCEPT;
-            }
-        }, false);
 
-        // To populate accepted we only need to walk the tree.
-        while (treeWalker.nextNode() != null);
+            // To populate accepted we only need to walk the tree.
+            while (treeWalker.nextNode() != null) ;
+        }
 
         final List<ItemProp> result = new ArrayList<>();
         for (Node itemPropNode : accepted) {

http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
index 11aa353..0e634de 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
@@ -197,6 +197,7 @@ public class MicrodataExtractorTest extends 
AbstractExtractorTestCase {
                 TreeModel actual = new TreeModel();
                 
createRunner(MicrodataExtractorFactory.NAME).extract(action.stringValue(), new 
TripleWriterHandler() {
                     public void writeTriple(Resource s, IRI p, Value o, 
Resource g) {
+                        if (MicrodataExtractor.MICRODATA_ITEM.equals(p)) 
return;
                         actual.add(s, p, o);
                     }
                     public void writeNamespace(String prefix, String uri) { }
@@ -214,8 +215,7 @@ public class MicrodataExtractorTest extends 
AbstractExtractorTestCase {
                     });
                 }
 
-                boolean testPassed = positive ? (expected.isEmpty() ? 
actual.isEmpty()
-                        : Models.isSubset(expected, actual)) : 
!Models.isomorphic(expected, actual);
+                boolean testPassed = positive == Models.isomorphic(expected, 
actual);
                 if (testPassed) {
                     passedTests.incrementAndGet();
                 } else {

http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
 
b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
index b3e99b0..8eedf33 100644
--- 
a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
+++ 
b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
@@ -16,7 +16,6 @@
 #
 
 _:nodebdb2c525cf8095abb6954b51432e6 
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> 
<http://microformats.org/profile/hcard> <http://bob.example.com/> .
-_:nodebdb2c525cf8095abb6954b51432e6 
<http://microformats.org/profile/hcard#street-address> "Avenue Q" 
<http://bob.example.com/> .
 _:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#fn> 
"Princeton" <http://bob.example.com/> .
 _:nodebdb2c525cf8095abb6954b51432e6 
<http://microformats.org/profile/hcard#given-name> "Princeton" 
<http://bob.example.com/> .
 _:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#n> 
_:node5194c3bb9d7f53e4759c6f393d95f88 <http://bob.example.com/> .
@@ -24,7 +23,6 @@ _:node1ffeb2699b75ba7aca5ee3d72adb55a8 
<http://microformats.org/profile/hcard#st
 _:nodebdb2c525cf8095abb6954b51432e6 
<http://microformats.org/profile/hcard#adr> 
_:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://bob.example.com/> .
 <http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> 
_:nodebdb2c525cf8095abb6954b51432e6 <http://bob.example.com/> .
 _:node7a12e48e321d29211c8b7c2ce396854 
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> 
<http://microformats.org/profile/hcard> <http://bob.example.com/> .
-_:node7a12e48e321d29211c8b7c2ce396854 
<http://microformats.org/profile/hcard#street-address> "Avenue Q" 
<http://bob.example.com/> .
 _:node7a12e48e321d29211c8b7c2ce396854 
<http://microformats.org/profile/hcard#fn> "Trekkie" <http://bob.example.com/> .
 _:node7a12e48e321d29211c8b7c2ce396854 
<http://microformats.org/profile/hcard#given-name> "Trekkie" 
<http://bob.example.com/> .
 _:node7a12e48e321d29211c8b7c2ce396854 
<http://microformats.org/profile/hcard#n> 
_:node45173ea18b736c2e9c3136e52ed3727e <http://bob.example.com/> .

Reply via email to