require strict isomorphism w/online microdata tests' expected models
Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/2175c2d3 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/2175c2d3 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/2175c2d3 Branch: refs/heads/master Commit: 2175c2d376712e2bb7a18c2c17f6527a46aa35ae Parents: 4f28040 Author: Hans <[email protected]> Authored: Mon Oct 29 22:28:45 2018 -0500 Committer: Hans <[email protected]> Committed: Mon Oct 29 22:28:45 2018 -0500 ---------------------------------------------------------------------- .../extractor/microdata/MicrodataExtractor.java | 2 +- .../extractor/microdata/MicrodataParser.java | 73 ++++++++------------ .../microdata/MicrodataExtractorTest.java | 4 +- ....2.1-non-normative-example-2-expected.nquads | 2 - 4 files changed, 30 insertions(+), 51 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java index cac6689..50f880f 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java @@ -49,7 +49,7 @@ import java.util.Optional; */ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor { - private static final IRI MICRODATA_ITEM + static final IRI MICRODATA_ITEM = RDFUtils.iri("http://www.w3.org/1999/xhtml/microdata#item"); private static final ParsedIRI EMPTY_FRAG = ParsedIRI.create("#"); http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java index c086d3f..8c3c641 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java @@ -300,32 +300,6 @@ public class MicrodataParser { ps.append("}"); } - /** - * Returns only nodes that are <b>not</b> nested one each other. - * - * @param candidates list of candidate nodes. - * @return list of unnested nodes. - */ - @SuppressWarnings("unused") - private static List<Node> getUnnestedNodes(List<Node> candidates) { - final List<Node> unnesteds = new ArrayList<>(); - for(int i = 0; i < candidates.size(); i++) { - boolean skip = false; - for(int j = 0; j < candidates.size(); j++) { - if(i == j) - continue; - if( DomUtils.isAncestorOf(candidates.get(j), candidates.get(i), true) ) { - skip = true; - break; - } - } - if(!skip) { - unnesteds.add( candidates.get(i) ); - } - } - return unnesteds; - } - public void setErrorMode(ErrorMode errorMode) { if(errorMode == null) throw new IllegalArgumentException("errorMode must be not null."); @@ -527,35 +501,42 @@ public class MicrodataParser { public List<ItemProp> getItemProps(final Node scopeNode, boolean skipRoot) throws MicrodataParserException { final Set<Node> accepted = new LinkedHashSet<>(); + boolean skipRootChildren = false; if (!skipRoot) { NamedNodeMap attributes = scopeNode.getAttributes(); if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null) { accepted.add(scopeNode); } + if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) { + skipRootChildren = true; + } } - // TreeWalker to walk DOM tree starting with the scopeNode. Nodes maybe visited multiple times. - TreeWalker treeWalker = ((DocumentTraversal) scopeNode.getOwnerDocument()) - .createTreeWalker(scopeNode, NodeFilter.SHOW_ELEMENT, new NodeFilter() { - @Override - public short acceptNode(Node node) { - if (node.getNodeType() == Node.ELEMENT_NODE) { - NamedNodeMap attributes = node.getAttributes(); - if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) { - accepted.add(node); - } + if (!skipRootChildren) { + // TreeWalker to walk DOM tree starting with the scopeNode. Nodes maybe visited multiple times. + TreeWalker treeWalker = ((DocumentTraversal) scopeNode.getOwnerDocument()) + .createTreeWalker(scopeNode, NodeFilter.SHOW_ELEMENT, new NodeFilter() { + @Override + public short acceptNode(Node node) { + if (node.getNodeType() == Node.ELEMENT_NODE) { + NamedNodeMap attributes = node.getAttributes(); + if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) { + accepted.add(node); + } + + if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) { + // Don't visit descendants of nodes that define a new scope + return FILTER_REJECT; + } + } + return FILTER_ACCEPT; + } + }, false); - if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) { - // Don't visit descendants of nodes that define a new scope - return FILTER_REJECT; - } - } - return FILTER_ACCEPT; - } - }, false); - // To populate accepted we only need to walk the tree. - while (treeWalker.nextNode() != null); + // To populate accepted we only need to walk the tree. + while (treeWalker.nextNode() != null) ; + } final List<ItemProp> result = new ArrayList<>(); for (Node itemPropNode : accepted) { http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java index 11aa353..0e634de 100644 --- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java @@ -197,6 +197,7 @@ public class MicrodataExtractorTest extends AbstractExtractorTestCase { TreeModel actual = new TreeModel(); createRunner(MicrodataExtractorFactory.NAME).extract(action.stringValue(), new TripleWriterHandler() { public void writeTriple(Resource s, IRI p, Value o, Resource g) { + if (MicrodataExtractor.MICRODATA_ITEM.equals(p)) return; actual.add(s, p, o); } public void writeNamespace(String prefix, String uri) { } @@ -214,8 +215,7 @@ public class MicrodataExtractorTest extends AbstractExtractorTestCase { }); } - boolean testPassed = positive ? (expected.isEmpty() ? actual.isEmpty() - : Models.isSubset(expected, actual)) : !Models.isomorphic(expected, actual); + boolean testPassed = positive == Models.isomorphic(expected, actual); if (testPassed) { passedTests.incrementAndGet(); } else { http://git-wip-us.apache.org/repos/asf/any23/blob/2175c2d3/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads index b3e99b0..8eedf33 100644 --- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads +++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads @@ -16,7 +16,6 @@ # _:nodebdb2c525cf8095abb6954b51432e6 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> . -_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#street-address> "Avenue Q" <http://bob.example.com/> . _:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#fn> "Princeton" <http://bob.example.com/> . _:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#given-name> "Princeton" <http://bob.example.com/> . _:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#n> _:node5194c3bb9d7f53e4759c6f393d95f88 <http://bob.example.com/> . @@ -24,7 +23,6 @@ _:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://microformats.org/profile/hcard#st _:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard#adr> _:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://bob.example.com/> . <http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodebdb2c525cf8095abb6954b51432e6 <http://bob.example.com/> . _:node7a12e48e321d29211c8b7c2ce396854 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> . -_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard#street-address> "Avenue Q" <http://bob.example.com/> . _:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard#fn> "Trekkie" <http://bob.example.com/> . _:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard#given-name> "Trekkie" <http://bob.example.com/> . _:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard#n> _:node45173ea18b736c2e9c3136e52ed3727e <http://bob.example.com/> .
