Repository: any23 Updated Branches: refs/heads/master 0270bb0cf -> 6a5471916
ANY23-414 support reverse itemprops in microdata Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6a547191 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6a547191 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6a547191 Branch: refs/heads/master Commit: 6a5471916c917d2c332985311fef5b40f3f894bd Parents: 0270bb0 Author: Hans <[email protected]> Authored: Tue Oct 30 13:56:06 2018 -0500 Committer: Hans <[email protected]> Committed: Tue Oct 30 13:56:06 2018 -0500 ---------------------------------------------------------------------- .../any23/extractor/microdata/ItemProp.java | 6 +++ .../extractor/microdata/MicrodataExtractor.java | 12 ++++- .../extractor/microdata/MicrodataParser.java | 52 +++++++++++++++----- .../microdata/MicrodataExtractorTest.java | 5 +- 4 files changed, 57 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/6a547191/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java index d516c88..6b8072f 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java @@ -42,6 +42,11 @@ public class ItemProp extends Item { * @param value item property value. */ public ItemProp(String xpath, String name, ItemPropValue value) { + this(xpath, name, value, false); + } + + final boolean reverse; + ItemProp(String xpath, String name, ItemPropValue value, boolean reverse) { super(xpath); if(name == null) { @@ -55,6 +60,7 @@ public class ItemProp extends Item { } this.name = name; this.value = value; + this.reverse = reverse; } /** http://git-wip-us.apache.org/repos/asf/any23/blob/6a547191/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java index 50f880f..829866d 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java @@ -190,13 +190,21 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor { value = toAbsoluteIRI(documentIRI, (String)propValue); //TODO: support registries so hardcoding not needed if (predicate.stringValue().equals("http://schema.org/additionalType")) { - out.writeTriple(subject, RDF.TYPE, value); + if (itemProp.reverse) { + out.writeTriple((Resource)value, RDF.TYPE, subject); + } else { + out.writeTriple(subject, RDF.TYPE, value); + } } } else { throw new RuntimeException("Invalid Type '" + propType + "' for ItemPropValue with name: '" + predicate + "'"); } - out.writeTriple(subject, predicate, value); + if (itemProp.reverse) { + out.writeTriple((Resource)value, predicate, subject); + } else { + out.writeTriple(subject, predicate, value); + } } private static final String hcardPrefix = "http://microformats.org/profile/hcard"; http://git-wip-us.apache.org/repos/asf/any23/blob/6a547191/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java index 8c3c641..0c993e1 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java @@ -103,6 +103,7 @@ public class MicrodataParser { public static final String ITEMSCOPE_ATTRIBUTE = "itemscope"; public static final String ITEMPROP_ATTRIBUTE = "itemprop"; + private static final String REVERSE_ITEMPROP_ATTRIBUTE = "itemprop-reverse"; /** * List of tags providing the <code>src</code> property. @@ -198,7 +199,8 @@ public class MicrodataParser { final List<Node> topLevelItemScopes = new ArrayList<>(); final List<Node> possibles = new ArrayList<>(); for (Node itemScope : itemScopes) { - if (!isItemProp(itemScope)) { + if (!isItemProp(itemScope) + && DomUtils.readAttribute(itemScope, REVERSE_ITEMPROP_ATTRIBUTE, null) == null) { topLevelItemScopes.add(itemScope); } else if (!isContainedInItemScope(itemScope)) { possibles.add(itemScope); @@ -504,7 +506,8 @@ public class MicrodataParser { boolean skipRootChildren = false; if (!skipRoot) { NamedNodeMap attributes = scopeNode.getAttributes(); - if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null) { + if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null + || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null) { accepted.add(scopeNode); } if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) { @@ -520,7 +523,8 @@ public class MicrodataParser { public short acceptNode(Node node) { if (node.getNodeType() == Node.ELEMENT_NODE) { NamedNodeMap attributes = node.getAttributes(); - if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) { + if ((attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null + || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null) && scopeNode != node) { accepted.add(node); } @@ -541,8 +545,12 @@ public class MicrodataParser { final List<ItemProp> result = new ArrayList<>(); for (Node itemPropNode : accepted) { final String itemProp = DomUtils.readAttribute(itemPropNode, ITEMPROP_ATTRIBUTE, null); + final String reverseProp = DomUtils.readAttribute(itemPropNode, REVERSE_ITEMPROP_ATTRIBUTE, null); - if (StringUtils.isBlank(itemProp)) { + boolean hasItemProp = StringUtils.isNotBlank(itemProp); + boolean hasReverseProp = StringUtils.isNotBlank(reverseProp); + + if (!hasItemProp && !hasReverseProp) { manageError(new MicrodataParserException("invalid property name '" + itemProp + "'", itemPropNode)); continue; } @@ -554,14 +562,34 @@ public class MicrodataParser { manageError(mpe); continue; } - for (String propertyName : itemProp.trim().split("\\s+")) { - result.add( - new ItemProp( - DomUtils.getXPathForNode(itemPropNode), - propertyName, - itemPropValue - ) - ); + if (hasItemProp) { + for (String propertyName : itemProp.trim().split("\\s+")) { + result.add( + new ItemProp( + DomUtils.getXPathForNode(itemPropNode), + propertyName, + itemPropValue, + false + ) + ); + } + } + if (hasReverseProp) { + if (itemPropValue.literal != null) { + manageError(new MicrodataParserException(REVERSE_ITEMPROP_ATTRIBUTE + + " cannot point to a literal", itemPropNode)); + continue; + } + for (String propertyName : reverseProp.trim().split("\\s+")) { + result.add( + new ItemProp( + DomUtils.getXPathForNode(itemPropNode), + propertyName, + itemPropValue, + true + ) + ); + } } } return result; http://git-wip-us.apache.org/repos/asf/any23/blob/6a547191/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java index 01a0585..9d27ffa 100644 --- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java @@ -127,10 +127,7 @@ public class MicrodataExtractorTest extends AbstractExtractorTestCase { private static final List<String> ignoredOnlineTestNames = Arrays.asList( "Test 0073", //Vocabulary Expansion test with rdfs:subPropertyOf - "Test 0074", //Vocabulary Expansion test with owl:equivalentProperty - "Test 0081", //Simple @itemprop-reverse (experimental) - "Test 0082", //@itemprop-reverse with @itemscope value (experimental) - "Test 0084" //@itemprop-reverse with @itemprop (experimental) + "Test 0074" //Vocabulary Expansion test with owl:equivalentProperty ); private static Any23 createRunner(String extractorName) {
