Repository: any23 Updated Branches: refs/heads/master dae72f3ed -> 244c15858
ANY23-348 handle malformed microdata types gracefully Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/61d91b55 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/61d91b55 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/61d91b55 Branch: refs/heads/master Commit: 61d91b55053bb8e3b087216b082594fa1db85a85 Parents: dae72f3 Author: Hans <[email protected]> Authored: Fri May 18 17:38:28 2018 -0500 Committer: Hans <[email protected]> Committed: Fri May 18 17:38:28 2018 -0500 ---------------------------------------------------------------------- .../any23/extractor/microdata/ItemScope.java | 26 +++++++++++++----- .../extractor/microdata/MicrodataParser.java | 29 +++++++++++++------- 2 files changed, 38 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/61d91b55/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java index f36828a..0ab0fee 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java @@ -17,6 +17,9 @@ package org.apache.any23.extractor.microdata; +import org.apache.commons.lang.StringUtils; +import org.eclipse.rdf4j.common.net.ParsedIRI; + import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -69,20 +72,29 @@ public class ItemScope extends Item { * @param itemId <i>itemscope</i> id. Can be <code>null</code>. */ public ItemScope(String xpath, ItemProp[] itemProps, String id, String[] refs, String type, String itemId) { - super(xpath); + this(xpath, itemProps, id, refs, stringToUrl(type), itemId); + } - if (itemProps == null) { - throw new NullPointerException("itemProps list cannot be null."); - } - if (type != null) { + static URL stringToUrl(String type) { + if (StringUtils.isNotBlank(type)) { try { - this.type = new URL(type); + return new URL(ParsedIRI.create(type.trim()).toString()); } catch (MalformedURLException murle) { throw new IllegalArgumentException("Invalid type '" + type + "', must be a valid URL."); } } else { - this.type = null; + return null; + } + } + + ItemScope(String xpath, ItemProp[] itemProps, String id, String[] refs, URL type, String itemId) { + super(xpath); + + if (itemProps == null) { + throw new NullPointerException("itemProps list cannot be null."); } + + this.type = type; this.id = id; this.refs = refs; this.itemId = itemId; http://git-wip-us.apache.org/repos/asf/any23/blob/61d91b55/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java index e54a533..32faec3 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java @@ -26,6 +26,7 @@ import org.w3c.dom.traversal.NodeFilter; import org.w3c.dom.traversal.TreeWalker; import java.io.PrintStream; +import java.net.URL; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; @@ -90,7 +91,7 @@ public class MicrodataParser { /** * List of collected errors. Used when {@link #errorMode} <code>==</code> {@link ErrorMode#FULL_REPORT}. */ - private List<MicrodataParserException> errors = new ArrayList<>(); + private final List<MicrodataParserException> errors = new ArrayList<>(); public static final String ITEMSCOPE_ATTRIBUTE = "itemscope"; public static final String ITEMPROP_ATTRIBUTE = "itemprop"; @@ -504,12 +505,20 @@ public class MicrodataParser { itemProps.add(deferredProperty); } + URL type; + try { + type = ItemScope.stringToUrl(itemType); + } catch (IllegalArgumentException e) { + manageError(new MicrodataParserException(e.getMessage(), node)); + type = null; + } + final ItemScope newItemScope = new ItemScope( DomUtils.getXPathForNode(node), itemProps.toArray(new ItemProp[itemProps.size()]), id, itemrefIDs, - itemType, + type, itemId ); itemScopes.put(node, newItemScope); @@ -517,15 +526,15 @@ public class MicrodataParser { } private void manageError(MicrodataParserException mpe) throws MicrodataParserException { - if(errorMode == ErrorMode.STOP_AT_FIRST_ERROR) { - throw mpe; - } - if(errorMode != ErrorMode.FULL_REPORT) - throw new IllegalStateException("Unsupported mode " + errorMode); - if(errors == null) { - errors = new ArrayList<>(); + switch (errorMode) { + case FULL_REPORT: + errors.add(mpe); + break; + case STOP_AT_FIRST_ERROR: + throw mpe; + default: + throw new IllegalStateException("Unsupported mode " + errorMode); } - errors.add(mpe); } }
