Repository: any23 Updated Branches: refs/heads/master 0adafd175 -> 15571d45f
ANY23-318 ExtractionException handling in BaseRDFExtractor.java kills entire extraction Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/4c81edde Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/4c81edde Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/4c81edde Branch: refs/heads/master Commit: 4c81edde390b6b6e91566f490ca5d915ca0b0945 Parents: 7c68688 Author: Lewis John McGibbney <[email protected]> Authored: Wed Dec 27 20:06:08 2017 +0000 Committer: Lewis John McGibbney <[email protected]> Committed: Wed Dec 27 20:06:08 2017 +0000 ---------------------------------------------------------------------- .../any23/extractor/ExtractionParameters.java | 22 ++-- .../main/java/org/apache/any23/cli/Rover.java | 4 +- .../extractor/SingleDocumentExtraction.java | 20 +-- .../any23/extractor/rdf/BaseRDFExtractor.java | 19 +-- .../DefaultValidationReportBuilder.java | 12 +- .../any23/validator/ValidationReport.java | 28 +++- .../any23/validator/rule/AboutNotURIRule.java | 8 +- .../any23/validator/rule/MetaNameMisuseFix.java | 2 + .../validator/rule/MetaNameMisuseRule.java | 6 +- .../rule/MissingItemscopeAttributeValueFix.java | 27 ++-- .../MissingItemscopeAttributeValueRule.java | 6 +- .../rule/MissingOpenGraphNamespaceRule.java | 4 +- .../validator/rule/OpenGraphNamespaceFix.java | 5 +- .../test/java/org/apache/any23/Any23Test.java | 8 +- .../any23/validator/DefaultValidatorTest.java | 17 +-- .../XMLValidationReportSerializerTest.java | 2 +- .../apache/any23/servlet/RedirectServlet.java | 51 ++++++-- .../java/org/apache/any23/servlet/Servlet.java | 16 +-- .../org/apache/any23/servlet/WebResponder.java | 2 +- .../resources/microdata/microdata-basic.html | 129 +++++++++++-------- 20 files changed, 244 insertions(+), 144 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java b/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java index 2bd7e2b..96a6218 100644 --- a/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java +++ b/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java @@ -73,15 +73,15 @@ public class ExtractionParameters { this.extractionFlags = extractionFlags == null ? - new HashMap<String,Boolean>() + new HashMap<>() : - new HashMap<String,Boolean>(extractionFlags); + new HashMap<>(extractionFlags); this.extractionProperties = extractionProperties == null ? - new HashMap<String,String>() + new HashMap<>() : - new HashMap<String,String>(extractionProperties); + new HashMap<>(extractionProperties); } /** @@ -122,7 +122,7 @@ public class ExtractionParameters { * @return the default extraction parameters. */ public static final ExtractionParameters newDefault(Configuration c) { - return new ExtractionParameters(c, ValidationMode.None); + return new ExtractionParameters(c, ValidationMode.NONE); } /** @@ -131,30 +131,30 @@ public class ExtractionParameters { * @return the default extraction parameters. */ public static final ExtractionParameters newDefault() { - return new ExtractionParameters(DefaultConfiguration.singleton(), ValidationMode.None); + return new ExtractionParameters(DefaultConfiguration.singleton(), ValidationMode.NONE); } /** * Declares the supported validation actions. */ public enum ValidationMode { - None, - Validate, - ValidateAndFix + NONE, + VALIDATE, + VALIDATE_AND_FIX } /** * @return <code>true</code> if validation is active. */ public boolean isValidate() { - return extractionMode == ValidationMode.Validate || extractionMode == ValidationMode.ValidateAndFix; + return extractionMode == ValidationMode.VALIDATE || extractionMode == ValidationMode.VALIDATE_AND_FIX; } /** * @return <code>true</code> if fix is active. */ public boolean isFix() { - return extractionMode == ValidationMode.ValidateAndFix; + return extractionMode == ValidationMode.VALIDATE_AND_FIX; } /** http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/cli/src/main/java/org/apache/any23/cli/Rover.java ---------------------------------------------------------------------- diff --git a/cli/src/main/java/org/apache/any23/cli/Rover.java b/cli/src/main/java/org/apache/any23/cli/Rover.java index ffa1de0..18f0c06 100644 --- a/cli/src/main/java/org/apache/any23/cli/Rover.java +++ b/cli/src/main/java/org/apache/any23/cli/Rover.java @@ -154,9 +154,9 @@ public class Rover implements Tool { extractionParameters = pedantic ? - new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled) + new ExtractionParameters(configuration, ValidationMode.VALIDATE_AND_FIX, nestingDisabled) : - new ExtractionParameters(configuration, ValidationMode.None , nestingDisabled); + new ExtractionParameters(configuration, ValidationMode.NONE , nestingDisabled); if (defaultns != null) { extractionParameters.setProperty(ExtractionParameters.EXTRACTION_CONTEXT_IRI_PROPERTY, defaultns); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java index cd6fea7..9cee7a4 100644 --- a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java +++ b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java @@ -73,7 +73,7 @@ public class SingleDocumentExtraction { private static final SINDICE vSINDICE = SINDICE.getInstance(); - private final static Logger log = LoggerFactory.getLogger(SingleDocumentExtraction.class); + private static final Logger log = LoggerFactory.getLogger(SingleDocumentExtraction.class); private final Configuration configuration; @@ -115,13 +115,15 @@ public class SingleDocumentExtraction { public SingleDocumentExtraction( Configuration configuration, DocumentSource in, ExtractorGroup extractors, TripleHandler output ) { - if(configuration == null) throw new NullPointerException("configuration cannot be null."); - if(in == null) throw new NullPointerException("in cannot be null."); + if(configuration == null) + throw new NullPointerException("configuration cannot be null."); + if(in == null) + throw new NullPointerException("in cannot be null."); this.configuration = configuration; this.in = in; this.extractors = extractors; - List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>(); + List<TripleHandler> tripleHandlers = new ArrayList<>(); tripleHandlers.add(output); tripleHandlers.add(new CountingTripleHandler()); this.output = new CompositeTripleHandler(tripleHandlers); @@ -222,7 +224,7 @@ public class SingleDocumentExtraction { filterExtractorsByMIMEType(); if(log.isDebugEnabled()) { - StringBuffer sb = new StringBuffer("Extractors "); + StringBuilder sb = new StringBuilder("Extractors "); for (ExtractorFactory<?> factory : matchingExtractors) { sb.append(factory.getExtractorName()); sb.append(' '); @@ -347,7 +349,7 @@ public class SingleDocumentExtraction { */ @SuppressWarnings("rawtypes") public List<Extractor> getMatchingExtractors() { - final List<Extractor> extractorsList = new ArrayList<Extractor>(); + final List<Extractor> extractorsList = new ArrayList<>(); for(ExtractorFactory extractorFactory : matchingExtractors) { extractorsList.add( extractorFactory.createExtractor() ); } @@ -415,7 +417,8 @@ public class SingleDocumentExtraction { */ private void filterExtractorsByMIMEType() throws IOException { - if (matchingExtractors != null) return; // has already been run. + if (matchingExtractors != null) + return; // has already been run. if (detector == null || extractors.allExtractorsSupportAllContentTypes()) { matchingExtractors = extractors; @@ -515,7 +518,8 @@ public class SingleDocumentExtraction { * @throws IOException */ private void ensureHasLocalCopy() throws IOException { - if (localDocumentSource != null) return; + if (localDocumentSource != null) + return; if (in.isLocal()) { localDocumentSource = in; return; http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java index 549cc1a..6b9377e 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java @@ -22,12 +22,14 @@ import org.apache.any23.extractor.ExtractionException; import org.apache.any23.extractor.ExtractionParameters; import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.Extractor; -import org.apache.any23.extractor.ExtractorDescription; import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.RDFParseException; import org.eclipse.rdf4j.rio.RDFParser; import org.eclipse.rdf4j.rio.RioSetting; import org.eclipse.rdf4j.rio.helpers.BasicParserSettings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.io.InputStream; import java.util.HashSet; @@ -40,9 +42,14 @@ import java.util.HashSet; */ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { + private static final Logger LOG = LoggerFactory.getLogger(BaseRDFExtractor.class); private boolean verifyDataType; private boolean stopAtFirstError; + public BaseRDFExtractor() { + this(false, false); + } + /** * Constructor, allows to specify the validation and error handling policies. * @@ -56,17 +63,11 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { this.stopAtFirstError = stopAtFirstError; } - public abstract ExtractorDescription getDescription(); - protected abstract RDFParser getParser( ExtractionContext extractionContext, ExtractionResult extractionResult ); - public BaseRDFExtractor() { - this(false, false); - } - public boolean isVerifyDataType() { return verifyDataType; } @@ -79,10 +80,12 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { return stopAtFirstError; } + @Override public void setStopAtFirstError(boolean b) { stopAtFirstError = b; } + @Override public void run( ExtractionParameters extractionParameters, ExtractionContext extractionContext, @@ -106,7 +109,7 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { } catch (RDFHandlerException ex) { throw new IllegalStateException("Unexpected exception.", ex); } catch (RDFParseException ex) { - throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult); + LOG.error("Error while parsing RDF document.", ex, extractionResult); } } http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/DefaultValidationReportBuilder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/DefaultValidationReportBuilder.java b/core/src/main/java/org/apache/any23/validator/DefaultValidationReportBuilder.java index 8f73d8d..bbab1b4 100644 --- a/core/src/main/java/org/apache/any23/validator/DefaultValidationReportBuilder.java +++ b/core/src/main/java/org/apache/any23/validator/DefaultValidationReportBuilder.java @@ -35,7 +35,9 @@ public class DefaultValidationReportBuilder implements ValidationReportBuilder { private List<ValidationReport.RuleActivation> ruleActivations; private List<ValidationReport.Error> errors; - public DefaultValidationReportBuilder() {} + public DefaultValidationReportBuilder() { + //default constructor + } public ValidationReport getReport() { return new DefaultValidationReport( @@ -47,7 +49,7 @@ public class DefaultValidationReportBuilder implements ValidationReportBuilder { public void reportIssue(ValidationReport.IssueLevel issueLevel, String message, Node n) { if(issues == null) { - issues = new ArrayList<ValidationReport.Issue>(); + issues = new ArrayList<>(); } issues.add( new ValidationReport.Issue(issueLevel, message, n) ); } @@ -58,21 +60,21 @@ public class DefaultValidationReportBuilder implements ValidationReportBuilder { public void traceRuleActivation(Rule r) { if(ruleActivations == null) { - ruleActivations = new ArrayList<ValidationReport.RuleActivation>(); + ruleActivations = new ArrayList<>(); } ruleActivations.add( new ValidationReport.RuleActivation(r) ); } public void reportRuleError(Rule r, Exception e, String msg) { if(errors == null) { - errors = new ArrayList<ValidationReport.Error>(); + errors = new ArrayList<>(); } errors.add( new ValidationReport.RuleError(r, e, msg) ); } public void reportFixError(Fix f, Exception e, String msg) { if(errors == null) { - errors = new ArrayList<ValidationReport.Error>(); + errors = new ArrayList<>(); } errors.add( new ValidationReport.FixError(f, e, msg) ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/ValidationReport.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/ValidationReport.java b/core/src/main/java/org/apache/any23/validator/ValidationReport.java index 7b47809..56b3f10 100644 --- a/core/src/main/java/org/apache/any23/validator/ValidationReport.java +++ b/core/src/main/java/org/apache/any23/validator/ValidationReport.java @@ -39,9 +39,9 @@ public interface ValidationReport extends Serializable { * Defines the different issue levels. */ enum IssueLevel { - error, - warning, - info + ERROR, + WARNING, + INFO } /** @@ -70,9 +70,13 @@ public interface ValidationReport extends Serializable { */ class Issue implements Serializable { + /** + * + */ + private static final long serialVersionUID = 1L; private final IssueLevel level; private final String message; - private final Node origin; + private final transient Node origin; public Issue(IssueLevel level, String message, Node origin) { if(level == null) { @@ -117,6 +121,10 @@ public interface ValidationReport extends Serializable { */ class RuleActivation implements Serializable { + /** + * + */ + private static final long serialVersionUID = 1L; private final String ruleStr; public RuleActivation(Rule r) { @@ -141,6 +149,10 @@ public interface ValidationReport extends Serializable { */ abstract class Error implements Serializable { + /** + * + */ + private static final long serialVersionUID = 1L; private final Exception cause; private final String message; @@ -174,6 +186,10 @@ public interface ValidationReport extends Serializable { */ class RuleError extends Error { + /** + * + */ + private static final long serialVersionUID = 1L; private final Rule origin; public RuleError(Rule r, Exception e, String msg) { @@ -199,6 +215,10 @@ public interface ValidationReport extends Serializable { */ class FixError extends Error { + /** + * + */ + private static final long serialVersionUID = 1L; private final Fix origin; public FixError(Fix f, Exception e, String msg) { http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java b/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java index 2e709ed..0275c4e 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java +++ b/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java @@ -40,21 +40,23 @@ public class AboutNotURIRule implements Rule { public static final String NODES_WITH_INVALID_ABOUT = "nodes-with-invalid-about"; + @Override public String getHRName() { return "about-not-uri-rule"; } + @Override public boolean applyOn( DOMDocument document, - RuleContext context, + @SuppressWarnings("rawtypes") RuleContext context, ValidationReportBuilder validationReportBuilder ) { final List<Node> nodesWithAbout = document.getNodesWithAttribute("about"); - final List<Node> nodesWithInvalidAbout = new ArrayList<Node>(); + final List<Node> nodesWithInvalidAbout = new ArrayList<>(); for(Node nodeWithAbout : nodesWithAbout) { if ( ! aboutIsValid(nodeWithAbout) ) { validationReportBuilder.reportIssue( - ValidationReport.IssueLevel.error, + ValidationReport.IssueLevel.ERROR, "Invalid about value for node, expected valid URL.", nodeWithAbout ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java index 5a0bfae..149ce5f 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java @@ -34,10 +34,12 @@ import java.util.List; */ public class MetaNameMisuseFix implements Fix { + @Override public String getHRName() { return "meta-name-misuse-fix"; } + @Override @SuppressWarnings("unchecked") public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { List<Node> nodes = (List<Node>) context.getData(MetaNameMisuseRule.ERRORED_META_NODES); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java index a803107..1b965ec 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java @@ -38,10 +38,12 @@ public class MetaNameMisuseRule implements Rule { public static final String ERRORED_META_NODES = "errored-meta-nodes"; + @Override public String getHRName() { return "meta-name-misuse-rule"; } + @Override public boolean applyOn( DOMDocument document, @SuppressWarnings("rawtypes") RuleContext context, @@ -49,14 +51,14 @@ public class MetaNameMisuseRule implements Rule { ) { List<Node> metaNodes = document.getNodes("/HTML/HEAD/META"); boolean foundIssue = false; - final List<Node> wrongMetaNodes = new ArrayList<Node>(); + final List<Node> wrongMetaNodes = new ArrayList<>(); for(Node metaNode : metaNodes) { Node nameNode = metaNode.getAttributes().getNamedItem("name"); if(nameNode != null && nameNode.getTextContent().contains(":")) { foundIssue = true; wrongMetaNodes.add(metaNode); validationReportBuilder.reportIssue( - ValidationReport.IssueLevel.error, + ValidationReport.IssueLevel.ERROR, "Error detected in meta node: name property contains a prefixed value.", metaNode ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java index 909a33a..58e4f11 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java @@ -18,6 +18,7 @@ package org.apache.any23.validator.rule; import java.util.List; +import org.apache.any23.extractor.html.DomUtils; import org.apache.any23.validator.DOMDocument; import org.apache.any23.validator.Fix; import org.apache.any23.validator.Rule; @@ -30,27 +31,37 @@ import org.w3c.dom.Node; */ public class MissingItemscopeAttributeValueFix implements Fix { + private static final String EMPTY_ITEMSCOPE_VALUE = ""; + + private static final String ITEMSCOPE = "itemscope"; + /** * Default constructor */ public MissingItemscopeAttributeValueFix() { + //default constructor } - public static final String EMPTY_ITEMSCOPE_VALUE = "=\"itemscope\""; - + @Override public String getHRName() { return "missing-itemscope-value-fix"; } + @Override public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { - List<Node> itemNodes = document.getNodesWithAttribute("itemscope"); - for(Node itemNode : itemNodes) { - Node itemScopeNode = itemNode.getAttributes().getNamedItem("itemscope"); - if(itemScopeNode.getNodeValue().contentEquals("")) { - itemNode.getAttributes().getNamedItem("itemscope").setNodeValue(EMPTY_ITEMSCOPE_VALUE); + List<Node> itemScopeContainerElements = document.getNodesWithAttribute(ITEMSCOPE); + for(Node itemScopeContainerElement : itemScopeContainerElements) { + Node newItemScopeContainerElement = itemScopeContainerElement; + Node itemScopeNode = newItemScopeContainerElement.getAttributes().getNamedItem(ITEMSCOPE); + if (itemScopeNode.getTextContent() == null || itemScopeNode.getTextContent() == "") { + String node = DomUtils.getXPathForNode(itemScopeContainerElement); + document.addAttribute(node, ITEMSCOPE, EMPTY_ITEMSCOPE_VALUE); + //newItemScopeContainerElement.getAttributes().removeNamedItem(ITEMSCOPE); + //Attr newItemScopeNode = document.getOriginalDocument().createAttribute(ITEMSCOPE); + //newItemScopeNode.setNodeValue(EMPTY_ITEMSCOPE_VALUE); + //newItemScopeContainerElement.getAttributes().setNamedItem(newItemScopeNode); } } } - } http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java index 67d44b2..415b2dc 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java @@ -18,6 +18,7 @@ package org.apache.any23.validator.rule; import java.util.List; +import org.apache.any23.extractor.html.DomUtils; import org.apache.any23.validator.DOMDocument; import org.apache.any23.validator.Rule; import org.apache.any23.validator.RuleContext; @@ -45,6 +46,7 @@ public class MissingItemscopeAttributeValueRule implements Rule { * Default constructor */ public MissingItemscopeAttributeValueRule() { + //default costructor } @Override @@ -60,7 +62,7 @@ public class MissingItemscopeAttributeValueRule implements Rule { ValidationReportBuilder validationReportBuilder) { List<Node> itemNodes = document.getNodesWithAttribute("itemscope"); boolean foundPrecondition = false; - String propertyNode = null; + String propertyNode; Node iNode = null; for(Node itemNode : itemNodes) { iNode = itemNode; @@ -72,7 +74,7 @@ public class MissingItemscopeAttributeValueRule implements Rule { } if(foundPrecondition) { validationReportBuilder.reportIssue( - ValidationReport.IssueLevel.error, + ValidationReport.IssueLevel.ERROR, "Located absence of an accompanying value for the the 'itemscope' attribute of element with hashcode: " + iNode.hashCode(), iNode ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java index 8229525..3ab99f5 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java @@ -35,10 +35,12 @@ import java.util.List; */ public class MissingOpenGraphNamespaceRule implements Rule { + @Override public String getHRName() { return "missing-opengraph-namespace-rule"; } + @Override public boolean applyOn( DOMDocument document, @SuppressWarnings("rawtypes") RuleContext context, @@ -57,7 +59,7 @@ public class MissingOpenGraphNamespaceRule implements Rule { Node htmlNode = document.getNode("/HTML"); if( htmlNode.getAttributes().getNamedItem("xmlns:og") == null) { validationReportBuilder.reportIssue( - ValidationReport.IssueLevel.error, + ValidationReport.IssueLevel.ERROR, "Missing OpenGraph namespace declaration.", htmlNode ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java index 6975991..6492590 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java +++ b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java @@ -21,6 +21,7 @@ import org.apache.any23.validator.DOMDocument; import org.apache.any23.validator.Fix; import org.apache.any23.validator.Rule; import org.apache.any23.validator.RuleContext; +import org.apache.any23.vocab.OGP; /** * This fixes the missing <i>Open Graph</i> protocol. @@ -31,12 +32,14 @@ import org.apache.any23.validator.RuleContext; */ public class OpenGraphNamespaceFix implements Fix { - public static final String OPENGRAPH_PROTOCOL_NS = "http://opengraphprotocol.org/schema/"; + public static final String OPENGRAPH_PROTOCOL_NS = OGP.NS; + @Override public String getHRName() { return "opengraph-namespace-fix"; } + @Override public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { document.addAttribute("/HTML", "xmlns:og", OPENGRAPH_PROTOCOL_NS); } http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/test/java/org/apache/any23/Any23Test.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java index 3f11087..32c4951 100644 --- a/core/src/test/java/org/apache/any23/Any23Test.java +++ b/core/src/test/java/org/apache/any23/Any23Test.java @@ -343,7 +343,7 @@ public class Any23Test extends Any23OnlineTestBase { try { runner.extract( new ExtractionParameters(DefaultConfiguration.singleton(), - ValidationMode.None), source, compositeTH1); + ValidationMode.NONE), source, compositeTH1); } finally { compositeTH1.close(); } @@ -383,7 +383,7 @@ public class Any23Test extends Any23OnlineTestBase { compositeTH1.addChild(ctw1); runner.extract( new ExtractionParameters(DefaultConfiguration.singleton(), - ValidationMode.None, true), source, compositeTH1); + ValidationMode.NONE, true), source, compositeTH1); compositeTH1.close(); logger.debug("Out1: " + baos.toString()); Assert.assertEquals("Unexpected number of triples.", @@ -397,7 +397,7 @@ public class Any23Test extends Any23OnlineTestBase { compositeTH2.addChild(ctw2); runner.extract( new ExtractionParameters(DefaultConfiguration.singleton(), - ValidationMode.ValidateAndFix, false), source, + ValidationMode.VALIDATE_AND_FIX, false), source, compositeTH2); compositeTH2.close(); logger.debug("Out2: " + baos.toString()); @@ -558,7 +558,7 @@ public class Any23Test extends Any23OnlineTestBase { ReportingTripleHandler outputHandler = new ReportingTripleHandler( new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments( new NTriplesWriter(out)))); - return any23.extract(new ExtractionParameters(conf, ValidationMode.ValidateAndFix, null, null), + return any23.extract(new ExtractionParameters(conf, ValidationMode.VALIDATE_AND_FIX, null, null), new StringDocumentSource(in, "http://host.com/path"), outputHandler, "UTF-8"); } http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java index c389f17..7917670 100644 --- a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java +++ b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java @@ -23,7 +23,6 @@ import org.apache.xml.serialize.XMLSerializer; import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,23 +76,21 @@ public class DefaultValidatorTest { logger.debug( validationReport.toString() ); } } - - @Ignore("Itemscope parsing issue") + @Test public void testMissingItemscopeAttributeValue() throws IOException, URISyntaxException, ValidatorException { DOMDocument document = loadDocument("microdata-basic.html"); - List<Node> brokenItemScopeNodes = document.getNodesWithAttribute("itemscope"); - for (Node node : brokenItemScopeNodes) { + List<Node> nullItemScopeNodes = document.getNodesWithAttribute("itemscope"); + for (Node node : nullItemScopeNodes) { // all nodes with itemscope have an empty string value Assert.assertEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() ); } ValidationReport validationReport = validator.validate(document, true); List<Node> fixedItemScopeNodes = document.getNodesWithAttribute("itemscope"); for (Node node : fixedItemScopeNodes) { - // all nodes with itemscope now have a default value of "itemscope" + // all nodes with itemscope now have a default value of "" e.g. empty string Assert.assertNotNull(node.getAttributes().getNamedItem("itemscope").getNodeValue() ); - Assert.assertNotEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() ); - Assert.assertEquals("itemscope", node.getAttributes().getNamedItem("itemscope").getNodeValue()); + Assert.assertEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() ); } if(logger.isDebugEnabled()) { logger.debug( validationReport.toString() ); @@ -126,8 +123,8 @@ public class DefaultValidatorTest { Assert.assertEquals( "Unexpected number of issues.", 1, validationReport.getIssues().size() ); } - private DOMDocument loadDocument(String document) throws IOException, URISyntaxException { - InputStream is = this.getClass().getResourceAsStream(document); + public static DOMDocument loadDocument(String document) throws IOException, URISyntaxException { + InputStream is = DefaultValidatorTest.class.getResourceAsStream(document); final String documentIRI = "http://test.com"; TagSoupParser tsp = new TagSoupParser(is, documentIRI); return new DefaultDOMDocument( new URI(documentIRI), tsp.getDOM() ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/core/src/test/java/org/apache/any23/validator/XMLValidationReportSerializerTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/validator/XMLValidationReportSerializerTest.java b/core/src/test/java/org/apache/any23/validator/XMLValidationReportSerializerTest.java index 237d14f..5c3683e 100644 --- a/core/src/test/java/org/apache/any23/validator/XMLValidationReportSerializerTest.java +++ b/core/src/test/java/org/apache/any23/validator/XMLValidationReportSerializerTest.java @@ -68,7 +68,7 @@ public class XMLValidationReportSerializerTest { Document document = new DocumentImpl(); Element element = document.createElement("html"); - validationReportBuilder.reportIssue(ValidationReport.IssueLevel.info, "Test message", element); + validationReportBuilder.reportIssue(ValidationReport.IssueLevel.INFO, "Test message", element); validationReportBuilder.traceRuleActivation( new MetaNameMisuseRule() ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java b/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java index ede383d..ea87e00 100644 --- a/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java +++ b/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java @@ -17,10 +17,15 @@ package org.apache.any23.servlet; +import javax.servlet.RequestDispatcher; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; /** @@ -30,11 +35,22 @@ import java.io.IOException; * @author Davide Palmisano ( [email protected] ) */ public class RedirectServlet extends HttpServlet { - + + private static final Logger LOG = LoggerFactory.getLogger(RedirectServlet.class); + + /** + * + */ + private static final long serialVersionUID = 1L; + @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - doGet(request, response); + try { + doGet(request, response); + } catch (ServletException | IOException e) { + LOG.error("Error executing GET request.", e); + } } @Override @@ -44,21 +60,36 @@ public class RedirectServlet extends HttpServlet { final String pathInfo = request.getPathInfo(); final String queryString = request.getQueryString(); - if (("/".equals(pathInfo) && queryString == null)) { - getServletContext().getRequestDispatcher("/resources/form.html").forward(request, response); + if ("/".equals(pathInfo) && queryString == null) { + RequestDispatcher dispatcher = getServletContext().getRequestDispatcher("/resources/form.html"); + try { + dispatcher.forward(request, response); + } catch (ServletException | IOException e) { + LOG.error("Error in request dispatcher forwarding.", e); + } return; } // forward requests to /resources/* to the default servlet, this is // where we can put static files if (pathInfo.startsWith("/resources/")) { - getServletContext().getNamedDispatcher("default").forward(request, response); + RequestDispatcher dispatcher = getServletContext().getNamedDispatcher("default"); + try { + dispatcher.forward(request, response); + } catch (ServletException | IOException e) { + LOG.error("Error in named request dispatcher forwarding.", e); + } return; } - response.sendRedirect( - request.getContextPath() + "/any23" + - request.getPathInfo() + - (queryString == null ? "" : "?" + queryString) - ); + try { + response.sendRedirect( + request.getContextPath() + "/any23" + + request.getPathInfo() + + (queryString == null ? "" : "?" + queryString) + ); + } catch (IOException e) { + LOG.error("Error in sending HttpServletResponse Redirect.", e); + } + } } http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/service/src/main/java/org/apache/any23/servlet/Servlet.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java index b93662e..b63d052 100644 --- a/service/src/main/java/org/apache/any23/servlet/Servlet.java +++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java @@ -56,11 +56,11 @@ public class Servlet extends HttpServlet { private static final long serialVersionUID = 8207685628715421336L; - private final static Pattern schemeAndSingleSlashRegex = + private static final Pattern schemeAndSingleSlashRegex = Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:/[^/]"); // RFC 3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - private final static Pattern schemeRegex = + private static final Pattern schemeRegex = Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:"); @Override @@ -142,8 +142,6 @@ public class Servlet extends HttpServlet { MediaRangeSpec result = Any23Negotiator.getNegotiator().getBestMatch(request.getHeader("Accept")); if (result == null) { return null; - } else if (RDFFormat.TURTLE.hasMIMEType(result.getMediaType())) { - return "turtle"; } else if (RDFFormat.N3.hasMIMEType(result.getMediaType())) { return "n3"; } else if (RDFFormat.NQUADS.hasMIMEType(result.getMediaType())) { @@ -155,7 +153,7 @@ public class Servlet extends HttpServlet { } else if (RDFFormat.JSONLD.hasMIMEType(result.getMediaType())) { return "ld+json"; } else { - return "turtle"; // shouldn't happen + return "turtle"; // shouldn't happen however default is turtle } } @@ -260,13 +258,13 @@ public class Servlet extends HttpServlet { final String parameter = "validation-mode"; final String validationMode = request.getParameter(parameter); if (validationMode == null) - return ValidationMode.None; + return ValidationMode.NONE; if ("none".equalsIgnoreCase(validationMode)) - return ValidationMode.None; + return ValidationMode.NONE; if ("validate".equalsIgnoreCase(validationMode)) - return ValidationMode.Validate; + return ValidationMode.VALIDATE; if ("validate-fix".equalsIgnoreCase(validationMode)) - return ValidationMode.ValidateAndFix; + return ValidationMode.VALIDATE_AND_FIX; throw new IllegalArgumentException( String.format("Invalid value '%s' for '%s' parameter.", validationMode, parameter) ); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/service/src/main/java/org/apache/any23/servlet/WebResponder.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/WebResponder.java b/service/src/main/java/org/apache/any23/servlet/WebResponder.java index 3101e09..5b16070 100644 --- a/service/src/main/java/org/apache/any23/servlet/WebResponder.java +++ b/service/src/main/java/org/apache/any23/servlet/WebResponder.java @@ -329,7 +329,7 @@ class WebResponder { FormatWriter fw = factory.getRdfWriter(byteOutStream); fw.setAnnotated(annotate); outputMediaType = factory.getMimeType(); - List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>(); + List<TripleHandler> tripleHandlers = new ArrayList<>(); tripleHandlers.add(new IgnoreAccidentalRDFa(fw)); tripleHandlers.add(new CountingTripleHandler()); rdfWriter = new CompositeTripleHandler(tripleHandlers); http://git-wip-us.apache.org/repos/asf/any23/blob/4c81edde/test-resources/src/test/resources/microdata/microdata-basic.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-basic.html b/test-resources/src/test/resources/microdata/microdata-basic.html index 3ffca84..e7d4dba 100644 --- a/test-resources/src/test/resources/microdata/microdata-basic.html +++ b/test-resources/src/test/resources/microdata/microdata-basic.html @@ -19,70 +19,87 @@ <head> <body> -<!-- result0 --> -<div itemscope> - <p>My name is <span itemprop="name">Elizabeth</span>.</p> -</div> -<!-- result1 --> -<div itemscope> - <p>My name is <span itemprop="name">Daniel</span>.</p> -</div> + <!-- result0 --> + <div itemscope> + <p> + My name is <span itemprop="name">Elizabeth</span>. + </p> + </div> + <!-- result1 --> + <div itemscope> + <p> + My name is <span itemprop="name">Daniel</span>. + </p> + </div> -<!-- result2 --> -<div itemscope> - <p>My name is <span itemprop="name">Neil</span>.</p> - <p>My band is called <span itemprop="band">Four Parts Water</span>.</p> - <p>I am <span itemprop="nationality">British</span>.</p> -</div> + <!-- result2 --> + <div itemscope> + <p> + My name is <span itemprop="name">Neil</span>. + </p> + <p> + My band is called <span itemprop="band">Four Parts Water</span>. + </p> + <p> + I am <span itemprop="nationality">British</span>. + </p> + </div> -<!-- result3 --> -<div itemscope> + <!-- result3 --> + <div itemscope> <img itemprop="image" src="google-logo.png" alt="Google"> -</div> + </div> -<!-- result4 --> -<div itemscope> - I was born on <time itemprop="birthday" datetime="2009-05-10">May 10th 2009</time>. -</div> + <!-- result4 --> + <div itemscope> + I was born on + <time itemprop="birthday" datetime="2009-05-10">May 10th 2009</time> + . + </div> -<!-- result5 --> -<div itemscope> + <!-- result5 --> + <div itemscope> <p>Flavors in my favorite ice cream:</p> <ul> - <li itemprop="flavor">Lemon sorbet</li> - <li itemprop="flavor">Apricot sorbet</li> + <li itemprop="flavor">Lemon sorbet</li> + <li itemprop="flavor">Apricot sorbet</li> </ul> -</div> + </div> -<!-- result6 --> -<div itemscope> + <!-- result6 --> + <div itemscope> <span itemprop="favorite-color favorite-fruit">orange</span> -</div> + </div> -<!-- result7 --> -<figure> + <!-- result7 --> + <figure> <img src="castle.jpeg"> - <figcaption><span itemscope><span itemprop="name">The Castle</span></span> (1986)</figcaption> -</figure> + <figcaption> + <span itemscope> + <span itemprop="name">The Castle</span> + </span> + (1986) + </figcaption> + </figure> -<!-- result8 --> -<span itemscope><meta itemprop="name" content="The Castle"></span> -<figure> + <!-- result8 --> + <span itemscope><meta itemprop="name" content="The Castle"></span> + <figure> <img src="castle.jpeg"> <figcaption>The Castle (1986)</figcaption> -</figure> + </figure> -<!-- result9 --> -<section itemscope itemtype="http://example.org/animals#cat"> + <!-- result9 --> + <section itemscope itemtype="http://example.org/animals#cat"> <h1 itemprop="name">Hedral</h1> <p itemprop="desc">Hedral is a male american domestic shorthair, - with a fluffy black fur with white paws and belly.</p> - <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months"> -</section> + with a fluffy black fur with white paws and belly.</p> + <img itemprop="img" src="hedral.jpeg" alt="" + title="Hedral, age 18 months"> + </section> -<!-- result10 --> -<dl itemscope - itemtype="http://vocab.example.net/book" + <!-- result10 --> + <dl itemscope itemtype="http://vocab.example.net/book" itemid="urn:isbn:0-330-34032-8"> <dt>Title <dd itemprop="title">The Reality Dysfunction @@ -90,17 +107,21 @@ <dd itemprop="author">Peter F. Hamilton <dt>Publication date <dd> - <time itemprop="pubdate" datetime="1996-01-26">26 January 1996</time> -</dl> + <time itemprop="pubdate" datetime="1996-01-26">26 January + 1996</time> + </dl> -<!-- result11 --> -<section itemscope itemtype="http://example.org/animals#cat"> + <!-- result11 --> + <section itemscope itemtype="http://example.org/animals#cat"> <h1 itemprop="name http://example.com/fn">Hedral</h1> - <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy - <span itemprop="http://example.com/color">black</span> fur with - <span itemprop="http://example.com/color">white</span> paws and belly.</p> - <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months"> -</section> + <p itemprop="desc"> + Hedral is a male american domestic shorthair, with a fluffy <span + itemprop="http://example.com/color">black</span> fur with <span + itemprop="http://example.com/color">white</span> paws and belly. + </p> + <img itemprop="img" src="hedral.jpeg" alt="" + title="Hedral, age 18 months"> + </section> </body> </head>
