Repository: any23 Updated Branches: refs/heads/master 108d87eef -> 57afcd1a3
ANY23-247 FIX Attribute name itemscope associated with an element type html must be followed by the ' = ' character Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/fc459327 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/fc459327 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/fc459327 Branch: refs/heads/master Commit: fc4593272a2e331ac5abfbe8ef1c46713a2b6f7f Parents: 8de1df6 Author: Lewis John McGibbney <[email protected]> Authored: Fri Mar 25 14:52:05 2016 -0700 Committer: Lewis John McGibbney <[email protected]> Committed: Fri Mar 25 14:52:05 2016 -0700 ---------------------------------------------------------------------- .../extractor/SingleDocumentExtraction.java | 5 +- .../any23/extractor/rdf/BaseRDFExtractor.java | 6 +- .../any23/validator/DefaultValidator.java | 6 +- .../any23/validator/rule/MetaNameMisuseFix.java | 2 +- .../validator/rule/MetaNameMisuseRule.java | 2 +- .../rule/MissingItemscopeAttributeValueFix.java | 56 ++++++++++ .../MissingItemscopeAttributeValueRule.java | 84 +++++++++++++++ .../rule/MissingOpenGraphNamespaceRule.java | 2 +- .../validator/rule/OpenGraphNamespaceFix.java | 2 +- .../test/java/org/apache/any23/Any23Test.java | 10 +- .../any23/validator/DefaultValidatorTest.java | 25 ++++- core/src/test/resources/log4j.properties | 53 +++++---- src/site/apt/index.apt | 13 ++- .../apache/any23/validator/microdata-basic.html | 107 +++++++++++++++++++ 14 files changed, 334 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java index 009a604..e05c6b7 100644 --- a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java +++ b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java @@ -17,7 +17,6 @@ package org.apache.any23.extractor; -import org.apache.any23.extractor.ExtractionParameters.ValidationMode; import org.apache.any23.configuration.Configuration; import org.apache.any23.configuration.DefaultConfiguration; import org.apache.any23.encoding.EncodingDetector; @@ -251,6 +250,7 @@ public class SingleDocumentExtraction { try { final String documentLanguage = extractDocumentLanguage(extractionParameters); for (ExtractorFactory<?> factory : matchingExtractors) { + @SuppressWarnings("rawtypes") final Extractor extractor = factory.createExtractor(); final SingleExtractionReport er = runExtractor( extractionParameters, @@ -343,6 +343,7 @@ public class SingleDocumentExtraction { /** * @return the list of all the activated extractors for the given {@link org.apache.any23.source.DocumentSource}. */ + @SuppressWarnings("rawtypes") public List<Extractor> getMatchingExtractors() { final List<Extractor> extractorsList = new ArrayList<Extractor>(); for(ExtractorFactory extractorFactory : matchingExtractors) { @@ -444,7 +445,7 @@ public class SingleDocumentExtraction { final Extractor<?> extractor ) throws ExtractionException, IOException, ValidatorException { if(log.isDebugEnabled()) { - log.debug("Running " + extractor.getDescription().getExtractorName() + " on " + documentURI); + log.debug("Running {} on {}", extractor.getDescription().getExtractorName(), documentURI); } long startTime = System.currentTimeMillis(); final ExtractionContext extractionContext = new ExtractionContext( http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java index e32ec51..be01d3f 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java @@ -94,11 +94,11 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>()); // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes - parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true); + parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true); parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES); - parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); + parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES); - parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false); + parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false); parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES); //ByteBuffer seems to represent incorrect content. Need to make sure it is the content //of the <script> node and not anything else! http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/DefaultValidator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/DefaultValidator.java b/core/src/main/java/org/apache/any23/validator/DefaultValidator.java index 828ef1a..0094e54 100644 --- a/core/src/main/java/org/apache/any23/validator/DefaultValidator.java +++ b/core/src/main/java/org/apache/any23/validator/DefaultValidator.java @@ -20,6 +20,8 @@ package org.apache.any23.validator; import org.apache.any23.validator.rule.AboutNotURIRule; import org.apache.any23.validator.rule.MetaNameMisuseFix; import org.apache.any23.validator.rule.MetaNameMisuseRule; +import org.apache.any23.validator.rule.MissingItemscopeAttributeValueFix; +import org.apache.any23.validator.rule.MissingItemscopeAttributeValueRule; import org.apache.any23.validator.rule.MissingOpenGraphNamespaceRule; import org.apache.any23.validator.rule.OpenGraphNamespaceFix; import org.w3c.dom.Document; @@ -54,7 +56,8 @@ public class DefaultValidator implements Validator { final ValidationReportBuilder validationReportBuilder = new DefaultValidationReportBuilder(); for(Class<? extends Rule> cRule : rulesOrder) { Rule rule = newRuleInstance(cRule); - final RuleContext ruleContext = new DefaultRuleContext(); + @SuppressWarnings("rawtypes") + final RuleContext ruleContext = new DefaultRuleContext(); boolean applyOn; try { applyOn = rule.applyOn(document, ruleContext, validationReportBuilder); @@ -121,6 +124,7 @@ public class DefaultValidator implements Validator { addRule(MetaNameMisuseRule.class, MetaNameMisuseFix.class); addRule(MissingOpenGraphNamespaceRule.class, OpenGraphNamespaceFix.class); addRule(AboutNotURIRule.class); + addRule(MissingItemscopeAttributeValueRule.class, MissingItemscopeAttributeValueFix.class); } private Fix newFixInstance(Class<? extends Fix> cFix) throws ValidatorException { http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java index dacde1b..5a0bfae 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java @@ -39,7 +39,7 @@ public class MetaNameMisuseFix implements Fix { } @SuppressWarnings("unchecked") - public void execute(Rule rule, RuleContext context, DOMDocument document) { + public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { List<Node> nodes = (List<Node>) context.getData(MetaNameMisuseRule.ERRORED_META_NODES); for(Node node : nodes) { final String nameValue = node.getAttributes().getNamedItem("name").getTextContent(); http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java index cc9c886..a803107 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java @@ -44,7 +44,7 @@ public class MetaNameMisuseRule implements Rule { public boolean applyOn( DOMDocument document, - RuleContext context, + @SuppressWarnings("rawtypes") RuleContext context, ValidationReportBuilder validationReportBuilder ) { List<Node> metaNodes = document.getNodes("/HTML/HEAD/META"); http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java new file mode 100644 index 0000000..909a33a --- /dev/null +++ b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.any23.validator.rule; + +import java.util.List; + +import org.apache.any23.validator.DOMDocument; +import org.apache.any23.validator.Fix; +import org.apache.any23.validator.Rule; +import org.apache.any23.validator.RuleContext; +import org.w3c.dom.Node; + +/** + * Fix for the issue described within + * {@link org.apache.any23.validator.rule.MissingItemscopeAttributeValueRule} + */ +public class MissingItemscopeAttributeValueFix implements Fix { + + /** + * Default constructor + */ + public MissingItemscopeAttributeValueFix() { + } + + public static final String EMPTY_ITEMSCOPE_VALUE = "=\"itemscope\""; + + public String getHRName() { + return "missing-itemscope-value-fix"; + } + + public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { + + List<Node> itemNodes = document.getNodesWithAttribute("itemscope"); + for(Node itemNode : itemNodes) { + Node itemScopeNode = itemNode.getAttributes().getNamedItem("itemscope"); + if(itemScopeNode.getNodeValue().contentEquals("")) { + itemNode.getAttributes().getNamedItem("itemscope").setNodeValue(EMPTY_ITEMSCOPE_VALUE); + } + } + } + +} http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java new file mode 100644 index 0000000..b0ecd9b --- /dev/null +++ b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.any23.validator.rule; + +import java.util.List; + +import org.apache.any23.validator.DOMDocument; +import org.apache.any23.validator.Rule; +import org.apache.any23.validator.RuleContext; +import org.apache.any23.validator.ValidationReport; +import org.apache.any23.validator.ValidationReportBuilder; +import org.w3c.dom.Node; + +/** + * This fixes missing attribute values for the 'itemscope' attribute + * Typically when such a snippet of XHTML is fed through the + * {@link org.apache.any23.extractor.rdfa.RDFa11Extractor}, and + * subsequently to Sesame's {@link org.semarglproject.sesame.rdf.rdfa.SesameRDFaParser}, + * it will result in the following behavior. + * <pre> + * {@code + * [Fatal Error] :23:15: Attribute name "itemscope" associated with an element type "div" must be followed by the ' = ' character. + * } + * </pre> + * This Rule identifies that happening. + * + */ +public class MissingItemscopeAttributeValueRule implements Rule { + + /** + * Default constructor + */ + public MissingItemscopeAttributeValueRule() { + } + + @Override + public String getHRName() { + return "missing-itemscope-value-rule"; + } + + /** + * @see org.apache.any23.validator.Rule#applyOn(org.apache.any23.validator.DOMDocument, org.apache.any23.validator.RuleContext, org.apache.any23.validator.ValidationReportBuilder) + */ + @Override + public boolean applyOn(DOMDocument document, @SuppressWarnings("rawtypes") RuleContext context, + ValidationReportBuilder validationReportBuilder) { + List<Node> itemNodes = document.getNodesWithAttribute("itemscope"); + boolean foundPrecondition = false; + String propertyNode = null; + Node iNode = null; + for(Node itemNode : itemNodes) { + iNode = itemNode; + propertyNode = iNode.getAttributes().getNamedItem("itemscope").getNodeValue(); + if( propertyNode == null || propertyNode.contentEquals("")) { + foundPrecondition = true; + break; + } + } + if(foundPrecondition) { + validationReportBuilder.reportIssue( + ValidationReport.IssueLevel.error, + "Located absence of an accompanying value for the the 'itemscope' attribute of element with hashcode: " + iNode.hashCode(), + iNode + ); + return true; + } + return false; + } + +} http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java index f814016..8229525 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java +++ b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java @@ -41,7 +41,7 @@ public class MissingOpenGraphNamespaceRule implements Rule { public boolean applyOn( DOMDocument document, - RuleContext context, + @SuppressWarnings("rawtypes") RuleContext context, ValidationReportBuilder validationReportBuilder ) { List<Node> metas = document.getNodes("/HTML/HEAD/META"); http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java index c0b394b..6975991 100644 --- a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java +++ b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java @@ -37,7 +37,7 @@ public class OpenGraphNamespaceFix implements Fix { return "opengraph-namespace-fix"; } - public void execute(Rule rule, RuleContext context, DOMDocument document) { + public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { document.addAttribute("/HTML", "xmlns:og", OPENGRAPH_PROTOCOL_NS); } http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/test/java/org/apache/any23/Any23Test.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java index c487ee8..4e0d9c2 100644 --- a/core/src/test/java/org/apache/any23/Any23Test.java +++ b/core/src/test/java/org/apache/any23/Any23Test.java @@ -18,6 +18,7 @@ package org.apache.any23; import org.junit.Assert; +import org.apache.any23.configuration.Configuration; import org.apache.any23.configuration.DefaultConfiguration; import org.apache.any23.configuration.ModifiableConfiguration; import org.apache.any23.extractor.ExtractionException; @@ -53,7 +54,6 @@ import org.openrdf.repository.RepositoryException; import org.openrdf.repository.RepositoryResult; import org.openrdf.repository.sail.SailRepository; import org.openrdf.rio.RDFParseException; -import org.openrdf.sail.Sail; import org.openrdf.sail.SailException; import org.openrdf.sail.memory.MemoryStore; import org.slf4j.Logger; @@ -552,11 +552,13 @@ public class Any23Test extends Any23OnlineTestBase { */ private ExtractionReport detectAndExtract(String in) throws Exception { Any23 any23 = new Any23(); + Configuration conf = DefaultConfiguration.copy(); ByteArrayOutputStream out = new ByteArrayOutputStream(); ReportingTripleHandler outputHandler = new ReportingTripleHandler( new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments( new NTriplesWriter(out)))); - return any23.extract(in, "http://host.com/path", outputHandler); + return any23.extract(new ExtractionParameters(conf, ValidationMode.ValidateAndFix, null, null), + new StringDocumentSource(in, "http://host.com/path"), outputHandler, "UTF-8"); } /** @@ -586,9 +588,9 @@ public class Any23Test extends Any23OnlineTestBase { * @throws ExtractionException */ private void assertExtractorActivation(String in, - Class<? extends Extractor>... expectedExtractors) throws Exception { + @SuppressWarnings("rawtypes") Class<? extends Extractor>... expectedExtractors) throws Exception { final ExtractionReport extractionReport = detectAndExtract(in); - for (Class<? extends Extractor> expectedExtractorClass : expectedExtractors) { + for (@SuppressWarnings("rawtypes") Class<? extends Extractor> expectedExtractorClass : expectedExtractors) { Assert.assertTrue( String.format( "Detection and extraction failed, expected extractor [%s] not found.", http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java index efef2f2..f31b846 100644 --- a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java +++ b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java @@ -76,6 +76,27 @@ public class DefaultValidatorTest { logger.debug( validationReport.toString() ); } } + + @Test + public void testMissingItemscopeAttributeValue() throws IOException, URISyntaxException, ValidatorException { + DOMDocument document = loadDocument("microdata-basic.html"); + List<Node> brokenItemScopeNodes = document.getNodesWithAttribute("itemscope"); + for (Node node : brokenItemScopeNodes) { + // all nodes with itemscope have an empty string value + Assert.assertEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() ); + } + ValidationReport validationReport = validator.validate(document, true); + List<Node> fixedItemScopeNodes = document.getNodesWithAttribute("itemscope"); + for (Node node : fixedItemScopeNodes) { + // all nodes with itemscope now have a default value of "itemscope" + Assert.assertNotNull(node.getAttributes().getNamedItem("itemscope").getNodeValue() ); + Assert.assertNotEquals("", node.getAttributes().getNamedItem("itemscope").getNodeValue() ); + Assert.assertEquals("itemscope", node.getAttributes().getNamedItem("itemscope").getNodeValue()); + } + if(logger.isDebugEnabled()) { + logger.debug( validationReport.toString() ); + } + } @Test public void testMetaNameMisuse() throws Exception { @@ -133,7 +154,7 @@ public class DefaultValidatorTest { public boolean applyOn( DOMDocument document, - RuleContext context, + @SuppressWarnings("rawtypes") RuleContext context, ValidationReportBuilder validationReportBuilder ) { throw new UnsupportedOperationException(); @@ -145,7 +166,7 @@ public class DefaultValidatorTest { return "fake-fix"; } - public void execute(Rule rule, RuleContext context, DOMDocument document) { + public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext context, DOMDocument document) { throw new UnsupportedOperationException(); } } http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties index 4aa0d92..4634d6b 100644 --- a/core/src/test/resources/log4j.properties +++ b/core/src/test/resources/log4j.properties @@ -1,20 +1,35 @@ -log4j.rootCategory=INFO, O - -# Stdout +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootCategory=DEBUG, O + +# Stdout log4j.appender.O=org.apache.log4j.ConsoleAppender - -# File -#log4j.appender.R=org.apache.log4j.RollingFileAppender -#log4j.appender.R.File=log4j.log - -# Control the maximum log file size -#log4j.appender.R.MaxFileSize=100KB - -# Archive log files (one backup file here) -log4j.appender.R.MaxBackupIndex=1 - -log4j.appender.R.layout=org.apache.log4j.PatternLayout -log4j.appender.O.layout=org.apache.log4j.PatternLayout - -log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n -log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n + +# File +#log4j.appender.R=org.apache.log4j.RollingFileAppender +#log4j.appender.R.File=log4j.log + +# Control the maximum log file size +#log4j.appender.R.MaxFileSize=100KB + +# Archive log files (one backup file here) +log4j.appender.R.MaxBackupIndex=1 + +log4j.appender.R.layout=org.apache.log4j.PatternLayout +log4j.appender.O.layout=org.apache.log4j.PatternLayout + +log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n +log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/src/site/apt/index.apt ---------------------------------------------------------------------- diff --git a/src/site/apt/index.apt b/src/site/apt/index.apt index 5769466..f81da17 100644 --- a/src/site/apt/index.apt +++ b/src/site/apt/index.apt @@ -3,7 +3,6 @@ ------ The Apache Software Foundation ------ - 2011-2012 ~~ Licensed to the Apache Software Foundation (ASF) under one or more ~~ contributor license agreements. See the NOTICE file distributed with @@ -31,12 +30,18 @@ Introduction to Apache Any23 * {{{http://www.w3.org/TR/xhtml-rdfa-primer/}RDFa}} with {{{http://www.w3.org/TR/2010/WD-rdfa-core-20100422/#scoping-of-prefix-mappings}RDFa1.1 prefix mechanism}} - * {{{http://microformats.org/}Microformats}}: Adr, Geo, hCalendar, hCard, hListing, hResume, hReview, License, XFN and Species + * {{{http://microformats.org/}Microformats1}} and {{{http://microformats.org/wiki/microformats-2}Microformats2}}: hAdr, hCard, hCalendar, hEntry, hEvent, hGeo, hItem, hListing, hProduct, hProduct, hRecipie, hResume, hReview, License, Species, XFN, etc + + * {{http://json-ld.org/}JSON-LD}: JSON for Linking Data. a lightweight Linked Data format based on the already successful JSON format and provides a way to help JSON data interoperate at Web-scale. * {{{http://dev.w3.org/html5/md/}HTML5 Microdata}}: (such as {{{http://schema.org}Schema.org}}) * {{{http://www.ietf.org/rfc/rfc4180.txt}CSV}}: Comma Separated Values with separator autodetection. + * Vocabularies: Extraction support for {{{http://dublincore.org/}Dublin Core Terms}}, {{{http://www.w3.org/wiki/DescriptionOfACareerVocabulary}Description of a Career}}, {{{https://github.com/edumbill/doap/wiki}Description Of A Project}}, {{{http://xmlns.com/foaf/spec/}Friend Of A Friend}}, {{{http://www.geonames.org/ontology/}GEO Names}}, {{{http://www.w3.org/2002/12/cal/icaltzd#}ICAL}}, {{{https://github.com/RinkeHoekstra/lkif-core}lkif-core}}, {{{http://ogp.me/}Open Graph Protocol}}, {{{http://purl.org/ontology/po/}BBC Programmes Ontology}}, {{{http://vocab.org/review/terms.html}RDF Review Vocabulary}}, {{{http://schema.org/}schema.org}}, {{{http://www.w3.org/2006/vcard/ns}VCard}}, {{{http://purl.org/ontology/wo/}BBC Wildlife Ontology}} and {{{http://www.w3.org/1999/xhtml/vocab/}XHTML}}... and more! + + + A detailed description of available extractors is {{{./extractors.html}here}}. <<Apache Any23>> is used in major Web of Data applications such as {{{http://sindice.com/}sindice.com}} and {{{http://sig.ma/}sig.ma}}. It is written in Java and licensed under the {{{http://any23.googlecode.com/svn/trunk/LICENSE.txt}Apache License}}. @@ -45,9 +50,9 @@ Introduction to Apache Any23 * As a command-line tool for extracting and converting between the supported formats. * As online service API available at {{{http://any23.org/}any23.org}}. - You can <<download>> the latest release from {{{./download.html}Apache Mirrors}}. + You can <<download>> the latest release from our {{{./download.html}Apache Mirrors}}. - Previous versions are available from the {{{http://code.google.com/p/any23/downloads/list}download site at Google Code}}. + Previous versions are available from the {{http://archive.apache.org/dist/any23/}Apache Archives site}. * Documentation Content http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html b/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html new file mode 100644 index 0000000..3ffca84 --- /dev/null +++ b/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html @@ -0,0 +1,107 @@ +<!DOCTYPE html> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<html> +<head> +<body> + +<!-- result0 --> +<div itemscope> + <p>My name is <span itemprop="name">Elizabeth</span>.</p> +</div> +<!-- result1 --> +<div itemscope> + <p>My name is <span itemprop="name">Daniel</span>.</p> +</div> + +<!-- result2 --> +<div itemscope> + <p>My name is <span itemprop="name">Neil</span>.</p> + <p>My band is called <span itemprop="band">Four Parts Water</span>.</p> + <p>I am <span itemprop="nationality">British</span>.</p> +</div> + +<!-- result3 --> +<div itemscope> + <img itemprop="image" src="google-logo.png" alt="Google"> +</div> + +<!-- result4 --> +<div itemscope> + I was born on <time itemprop="birthday" datetime="2009-05-10">May 10th 2009</time>. +</div> + +<!-- result5 --> +<div itemscope> + <p>Flavors in my favorite ice cream:</p> + <ul> + <li itemprop="flavor">Lemon sorbet</li> + <li itemprop="flavor">Apricot sorbet</li> + </ul> +</div> + +<!-- result6 --> +<div itemscope> + <span itemprop="favorite-color favorite-fruit">orange</span> +</div> + +<!-- result7 --> +<figure> + <img src="castle.jpeg"> + <figcaption><span itemscope><span itemprop="name">The Castle</span></span> (1986)</figcaption> +</figure> + +<!-- result8 --> +<span itemscope><meta itemprop="name" content="The Castle"></span> +<figure> + <img src="castle.jpeg"> + <figcaption>The Castle (1986)</figcaption> +</figure> + +<!-- result9 --> +<section itemscope itemtype="http://example.org/animals#cat"> + <h1 itemprop="name">Hedral</h1> + <p itemprop="desc">Hedral is a male american domestic shorthair, + with a fluffy black fur with white paws and belly.</p> + <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months"> +</section> + +<!-- result10 --> +<dl itemscope + itemtype="http://vocab.example.net/book" + itemid="urn:isbn:0-330-34032-8"> + <dt>Title + <dd itemprop="title">The Reality Dysfunction + <dt>Author + <dd itemprop="author">Peter F. Hamilton + <dt>Publication date + <dd> + <time itemprop="pubdate" datetime="1996-01-26">26 January 1996</time> +</dl> + +<!-- result11 --> +<section itemscope itemtype="http://example.org/animals#cat"> + <h1 itemprop="name http://example.com/fn">Hedral</h1> + <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy + <span itemprop="http://example.com/color">black</span> fur with + <span itemprop="http://example.com/color">white</span> paws and belly.</p> + <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months"> +</section> + +</body> +</head> +</html> \ No newline at end of file
