Repository: any23
Updated Branches:
  refs/heads/master 108d87eef -> 57afcd1a3


ANY23-247 FIX Attribute name itemscope associated with an element type html 
must be followed by the ' = ' character


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/fc459327
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/fc459327
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/fc459327

Branch: refs/heads/master
Commit: fc4593272a2e331ac5abfbe8ef1c46713a2b6f7f
Parents: 8de1df6
Author: Lewis John McGibbney <[email protected]>
Authored: Fri Mar 25 14:52:05 2016 -0700
Committer: Lewis John McGibbney <[email protected]>
Committed: Fri Mar 25 14:52:05 2016 -0700

----------------------------------------------------------------------
 .../extractor/SingleDocumentExtraction.java     |   5 +-
 .../any23/extractor/rdf/BaseRDFExtractor.java   |   6 +-
 .../any23/validator/DefaultValidator.java       |   6 +-
 .../any23/validator/rule/MetaNameMisuseFix.java |   2 +-
 .../validator/rule/MetaNameMisuseRule.java      |   2 +-
 .../rule/MissingItemscopeAttributeValueFix.java |  56 ++++++++++
 .../MissingItemscopeAttributeValueRule.java     |  84 +++++++++++++++
 .../rule/MissingOpenGraphNamespaceRule.java     |   2 +-
 .../validator/rule/OpenGraphNamespaceFix.java   |   2 +-
 .../test/java/org/apache/any23/Any23Test.java   |  10 +-
 .../any23/validator/DefaultValidatorTest.java   |  25 ++++-
 core/src/test/resources/log4j.properties        |  53 +++++----
 src/site/apt/index.apt                          |  13 ++-
 .../apache/any23/validator/microdata-basic.html | 107 +++++++++++++++++++
 14 files changed, 334 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java 
b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
index 009a604..e05c6b7 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
@@ -17,7 +17,6 @@
 
 package org.apache.any23.extractor;
 
-import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
 import org.apache.any23.configuration.Configuration;
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.encoding.EncodingDetector;
@@ -251,6 +250,7 @@ public class SingleDocumentExtraction {
         try {
             final String documentLanguage = 
extractDocumentLanguage(extractionParameters);
             for (ExtractorFactory<?> factory : matchingExtractors) {
+                @SuppressWarnings("rawtypes")
                 final Extractor extractor = factory.createExtractor();
                 final SingleExtractionReport er = runExtractor(
                         extractionParameters,
@@ -343,6 +343,7 @@ public class SingleDocumentExtraction {
     /**
      * @return the list of all the activated extractors for the given {@link 
org.apache.any23.source.DocumentSource}.
      */
+    @SuppressWarnings("rawtypes")
     public List<Extractor> getMatchingExtractors() {
         final List<Extractor> extractorsList = new ArrayList<Extractor>();
         for(ExtractorFactory extractorFactory : matchingExtractors) {
@@ -444,7 +445,7 @@ public class SingleDocumentExtraction {
             final Extractor<?> extractor
     ) throws ExtractionException, IOException, ValidatorException {
         if(log.isDebugEnabled()) {
-            log.debug("Running " + 
extractor.getDescription().getExtractorName() + " on " + documentURI);
+            log.debug("Running {} on {}", 
extractor.getDescription().getExtractorName(), documentURI);
         }
         long startTime = System.currentTimeMillis();
         final ExtractionContext extractionContext = new ExtractionContext(

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index e32ec51..be01d3f 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -94,11 +94,11 @@ public abstract class BaseRDFExtractor implements 
Extractor.ContentExtractor {
             parser.getParserConfig().setNonFatalErrors(new 
HashSet<RioSetting<?>>());
 
             // Disable verification to ensure that DBPedia is accessible, 
given it uses so many custom datatypes
-            
parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, 
true);                
+            
parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, 
true);
             
parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
-            
parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); 
               
+            
parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
             
parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
-            
parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, 
false);                
+            
parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, 
false);
             
parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
             //ByteBuffer seems to represent incorrect content. Need to make 
sure it is the content
             //of the <script> node and not anything else!

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/DefaultValidator.java 
b/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
index 828ef1a..0094e54 100644
--- a/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
+++ b/core/src/main/java/org/apache/any23/validator/DefaultValidator.java
@@ -20,6 +20,8 @@ package org.apache.any23.validator;
 import org.apache.any23.validator.rule.AboutNotURIRule;
 import org.apache.any23.validator.rule.MetaNameMisuseFix;
 import org.apache.any23.validator.rule.MetaNameMisuseRule;
+import org.apache.any23.validator.rule.MissingItemscopeAttributeValueFix;
+import org.apache.any23.validator.rule.MissingItemscopeAttributeValueRule;
 import org.apache.any23.validator.rule.MissingOpenGraphNamespaceRule;
 import org.apache.any23.validator.rule.OpenGraphNamespaceFix;
 import org.w3c.dom.Document;
@@ -54,7 +56,8 @@ public class DefaultValidator implements Validator {
         final ValidationReportBuilder validationReportBuilder = new 
DefaultValidationReportBuilder();
         for(Class<? extends Rule> cRule : rulesOrder) {
             Rule rule = newRuleInstance(cRule);
-            final RuleContext ruleContext = new DefaultRuleContext();          
  
+            @SuppressWarnings("rawtypes")
+            final RuleContext ruleContext = new DefaultRuleContext();
             boolean applyOn;
             try {
                 applyOn = rule.applyOn(document, ruleContext, 
validationReportBuilder);
@@ -121,6 +124,7 @@ public class DefaultValidator implements Validator {
         addRule(MetaNameMisuseRule.class, MetaNameMisuseFix.class);
         addRule(MissingOpenGraphNamespaceRule.class, 
OpenGraphNamespaceFix.class);
         addRule(AboutNotURIRule.class);
+        addRule(MissingItemscopeAttributeValueRule.class, 
MissingItemscopeAttributeValueFix.class);
     }
 
     private Fix newFixInstance(Class<? extends Fix> cFix) throws 
ValidatorException {

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java 
b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
index dacde1b..5a0bfae 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseFix.java
@@ -39,7 +39,7 @@ public class MetaNameMisuseFix implements Fix {
     }
 
     @SuppressWarnings("unchecked")
-    public void execute(Rule rule, RuleContext context, DOMDocument document) {
+    public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext 
context, DOMDocument document) {
         List<Node> nodes = (List<Node>) 
context.getData(MetaNameMisuseRule.ERRORED_META_NODES);
         for(Node node : nodes) {
             final String nameValue = 
node.getAttributes().getNamedItem("name").getTextContent();

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java 
b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
index cc9c886..a803107 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
@@ -44,7 +44,7 @@ public class MetaNameMisuseRule implements Rule {
 
     public boolean applyOn(
             DOMDocument document,
-            RuleContext context,
+            @SuppressWarnings("rawtypes") RuleContext context,
             ValidationReportBuilder validationReportBuilder
     ) {
         List<Node> metaNodes = document.getNodes("/HTML/HEAD/META");

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java
 
b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java
new file mode 100644
index 0000000..909a33a
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueFix.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.validator.rule;
+
+import java.util.List;
+
+import org.apache.any23.validator.DOMDocument;
+import org.apache.any23.validator.Fix;
+import org.apache.any23.validator.Rule;
+import org.apache.any23.validator.RuleContext;
+import org.w3c.dom.Node;
+
+/**
+ * Fix for the issue described within 
+ * {@link org.apache.any23.validator.rule.MissingItemscopeAttributeValueRule}
+ */
+public class MissingItemscopeAttributeValueFix implements Fix {
+
+  /**
+   * Default constructor
+   */
+  public MissingItemscopeAttributeValueFix() {
+  }
+
+  public static final String EMPTY_ITEMSCOPE_VALUE = "=\"itemscope\"";
+
+  public String getHRName() {
+    return "missing-itemscope-value-fix";
+  }
+
+  public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext 
context, DOMDocument document) {
+
+    List<Node> itemNodes = document.getNodesWithAttribute("itemscope");
+    for(Node itemNode : itemNodes) {
+      Node itemScopeNode = itemNode.getAttributes().getNamedItem("itemscope");
+      if(itemScopeNode.getNodeValue().contentEquals("")) {
+        
itemNode.getAttributes().getNamedItem("itemscope").setNodeValue(EMPTY_ITEMSCOPE_VALUE);
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java
 
b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java
new file mode 100644
index 0000000..b0ecd9b
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/validator/rule/MissingItemscopeAttributeValueRule.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.validator.rule;
+
+import java.util.List;
+
+import org.apache.any23.validator.DOMDocument;
+import org.apache.any23.validator.Rule;
+import org.apache.any23.validator.RuleContext;
+import org.apache.any23.validator.ValidationReport;
+import org.apache.any23.validator.ValidationReportBuilder;
+import org.w3c.dom.Node;
+
+/**
+ * This fixes missing attribute values for the 'itemscope' attribute 
+ * Typically when such a snippet of XHTML is fed through the 
+ * {@link org.apache.any23.extractor.rdfa.RDFa11Extractor}, and
+ * subsequently to Sesame's {@link 
org.semarglproject.sesame.rdf.rdfa.SesameRDFaParser},
+ * it will result in the following behavior. 
+ * <pre>
+ * {@code
+ * [Fatal Error] :23:15: Attribute name "itemscope" associated with an element 
type "div" must be followed by the ' = ' character.
+ * }
+ * </pre>
+ * This Rule identifies that happening.
+ *
+ */
+public class MissingItemscopeAttributeValueRule implements Rule {
+
+  /**
+   * Default constructor
+   */
+  public MissingItemscopeAttributeValueRule() {
+  }
+
+  @Override
+  public String getHRName() {
+    return "missing-itemscope-value-rule";
+  }
+
+  /**
+   * @see 
org.apache.any23.validator.Rule#applyOn(org.apache.any23.validator.DOMDocument, 
org.apache.any23.validator.RuleContext, 
org.apache.any23.validator.ValidationReportBuilder)
+   */
+  @Override
+  public boolean applyOn(DOMDocument document, @SuppressWarnings("rawtypes") 
RuleContext context,
+      ValidationReportBuilder validationReportBuilder) {
+    List<Node> itemNodes = document.getNodesWithAttribute("itemscope");
+    boolean foundPrecondition = false;
+    String propertyNode = null;
+    Node iNode = null;
+    for(Node itemNode : itemNodes) {
+      iNode = itemNode;
+      propertyNode = 
iNode.getAttributes().getNamedItem("itemscope").getNodeValue();
+      if( propertyNode == null || propertyNode.contentEquals("")) {
+        foundPrecondition = true;
+        break;
+      }
+    }
+    if(foundPrecondition) {
+      validationReportBuilder.reportIssue(
+          ValidationReport.IssueLevel.error,
+          "Located absence of an accompanying value for the the 'itemscope' 
attribute of element with hashcode: " + iNode.hashCode(),
+          iNode
+          );
+      return true;
+    }
+    return false;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
 
b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
index f814016..8229525 100644
--- 
a/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
+++ 
b/core/src/main/java/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.java
@@ -41,7 +41,7 @@ public class MissingOpenGraphNamespaceRule implements Rule {
 
     public boolean applyOn(
             DOMDocument document,
-            RuleContext context,
+            @SuppressWarnings("rawtypes") RuleContext context,
             ValidationReportBuilder validationReportBuilder
     ) {
         List<Node> metas = document.getNodes("/HTML/HEAD/META");

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java 
b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
index c0b394b..6975991 100644
--- 
a/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
+++ 
b/core/src/main/java/org/apache/any23/validator/rule/OpenGraphNamespaceFix.java
@@ -37,7 +37,7 @@ public class OpenGraphNamespaceFix implements Fix {
         return "opengraph-namespace-fix";
     }
 
-    public void execute(Rule rule, RuleContext context, DOMDocument document) {
+    public void execute(Rule rule, @SuppressWarnings("rawtypes") RuleContext 
context, DOMDocument document) {
         document.addAttribute("/HTML", "xmlns:og", OPENGRAPH_PROTOCOL_NS);
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/test/java/org/apache/any23/Any23Test.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/Any23Test.java 
b/core/src/test/java/org/apache/any23/Any23Test.java
index c487ee8..4e0d9c2 100644
--- a/core/src/test/java/org/apache/any23/Any23Test.java
+++ b/core/src/test/java/org/apache/any23/Any23Test.java
@@ -18,6 +18,7 @@
 package org.apache.any23;
 
 import org.junit.Assert;
+import org.apache.any23.configuration.Configuration;
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.configuration.ModifiableConfiguration;
 import org.apache.any23.extractor.ExtractionException;
@@ -53,7 +54,6 @@ import org.openrdf.repository.RepositoryException;
 import org.openrdf.repository.RepositoryResult;
 import org.openrdf.repository.sail.SailRepository;
 import org.openrdf.rio.RDFParseException;
-import org.openrdf.sail.Sail;
 import org.openrdf.sail.SailException;
 import org.openrdf.sail.memory.MemoryStore;
 import org.slf4j.Logger;
@@ -552,11 +552,13 @@ public class Any23Test extends Any23OnlineTestBase {
      */
     private ExtractionReport detectAndExtract(String in) throws Exception {
         Any23 any23 = new Any23();
+        Configuration conf = DefaultConfiguration.copy();
         ByteArrayOutputStream out = new ByteArrayOutputStream();
         ReportingTripleHandler outputHandler = new ReportingTripleHandler(
                 new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(
                         new NTriplesWriter(out))));
-        return any23.extract(in, "http://host.com/path";, outputHandler);
+        return any23.extract(new ExtractionParameters(conf, 
ValidationMode.ValidateAndFix, null, null), 
+            new StringDocumentSource(in, "http://host.com/path";), 
outputHandler, "UTF-8");
     }
 
     /**
@@ -586,9 +588,9 @@ public class Any23Test extends Any23OnlineTestBase {
      * @throws ExtractionException
      */
     private void assertExtractorActivation(String in,
-            Class<? extends Extractor>... expectedExtractors) throws Exception 
{
+            @SuppressWarnings("rawtypes") Class<? extends Extractor>... 
expectedExtractors) throws Exception {
         final ExtractionReport extractionReport = detectAndExtract(in);
-        for (Class<? extends Extractor> expectedExtractorClass : 
expectedExtractors) {
+        for (@SuppressWarnings("rawtypes") Class<? extends Extractor> 
expectedExtractorClass : expectedExtractors) {
             Assert.assertTrue(
                     String.format(
                             "Detection and extraction failed, expected 
extractor [%s] not found.",

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java 
b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
index efef2f2..f31b846 100644
--- a/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
+++ b/core/src/test/java/org/apache/any23/validator/DefaultValidatorTest.java
@@ -76,6 +76,27 @@ public class DefaultValidatorTest {
             logger.debug( validationReport.toString() );
         }
     }
+    
+    @Test
+    public void testMissingItemscopeAttributeValue() throws IOException, 
URISyntaxException, ValidatorException {
+      DOMDocument document = loadDocument("microdata-basic.html");
+      List<Node> brokenItemScopeNodes = 
document.getNodesWithAttribute("itemscope");
+      for (Node node : brokenItemScopeNodes) {
+        // all nodes with itemscope have an empty string value
+        Assert.assertEquals("", 
node.getAttributes().getNamedItem("itemscope").getNodeValue() );
+      }
+      ValidationReport validationReport = validator.validate(document, true);
+      List<Node> fixedItemScopeNodes = 
document.getNodesWithAttribute("itemscope");
+      for (Node node : fixedItemScopeNodes) {
+        // all nodes with itemscope now have a default value of "itemscope"
+        
Assert.assertNotNull(node.getAttributes().getNamedItem("itemscope").getNodeValue()
 );
+        Assert.assertNotEquals("", 
node.getAttributes().getNamedItem("itemscope").getNodeValue() );
+        Assert.assertEquals("itemscope", 
node.getAttributes().getNamedItem("itemscope").getNodeValue());
+      }
+      if(logger.isDebugEnabled()) {
+          logger.debug( validationReport.toString() );
+      }
+  }
 
     @Test
     public void testMetaNameMisuse() throws Exception {
@@ -133,7 +154,7 @@ public class DefaultValidatorTest {
 
         public boolean applyOn(
                 DOMDocument document,
-                RuleContext context,
+                @SuppressWarnings("rawtypes") RuleContext context,
                 ValidationReportBuilder validationReportBuilder
         ) {
             throw new UnsupportedOperationException();
@@ -145,7 +166,7 @@ public class DefaultValidatorTest {
             return "fake-fix";
         }
 
-        public void execute(Rule rule, RuleContext context, DOMDocument 
document) {
+        public void execute(Rule rule, @SuppressWarnings("rawtypes") 
RuleContext context, DOMDocument document) {
               throw new UnsupportedOperationException();
         }
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/core/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/core/src/test/resources/log4j.properties 
b/core/src/test/resources/log4j.properties
index 4aa0d92..4634d6b 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -1,20 +1,35 @@
-log4j.rootCategory=INFO, O  
-      
-# Stdout  
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootCategory=DEBUG, O
+
+# Stdout
 log4j.appender.O=org.apache.log4j.ConsoleAppender  
-      
-# File  
-#log4j.appender.R=org.apache.log4j.RollingFileAppender  
-#log4j.appender.R.File=log4j.log  
-      
-# Control the maximum log file size  
-#log4j.appender.R.MaxFileSize=100KB  
-      
-# Archive log files (one backup file here)  
-log4j.appender.R.MaxBackupIndex=1  
-      
-log4j.appender.R.layout=org.apache.log4j.PatternLayout  
-log4j.appender.O.layout=org.apache.log4j.PatternLayout  
-      
-log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - 
%C.%M(%F:%L) - %m%n  
-log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - 
%C.%M(%F:%L) - %m%n  
+
+# File
+#log4j.appender.R=org.apache.log4j.RollingFileAppender
+#log4j.appender.R.File=log4j.log
+
+# Control the maximum log file size
+#log4j.appender.R.MaxFileSize=100KB
+
+# Archive log files (one backup file here)
+log4j.appender.R.MaxBackupIndex=1
+
+log4j.appender.R.layout=org.apache.log4j.PatternLayout
+log4j.appender.O.layout=org.apache.log4j.PatternLayout
+
+log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - 
%C.%M(%F:%L) - %m%n
+log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - 
%C.%M(%F:%L) - %m%n

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/src/site/apt/index.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/index.apt b/src/site/apt/index.apt
index 5769466..f81da17 100644
--- a/src/site/apt/index.apt
+++ b/src/site/apt/index.apt
@@ -3,7 +3,6 @@
                                     ------
                               The Apache Software Foundation
                                     ------
-                                     2011-2012
 
 ~~  Licensed to the Apache Software Foundation (ASF) under one or more
 ~~  contributor license agreements.  See the NOTICE file distributed with
@@ -31,12 +30,18 @@ Introduction to Apache Any23
 
      * {{{http://www.w3.org/TR/xhtml-rdfa-primer/}RDFa}} with 
{{{http://www.w3.org/TR/2010/WD-rdfa-core-20100422/#scoping-of-prefix-mappings}RDFa1.1
 prefix mechanism}}
 
-     * {{{http://microformats.org/}Microformats}}: Adr, Geo, hCalendar, hCard, 
hListing, hResume, hReview, License, XFN and Species
+     * {{{http://microformats.org/}Microformats1}} and 
{{{http://microformats.org/wiki/microformats-2}Microformats2}}: hAdr, hCard, 
hCalendar, hEntry, hEvent, hGeo, hItem, hListing, hProduct, hProduct, hRecipie, 
hResume, hReview, License, Species, XFN, etc
+
+      * {{http://json-ld.org/}JSON-LD}: JSON for Linking Data. a lightweight 
Linked Data format based on the already successful JSON format and provides a 
way to help JSON data interoperate at Web-scale.
 
      * {{{http://dev.w3.org/html5/md/}HTML5 Microdata}}: (such as 
{{{http://schema.org}Schema.org}})
 
      * {{{http://www.ietf.org/rfc/rfc4180.txt}CSV}}: Comma Separated Values 
with separator autodetection.
 
+     * Vocabularies: Extraction support for {{{http://dublincore.org/}Dublin 
Core Terms}}, 
{{{http://www.w3.org/wiki/DescriptionOfACareerVocabulary}Description of a 
Career}}, {{{https://github.com/edumbill/doap/wiki}Description Of A Project}}, 
{{{http://xmlns.com/foaf/spec/}Friend Of A Friend}}, 
{{{http://www.geonames.org/ontology/}GEO Names}}, 
{{{http://www.w3.org/2002/12/cal/icaltzd#}ICAL}}, 
{{{https://github.com/RinkeHoekstra/lkif-core}lkif-core}}, 
{{{http://ogp.me/}Open Graph Protocol}}, {{{http://purl.org/ontology/po/}BBC 
Programmes Ontology}}, {{{http://vocab.org/review/terms.html}RDF Review 
Vocabulary}}, {{{http://schema.org/}schema.org}}, 
{{{http://www.w3.org/2006/vcard/ns}VCard}}, {{{http://purl.org/ontology/wo/}BBC 
Wildlife Ontology}} and {{{http://www.w3.org/1999/xhtml/vocab/}XHTML}}... and 
more!
+
+
+
     A detailed description of available extractors is 
{{{./extractors.html}here}}.
 
     <<Apache Any23>> is used in major Web of Data applications such as 
{{{http://sindice.com/}sindice.com}} and {{{http://sig.ma/}sig.ma}}. It is 
written in Java and licensed under the 
{{{http://any23.googlecode.com/svn/trunk/LICENSE.txt}Apache License}}.
@@ -45,9 +50,9 @@ Introduction to Apache Any23
      * As a command-line tool for extracting and converting between the 
supported formats.
      * As online service API available at {{{http://any23.org/}any23.org}}.
 
-    You can <<download>> the latest release from {{{./download.html}Apache 
Mirrors}}.
+    You can <<download>> the latest release from our {{{./download.html}Apache 
Mirrors}}.
 
-    Previous versions are available from the 
{{{http://code.google.com/p/any23/downloads/list}download site at Google Code}}.
+    Previous versions are available from the 
{{http://archive.apache.org/dist/any23/}Apache Archives site}.
 
 * Documentation Content
 

http://git-wip-us.apache.org/repos/asf/any23/blob/fc459327/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html
 
b/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html
new file mode 100644
index 0000000..3ffca84
--- /dev/null
+++ 
b/test-resources/src/test/resources/org/apache/any23/validator/microdata-basic.html
@@ -0,0 +1,107 @@
+<!DOCTYPE html>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+<body>
+
+<!--  result0 -->
+<div itemscope>
+    <p>My name is <span itemprop="name">Elizabeth</span>.</p>
+</div>
+<!--  result1 -->
+<div itemscope>
+    <p>My name is <span itemprop="name">Daniel</span>.</p>
+</div>
+
+<!--  result2 -->
+<div itemscope>
+    <p>My name is <span itemprop="name">Neil</span>.</p>
+    <p>My band is called <span itemprop="band">Four Parts Water</span>.</p>
+    <p>I am <span itemprop="nationality">British</span>.</p>
+</div>
+
+<!--  result3 -->
+<div itemscope>
+    <img itemprop="image" src="google-logo.png" alt="Google">
+</div>
+
+<!--  result4 -->
+<div itemscope>
+ I was born on <time itemprop="birthday" datetime="2009-05-10">May 10th 
2009</time>.
+</div>
+
+<!--  result5 -->
+<div itemscope>
+    <p>Flavors in my favorite ice cream:</p>
+    <ul>
+        <li itemprop="flavor">Lemon sorbet</li>
+        <li itemprop="flavor">Apricot sorbet</li>
+    </ul>
+</div>
+
+<!--  result6 -->
+<div itemscope>
+    <span itemprop="favorite-color favorite-fruit">orange</span>
+</div>
+
+<!--  result7 -->
+<figure>
+    <img src="castle.jpeg">
+    <figcaption><span itemscope><span itemprop="name">The Castle</span></span> 
(1986)</figcaption>
+</figure>
+
+<!--  result8 -->
+<span itemscope><meta itemprop="name" content="The Castle"></span>
+<figure>
+    <img src="castle.jpeg">
+    <figcaption>The Castle (1986)</figcaption>
+</figure>
+
+<!--  result9 -->
+<section itemscope itemtype="http://example.org/animals#cat";>
+    <h1 itemprop="name">Hedral</h1>
+    <p itemprop="desc">Hedral is a male american domestic shorthair,
+     with a fluffy black fur with white paws and belly.</p>
+    <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months">
+</section>
+
+<!--  result10 -->
+<dl itemscope
+    itemtype="http://vocab.example.net/book";
+    itemid="urn:isbn:0-330-34032-8">
+    <dt>Title
+    <dd itemprop="title">The Reality Dysfunction
+    <dt>Author
+    <dd itemprop="author">Peter F. Hamilton
+    <dt>Publication date
+    <dd>
+    <time itemprop="pubdate" datetime="1996-01-26">26 January 1996</time>
+</dl>
+
+<!--  result11 -->
+<section itemscope itemtype="http://example.org/animals#cat";>
+    <h1 itemprop="name http://example.com/fn";>Hedral</h1>
+    <p itemprop="desc">Hedral is a male american domestic shorthair, with a 
fluffy
+        <span itemprop="http://example.com/color";>black</span> fur with
+        <span itemprop="http://example.com/color";>white</span> paws and 
belly.</p>
+    <img itemprop="img" src="hedral.jpeg" alt="" title="Hedral, age 18 months">
+</section>
+
+</body>
+</head>
+</html>
\ No newline at end of file

Reply via email to