added HAdr and HGeo Extractors support

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/5b10339b
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/5b10339b
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/5b10339b

Branch: refs/heads/master
Commit: 5b10339b55ea04e097a960fd722e8553573daccf
Parents: a03bafa
Author: nisalanirmana <[email protected]>
Authored: Mon Jun 22 00:09:17 2015 +0530
Committer: nisalanirmana <[email protected]>
Committed: Mon Jun 22 00:09:17 2015 +0530

----------------------------------------------------------------------
 .../main/java/org/apache/any23/vocab/VCard.java |   5 +
 .../html/microformats2/HAdrExtractor.java       | 120 +++++++++++++++++++
 .../microformats2/HAdrExtractorFactory.java     |  57 +++++++++
 .../html/microformats2/HGeoExtractor.java       |  84 +++++++++++++
 .../microformats2/HGeoExtractorFactory.java     |  57 +++++++++
 .../microformats2/annotations/Includes.java     |  41 +++++++
 .../microformats2/annotations/package-info.java |  24 ++++
 .../html/microformats2/package-info.java        |  24 ++++
 .../html/microformats2/example-mf2-h-adr.html   |  27 +++++
 .../html/microformats2/example-mf2-h-geo.html   |  22 ++++
 .../html/microformats2/HAdrExtractorTest.java   |  37 ++++++
 .../html/microformats2/HGeoExtractorTest.java   |  47 ++++++++
 .../apache/any23/vocab/RDFSchemaUtilsTest.java  |   4 +-
 .../microformats2/h-adr/h-adr-test.html         |  33 +++++
 .../microformats2/h-geo/h-geo-test.html         |  33 +++++
 15 files changed, 613 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/api/src/main/java/org/apache/any23/vocab/VCard.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/VCard.java 
b/api/src/main/java/org/apache/any23/vocab/VCard.java
index f43c5eb..10d3c94 100644
--- a/api/src/main/java/org/apache/any23/vocab/VCard.java
+++ b/api/src/main/java/org/apache/any23/vocab/VCard.java
@@ -59,6 +59,11 @@ public class VCard extends Vocabulary {
     public final URI agent = createProperty("agent");
 
     /**
+     * The altitude of a geographic location.
+     */
+    public final URI altitude = createProperty("altitude");
+
+    /**
      * The birthday of a person.
      */
     public final URI bday = createProperty("bday");

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
new file mode 100644
index 0000000..022bf47
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.microformats2.annotations.Includes;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-adr";>h-adr</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+@Includes( extractors = HGeoExtractor.class )
+public class HAdrExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final VCard vVCARD = VCard.getInstance();
+
+    private static final String[] addressFields = {
+            "p-street-address",
+            "p-extended-address",
+            "p-locality",
+            "p-region",
+            "p-postal-code",
+            "p-country-name",
+            "p-geo"
+    };
+
+    protected String getBaseClassName() {
+        return "h-adr";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    protected boolean extractEntity(Node node, ExtractionResult out) {
+        if (null == node) return false;
+        final HTMLDocument document = new HTMLDocument(node);
+        BNode adr = getBlankNodeFor(node);
+        out.writeTriple(adr, RDF.TYPE, vVCARD.Address);
+        final String extractorName = getDescription().getExtractorName();
+        for (String field : addressFields) {
+            HTMLDocument.TextField[] values = 
document.getPluralTextField(field);
+            for (HTMLDocument.TextField val : values) {
+               if(!field.equals("p-geo")) {
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                adr, 
vVCARD.getProperty(field.replaceFirst("p-", "")), val.value()
+                        );
+               }else {
+                   String[] composed = val.value().split(";");
+                   if (composed.length == 3){
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.latitude, composed[0]
+                       );
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.longitude, composed[1]
+                       );
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.altitude, composed[2]
+                       );
+
+                   }else if (composed.length == 2){
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.latitude, composed[0]
+                       );
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.longitude, composed[1]
+                       );
+                   }else {
+                       //we discard if only length is 1
+                   }
+
+               }
+
+            }
+        }
+
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) 
getCurrentExtractionResult();
+        tser.addResourceRoot( document.getPathToLocalRoot(), adr, 
this.getClass() );
+
+        return true;
+    }
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HAdrExtractorFactory.getDescriptionInstance();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
new file mode 100644
index 0000000..3b28fb5
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HAdrExtractorFactory extends 
SimpleExtractorFactory<HAdrExtractor> implements
+        ExtractorFactory<HAdrExtractor> {
+
+    public static final String NAME = "html-mf2-h-adr";
+    
+    public static final Prefixes PREFIXES = 
PopularPrefixes.createSubset("rdf", "vcard");
+
+    private static final ExtractorDescription descriptionInstance = new 
HAdrExtractorFactory();
+    
+    public HAdrExtractorFactory() {
+        super(
+                HAdrExtractorFactory.NAME,
+                HAdrExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", 
"application/xhtml+xml;q=0.1"),
+                "example-mf2-h-adr.html");
+    }
+    
+    @Override
+    public HAdrExtractor createExtractor() {
+        return new HAdrExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
new file mode 100644
index 0000000..4a1fbfd
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-geo";>h-geo</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HGeoExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final VCard vVCARD = VCard.getInstance();
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HGeoExtractorFactory.getDescriptionInstance();
+    }
+
+    protected String getBaseClassName() {
+        return "h-geo";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    protected boolean extractEntity(Node node, ExtractionResult out) {
+        if (null == node) return false;
+        final HTMLDocument document = new HTMLDocument(node);
+        HTMLDocument.TextField latNode = 
document.getSingularTextField("p-latitude");
+        HTMLDocument.TextField lonNode = 
document.getSingularTextField("p-longitude");
+        HTMLDocument.TextField altNode = 
document.getSingularTextField("p-altitude");
+        String lat = latNode.value();
+        String lon = lonNode.value();
+        String alt = altNode.value();
+        BNode geo = getBlankNodeFor(node);
+        out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
+        final String extractorName = getDescription().getExtractorName();
+        conditionallyAddStringProperty(
+                latNode.source(),
+                geo, vVCARD.latitude , lat
+        );
+        conditionallyAddStringProperty(
+                lonNode.source(),
+                geo, vVCARD.longitude, lon
+        );
+        conditionallyAddStringProperty(
+                altNode.source(),
+                geo, vVCARD.altitude, alt
+        );
+
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) 
getCurrentExtractionResult();
+        tser.addResourceRoot( document.getPathToLocalRoot(), geo, 
this.getClass() );
+
+        return true;
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
new file mode 100644
index 0000000..5b60b7d
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HGeoExtractorFactory extends 
SimpleExtractorFactory<HGeoExtractor> implements
+        ExtractorFactory<HGeoExtractor> {
+
+    public static final String NAME = "html-mf2-h-geo";
+    
+    public static final Prefixes PREFIXES = 
PopularPrefixes.createSubset("rdf", "vcard");
+
+    private static final ExtractorDescription descriptionInstance = new 
HGeoExtractorFactory();
+    
+    public HGeoExtractorFactory() {
+        super(
+                HGeoExtractorFactory.NAME,
+                HGeoExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", 
"application/xhtml+xml;q=0.1"),
+                "example-mf2-h-geo.html");
+    }
+    
+    @Override
+    public HGeoExtractor createExtractor() {
+        return new HGeoExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
new file mode 100644
index 0000000..ff9d738
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2.annotations;
+
+import org.apache.any23.extractor.html.MicroformatExtractor;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * This annotation could be used to decorate a {@link MicroformatExtractor} to
+ * represent which of the other <i>Microformats</i> could it nest.
+ *
+ * @author Davide Palmisano ( [email protected] )
+ */
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface Includes {
+
+    Class<? extends MicroformatExtractor>[] extractors();
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
new file mode 100644
index 0000000..3311c98
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  This package contains the annotations needed to describe the
+ *  single nesting relations among different <a 
href="http://microformats.org/";>Microformats</a>.
+ *
+ *  @see org.apache.any23.extractor.html.MicroformatExtractor
+ */
+package org.apache.any23.extractor.microformats2.annotations;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
new file mode 100644
index 0000000..b961373
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ * All the various {@link org.apache.any23.extractor.Extractor} needed to 
distill <i>RDF</i>
+ * from <a href="http://microformats.org/";>Microformats</a> in HTML pages are 
contained in this package.
+ *
+ */
+package org.apache.any23.extractor.html.microformats2;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
----------------------------------------------------------------------
diff --git 
a/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
 
b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
new file mode 100644
index 0000000..d6f2c06
--- /dev/null
+++ 
b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
@@ -0,0 +1,27 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<div class="h-adr">
+  <span class="p-street-address">349/B</span> 
+  <span class="p-extended-address">Batagama,North</span>   
+  <span class="p-locality">Jaela</span>
+  <span class="p-region">Western</span>
+  <span class="p-postal-code">11325</span>    
+  <span class="p-country-name">SL</span></span>
+  <span class="p-label">349/B,Jaela</span>
+  <span class="p-geo">51.526421;-0.081067;25</span>
+</div>

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
----------------------------------------------------------------------
diff --git 
a/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
 
b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
new file mode 100644
index 0000000..c8b2607
--- /dev/null
+++ 
b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
@@ -0,0 +1,22 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<div class="h-geo">
+  <span>Latitude</span><div class="p-latitude">7.066622</div> 
+  <span>Longitude</span><div class="p-longitude">79.903048</div>
+  <span>Altitude</span><div class="p-altitude">15</div>
+<div>  

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
new file mode 100644
index 0000000..0fb3625
--- /dev/null
+++ 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HAdrExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HAdrExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException , 
RDFHandlerException {
+        assertExtract("/microformats2/h-adr/h-adr-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 10);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
new file mode 100644
index 0000000..eba89de
--- /dev/null
+++ 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.apache.any23.extractor.html.microformats2.HGeoExtractorFactory;
+import org.apache.any23.vocab.VCard;
+import org.junit.Assert;
+import org.junit.Test;
+import org.openrdf.model.Resource;
+import org.openrdf.model.vocabulary.RDF;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+import java.util.List;
+
+
+public class HGeoExtractorTest extends AbstractExtractorTestCase {
+
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HGeoExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException , 
RDFHandlerException {
+        assertExtract("/microformats2/h-geo/h-geo-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 4);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java 
b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
index 3971191..b4f8b7a 100644
--- a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
+++ b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
@@ -43,7 +43,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesNTriples() {
-        serializeVocabularies(RDFFormat.NTRIPLES, 1918);
+        serializeVocabularies(RDFFormat.NTRIPLES, 1920);
     }
 
     /**
@@ -53,7 +53,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesRDFXML() {
-        serializeVocabularies(RDFFormat.RDFXML, 4987); // Effective lines + 
separators.
+        serializeVocabularies(RDFFormat.RDFXML, 4992); // Effective lines + 
separators.
     }
 
     private void serializeVocabularies(RDFFormat format, int expectedLines) {

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html 
b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
new file mode 100644
index 0000000..b5c095a
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
@@ -0,0 +1,33 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+  <body>
+    <!-- Microformats 2 -->
+    <div class="h-adr">
+      <span class="p-street-address">349/B</span> 
+      <span class="p-extended-address">Batagama,North</span>   
+      <span class="p-locality">Jaela</span>
+      <span class="p-region">Western</span>
+      <span class="p-postal-code">11325</span>    
+      <span class="p-country-name">SL</span></span>
+      <span class="p-geo">51.526421;-0.081067;25</span> 
+    </div>
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html 
b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
new file mode 100644
index 0000000..c0181fe
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
@@ -0,0 +1,33 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+   <!-- Microformats 2 -->
+
+   <p>
+     <span class="h-geo">
+       <span class="p-latitude">54.155278</span>,
+       <span class="p-longitude">-2.249722</span>
+       <span class="p-altitude">694</span>
+     </span>
+   </p>
+
+</body>
+
+</html>

Reply via email to