changes according to  mentor michele feedback

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/ff816027
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/ff816027
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/ff816027

Branch: refs/heads/master
Commit: ff816027510f731f3e3f6a3c410feb5c48ffd972
Parents: 5b10339
Author: Nisala Nirmana <[email protected]>
Authored: Sun Jun 28 22:33:29 2015 +0530
Committer: Nisala Nirmana <[email protected]>
Committed: Sun Jun 28 22:33:29 2015 +0530

----------------------------------------------------------------------
 .../html/microformats2/HAdrExtractor.java       | 69 +++++++++-----------
 .../html/microformats2/HGeoExtractor.java       | 52 +++++++++------
 .../microformats2/Microformats2Prefixes.java    | 26 ++++++++
 .../html/microformats2/HAdrExtractorTest.java   |  2 +-
 .../html/microformats2/HGeoExtractorTest.java   |  2 +-
 .../microformats2/h-adr/h-adr-test.html         | 21 +++---
 .../microformats2/h-geo/h-geo-test.html         |  8 +--
 7 files changed, 105 insertions(+), 75 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
index 022bf47..d0d9257 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
@@ -17,12 +17,14 @@
 
 package org.apache.any23.extractor.html.microformats2;
 
+import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.TagSoupExtractionResult;
 import org.apache.any23.extractor.html.microformats2.annotations.Includes;
 import org.apache.any23.vocab.VCard;
 import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
 import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
@@ -40,17 +42,23 @@ public class HAdrExtractor extends 
EntityBasedMicroformatExtractor {
     private static final VCard vVCARD = VCard.getInstance();
 
     private static final String[] addressFields = {
-            "p-street-address",
-            "p-extended-address",
-            "p-locality",
-            "p-region",
-            "p-postal-code",
-            "p-country-name",
-            "p-geo"
+            "street-address",
+            "extended-address",
+            "locality",
+            "region",
+            "postal-code",
+            "country-name",
+            "geo"
+    };
+
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
     };
 
     protected String getBaseClassName() {
-        return "h-adr";
+        return Microformats2Prefixes.CLASS_PREFIX+"adr";
     }
 
     @Override
@@ -58,60 +66,45 @@ public class HAdrExtractor extends 
EntityBasedMicroformatExtractor {
         // Empty.
     }
 
-    protected boolean extractEntity(Node node, ExtractionResult out) {
+    protected boolean extractEntity(Node node, ExtractionResult out) throws 
ExtractionException {
         if (null == node) return false;
         final HTMLDocument document = new HTMLDocument(node);
         BNode adr = getBlankNodeFor(node);
         out.writeTriple(adr, RDF.TYPE, vVCARD.Address);
         final String extractorName = getDescription().getExtractorName();
         for (String field : addressFields) {
-            HTMLDocument.TextField[] values = 
document.getPluralTextField(field);
+            HTMLDocument.TextField[] values = 
document.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
             for (HTMLDocument.TextField val : values) {
-               if(!field.equals("p-geo")) {
+               if(!field.equals("geo")) {
                         conditionallyAddStringProperty(
                                 val.source(),
-                                adr, 
vVCARD.getProperty(field.replaceFirst("p-", "")), val.value()
+                                adr, vVCARD.getProperty(field), val.value()
                         );
                }else {
                    String[] composed = val.value().split(";");
-                   if (composed.length == 3){
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.latitude, composed[0]
-                       );
+                   for(int counter=0;counter<composed.length;counter++){
                        conditionallyAddStringProperty(
                                val.source(),
-                               adr, vVCARD.longitude, composed[1]
-                       );
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.altitude, composed[2]
+                               adr, vVCARD.getProperty(geoFields[counter]), 
composed[counter]
                        );
 
-                   }else if (composed.length == 2){
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.latitude, composed[0]
-                       );
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.longitude, composed[1]
-                       );
-                   }else {
-                       //we discard if only length is 1
                    }
-
                }
-
             }
         }
-
+        addGeoAsUrlResource(adr,document);
         final TagSoupExtractionResult tser = (TagSoupExtractionResult) 
getCurrentExtractionResult();
-        tser.addResourceRoot( document.getPathToLocalRoot(), adr, 
this.getClass() );
-
+        tser.addResourceRoot( document.getPathToLocalRoot(), adr, 
this.getClass());
         return true;
     }
 
+    private void addGeoAsUrlResource(Resource card,HTMLDocument document) 
throws ExtractionException {
+        HTMLDocument.TextField[] links = 
document.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX+"geo");
+        for (HTMLDocument.TextField link : links) {
+            conditionallyAddResourceProperty(card, vVCARD.geo, 
getHTMLDocument().resolveURI(link.value()));
+        }
+    }
+
     @Override
     public ExtractorDescription getDescription() {
         return HAdrExtractorFactory.getDescriptionInstance();

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
index 4a1fbfd..c9c061a 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
@@ -26,6 +26,9 @@ import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
 import org.apache.any23.extractor.html.HTMLDocument;
+
+import java.util.ArrayList;
+
 /**
  * Extractor for the <a href="http://microformats.org/wiki/h-geo";>h-geo</a>
  * microformat.
@@ -36,13 +39,19 @@ public class HGeoExtractor extends 
EntityBasedMicroformatExtractor {
 
     private static final VCard vVCARD = VCard.getInstance();
 
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
+    };
+
     @Override
     public ExtractorDescription getDescription() {
         return HGeoExtractorFactory.getDescriptionInstance();
     }
 
     protected String getBaseClassName() {
-        return "h-geo";
+        return Microformats2Prefixes.CLASS_PREFIX+"geo";
     }
 
     @Override
@@ -53,31 +62,32 @@ public class HGeoExtractor extends 
EntityBasedMicroformatExtractor {
     protected boolean extractEntity(Node node, ExtractionResult out) {
         if (null == node) return false;
         final HTMLDocument document = new HTMLDocument(node);
-        HTMLDocument.TextField latNode = 
document.getSingularTextField("p-latitude");
-        HTMLDocument.TextField lonNode = 
document.getSingularTextField("p-longitude");
-        HTMLDocument.TextField altNode = 
document.getSingularTextField("p-altitude");
-        String lat = latNode.value();
-        String lon = lonNode.value();
-        String alt = altNode.value();
         BNode geo = getBlankNodeFor(node);
         out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
         final String extractorName = getDescription().getExtractorName();
-        conditionallyAddStringProperty(
-                latNode.source(),
-                geo, vVCARD.latitude , lat
-        );
-        conditionallyAddStringProperty(
-                lonNode.source(),
-                geo, vVCARD.longitude, lon
-        );
-        conditionallyAddStringProperty(
-                altNode.source(),
-                geo, vVCARD.altitude, alt
-        );
-
+        ArrayList<HTMLDocument.TextField> geoNodes = new 
ArrayList<HTMLDocument.TextField>();
+        for(String field : geoFields){
+            
geoNodes.add(document.getSingularTextField(Microformats2Prefixes.PROPERTY_PREFIX+field));
+        }
+        if(geoNodes.get(0).source()==null){
+            String[] composed = 
document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX +"geo")
+                                        .value().split(";");
+            for(int counter=0;counter<composed.length;counter++){
+                conditionallyAddStringProperty(
+                        
document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX+"geo").source(),
+                        geo, vVCARD.getProperty(geoFields[counter]), 
composed[counter]
+                );
+            }
+        }else{
+            for(int counter=0;counter<geoNodes.size();counter++){
+                conditionallyAddStringProperty(
+                        geoNodes.get(counter).source(),
+                        geo, vVCARD.getProperty(geoFields[counter]) , 
geoNodes.get(counter).value()
+                );
+            }
+        }
         final TagSoupExtractionResult tser = (TagSoupExtractionResult) 
getCurrentExtractionResult();
         tser.addResourceRoot( document.getPathToLocalRoot(), geo, 
this.getClass() );
-
         return true;
     }
     

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
new file mode 100644
index 0000000..18ac1b1
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+public class Microformats2Prefixes {
+    public static final String CLASS_PREFIX = "h-";
+    public static final String PROPERTY_PREFIX = "p-";
+    public static final String URL_PROPERTY_PREFIX = "u-";
+    public static final String EMBEDDED_PROPERTY_PREFIX = "e-";
+    public static final String TIME_PROPERTY_PREFIX = "dt-";
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
index 0fb3625..69abb55 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
@@ -32,6 +32,6 @@ public class HAdrExtractorTest extends 
AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException , 
RDFHandlerException {
         assertExtract("/microformats2/h-adr/h-adr-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 10);
+        assertStatementsSize(null, null, null, 11);
     }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
index eba89de..0d29fda 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
@@ -41,7 +41,7 @@ public class HGeoExtractorTest extends 
AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException , 
RDFHandlerException {
         assertExtract("/microformats2/h-geo/h-geo-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 4);
+        assertStatementsSize(null, null, null, 8);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html 
b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
index b5c095a..5438b90 100644
--- a/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
+++ b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
@@ -18,16 +18,17 @@
 <html>
 
   <body>
-    <!-- Microformats 2 -->
-    <div class="h-adr">
-      <span class="p-street-address">349/B</span> 
-      <span class="p-extended-address">Batagama,North</span>   
-      <span class="p-locality">Jaela</span>
-      <span class="p-region">Western</span>
-      <span class="p-postal-code">11325</span>    
-      <span class="p-country-name">SL</span></span>
-      <span class="p-geo">51.526421;-0.081067;25</span> 
-    </div>
+      <!-- Microformats 2 -->
+      <div class="h-adr">
+          <span class="p-street-address">349/B</span>
+          <span class="p-extended-address">Batagama,North</span>
+          <span class="p-locality">Jaela</span>
+          <span class="p-region">Western</span>
+          <span class="p-postal-code">11325</span>
+          <span class="p-country-name">SL</span></span>
+          <span class="p-geo">51.526421;-0.081067;25</span>
+          <a class="u-geo" 
href="geo:51.526421;-0.081067;crs=wgs84;u=40">Home</a>
+      </div>
   </body>
 
 </html>

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html 
b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
index c0181fe..38d906f 100644
--- a/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
+++ b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
@@ -20,13 +20,13 @@
 <body>
    <!-- Microformats 2 -->
 
-   <p>
-     <span class="h-geo">
+   <span class="h-geo">
        <span class="p-latitude">54.155278</span>,
        <span class="p-longitude">-2.249722</span>
        <span class="p-altitude">694</span>
-     </span>
-   </p>
+   </span>
+
+   <span class="h-geo">51.513458;-0.14812;50</span>
 
 </body>
 

Reply via email to