Author: rwesten
Date: Thu Apr 18 06:05:58 2013
New Revision: 1469171

URL: http://svn.apache.org/r1469171
Log:
STANBOL-1040: Improved GeonamesEntityScoreProvider (see updated issue 
description); fixed a bug in the feature code generation of the 
GeonamesEntityScoreProvider

Modified:
    
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
    
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java

Modified: 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java?rev=1469171&r1=1469170&r2=1469171&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
 Thu Apr 18 06:05:58 2013
@@ -1,5 +1,7 @@
 package org.apache.stanbol.entityhub.indexing.geonames;
 
+import static 
org.apache.stanbol.entityhub.indexing.geonames.GeonamesConstants.GEONAMES_ONTOLOGY_NS;
+
 import java.util.Map;
 
 import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
@@ -8,11 +10,14 @@ import org.apache.stanbol.entityhub.serv
 
 public class GeonamesEntityScoreProvider implements EntityScoreProvider {
 
-    private static final String FCLASS_A = 
GeonamesConstants.GEONAMES_ONTOLOGY_NS +"A";
-    private static final String FCLASS_P = 
GeonamesConstants.GEONAMES_ONTOLOGY_NS +"P";
-    private static final int MAX_POPULATION = 1000000;
-    private static final double FACT = Math.log1p(1000000);
-    private static final Float DEFAULT_SCORE = Float.valueOf(0.3f);
+    private static final String FCLASS_A = GEONAMES_ONTOLOGY_NS +"A";
+    private static final String FCLASS_P = GEONAMES_ONTOLOGY_NS +"P";
+    private static final int MAX_POPULATION = 10000000;
+    private static final int MIN_POPULATION = 1000;
+    // used to change the scale of the the natural log 
+    private static final double POPULATION_SCALE = 10000; //10k is one 
+    private static final double FACT = 
Math.log1p(MAX_POPULATION/POPULATION_SCALE);
+    private static final Float DEFAULT_SCORE = Float.valueOf(0.1f);
     
     @Override
     public void setConfiguration(Map<String,Object> config) {
@@ -49,18 +54,41 @@ public class GeonamesEntityScoreProvider
         //String fCode = ref == null ? null : ref.getReference();
         
         if(FCLASS_A.equals(fclass)){
-            return Float.valueOf(1f);
+            ref = 
entity.getFirstReference(GeonamesPropertyEnum.gn_featureCode.toString());
+            String fcode = ref == null ? null : ref.getReference();
+            if(fcode == null){
+                return DEFAULT_SCORE;
+            } else {
+                fcode = fcode.substring(GEONAMES_ONTOLOGY_NS.length()+2);
+                if(fcode.length() > 2 && fcode.startsWith("PC")){
+                    return Float.valueOf(1.0f);
+                } else if(fcode.length() > 3 && fcode.charAt(3) == '1'){
+                    return Float.valueOf(0.5f);
+                } else if(fcode.length() > 3 && fcode.charAt(3) == '2'){
+                    return Float.valueOf(0.25f);
+                } else if(fcode.length() > 3 && fcode.charAt(3) == '3'){
+                    return Float.valueOf(0.125f);
+                } else if(fcode.length() > 3 && (fcode.charAt(3) == '4' ||
+                        fcode.charAt(3) == 'D')){
+                    return Float.valueOf(0.062f);
+                } else if(fcode.length() > 3 && fcode.charAt(3) == '5'){
+                    return Float.valueOf(0.031f);
+                } else {
+                    return Float.valueOf(0.062f);
+                }
+            }
         } else if(FCLASS_P.equals(fclass)){
             Long population = 
entity.getFirst(GeonamesPropertyEnum.gn_population.toString(), Long.class);
             if(population == null){
-                return Float.valueOf(0.2f); //min population score
-            } else {
-                long p = Math.min(MAX_POPULATION, population.longValue());
-                double fact = Math.log1p(p);
-                //Normalised the score based on the population in the range
-                // [0.2..1.0]
-                return Float.valueOf((float)((fact/FACT*0.8)+0.2));
+                population = Long.valueOf(1); //use 1 to avoid creating a new 
instance
             }
+            //normalise the population
+            double p = Math.max(Math.min(MAX_POPULATION, 
population.longValue()),MIN_POPULATION);
+            //population factor
+            double fact = Math.log1p(p/POPULATION_SCALE);
+            //Normalised based on the maximum popuoation
+            Float score = Float.valueOf((float)(fact/FACT));
+            return score;
         } else {
             return DEFAULT_SCORE;
         }

Modified: 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java?rev=1469171&r1=1469170&r2=1469171&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
 Thu Apr 18 06:05:58 2013
@@ -291,7 +291,7 @@ public class GeonamesIndexingSource impl
                 
doc.add(GeonamesPropertyEnum.gn_featureClass.toString(),getReference(featureClass));
                 //[7] featureCode (-> need to use 
<featureClass>.<featureCode>!!)
                 
doc.add(GeonamesPropertyEnum.gn_featureCode.toString(),getReference(
-                    new 
StringBuilder(featureClass).append(t.next()).toString()));
+                    new 
StringBuilder(featureClass).append('.').append(t.next()).toString()));
                 //countryCode
                 //  -> geonames uses here the link to an HTML Page showing the 
Country
                 //     We would like to use an Link to a SKOS:Concept 
representing the Country


Reply via email to