Author: rwesten
Date: Thu Apr 18 06:05:58 2013
New Revision: 1469171
URL: http://svn.apache.org/r1469171
Log:
STANBOL-1040: Improved GeonamesEntityScoreProvider (see updated issue
description); fixed a bug in the feature code generation of the
GeonamesEntityScoreProvider
Modified:
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
Modified:
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java?rev=1469171&r1=1469170&r2=1469171&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
(original)
+++
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
Thu Apr 18 06:05:58 2013
@@ -1,5 +1,7 @@
package org.apache.stanbol.entityhub.indexing.geonames;
+import static
org.apache.stanbol.entityhub.indexing.geonames.GeonamesConstants.GEONAMES_ONTOLOGY_NS;
+
import java.util.Map;
import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
@@ -8,11 +10,14 @@ import org.apache.stanbol.entityhub.serv
public class GeonamesEntityScoreProvider implements EntityScoreProvider {
- private static final String FCLASS_A =
GeonamesConstants.GEONAMES_ONTOLOGY_NS +"A";
- private static final String FCLASS_P =
GeonamesConstants.GEONAMES_ONTOLOGY_NS +"P";
- private static final int MAX_POPULATION = 1000000;
- private static final double FACT = Math.log1p(1000000);
- private static final Float DEFAULT_SCORE = Float.valueOf(0.3f);
+ private static final String FCLASS_A = GEONAMES_ONTOLOGY_NS +"A";
+ private static final String FCLASS_P = GEONAMES_ONTOLOGY_NS +"P";
+ private static final int MAX_POPULATION = 10000000;
+ private static final int MIN_POPULATION = 1000;
+ // used to change the scale of the the natural log
+ private static final double POPULATION_SCALE = 10000; //10k is one
+ private static final double FACT =
Math.log1p(MAX_POPULATION/POPULATION_SCALE);
+ private static final Float DEFAULT_SCORE = Float.valueOf(0.1f);
@Override
public void setConfiguration(Map<String,Object> config) {
@@ -49,18 +54,41 @@ public class GeonamesEntityScoreProvider
//String fCode = ref == null ? null : ref.getReference();
if(FCLASS_A.equals(fclass)){
- return Float.valueOf(1f);
+ ref =
entity.getFirstReference(GeonamesPropertyEnum.gn_featureCode.toString());
+ String fcode = ref == null ? null : ref.getReference();
+ if(fcode == null){
+ return DEFAULT_SCORE;
+ } else {
+ fcode = fcode.substring(GEONAMES_ONTOLOGY_NS.length()+2);
+ if(fcode.length() > 2 && fcode.startsWith("PC")){
+ return Float.valueOf(1.0f);
+ } else if(fcode.length() > 3 && fcode.charAt(3) == '1'){
+ return Float.valueOf(0.5f);
+ } else if(fcode.length() > 3 && fcode.charAt(3) == '2'){
+ return Float.valueOf(0.25f);
+ } else if(fcode.length() > 3 && fcode.charAt(3) == '3'){
+ return Float.valueOf(0.125f);
+ } else if(fcode.length() > 3 && (fcode.charAt(3) == '4' ||
+ fcode.charAt(3) == 'D')){
+ return Float.valueOf(0.062f);
+ } else if(fcode.length() > 3 && fcode.charAt(3) == '5'){
+ return Float.valueOf(0.031f);
+ } else {
+ return Float.valueOf(0.062f);
+ }
+ }
} else if(FCLASS_P.equals(fclass)){
Long population =
entity.getFirst(GeonamesPropertyEnum.gn_population.toString(), Long.class);
if(population == null){
- return Float.valueOf(0.2f); //min population score
- } else {
- long p = Math.min(MAX_POPULATION, population.longValue());
- double fact = Math.log1p(p);
- //Normalised the score based on the population in the range
- // [0.2..1.0]
- return Float.valueOf((float)((fact/FACT*0.8)+0.2));
+ population = Long.valueOf(1); //use 1 to avoid creating a new
instance
}
+ //normalise the population
+ double p = Math.max(Math.min(MAX_POPULATION,
population.longValue()),MIN_POPULATION);
+ //population factor
+ double fact = Math.log1p(p/POPULATION_SCALE);
+ //Normalised based on the maximum popuoation
+ Float score = Float.valueOf((float)(fact/FACT));
+ return score;
} else {
return DEFAULT_SCORE;
}
Modified:
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java?rev=1469171&r1=1469170&r2=1469171&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
(original)
+++
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
Thu Apr 18 06:05:58 2013
@@ -291,7 +291,7 @@ public class GeonamesIndexingSource impl
doc.add(GeonamesPropertyEnum.gn_featureClass.toString(),getReference(featureClass));
//[7] featureCode (-> need to use
<featureClass>.<featureCode>!!)
doc.add(GeonamesPropertyEnum.gn_featureCode.toString(),getReference(
- new
StringBuilder(featureClass).append(t.next()).toString()));
+ new
StringBuilder(featureClass).append('.').append(t.next()).toString()));
//countryCode
// -> geonames uses here the link to an HTML Page showing the
Country
// We would like to use an Link to a SKOS:Concept
representing the Country