Author: rwesten
Date: Wed Apr 17 10:38:48 2013
New Revision: 1468829

URL: http://svn.apache.org/r1468829
Log:
fixes STANBOL-1038; switches to shade plugin for runable jar (STANBOL-1039); 
implemented simple EntityScoreProvider (STANBOL-1040); also updated the default 
configuration to use the new entity score provider and to provide an example 
for filtering Entities based on the feature class

Added:
    
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
   (with props)
    
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/entityTypes.properties
   (with props)
Removed:
    stanbol/trunk/entityhub/indexing/geonames/src/main/assembly/
Modified:
    stanbol/trunk/entityhub/indexing/geonames/pom.xml
    
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
    
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/HierarchyProcessor.java
    
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/indexing.properties

Modified: stanbol/trunk/entityhub/indexing/geonames/pom.xml
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/pom.xml?rev=1468829&r1=1468828&r2=1468829&view=diff
==============================================================================
--- stanbol/trunk/entityhub/indexing/geonames/pom.xml (original)
+++ stanbol/trunk/entityhub/indexing/geonames/pom.xml Wed Apr 17 10:38:48 2013
@@ -65,28 +65,38 @@
         </configuration>
       </plugin>
       <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <version>2.2</version>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
         <configuration>
-          <descriptors>
-            <descriptor>src/main/assembly/assembly.xml</descriptor>
-          </descriptors>
           <archive>
             <manifest>
-                <addClasspath>true</addClasspath>
+              <addClasspath>true</addClasspath>
               <mainClass>org.apache.stanbol.entityhub.indexing.Main</mainClass>
             </manifest>
           </archive>
         </configuration>
-<!--        <executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+            <artifactSet>
+              <includes>
+                <include>*</include>
+              </includes>
+            </artifactSet>
+            <transformers>
+              <transformer 
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+            </transformers>
+          </configuration>
+        <executions>
           <execution>
-            <id>make-assembly</id>
             <phase>package</phase>
             <goals>
-              <goal>single</goal>
+              <goal>shade</goal>
             </goals>
           </execution>
-        </executions>  -->
+        </executions>
       </plugin>
     </plugins>
   </build>

Added: 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java?rev=1468829&view=auto
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
 (added)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
 Wed Apr 17 10:38:48 2013
@@ -0,0 +1,69 @@
+package org.apache.stanbol.entityhub.indexing.geonames;
+
+import java.util.Map;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+
+public class GeonamesEntityScoreProvider implements EntityScoreProvider {
+
+    private static final String FCLASS_A = 
GeonamesConstants.GEONAMES_ONTOLOGY_NS +"A";
+    private static final String FCLASS_P = 
GeonamesConstants.GEONAMES_ONTOLOGY_NS +"P";
+    private static final int MAX_POPULATION = 1000000;
+    private static final double FACT = Math.log1p(1000000);
+    private static final Float DEFAULT_SCORE = Float.valueOf(0.3f);
+    
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+    }
+
+    @Override
+    public boolean needsInitialisation() {
+        return false;
+    }
+
+    @Override
+    public void initialise() {
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public boolean needsData() {
+        return true;
+    }
+
+    @Override
+    public Float process(String id) throws UnsupportedOperationException {
+        throw new UnsupportedOperationException("This implementation requries 
data to process the score");
+    }
+
+    @Override
+    public Float process(Representation entity) throws 
UnsupportedOperationException {
+        Reference ref = 
entity.getFirstReference(GeonamesPropertyEnum.gn_featureClass.toString());
+        String fclass = ref == null ? null : ref.getReference();
+        //ref = 
entity.getFirstReference(GeonamesPropertyEnum.gn_featureCode.toString());
+        //String fCode = ref == null ? null : ref.getReference();
+        
+        if(FCLASS_A.equals(fclass)){
+            return Float.valueOf(1f);
+        } else if(FCLASS_P.equals(fclass)){
+            Long population = 
entity.getFirst(GeonamesPropertyEnum.gn_population.toString(), Long.class);
+            if(population == null){
+                return Float.valueOf(0.2f); //min population score
+            } else {
+                long p = Math.min(MAX_POPULATION, population.longValue());
+                double fact = Math.log1p(p);
+                //Normalised the score based on the population in the range
+                // [0.2..1.0]
+                return Float.valueOf((float)((fact/FACT*0.8)+0.2));
+            }
+        } else {
+            return DEFAULT_SCORE;
+        }
+    }
+
+}

Propchange: 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesEntityScoreProvider.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java?rev=1468829&r1=1468828&r2=1468829&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/GeonamesIndexingSource.java
 Wed Apr 17 10:38:48 2013
@@ -266,7 +266,7 @@ public class GeonamesIndexingSource impl
                 Integer geoNamesId = Integer.parseInt(id);
                 //create a new Doc based on the first Element (geonamesID)
                 Representation doc = valueFactory.createRepresentation(
-                    new 
StringBuilder(GEONAMES_RESOURCE_NS).append(id).toString());
+                    new 
StringBuilder(GEONAMES_RESOURCE_NS).append(id).append('/').toString());
                 //add the Integer id so that we do not need to parse it from 
the subject URI
                 doc.add(GeonamesPropertyEnum.idx_id.toString(), geoNamesId);
                 //add the geonames:Feature type

Modified: 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/HierarchyProcessor.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/HierarchyProcessor.java?rev=1468829&r1=1468828&r2=1468829&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/HierarchyProcessor.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/java/org/apache/stanbol/entityhub/indexing/geonames/HierarchyProcessor.java
 Wed Apr 17 10:38:48 2013
@@ -359,7 +359,7 @@ public class HierarchyProcessor implemen
         //add country
         if(adminIds[0] != null){
             doc.add(GeonamesPropertyEnum.gn_parentCountry.toString(), 
vf.createReference(
-                new 
StringBuilder(GeonamesConstants.GEONAMES_RESOURCE_NS).append(adminIds[0]).toString()));
+                new 
StringBuilder(GeonamesConstants.GEONAMES_RESOURCE_NS).append(adminIds[0]).append('/').toString()));
             parentLevel = Collections.singleton(adminIds[0]);
         }  else {
             parentLevel = Collections.emptySet();
@@ -413,7 +413,7 @@ public class HierarchyProcessor implemen
             if(id != null){
                 refs.add(vf.createReference(
                     new StringBuilder(GeonamesConstants.GEONAMES_RESOURCE_NS)
-                    .append(id).toString()));
+                    .append(id).append('/').toString()));
             }
         }
         return refs;

Added: 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/entityTypes.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/entityTypes.properties?rev=1468829&view=auto
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/entityTypes.properties
 (added)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/entityTypes.properties
 Wed Apr 17 10:38:48 2013
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+field=geonames:featureClass
+
+# Only index Freebase Topics
+values=geonames:A;geonames:P

Propchange: 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/entityTypes.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/indexing.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/indexing.properties?rev=1468829&r1=1468828&r2=1468829&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/indexing.properties
 (original)
+++ 
stanbol/trunk/entityhub/indexing/geonames/src/main/resources/indexing/config/indexing.properties
 Wed Apr 17 10:38:48 2013
@@ -41,18 +41,19 @@ Synchronized=false
 #the configured source is also the default. You can also configure a directory
 #if you want to index from multiple dumps (e.g. only specific countries)
 
entityDataIterable=org.apache.stanbol.entityhub.indexing.geonames.GeonamesIndexingSource,source:geonames/allCountries.zip
-#no support for entity scores
-entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider
-
+#scores entities based on class (A -> 1.0, P -> based on population [0.2..1], 
rest -> 0.3
+entityScoreProvider=org.apache.stanbol.entityhub.indexing.geonames.GeonamesEntityScoreProvider
 # ------------
 # EntityProcessor
 # ------------
 
 # Three processors
+# (0) index only some feature classes
 # (1) alternate labels
 # (2) hierarchy
 # (3) field mappings
 # Default Entity Processor configuration
+# (0) 
org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes;
 
entityProcessor=org.apache.stanbol.entityhub.indexing.geonames.AlternateLabelProcessor;org.apache.stanbol.entityhub.indexing.geonames.HierarchyProcessor;org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
 
 # ------------


Reply via email to