Author: markg
Date: Mon May 19 14:26:39 2014
New Revision: 1595913

URL: http://svn.apache.org/r1595913
Log:
OPENNLP-698
Fixed cleanInput() method so it handles multi token names. Now there is a 
property that can be added to the entitylinker.properties file, in which user 
can define whether to use double quotes around names or not.

Modified:
    
opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java

Modified: 
opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
URL: 
http://svn.apache.org/viewvc/opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java?rev=1595913&r1=1595912&r2=1595913&view=diff
==============================================================================
--- 
opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
 (original)
+++ 
opennlp/addons/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
 Mon May 19 14:26:39 2014
@@ -19,6 +19,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.logging.Level;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
@@ -49,7 +50,8 @@ public class GazetteerSearcher {
 
   private final String REGEX_CLEAN = "[^\\p{L}\\p{Nd}]";
   private static final Logger LOGGER = 
Logger.getLogger(GazetteerSearcher.class);
-  private double scoreCutoff = .90;
+  private double scoreCutoff = .70;
+  private boolean doubleQuoteAllSearchTerms = false;
   private Directory geonamesIndex;//= new MMapDirectory(new File(indexloc));
   private IndexReader geonamesReader;// = DirectoryReader.open(geonamesIndex);
   private IndexSearcher geonamesSearcher;// = new 
IndexSearcher(geonamesReader);
@@ -61,7 +63,17 @@ public class GazetteerSearcher {
   private Analyzer usgsAnalyzer;
   private EntityLinkerProperties properties;
 
+  public static void main(String[] args) {
+    try {
+      boolean b = Boolean.valueOf("true");
 
+      new GazetteerSearcher(new EntityLinkerProperties(new 
File("c:\\temp\\entitylinker.properties"))).geonamesFind("townsville, 
queensland", 5, "");
+    } catch (IOException ex) {
+      
java.util.logging.Logger.getLogger(GazetteerSearcher.class.getName()).log(Level.SEVERE,
 null, ex);
+    } catch (Exception ex) {
+      
java.util.logging.Logger.getLogger(GazetteerSearcher.class.getName()).log(Level.SEVERE,
 null, ex);
+    }
+  }
 
   public GazetteerSearcher(EntityLinkerProperties properties) throws Exception 
{
     this.properties = properties;
@@ -272,14 +284,19 @@ public class GazetteerSearcher {
   }
 
   /**
-   * Replaces any noise chars with 
+   * Replaces any noise chars with a space, and depending on configuration 
adds double quotes to the string
+   *
    * @param input
-   * @return 
+   * @return
    */
   private String cleanInput(String input) {
     String output = input.replaceAll(REGEX_CLEAN, " ").trim();
-    System.out.println(output);
-    return "\"" + output + "\"";
+    if (doubleQuoteAllSearchTerms) {
+      return "\"" + output + "\"";
+    } else {
+      return output;
+    }
+
   }
 
   private void init() throws Exception {
@@ -290,7 +307,10 @@ public class GazetteerSearcher {
         LOGGER.error(new Exception("USGS Gaz location not found"));
       }
       String cutoff = 
properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", 
String.valueOf(scoreCutoff));
+
       scoreCutoff = Double.valueOf(cutoff);
+      String doubleQuote = 
properties.getProperty("opennlp.geoentitylinker.gaz.doublequote", 
String.valueOf(doubleQuoteAllSearchTerms));
+      doubleQuoteAllSearchTerms = Boolean.valueOf(doubleQuote);
       usgsIndex = new MMapDirectory(new File(indexloc));
       usgsReader = DirectoryReader.open(usgsIndex);
       usgsSearcher = new IndexSearcher(usgsReader);


Reply via email to