This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git


The following commit(s) were added to refs/heads/main by this push:
     new 23e45b63 Geocoder: scoring by population, parsing query text support 
AND,OR operators (#712)
23e45b63 is described below

commit 23e45b638b351ac5a643cc850e6a17ccf3f928ef
Author: Perdjesk <[email protected]>
AuthorDate: Wed Jun 28 20:22:10 2023 +0200

    Geocoder: scoring by population, parsing query text support AND,OR 
operators (#712)
---
 baremaps-core/pom.xml                              |  4 +
 .../baremaps/geocoder/GeonamesDocumentMapper.java  |  6 +-
 .../baremaps/geocoder/GeonamesQueryBuilder.java    | 54 +++++++++++-
 .../org/apache/baremaps/iploc/IpLocMapper.java     |  3 +-
 .../baremaps/geocoder/GeonamesIndexTest.java       | 95 ++++++++++++++++++++++
 .../apache/baremaps/server/GeocoderResource.java   | 49 +++++++----
 pom.xml                                            |  5 ++
 7 files changed, 194 insertions(+), 22 deletions(-)

diff --git a/baremaps-core/pom.xml b/baremaps-core/pom.xml
index 8502ed1d..e9bfe79f 100644
--- a/baremaps-core/pom.xml
+++ b/baremaps-core/pom.xml
@@ -93,6 +93,10 @@
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-expressions</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-queryparser</artifactId>
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java
index 871e44f9..cd6177ac 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java
@@ -29,19 +29,21 @@ public class GeonamesDocumentMapper implements 
Function<GeonamesRecord, Document
     document.add(new TextField("name", record.getName(), Field.Store.YES));
     document.add(new TextField("country", 
IsoCountriesUtils.getCountry(record.getCountryCode()),
         Field.Store.YES));
+    // countryCode is not analyzed and thus must be queried using uppercase
     document.add(new StringField("countryCode", record.getCountryCode(), 
Field.Store.YES));
     document.add(new LatLonPoint("point", record.getLatitude(), 
record.getLongitude()));
     document.add(new StoredField("longitude", record.getLongitude()));
     document.add(new StoredField("latitude", record.getLatitude()));
-    document.add(new StoredField("asciiname", record.getAsciiname()));
+    document.add(new TextField("asciiname", record.getAsciiname(), 
Field.Store.YES));
     document.add(new StoredField("alternatenames", 
record.getAlternatenames()));
-    document.add(new StoredField("featureClass", record.getFeatureClass()));
+    document.add(new StringField("featureClass", record.getFeatureClass(), 
Field.Store.YES));
     document.add(new StoredField("featureCode", record.getFeatureCode()));
     document.add(new StoredField("cc2", record.getCc2()));
     document.add(new StoredField("admin1Code", record.getAdmin1Code()));
     document.add(new StoredField("admin2Code", record.getAdmin2Code()));
     document.add(new StoredField("admin3Code", record.getAdmin3Code()));
     document.add(new StoredField("admin4Code", record.getAdmin4Code()));
+    document.add(new NumericDocValuesField("population", 
record.getPopulation()));
     document.add(new StoredField("population", record.getPopulation()));
     if (record.getElevation() != null) {
       document.add(new StoredField("elevation", record.getElevation()));
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java
index 57838f80..561cb321 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java
@@ -14,13 +14,19 @@ package org.apache.baremaps.geocoder;
 
 
 
+import java.text.ParseException;
 import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.expressions.Expression;
+import org.apache.lucene.expressions.SimpleBindings;
+import org.apache.lucene.expressions.js.JavascriptCompiler;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.function.FunctionScoreQuery;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.queryparser.simple.SimpleQueryParser;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DoubleValuesSource;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 
@@ -35,6 +41,11 @@ public class GeonamesQueryBuilder {
 
   private String countryCode = "";
 
+  private boolean scoringByPopulation;
+
+  private boolean andOperator;
+
+
   public GeonamesQueryBuilder() {
     this(GeocoderConstants.ANALYZER);
   }
@@ -53,15 +64,37 @@ public class GeonamesQueryBuilder {
     return this;
   }
 
-  public Query build() {
+  /**
+   * The scoring will take into account the population
+   */
+  public GeonamesQueryBuilder withScoringByPopulation() {
+    this.scoringByPopulation = true;
+    return this;
+  }
+
+  /**
+   * The queryText will be parsed with AND operator between terms instead of 
OR.
+   */
+  public GeonamesQueryBuilder withAndOperator() {
+    this.andOperator = true;
+    return this;
+  }
+
+  public Query build() throws ParseException {
     var builder = new BooleanQuery.Builder();
 
     if (queryText != null) {
       var queryTextEsc = QueryParser.escape(queryText);
       if (!queryTextEsc.isBlank()) {
-        var fieldWeights = Map.of("name", 1f, "country", 1f);
-        var termsQuery = new SimpleQueryParser(analyzer, 
fieldWeights).parse(queryTextEsc);
-        builder.add(termsQuery, BooleanClause.Occur.SHOULD);
+        var fieldWeights = Map.of("name", 1f, "asciiname", 1f, "country", 1f, 
"countryCode", 1f);
+        var parser = new SimpleQueryParser(analyzer, fieldWeights);
+        if (andOperator) {
+          // AND operator between query terms parsed instead of default OR
+          parser.setDefaultOperator(BooleanClause.Occur.MUST);
+        }
+        var termsQuery = parser.parse(queryTextEsc);
+        // at least one terms of the queryText must be present
+        builder.add(termsQuery, BooleanClause.Occur.MUST);
       }
     }
 
@@ -73,6 +106,19 @@ public class GeonamesQueryBuilder {
       }
     }
 
+    if (scoringByPopulation) {
+      var query = builder.build();
+      // ln(1+population) to tolerate entries with population=0
+      Expression expr = JavascriptCompiler.compile("_score + 
ln(1+population)");
+
+      var bindings = new SimpleBindings();
+      bindings.add("_score", DoubleValuesSource.SCORES);
+      bindings.add("population", 
DoubleValuesSource.fromIntField("population"));
+
+      return new FunctionScoreQuery(
+          query,
+          expr.getDoubleValuesSource(bindings));
+    }
     return builder.build();
   }
 }
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLocMapper.java 
b/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLocMapper.java
index 78caa1da..0215afa2 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLocMapper.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLocMapper.java
@@ -15,6 +15,7 @@ package org.apache.baremaps.iploc;
 
 import com.google.common.net.InetAddresses;
 import java.io.IOException;
+import java.text.ParseException;
 import java.util.Optional;
 import java.util.function.Function;
 import java.util.regex.Pattern;
@@ -182,7 +183,7 @@ public class IpLocMapper implements Function<NicObject, 
Optional<IpLocObject>> {
    * @throws IOException if an I/O error occurs
    */
   private Optional<Coordinate> findLocation(String searchTerms, String 
countryCode)
-      throws IOException {
+      throws IOException, ParseException {
     var indexSearcher = searcherManager.acquire();
     var geonamesQuery =
         new 
GeonamesQueryBuilder().queryText(searchTerms).countryCode(countryCode).build();
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeonamesIndexTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeonamesIndexTest.java
new file mode 100644
index 00000000..60f2126a
--- /dev/null
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeonamesIndexTest.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not 
use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+ * or implied. See the License for the specific language governing permissions 
and limitations under
+ * the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import org.apache.baremaps.testing.TestFiles;
+import org.apache.baremaps.utils.FileUtils;
+import org.apache.baremaps.workflow.WorkflowContext;
+import org.apache.baremaps.workflow.tasks.CreateGeonamesIndex;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.MMapDirectory;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+
+public class GeonamesIndexTest {
+
+  private static Path directory;
+  private static IndexSearcher searcher;
+
+  @BeforeAll
+  public static void beforeAll() throws Exception {
+    // Init the geocoder service
+    directory = Files.createTempDirectory(Paths.get("."), "geocoder_");
+
+    // Create the geonames index
+    var data = TestFiles.resolve("geonames/LI.txt");
+    var task = new CreateGeonamesIndex(data, directory);
+    task.execute(new WorkflowContext());
+    var dir = MMapDirectory.open(directory);
+    var searcherManager = new SearcherManager(dir, new SearcherFactory());
+    searcher = searcherManager.acquire();
+  }
+
+  @AfterAll
+  public static void afterAll() throws IOException {
+    FileUtils.deleteRecursively(directory);
+  }
+
+  @Test
+  void testCreateIndex() throws Exception {
+    var geonamesQuery =
+        new 
GeonamesQueryBuilder().queryText("vaduz").countryCode("LI").build();
+    var topDocs = searcher.search(geonamesQuery, 1);
+    var doc = 
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
+    assertEquals("Vaduz", doc.getField("name").stringValue());
+  }
+
+  @Test
+  void testOrQuery() throws Exception {
+    var geonamesQuery =
+        new GeonamesQueryBuilder().queryText("vaduz 
berlin").countryCode("LI").build();
+    var topDocs = searcher.search(geonamesQuery, 1);
+    var doc = 
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
+    assertEquals("Vaduz", doc.getField("name").stringValue());
+  }
+
+  @Test
+  void testAndQueryNoHits() throws Exception {
+    var geonamesQuery =
+        new GeonamesQueryBuilder().queryText("vaduz 
berlin").withAndOperator().countryCode("LI")
+            .build();
+    var topDocs = searcher.search(geonamesQuery, 1);
+    assertEquals(0, topDocs.totalHits.value);
+  }
+
+  @Test
+  void testAndQuery() throws Exception {
+    var geonamesQuery =
+        new GeonamesQueryBuilder().queryText("vaduz 
liechtenstein").withAndOperator()
+            .countryCode("LI").build();
+    var topDocs = searcher.search(geonamesQuery, 1);
+    var doc = 
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
+    assertEquals("Vaduz", doc.getField("name").stringValue());
+  }
+}
diff --git 
a/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResource.java
 
b/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResource.java
index f359a90d..9b3543d8 100644
--- 
a/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResource.java
+++ 
b/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResource.java
@@ -17,13 +17,17 @@ import static 
com.google.common.net.HttpHeaders.CONTENT_TYPE;
 import static javax.ws.rs.core.MediaType.APPLICATION_JSON;
 
 import java.io.IOException;
+import java.text.ParseException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.inject.Inject;
 import javax.inject.Singleton;
-import javax.ws.rs.*;
+import javax.ws.rs.DefaultValue;
+import javax.ws.rs.GET;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.WebApplicationException;
 import javax.ws.rs.core.Response;
 import org.apache.baremaps.geocoder.GeonamesQueryBuilder;
 import org.apache.lucene.search.IndexSearcher;
@@ -54,28 +58,43 @@ public class GeocoderResource {
 
   @GET
   @javax.ws.rs.Path("/api/geocoder")
-  public Response getIpToLocation(
+  public Response searchLocations(
       @QueryParam("queryText") String queryText,
       @QueryParam("countryCode") @DefaultValue("") String countryCode,
-      @QueryParam("limit") @DefaultValue("10") int limit) throws IOException {
+      @QueryParam("limit") @DefaultValue("10") int limit) {
     if (queryText == null) {
       throw new 
WebApplicationException(Response.status(Response.Status.BAD_REQUEST)
           .entity("The queryText parameter is mandatory").build());
     }
-    var query = new 
GeonamesQueryBuilder().queryText(queryText).countryCode(countryCode).build();
-    var searcher = searcherManager.acquire();
     try {
-      var result = searcher.search(query, limit);
-      var results =
-          Arrays.stream(result.scoreDocs).map(scoreDoc -> asResult(searcher, 
scoreDoc)).toList();
-      return Response.status(200).header(ACCESS_CONTROL_ALLOW_ORIGIN, "*")
-          .header(CONTENT_TYPE, APPLICATION_JSON).entity(new 
GeocoderResponse(results)).build();
-    } catch (IllegalArgumentException e) {
-      return Response.status(400).entity(e.getMessage()).build();
+      IndexSearcher searcher = searcherManager.acquire();
+      try {
+        // Querying to search location uses AND operator between terms such as 
every term "adds up"
+        // Examples of queryText:
+        // - "paris", returns paris in france in first results (i.e because of 
scoring with
+        // population)
+        // - "paris brazil", returns paris in brazil and not paris in france.
+        var query = new GeonamesQueryBuilder()
+            
.queryText(queryText).countryCode(countryCode).withScoringByPopulation()
+            .withAndOperator()
+            .build();
+
+        var result = searcher.search(query, limit);
+        var results =
+            Arrays.stream(result.scoreDocs).map(scoreDoc -> asResult(searcher, 
scoreDoc)).toList();
+        return 
Response.status(Response.Status.OK).header(ACCESS_CONTROL_ALLOW_ORIGIN, "*")
+            .header(CONTENT_TYPE, APPLICATION_JSON).entity(new 
GeocoderResponse(results)).build();
+      } catch (IllegalArgumentException e) {
+        return 
Response.status(Response.Status.BAD_REQUEST).entity(e.getMessage()).build();
+      } catch (IOException | ParseException e) {
+        return 
Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage())
+            .build();
+      } finally {
+        searcherManager.release(searcher);
+      }
     } catch (IOException e) {
-      return Response.status(500).entity(e.getMessage()).build();
-    } finally {
-      searcherManager.release(searcher);
+      return 
Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage())
+          .build();
     }
   }
 
diff --git a/pom.xml b/pom.xml
index 53844624..73cbab07 100644
--- a/pom.xml
+++ b/pom.xml
@@ -349,6 +349,11 @@
         <artifactId>lucene-core</artifactId>
         <version>${version.lucene}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.lucene</groupId>
+        <artifactId>lucene-expressions</artifactId>
+        <version>${version.lucene}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-queryparser</artifactId>

Reply via email to