This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch overturemaps-lucene
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git


The following commit(s) were added to refs/heads/overturemaps-lucene by this 
push:
     new 7088d3cf6 Add abstractions to create index from data tables
7088d3cf6 is described below

commit 7088d3cf635d9f5282bf1d0a416f8c292374b59d
Author: Bertil Chapuis <[email protected]>
AuthorDate: Tue Nov 5 09:28:16 2024 +0100

    Add abstractions to create index from data tables
---
 .../apache/baremaps/geocoder/DataRowConsumer.java  |   3 +-
 .../apache/baremaps/geocoder/DataRowMapper.java    | 429 +++++++++++----------
 .../apache/baremaps/geocoder/DataTableQuery.java   |  45 ---
 .../baremaps/geocoder/DataTableQueryBuilder.java   | 142 +++++++
 .../geocoder/geonames/GeonamesQueryBuilder.java    |   3 +-
 ...namesIndexTest.java => DataTableIndexTest.java} |  69 ++--
 .../geocoder/geonames/GeonamesIndexTest.java       |   1 -
 .../geocoder/geonames/GeonamesReaderTest.java      |   1 -
 .../openstreetmap/OpenStreetMapIndexTest.java      |   1 -
 .../apache/baremaps/data/storage/DataRowImpl.java  |   2 +-
 .../baremaps/data/storage/DataSchemaImpl.java      |  10 +-
 .../geoparquet/GeoParquetGroupFactory.java         |   9 +
 pom.xml                                            |   1 -
 13 files changed, 407 insertions(+), 309 deletions(-)

diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
index 175afe89a..02ff1c10d 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
@@ -17,13 +17,12 @@
 
 package org.apache.baremaps.geocoder;
 
+import java.util.function.Consumer;
 import org.apache.baremaps.data.storage.DataRow;
 import org.apache.lucene.index.IndexWriter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.function.Consumer;
-
 public class DataRowConsumer implements Consumer<DataRow> {
 
   private static final Logger logger = 
LoggerFactory.getLogger(DataRowConsumer.class);
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
index 3938d42cf..2475a84c5 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
@@ -1,232 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.baremaps.geocoder;
 
+import java.net.InetAddress;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
 import org.apache.baremaps.data.storage.DataColumn;
 import org.apache.baremaps.data.storage.DataRow;
 import org.apache.baremaps.data.storage.DataSchema;
-import org.locationtech.jts.geom.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.document.IntPoint;
-import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.FloatPoint;
-import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.LatLonPoint;
 import org.apache.lucene.document.LatLonShape;
-
-import java.net.InetAddress;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.time.LocalTime;
-import java.util.Arrays;
-import java.util.Map;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.function.Function;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.locationtech.jts.geom.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class DataRowMapper implements Function<DataRow, Document> {
 
-    private static final Logger logger = 
LoggerFactory.getLogger(DataRowMapper.class);
+  private static final Logger logger = 
LoggerFactory.getLogger(DataRowMapper.class);
 
-    @Override
-    public Document apply(DataRow dataRow) {
-        Document doc = new Document();
-        DataSchema schema = dataRow.schema();
-        List<DataColumn> columns = schema.columns();
-        for (int i = 0; i < columns.size(); i++) {
-            DataColumn column = columns.get(i);
-            String columnName = column.name();
-            Object value = dataRow.get(i);
-            if (value == null) continue;
-            DataColumn.Type type = column.type();
+  @Override
+  public Document apply(DataRow dataRow) {
+    Document doc = new Document();
+    DataSchema schema = dataRow.schema();
+    List<DataColumn> columns = schema.columns();
+    for (int i = 0; i < columns.size(); i++) {
+      DataColumn column = columns.get(i);
+      String columnName = column.name();
+      Object value = dataRow.get(i);
+      if (value == null)
+        continue;
+      DataColumn.Type type = column.type();
 
-            try {
-                switch (type) {
-                    case BINARY:
-                        doc.add(new StoredField(columnName, (byte[]) value));
-                        break;
-                    case BYTE:
-                        doc.add(new IntPoint(columnName, ((Byte) 
value).intValue()));
-                        doc.add(new StoredField(columnName, ((Byte) 
value).intValue()));
-                        break;
-                    case BOOLEAN:
-                        doc.add(new StringField(columnName, value.toString(), 
Field.Store.YES));
-                        break;
-                    case SHORT:
-                        doc.add(new IntPoint(columnName, ((Short) 
value).intValue()));
-                        doc.add(new StoredField(columnName, ((Short) 
value).intValue()));
-                        break;
-                    case INTEGER:
-                        doc.add(new IntPoint(columnName, (Integer) value));
-                        doc.add(new StoredField(columnName, (Integer) value));
-                        break;
-                    case LONG:
-                        doc.add(new LongPoint(columnName, (Long) value));
-                        doc.add(new StoredField(columnName, (Long) value));
-                        break;
-                    case FLOAT:
-                        doc.add(new FloatPoint(columnName, (Float) value));
-                        doc.add(new StoredField(columnName, (Float) value));
-                        break;
-                    case DOUBLE:
-                        doc.add(new DoublePoint(columnName, (Double) value));
-                        doc.add(new StoredField(columnName, (Double) value));
-                        break;
-                    case STRING:
-                        doc.add(new TextField(columnName, (String) value, 
Field.Store.YES));
-                        break;
-                    case COORDINATE:
-                        Coordinate coord = (Coordinate) value;
-                        double lat = coord.getY();
-                        double lon = coord.getX();
-                            doc.add(new LatLonPoint(columnName, lat, lon));
-                            doc.add(new StoredField(columnName + "_lat", lat));
-                            doc.add(new StoredField(columnName + "_lon", lon));
-                        break;
-                    case POINT:
-                        Point point = (Point) value;
-                        double pointLat = point.getY();
-                        double pointLon = point.getX();
-                            doc.add(new LatLonPoint(columnName, pointLat, 
pointLon));
-                            doc.add(new StoredField(columnName + "_lat", 
pointLat));
-                            doc.add(new StoredField(columnName + "_lon", 
pointLon));
-                        break;
-                    case LINESTRING:
-                    case POLYGON:
-                    case MULTIPOINT:
-                    case MULTILINESTRING:
-                    case MULTIPOLYGON:
-                    case GEOMETRYCOLLECTION:
-                    case GEOMETRY:
-                        Geometry geometry = (Geometry) value;
-                        if (geometry != null) {
-                            Field[] shapeFields = 
createShapeFields(columnName, geometry);
-                            for (Field field : shapeFields) {
-                                doc.add(field);
-                            }
-                            doc.add(new StoredField(columnName + "_wkt", 
geometry.toText()));
-                        }
-                        break;
-                    case ENVELOPE:
-                        Envelope envelope = (Envelope) value;
-                        String envelopeStr = envelope.toString();
-                        doc.add(new StringField(columnName, envelopeStr, 
Field.Store.YES));
-                        break;
-                    case INET_ADDRESS:
-                    case INET4_ADDRESS:
-                    case INET6_ADDRESS:
-                        InetAddress addr = (InetAddress) value;
-                        doc.add(new StringField(columnName, 
addr.getHostAddress(), Field.Store.YES));
-                        break;
-                    case LOCAL_DATE:
-                        LocalDate date = (LocalDate) value;
-                        doc.add(new StringField(columnName, date.toString(), 
Field.Store.YES));
-                        break;
-                    case LOCAL_TIME:
-                        LocalTime time = (LocalTime) value;
-                        doc.add(new StringField(columnName, time.toString(), 
Field.Store.YES));
-                        break;
-                    case LOCAL_DATE_TIME:
-                        LocalDateTime dateTime = (LocalDateTime) value;
-                        doc.add(new StringField(columnName, 
dateTime.toString(), Field.Store.YES));
-                        break;
-                    case NESTED:
-                        Map<String, Object> map = (Map<String, Object>) value;
-                        for (Map.Entry<String, Object> entry : map.entrySet()) 
{
-                            String nestedKey = columnName + "." + 
entry.getKey();
-                            Object nestedValue = entry.getValue();
-                            if (nestedValue != null) {
-                                doc.add(new TextField(nestedKey, 
nestedValue.toString(), Field.Store.YES));
-                            }
-                        }
-                        break;
-                    default:
-                        doc.add(new StringField(columnName, value.toString(), 
Field.Store.YES));
-                        break;
-                }
-            } catch (Exception e) {
-                logger.error("Error processing column '{}' with value '{}': 
{}", columnName, value, e.getMessage());
+      try {
+        switch (type) {
+          case BINARY:
+            doc.add(new StoredField(columnName, (byte[]) value));
+            break;
+          case BYTE:
+            doc.add(new IntPoint(columnName, ((Byte) value).intValue()));
+            doc.add(new StoredField(columnName, ((Byte) value).intValue()));
+            break;
+          case BOOLEAN:
+            doc.add(new StringField(columnName, value.toString(), 
Field.Store.YES));
+            break;
+          case SHORT:
+            doc.add(new IntPoint(columnName, ((Short) value).intValue()));
+            doc.add(new StoredField(columnName, ((Short) value).intValue()));
+            break;
+          case INTEGER:
+            doc.add(new IntPoint(columnName, (Integer) value));
+            doc.add(new StoredField(columnName, (Integer) value));
+            break;
+          case LONG:
+            doc.add(new LongPoint(columnName, (Long) value));
+            doc.add(new StoredField(columnName, (Long) value));
+            break;
+          case FLOAT:
+            doc.add(new FloatPoint(columnName, (Float) value));
+            doc.add(new StoredField(columnName, (Float) value));
+            break;
+          case DOUBLE:
+            doc.add(new DoublePoint(columnName, (Double) value));
+            doc.add(new StoredField(columnName, (Double) value));
+            break;
+          case STRING:
+            doc.add(new TextField(columnName, (String) value, 
Field.Store.YES));
+            break;
+          case COORDINATE:
+            Coordinate coord = (Coordinate) value;
+            double lat = coord.getY();
+            double lon = coord.getX();
+            doc.add(new LatLonPoint(columnName, lat, lon));
+            doc.add(new StoredField(columnName + "_lat", lat));
+            doc.add(new StoredField(columnName + "_lon", lon));
+            break;
+          case POINT:
+            Point point = (Point) value;
+            double pointLat = point.getY();
+            double pointLon = point.getX();
+            doc.add(new LatLonPoint(columnName, pointLat, pointLon));
+            doc.add(new StoredField(columnName + "_lat", pointLat));
+            doc.add(new StoredField(columnName + "_lon", pointLon));
+            break;
+          case LINESTRING:
+          case POLYGON:
+          case MULTIPOINT:
+          case MULTILINESTRING:
+          case MULTIPOLYGON:
+          case GEOMETRYCOLLECTION:
+          case GEOMETRY:
+            Geometry geometry = (Geometry) value;
+            if (geometry != null) {
+              Field[] shapeFields = createShapeFields(columnName, geometry);
+              for (Field field : shapeFields) {
+                doc.add(field);
+              }
+              doc.add(new StoredField(columnName + "_wkt", geometry.toText()));
             }
-        }
-        return doc;
-    }
-
-    private Field[] createShapeFields(String fieldName, Geometry geometry) {
-        if (geometry instanceof Point point) {
-            double lat = point.getY();
-            double lon = point.getX();
-            return new Field[] { new LatLonPoint(fieldName, lat, lon) };
-        } else if (geometry instanceof LineString lineString) {
-            return LatLonShape.createIndexableFields(fieldName, 
convertToLuceneLine(lineString));
-        } else if (geometry instanceof Polygon polygon) {
-            org.apache.lucene.geo.Polygon lucenePolygon = 
convertToLucenePolygon(polygon);
-            return LatLonShape.createIndexableFields(fieldName, lucenePolygon);
-        } else if (geometry instanceof MultiPolygon multiPolygon) {
-            return createFieldsFromMultiPolygon(fieldName, multiPolygon);
-        } else if (geometry instanceof GeometryCollection collection) {
-            List<Field> fieldList = new ArrayList<>();
-            for (int i = 0; i < collection.getNumGeometries(); i++) {
-                Geometry geom = collection.getGeometryN(i);
-                Field[] fields = createShapeFields(fieldName, geom);
-                fieldList.addAll(Arrays.asList(fields));
+            break;
+          case ENVELOPE:
+            Envelope envelope = (Envelope) value;
+            String envelopeStr = envelope.toString();
+            doc.add(new StringField(columnName, envelopeStr, Field.Store.YES));
+            break;
+          case INET_ADDRESS:
+          case INET4_ADDRESS:
+          case INET6_ADDRESS:
+            InetAddress addr = (InetAddress) value;
+            doc.add(new StringField(columnName, addr.getHostAddress(), 
Field.Store.YES));
+            break;
+          case LOCAL_DATE:
+            LocalDate date = (LocalDate) value;
+            doc.add(new StringField(columnName, date.toString(), 
Field.Store.YES));
+            break;
+          case LOCAL_TIME:
+            LocalTime time = (LocalTime) value;
+            doc.add(new StringField(columnName, time.toString(), 
Field.Store.YES));
+            break;
+          case LOCAL_DATE_TIME:
+            LocalDateTime dateTime = (LocalDateTime) value;
+            doc.add(new StringField(columnName, dateTime.toString(), 
Field.Store.YES));
+            break;
+          case NESTED:
+            Map<String, Object> map = (Map<String, Object>) value;
+            for (Map.Entry<String, Object> entry : map.entrySet()) {
+              String nestedKey = columnName + "." + entry.getKey();
+              Object nestedValue = entry.getValue();
+              if (nestedValue != null) {
+                doc.add(new TextField(nestedKey, nestedValue.toString(), 
Field.Store.YES));
+              }
             }
-            return fieldList.toArray(new Field[0]);
-        } else {
-            logger.warn("Unsupported geometry type '{}' for field '{}'", 
geometry.getGeometryType(), fieldName);
-            return new Field[0];
+            break;
+          default:
+            doc.add(new StringField(columnName, value.toString(), 
Field.Store.YES));
+            break;
         }
+      } catch (Exception e) {
+        logger.error("Error processing column '{}' with value '{}': {}", 
columnName, value,
+            e.getMessage());
+      }
     }
+    return doc;
+  }
 
-    private org.apache.lucene.geo.Line convertToLuceneLine(LineString 
lineString) {
-        Coordinate[] coords = lineString.getCoordinates();
-        double[] lats = new double[coords.length];
-        double[] lons = new double[coords.length];
-        for (int i = 0; i < coords.length; i++) {
-            lats[i] = coords[i].getY();
-            lons[i] = coords[i].getX();
-        }
-        return new org.apache.lucene.geo.Line(lats, lons);
+  private Field[] createShapeFields(String fieldName, Geometry geometry) {
+    if (geometry instanceof Point point) {
+      double lat = point.getY();
+      double lon = point.getX();
+      return new Field[] {new LatLonPoint(fieldName, lat, lon)};
+    } else if (geometry instanceof LineString lineString) {
+      return LatLonShape.createIndexableFields(fieldName, 
convertToLuceneLine(lineString));
+    } else if (geometry instanceof Polygon polygon) {
+      org.apache.lucene.geo.Polygon lucenePolygon = 
convertToLucenePolygon(polygon);
+      return LatLonShape.createIndexableFields(fieldName, lucenePolygon);
+    } else if (geometry instanceof MultiPolygon multiPolygon) {
+      return createFieldsFromMultiPolygon(fieldName, multiPolygon);
+    } else if (geometry instanceof GeometryCollection collection) {
+      List<Field> fieldList = new ArrayList<>();
+      for (int i = 0; i < collection.getNumGeometries(); i++) {
+        Geometry geom = collection.getGeometryN(i);
+        Field[] fields = createShapeFields(fieldName, geom);
+        fieldList.addAll(Arrays.asList(fields));
+      }
+      return fieldList.toArray(new Field[0]);
+    } else {
+      logger.warn("Unsupported geometry type '{}' for field '{}'", 
geometry.getGeometryType(),
+          fieldName);
+      return new Field[0];
     }
+  }
 
-    private org.apache.lucene.geo.Polygon 
convertToLucenePolygon(org.locationtech.jts.geom.Polygon jtsPolygon) {
-        LinearRing shell = jtsPolygon.getExteriorRing();
-        Coordinate[] shellCoords = shell.getCoordinates();
-        double[] lats = new double[shellCoords.length];
-        double[] lons = new double[shellCoords.length];
-        for (int i = 0; i < shellCoords.length; i++) {
-            lats[i] = shellCoords[i].getY();
-            lons[i] = shellCoords[i].getX();
-        }
+  private org.apache.lucene.geo.Line convertToLuceneLine(LineString 
lineString) {
+    Coordinate[] coords = lineString.getCoordinates();
+    double[] lats = new double[coords.length];
+    double[] lons = new double[coords.length];
+    for (int i = 0; i < coords.length; i++) {
+      lats[i] = coords[i].getY();
+      lons[i] = coords[i].getX();
+    }
+    return new org.apache.lucene.geo.Line(lats, lons);
+  }
 
-        int numHoles = jtsPolygon.getNumInteriorRing();
-        org.apache.lucene.geo.Polygon[] holes = new 
org.apache.lucene.geo.Polygon[numHoles];
-        for (int i = 0; i < numHoles; i++) {
-            LinearRing hole = jtsPolygon.getInteriorRingN(i);
-            Coordinate[] holeCoords = hole.getCoordinates();
-            double[] holeLats = new double[holeCoords.length];
-            double[] holeLons = new double[holeCoords.length];
-            for (int j = 0; j < holeCoords.length; j++) {
-                holeLats[j] = holeCoords[j].getY();
-                holeLons[j] = holeCoords[j].getX();
-            }
-            holes[i] = new org.apache.lucene.geo.Polygon(holeLats, holeLons);
-        }
+  private org.apache.lucene.geo.Polygon convertToLucenePolygon(
+      org.locationtech.jts.geom.Polygon jtsPolygon) {
+    LinearRing shell = jtsPolygon.getExteriorRing();
+    Coordinate[] shellCoords = shell.getCoordinates();
+    double[] lats = new double[shellCoords.length];
+    double[] lons = new double[shellCoords.length];
+    for (int i = 0; i < shellCoords.length; i++) {
+      lats[i] = shellCoords[i].getY();
+      lons[i] = shellCoords[i].getX();
+    }
 
-        return new org.apache.lucene.geo.Polygon(lats, lons, holes);
+    int numHoles = jtsPolygon.getNumInteriorRing();
+    org.apache.lucene.geo.Polygon[] holes = new 
org.apache.lucene.geo.Polygon[numHoles];
+    for (int i = 0; i < numHoles; i++) {
+      LinearRing hole = jtsPolygon.getInteriorRingN(i);
+      Coordinate[] holeCoords = hole.getCoordinates();
+      double[] holeLats = new double[holeCoords.length];
+      double[] holeLons = new double[holeCoords.length];
+      for (int j = 0; j < holeCoords.length; j++) {
+        holeLats[j] = holeCoords[j].getY();
+        holeLons[j] = holeCoords[j].getX();
+      }
+      holes[i] = new org.apache.lucene.geo.Polygon(holeLats, holeLons);
     }
 
-    private Field[] createFieldsFromMultiPolygon(String fieldName, 
MultiPolygon multiPolygon) {
-        List<Field> fieldList = new ArrayList<>();
-        for (int i = 0; i < multiPolygon.getNumGeometries(); i++) {
-            org.locationtech.jts.geom.Polygon polygon = 
(org.locationtech.jts.geom.Polygon) multiPolygon.getGeometryN(i);
-            org.apache.lucene.geo.Polygon lucenePolygon = 
convertToLucenePolygon(polygon);
-            Field[] fields = LatLonShape.createIndexableFields(fieldName, 
lucenePolygon);
-            fieldList.addAll(Arrays.asList(fields));
-        }
-        return fieldList.toArray(new Field[0]);
+    return new org.apache.lucene.geo.Polygon(lats, lons, holes);
+  }
+
+  private Field[] createFieldsFromMultiPolygon(String fieldName, MultiPolygon 
multiPolygon) {
+    List<Field> fieldList = new ArrayList<>();
+    for (int i = 0; i < multiPolygon.getNumGeometries(); i++) {
+      org.locationtech.jts.geom.Polygon polygon =
+          (org.locationtech.jts.geom.Polygon) multiPolygon.getGeometryN(i);
+      org.apache.lucene.geo.Polygon lucenePolygon = 
convertToLucenePolygon(polygon);
+      Field[] fields = LatLonShape.createIndexableFields(fieldName, 
lucenePolygon);
+      fieldList.addAll(Arrays.asList(fields));
     }
+    return fieldList.toArray(new Field[0]);
+  }
 }
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java
deleted file mode 100644
index 0d3c6dfc0..000000000
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.baremaps.geocoder;
-
-import org.apache.baremaps.geocoder.openstreetmap.OsmTags;
-import org.apache.lucene.queryparser.classic.QueryParserBase;
-import org.apache.lucene.queryparser.simple.SimpleQueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-
-public class DataTableQuery {
-
-  private final String query;
-
-  public DataTableQuery(String query) {
-    this.query = query;
-  }
-
-  public Query build() {
-    var builder = new BooleanQuery.Builder();
-    var queryTextEsc = QueryParserBase.escape(query);
-
-    var parser = new SimpleQueryParser(GeocoderConstants.ANALYZER, 
OsmTags.NAME.key());
-    var termsQuery = parser.parse(queryTextEsc);
-    // at least one terms of the queryText must be present
-    builder.add(termsQuery, BooleanClause.Occur.MUST);
-    return builder.build();
-  }
-}
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQueryBuilder.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQueryBuilder.java
new file mode 100644
index 000000000..b654f1832
--- /dev/null
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQueryBuilder.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.baremaps.data.storage.DataColumn;
+import org.apache.baremaps.data.storage.DataSchema;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryparser.classic.QueryParserBase;
+import org.apache.lucene.queryparser.simple.SimpleQueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+
+/**
+ * A builder for creating queries on a data table.
+ */
+public class DataTableQueryBuilder {
+
+  private final Analyzer analyzer;
+
+  private Map<String, Float> fields = new HashMap<>();
+
+  private String query;
+
+  /**
+   * Constructs a query builder with the default analyzer.
+   */
+  public DataTableQueryBuilder() {
+    this(GeocoderConstants.ANALYZER);
+  }
+
+  /**
+   * Constructs a query builder with the specified analyzer.
+   *
+   * @param analyzer the analyzer
+   */
+  public DataTableQueryBuilder(Analyzer analyzer) {
+    this.analyzer = analyzer;
+  }
+
+  /**
+   * Replace all the fields with the columns of the schema and a boost of 1.0.
+   *
+   * @param schema the schema
+   * @return the query builder
+   */
+  public DataTableQueryBuilder schema(DataSchema schema) {
+    this.fields = new HashMap<>(schema.columns().stream()
+        .collect(Collectors.toMap(DataColumn::name, column -> 1.0f)));
+    return this;
+  }
+
+  /**
+   * Replace all the fields with the specified fields and boosts.
+   *
+   * @param fields the fields and boosts
+   * @return the query builder
+   */
+  public DataTableQueryBuilder columns(Map<DataColumn, Float> fields) {
+    this.fields = new HashMap<>(fields.entrySet().stream()
+        .collect(Collectors.toMap(entry -> entry.getKey().name(), 
Map.Entry::getValue)));
+    return this;
+  }
+
+  /**
+   * Add a column with a specified boost.
+   *
+   * @param column the column
+   * @param boost the boost
+   * @return the query builder
+   */
+  public DataTableQueryBuilder column(DataColumn column, float boost) {
+    return column(column.name(), boost);
+  }
+
+  /**
+   * Add a column with a specified boost.
+   *
+   * @param column the column
+   * @param boost the boost
+   * @return the query builder
+   */
+  public DataTableQueryBuilder column(String column, float boost) {
+    fields.put(column, boost);
+    return this;
+  }
+
+  /**
+   * Set the query text.
+   *
+   * @param query the query text
+   * @return the query builder
+   */
+  public DataTableQueryBuilder query(String query) {
+    this.query = query;
+    return this;
+  }
+
+  /**
+   * Build the query.
+   *
+   * @return the query
+   */
+  public Query build() {
+    var builder = new BooleanQuery.Builder();
+
+    var parser = new SimpleQueryParser(analyzer, fields);
+    var escapedQuery = QueryParserBase.escape(query);
+    var termsQuery = parser.parse(escapedQuery);
+
+    // at least one terms of the queryText must be present
+    builder.add(termsQuery, BooleanClause.Occur.MUST);
+    return builder.build();
+  }
+
+  /**
+   * Get the analyzer.
+   *
+   * @return the analyzer
+   */
+  private static Analyzer getAnalyzer() {
+    return GeocoderConstants.ANALYZER;
+  }
+}
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
index d1cd32db2..d1d702ef1 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
@@ -22,7 +22,6 @@ package org.apache.baremaps.geocoder.geonames;
 import com.google.common.base.Strings;
 import java.text.ParseException;
 import java.util.Map;
-
 import org.apache.baremaps.geocoder.GeocoderConstants;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.expressions.Expression;
@@ -52,8 +51,8 @@ public class GeonamesQueryBuilder {
   private boolean scoringByPopulation;
 
   private boolean andOperator;
-  private String featureCode;
 
+  private String featureCode;
 
   public GeonamesQueryBuilder() {
     this(GeocoderConstants.ANALYZER);
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/DataTableIndexTest.java
similarity index 56%
copy from 
baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
copy to 
baremaps-core/src/test/java/org/apache/baremaps/geocoder/DataTableIndexTest.java
index 0effcf9ad..2d5aba56b 100644
--- 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/DataTableIndexTest.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.baremaps.geocoder.geonames;
+package org.apache.baremaps.geocoder;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
@@ -24,11 +24,12 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
-
+import org.apache.baremaps.storage.geoparquet.GeoParquetDataTable;
 import org.apache.baremaps.testing.TestFiles;
 import org.apache.baremaps.utils.FileUtils;
-import org.apache.baremaps.workflow.WorkflowContext;
-import org.apache.baremaps.workflow.tasks.CreateGeonamesIndex;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.SearcherFactory;
 import org.apache.lucene.search.SearcherManager;
@@ -38,7 +39,7 @@ import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
 
-public class GeonamesIndexTest {
+public class DataTableIndexTest {
 
   private static Path directory;
   private static IndexSearcher searcher;
@@ -49,10 +50,18 @@ public class GeonamesIndexTest {
     directory = Files.createTempDirectory(Paths.get("."), "geocoder_");
 
     // Create the geonames index
-    var data = TestFiles.resolve("baremaps-testing/data/geonames/sample.txt");
-    var task = new CreateGeonamesIndex(data, directory);
-    task.execute(new WorkflowContext());
     var dir = FSDirectory.open(directory);
+    var data = 
TestFiles.resolve("baremaps-testing/data/samples/example.parquet");
+    var config = new IndexWriterConfig(GeocoderConstants.ANALYZER);
+    try (var indexWriter = new IndexWriter(dir, config);
+        var inputStream = Files.newInputStream(data)) {
+      indexWriter.deleteAll();
+      var documents = new GeoParquetDataTable(data.toUri())
+          .stream()
+          .map(new DataRowMapper());
+      indexWriter.addDocuments((Iterable<Document>) documents::iterator);
+    }
+
     var searcherManager = new SearcherManager(dir, new SearcherFactory());
     searcher = searcherManager.acquire();
   }
@@ -63,48 +72,22 @@ public class GeonamesIndexTest {
   }
 
   @Test
-  void testCreateIndex() throws Exception {
-    var geonamesQuery =
-        new 
GeonamesQueryBuilder().queryText("yverdon").countryCode("CH").build();
-    var topDocs = searcher.search(geonamesQuery, 1);
-    var doc = 
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
-    assertEquals("Yverdon-les-bains", doc.getField("name").stringValue());
-  }
-
-  @Test
-  void testOrQuery() throws Exception {
-    var geonamesQuery = new GeonamesQueryBuilder()
-        .queryText("bains cheseaux")
-        .countryCode("CH")
-        .build();
-    var topDocs = searcher.search(geonamesQuery, 2);
-    assertEquals(2, topDocs.totalHits.value);
-    var doc0 = searcher.doc(topDocs.scoreDocs[0].doc);
-    assertEquals("Yverdon-les-bains", doc0.getField("name").stringValue());
-    var doc1 = searcher.doc(topDocs.scoreDocs[1].doc);
-    assertEquals("Route de Cheseaux 1", doc1.getField("name").stringValue());
-  }
-
-  @Test
-  void testAndQueryNoHits() throws Exception {
-    var geonamesQuery = new GeonamesQueryBuilder()
-        .queryText("bains cheseaux")
-        .andOperator()
-        .countryCode("CH")
+  void testQueryNoHits() throws Exception {
+    var geonamesQuery = new DataTableQueryBuilder()
+        .query("test")
         .build();
     var topDocs = searcher.search(geonamesQuery, 1);
     assertEquals(0, topDocs.totalHits.value);
   }
 
   @Test
-  void testAndQuery() throws Exception {
-    var geonamesQuery =
-        new GeonamesQueryBuilder().queryText("yverdon bains")
-            .andOperator()
-            .countryCode("CH")
-            .build();
+  void testQuery() throws Exception {
+    var geonamesQuery = new DataTableQueryBuilder()
+        .column("continent", 1.0f)
+        .query("oceania")
+        .build();
     var topDocs = searcher.search(geonamesQuery, 1);
     var doc = 
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
-    assertEquals("Yverdon-les-bains", doc.getField("name").stringValue());
+    assertEquals("Oceania", doc.getField("continent").stringValue());
   }
 }
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
index 0effcf9ad..4bced3332 100644
--- 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
@@ -24,7 +24,6 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
-
 import org.apache.baremaps.testing.TestFiles;
 import org.apache.baremaps.utils.FileUtils;
 import org.apache.baremaps.workflow.WorkflowContext;
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
index 3f3170f95..c02bf59e1 100644
--- 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
@@ -21,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.*;
 
 import java.io.IOException;
 import java.nio.file.Files;
-
 import org.apache.baremaps.testing.TestFiles;
 import org.junit.jupiter.api.Test;
 
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
index 4d74343e6..605685be3 100644
--- 
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
@@ -27,7 +27,6 @@ import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-
 import org.apache.baremaps.utils.FileUtils;
 import org.apache.baremaps.workflow.WorkflowContext;
 import org.apache.baremaps.workflow.tasks.CreateGeocoderOpenStreetMap;
diff --git 
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java 
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
index 72c7c6dea..eca44b246 100644
--- 
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
+++ 
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
@@ -17,6 +17,7 @@
 
 package org.apache.baremaps.data.storage;
 
+
 import java.util.List;
 
 /**
@@ -67,5 +68,4 @@ public record DataRowImpl(DataSchema schema, List<Object> 
values) implements Dat
   public void set(int index, Object value) {
     values.set(index, value);
   }
-
 }
diff --git 
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
 
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
index c9b20c7a6..81dfc4036 100644
--- 
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
+++ 
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
@@ -23,11 +23,7 @@ import java.util.List;
 /**
  * A {@link DataSchema} defines the structure of a table.
  */
-public class DataSchemaImpl implements DataSchema {
-
-  private final String name;
-
-  private final List<DataColumn> columns;
+public record DataSchemaImpl(String name, List<DataColumn> columns) implements 
DataSchema {
 
   /**
    * Constructs a schema with the specified name and columns.
@@ -35,9 +31,7 @@ public class DataSchemaImpl implements DataSchema {
    * @param name the name of the schema
    * @param columns the columns of the schema
    */
-  public DataSchemaImpl(String name, List<DataColumn> columns) {
-    this.name = name;
-    this.columns = columns;
+  public DataSchemaImpl {
   }
 
   /**
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
index a88f3b13e..97cdb3570 100644
--- 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
@@ -20,6 +20,7 @@ package org.apache.baremaps.geoparquet;
 import java.util.List;
 import org.apache.baremaps.geoparquet.GeoParquetSchema.*;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 
@@ -89,6 +90,12 @@ class GeoParquetGroupFactory {
             geoParquetSchema);
       }
 
+      // TODO: Handle logical types
+      else if (field.getLogicalTypeAnnotation() != null
+          && 
field.getLogicalTypeAnnotation().equals(LogicalTypeAnnotation.stringType())) {
+        return new StringField(field.getName(), cardinality);
+      }
+
       // Handle primitive columns
       else {
         PrimitiveType primitiveType = field.asPrimitiveType();
@@ -110,6 +117,8 @@ class GeoParquetGroupFactory {
     return new GeoParquetSchema(schema.getName(), fields);
   }
 
+
+
   /**
    * Creates a new {@link GeoParquetGroup}.
    *
diff --git a/pom.xml b/pom.xml
index ffabeb341..07acbfcdf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -59,7 +59,6 @@ limitations under the License.
     <module>baremaps-pmtiles</module>
     <module>baremaps-server</module>
     <module>baremaps-testing</module>
-      <module>baremaps-geocoder</module>
   </modules>
 
   <scm>


Reply via email to