This is an automated email from the ASF dual-hosted git repository.
bchapuis pushed a commit to branch overturemaps-lucene
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git
The following commit(s) were added to refs/heads/overturemaps-lucene by this
push:
new 7088d3cf6 Add abstractions to create index from data tables
7088d3cf6 is described below
commit 7088d3cf635d9f5282bf1d0a416f8c292374b59d
Author: Bertil Chapuis <[email protected]>
AuthorDate: Tue Nov 5 09:28:16 2024 +0100
Add abstractions to create index from data tables
---
.../apache/baremaps/geocoder/DataRowConsumer.java | 3 +-
.../apache/baremaps/geocoder/DataRowMapper.java | 429 +++++++++++----------
.../apache/baremaps/geocoder/DataTableQuery.java | 45 ---
.../baremaps/geocoder/DataTableQueryBuilder.java | 142 +++++++
.../geocoder/geonames/GeonamesQueryBuilder.java | 3 +-
...namesIndexTest.java => DataTableIndexTest.java} | 69 ++--
.../geocoder/geonames/GeonamesIndexTest.java | 1 -
.../geocoder/geonames/GeonamesReaderTest.java | 1 -
.../openstreetmap/OpenStreetMapIndexTest.java | 1 -
.../apache/baremaps/data/storage/DataRowImpl.java | 2 +-
.../baremaps/data/storage/DataSchemaImpl.java | 10 +-
.../geoparquet/GeoParquetGroupFactory.java | 9 +
pom.xml | 1 -
13 files changed, 407 insertions(+), 309 deletions(-)
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
index 175afe89a..02ff1c10d 100644
---
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
+++
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java
@@ -17,13 +17,12 @@
package org.apache.baremaps.geocoder;
+import java.util.function.Consumer;
import org.apache.baremaps.data.storage.DataRow;
import org.apache.lucene.index.IndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.function.Consumer;
-
public class DataRowConsumer implements Consumer<DataRow> {
private static final Logger logger =
LoggerFactory.getLogger(DataRowConsumer.class);
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
index 3938d42cf..2475a84c5 100644
---
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
+++
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java
@@ -1,232 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.baremaps.geocoder;
+import java.net.InetAddress;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
import org.apache.baremaps.data.storage.DataColumn;
import org.apache.baremaps.data.storage.DataRow;
import org.apache.baremaps.data.storage.DataSchema;
-import org.locationtech.jts.geom.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.document.IntPoint;
-import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.FloatPoint;
-import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LatLonShape;
-
-import java.net.InetAddress;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.time.LocalTime;
-import java.util.Arrays;
-import java.util.Map;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.function.Function;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.locationtech.jts.geom.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DataRowMapper implements Function<DataRow, Document> {
- private static final Logger logger =
LoggerFactory.getLogger(DataRowMapper.class);
+ private static final Logger logger =
LoggerFactory.getLogger(DataRowMapper.class);
- @Override
- public Document apply(DataRow dataRow) {
- Document doc = new Document();
- DataSchema schema = dataRow.schema();
- List<DataColumn> columns = schema.columns();
- for (int i = 0; i < columns.size(); i++) {
- DataColumn column = columns.get(i);
- String columnName = column.name();
- Object value = dataRow.get(i);
- if (value == null) continue;
- DataColumn.Type type = column.type();
+ @Override
+ public Document apply(DataRow dataRow) {
+ Document doc = new Document();
+ DataSchema schema = dataRow.schema();
+ List<DataColumn> columns = schema.columns();
+ for (int i = 0; i < columns.size(); i++) {
+ DataColumn column = columns.get(i);
+ String columnName = column.name();
+ Object value = dataRow.get(i);
+ if (value == null)
+ continue;
+ DataColumn.Type type = column.type();
- try {
- switch (type) {
- case BINARY:
- doc.add(new StoredField(columnName, (byte[]) value));
- break;
- case BYTE:
- doc.add(new IntPoint(columnName, ((Byte)
value).intValue()));
- doc.add(new StoredField(columnName, ((Byte)
value).intValue()));
- break;
- case BOOLEAN:
- doc.add(new StringField(columnName, value.toString(),
Field.Store.YES));
- break;
- case SHORT:
- doc.add(new IntPoint(columnName, ((Short)
value).intValue()));
- doc.add(new StoredField(columnName, ((Short)
value).intValue()));
- break;
- case INTEGER:
- doc.add(new IntPoint(columnName, (Integer) value));
- doc.add(new StoredField(columnName, (Integer) value));
- break;
- case LONG:
- doc.add(new LongPoint(columnName, (Long) value));
- doc.add(new StoredField(columnName, (Long) value));
- break;
- case FLOAT:
- doc.add(new FloatPoint(columnName, (Float) value));
- doc.add(new StoredField(columnName, (Float) value));
- break;
- case DOUBLE:
- doc.add(new DoublePoint(columnName, (Double) value));
- doc.add(new StoredField(columnName, (Double) value));
- break;
- case STRING:
- doc.add(new TextField(columnName, (String) value,
Field.Store.YES));
- break;
- case COORDINATE:
- Coordinate coord = (Coordinate) value;
- double lat = coord.getY();
- double lon = coord.getX();
- doc.add(new LatLonPoint(columnName, lat, lon));
- doc.add(new StoredField(columnName + "_lat", lat));
- doc.add(new StoredField(columnName + "_lon", lon));
- break;
- case POINT:
- Point point = (Point) value;
- double pointLat = point.getY();
- double pointLon = point.getX();
- doc.add(new LatLonPoint(columnName, pointLat,
pointLon));
- doc.add(new StoredField(columnName + "_lat",
pointLat));
- doc.add(new StoredField(columnName + "_lon",
pointLon));
- break;
- case LINESTRING:
- case POLYGON:
- case MULTIPOINT:
- case MULTILINESTRING:
- case MULTIPOLYGON:
- case GEOMETRYCOLLECTION:
- case GEOMETRY:
- Geometry geometry = (Geometry) value;
- if (geometry != null) {
- Field[] shapeFields =
createShapeFields(columnName, geometry);
- for (Field field : shapeFields) {
- doc.add(field);
- }
- doc.add(new StoredField(columnName + "_wkt",
geometry.toText()));
- }
- break;
- case ENVELOPE:
- Envelope envelope = (Envelope) value;
- String envelopeStr = envelope.toString();
- doc.add(new StringField(columnName, envelopeStr,
Field.Store.YES));
- break;
- case INET_ADDRESS:
- case INET4_ADDRESS:
- case INET6_ADDRESS:
- InetAddress addr = (InetAddress) value;
- doc.add(new StringField(columnName,
addr.getHostAddress(), Field.Store.YES));
- break;
- case LOCAL_DATE:
- LocalDate date = (LocalDate) value;
- doc.add(new StringField(columnName, date.toString(),
Field.Store.YES));
- break;
- case LOCAL_TIME:
- LocalTime time = (LocalTime) value;
- doc.add(new StringField(columnName, time.toString(),
Field.Store.YES));
- break;
- case LOCAL_DATE_TIME:
- LocalDateTime dateTime = (LocalDateTime) value;
- doc.add(new StringField(columnName,
dateTime.toString(), Field.Store.YES));
- break;
- case NESTED:
- Map<String, Object> map = (Map<String, Object>) value;
- for (Map.Entry<String, Object> entry : map.entrySet())
{
- String nestedKey = columnName + "." +
entry.getKey();
- Object nestedValue = entry.getValue();
- if (nestedValue != null) {
- doc.add(new TextField(nestedKey,
nestedValue.toString(), Field.Store.YES));
- }
- }
- break;
- default:
- doc.add(new StringField(columnName, value.toString(),
Field.Store.YES));
- break;
- }
- } catch (Exception e) {
- logger.error("Error processing column '{}' with value '{}':
{}", columnName, value, e.getMessage());
+ try {
+ switch (type) {
+ case BINARY:
+ doc.add(new StoredField(columnName, (byte[]) value));
+ break;
+ case BYTE:
+ doc.add(new IntPoint(columnName, ((Byte) value).intValue()));
+ doc.add(new StoredField(columnName, ((Byte) value).intValue()));
+ break;
+ case BOOLEAN:
+ doc.add(new StringField(columnName, value.toString(),
Field.Store.YES));
+ break;
+ case SHORT:
+ doc.add(new IntPoint(columnName, ((Short) value).intValue()));
+ doc.add(new StoredField(columnName, ((Short) value).intValue()));
+ break;
+ case INTEGER:
+ doc.add(new IntPoint(columnName, (Integer) value));
+ doc.add(new StoredField(columnName, (Integer) value));
+ break;
+ case LONG:
+ doc.add(new LongPoint(columnName, (Long) value));
+ doc.add(new StoredField(columnName, (Long) value));
+ break;
+ case FLOAT:
+ doc.add(new FloatPoint(columnName, (Float) value));
+ doc.add(new StoredField(columnName, (Float) value));
+ break;
+ case DOUBLE:
+ doc.add(new DoublePoint(columnName, (Double) value));
+ doc.add(new StoredField(columnName, (Double) value));
+ break;
+ case STRING:
+ doc.add(new TextField(columnName, (String) value,
Field.Store.YES));
+ break;
+ case COORDINATE:
+ Coordinate coord = (Coordinate) value;
+ double lat = coord.getY();
+ double lon = coord.getX();
+ doc.add(new LatLonPoint(columnName, lat, lon));
+ doc.add(new StoredField(columnName + "_lat", lat));
+ doc.add(new StoredField(columnName + "_lon", lon));
+ break;
+ case POINT:
+ Point point = (Point) value;
+ double pointLat = point.getY();
+ double pointLon = point.getX();
+ doc.add(new LatLonPoint(columnName, pointLat, pointLon));
+ doc.add(new StoredField(columnName + "_lat", pointLat));
+ doc.add(new StoredField(columnName + "_lon", pointLon));
+ break;
+ case LINESTRING:
+ case POLYGON:
+ case MULTIPOINT:
+ case MULTILINESTRING:
+ case MULTIPOLYGON:
+ case GEOMETRYCOLLECTION:
+ case GEOMETRY:
+ Geometry geometry = (Geometry) value;
+ if (geometry != null) {
+ Field[] shapeFields = createShapeFields(columnName, geometry);
+ for (Field field : shapeFields) {
+ doc.add(field);
+ }
+ doc.add(new StoredField(columnName + "_wkt", geometry.toText()));
}
- }
- return doc;
- }
-
- private Field[] createShapeFields(String fieldName, Geometry geometry) {
- if (geometry instanceof Point point) {
- double lat = point.getY();
- double lon = point.getX();
- return new Field[] { new LatLonPoint(fieldName, lat, lon) };
- } else if (geometry instanceof LineString lineString) {
- return LatLonShape.createIndexableFields(fieldName,
convertToLuceneLine(lineString));
- } else if (geometry instanceof Polygon polygon) {
- org.apache.lucene.geo.Polygon lucenePolygon =
convertToLucenePolygon(polygon);
- return LatLonShape.createIndexableFields(fieldName, lucenePolygon);
- } else if (geometry instanceof MultiPolygon multiPolygon) {
- return createFieldsFromMultiPolygon(fieldName, multiPolygon);
- } else if (geometry instanceof GeometryCollection collection) {
- List<Field> fieldList = new ArrayList<>();
- for (int i = 0; i < collection.getNumGeometries(); i++) {
- Geometry geom = collection.getGeometryN(i);
- Field[] fields = createShapeFields(fieldName, geom);
- fieldList.addAll(Arrays.asList(fields));
+ break;
+ case ENVELOPE:
+ Envelope envelope = (Envelope) value;
+ String envelopeStr = envelope.toString();
+ doc.add(new StringField(columnName, envelopeStr, Field.Store.YES));
+ break;
+ case INET_ADDRESS:
+ case INET4_ADDRESS:
+ case INET6_ADDRESS:
+ InetAddress addr = (InetAddress) value;
+ doc.add(new StringField(columnName, addr.getHostAddress(),
Field.Store.YES));
+ break;
+ case LOCAL_DATE:
+ LocalDate date = (LocalDate) value;
+ doc.add(new StringField(columnName, date.toString(),
Field.Store.YES));
+ break;
+ case LOCAL_TIME:
+ LocalTime time = (LocalTime) value;
+ doc.add(new StringField(columnName, time.toString(),
Field.Store.YES));
+ break;
+ case LOCAL_DATE_TIME:
+ LocalDateTime dateTime = (LocalDateTime) value;
+ doc.add(new StringField(columnName, dateTime.toString(),
Field.Store.YES));
+ break;
+ case NESTED:
+ Map<String, Object> map = (Map<String, Object>) value;
+ for (Map.Entry<String, Object> entry : map.entrySet()) {
+ String nestedKey = columnName + "." + entry.getKey();
+ Object nestedValue = entry.getValue();
+ if (nestedValue != null) {
+ doc.add(new TextField(nestedKey, nestedValue.toString(),
Field.Store.YES));
+ }
}
- return fieldList.toArray(new Field[0]);
- } else {
- logger.warn("Unsupported geometry type '{}' for field '{}'",
geometry.getGeometryType(), fieldName);
- return new Field[0];
+ break;
+ default:
+ doc.add(new StringField(columnName, value.toString(),
Field.Store.YES));
+ break;
}
+ } catch (Exception e) {
+ logger.error("Error processing column '{}' with value '{}': {}",
columnName, value,
+ e.getMessage());
+ }
}
+ return doc;
+ }
- private org.apache.lucene.geo.Line convertToLuceneLine(LineString
lineString) {
- Coordinate[] coords = lineString.getCoordinates();
- double[] lats = new double[coords.length];
- double[] lons = new double[coords.length];
- for (int i = 0; i < coords.length; i++) {
- lats[i] = coords[i].getY();
- lons[i] = coords[i].getX();
- }
- return new org.apache.lucene.geo.Line(lats, lons);
+ private Field[] createShapeFields(String fieldName, Geometry geometry) {
+ if (geometry instanceof Point point) {
+ double lat = point.getY();
+ double lon = point.getX();
+ return new Field[] {new LatLonPoint(fieldName, lat, lon)};
+ } else if (geometry instanceof LineString lineString) {
+ return LatLonShape.createIndexableFields(fieldName,
convertToLuceneLine(lineString));
+ } else if (geometry instanceof Polygon polygon) {
+ org.apache.lucene.geo.Polygon lucenePolygon =
convertToLucenePolygon(polygon);
+ return LatLonShape.createIndexableFields(fieldName, lucenePolygon);
+ } else if (geometry instanceof MultiPolygon multiPolygon) {
+ return createFieldsFromMultiPolygon(fieldName, multiPolygon);
+ } else if (geometry instanceof GeometryCollection collection) {
+ List<Field> fieldList = new ArrayList<>();
+ for (int i = 0; i < collection.getNumGeometries(); i++) {
+ Geometry geom = collection.getGeometryN(i);
+ Field[] fields = createShapeFields(fieldName, geom);
+ fieldList.addAll(Arrays.asList(fields));
+ }
+ return fieldList.toArray(new Field[0]);
+ } else {
+ logger.warn("Unsupported geometry type '{}' for field '{}'",
geometry.getGeometryType(),
+ fieldName);
+ return new Field[0];
}
+ }
- private org.apache.lucene.geo.Polygon
convertToLucenePolygon(org.locationtech.jts.geom.Polygon jtsPolygon) {
- LinearRing shell = jtsPolygon.getExteriorRing();
- Coordinate[] shellCoords = shell.getCoordinates();
- double[] lats = new double[shellCoords.length];
- double[] lons = new double[shellCoords.length];
- for (int i = 0; i < shellCoords.length; i++) {
- lats[i] = shellCoords[i].getY();
- lons[i] = shellCoords[i].getX();
- }
+ private org.apache.lucene.geo.Line convertToLuceneLine(LineString
lineString) {
+ Coordinate[] coords = lineString.getCoordinates();
+ double[] lats = new double[coords.length];
+ double[] lons = new double[coords.length];
+ for (int i = 0; i < coords.length; i++) {
+ lats[i] = coords[i].getY();
+ lons[i] = coords[i].getX();
+ }
+ return new org.apache.lucene.geo.Line(lats, lons);
+ }
- int numHoles = jtsPolygon.getNumInteriorRing();
- org.apache.lucene.geo.Polygon[] holes = new
org.apache.lucene.geo.Polygon[numHoles];
- for (int i = 0; i < numHoles; i++) {
- LinearRing hole = jtsPolygon.getInteriorRingN(i);
- Coordinate[] holeCoords = hole.getCoordinates();
- double[] holeLats = new double[holeCoords.length];
- double[] holeLons = new double[holeCoords.length];
- for (int j = 0; j < holeCoords.length; j++) {
- holeLats[j] = holeCoords[j].getY();
- holeLons[j] = holeCoords[j].getX();
- }
- holes[i] = new org.apache.lucene.geo.Polygon(holeLats, holeLons);
- }
+ private org.apache.lucene.geo.Polygon convertToLucenePolygon(
+ org.locationtech.jts.geom.Polygon jtsPolygon) {
+ LinearRing shell = jtsPolygon.getExteriorRing();
+ Coordinate[] shellCoords = shell.getCoordinates();
+ double[] lats = new double[shellCoords.length];
+ double[] lons = new double[shellCoords.length];
+ for (int i = 0; i < shellCoords.length; i++) {
+ lats[i] = shellCoords[i].getY();
+ lons[i] = shellCoords[i].getX();
+ }
- return new org.apache.lucene.geo.Polygon(lats, lons, holes);
+ int numHoles = jtsPolygon.getNumInteriorRing();
+ org.apache.lucene.geo.Polygon[] holes = new
org.apache.lucene.geo.Polygon[numHoles];
+ for (int i = 0; i < numHoles; i++) {
+ LinearRing hole = jtsPolygon.getInteriorRingN(i);
+ Coordinate[] holeCoords = hole.getCoordinates();
+ double[] holeLats = new double[holeCoords.length];
+ double[] holeLons = new double[holeCoords.length];
+ for (int j = 0; j < holeCoords.length; j++) {
+ holeLats[j] = holeCoords[j].getY();
+ holeLons[j] = holeCoords[j].getX();
+ }
+ holes[i] = new org.apache.lucene.geo.Polygon(holeLats, holeLons);
}
- private Field[] createFieldsFromMultiPolygon(String fieldName,
MultiPolygon multiPolygon) {
- List<Field> fieldList = new ArrayList<>();
- for (int i = 0; i < multiPolygon.getNumGeometries(); i++) {
- org.locationtech.jts.geom.Polygon polygon =
(org.locationtech.jts.geom.Polygon) multiPolygon.getGeometryN(i);
- org.apache.lucene.geo.Polygon lucenePolygon =
convertToLucenePolygon(polygon);
- Field[] fields = LatLonShape.createIndexableFields(fieldName,
lucenePolygon);
- fieldList.addAll(Arrays.asList(fields));
- }
- return fieldList.toArray(new Field[0]);
+ return new org.apache.lucene.geo.Polygon(lats, lons, holes);
+ }
+
+ private Field[] createFieldsFromMultiPolygon(String fieldName, MultiPolygon
multiPolygon) {
+ List<Field> fieldList = new ArrayList<>();
+ for (int i = 0; i < multiPolygon.getNumGeometries(); i++) {
+ org.locationtech.jts.geom.Polygon polygon =
+ (org.locationtech.jts.geom.Polygon) multiPolygon.getGeometryN(i);
+ org.apache.lucene.geo.Polygon lucenePolygon =
convertToLucenePolygon(polygon);
+ Field[] fields = LatLonShape.createIndexableFields(fieldName,
lucenePolygon);
+ fieldList.addAll(Arrays.asList(fields));
}
+ return fieldList.toArray(new Field[0]);
+ }
}
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java
deleted file mode 100644
index 0d3c6dfc0..000000000
---
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.baremaps.geocoder;
-
-import org.apache.baremaps.geocoder.openstreetmap.OsmTags;
-import org.apache.lucene.queryparser.classic.QueryParserBase;
-import org.apache.lucene.queryparser.simple.SimpleQueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-
-public class DataTableQuery {
-
- private final String query;
-
- public DataTableQuery(String query) {
- this.query = query;
- }
-
- public Query build() {
- var builder = new BooleanQuery.Builder();
- var queryTextEsc = QueryParserBase.escape(query);
-
- var parser = new SimpleQueryParser(GeocoderConstants.ANALYZER,
OsmTags.NAME.key());
- var termsQuery = parser.parse(queryTextEsc);
- // at least one terms of the queryText must be present
- builder.add(termsQuery, BooleanClause.Occur.MUST);
- return builder.build();
- }
-}
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQueryBuilder.java
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQueryBuilder.java
new file mode 100644
index 000000000..b654f1832
--- /dev/null
+++
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQueryBuilder.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.baremaps.data.storage.DataColumn;
+import org.apache.baremaps.data.storage.DataSchema;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryparser.classic.QueryParserBase;
+import org.apache.lucene.queryparser.simple.SimpleQueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+
+/**
+ * A builder for creating queries on a data table.
+ */
+public class DataTableQueryBuilder {
+
+ private final Analyzer analyzer;
+
+ private Map<String, Float> fields = new HashMap<>();
+
+ private String query;
+
+ /**
+ * Constructs a query builder with the default analyzer.
+ */
+ public DataTableQueryBuilder() {
+ this(GeocoderConstants.ANALYZER);
+ }
+
+ /**
+ * Constructs a query builder with the specified analyzer.
+ *
+ * @param analyzer the analyzer
+ */
+ public DataTableQueryBuilder(Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ /**
+ * Replace all the fields with the columns of the schema and a boost of 1.0.
+ *
+ * @param schema the schema
+ * @return the query builder
+ */
+ public DataTableQueryBuilder schema(DataSchema schema) {
+ this.fields = new HashMap<>(schema.columns().stream()
+ .collect(Collectors.toMap(DataColumn::name, column -> 1.0f)));
+ return this;
+ }
+
+ /**
+ * Replace all the fields with the specified fields and boosts.
+ *
+ * @param fields the fields and boosts
+ * @return the query builder
+ */
+ public DataTableQueryBuilder columns(Map<DataColumn, Float> fields) {
+ this.fields = new HashMap<>(fields.entrySet().stream()
+ .collect(Collectors.toMap(entry -> entry.getKey().name(),
Map.Entry::getValue)));
+ return this;
+ }
+
+ /**
+ * Add a column with a specified boost.
+ *
+ * @param column the column
+ * @param boost the boost
+ * @return the query builder
+ */
+ public DataTableQueryBuilder column(DataColumn column, float boost) {
+ return column(column.name(), boost);
+ }
+
+ /**
+ * Add a column with a specified boost.
+ *
+ * @param column the column
+ * @param boost the boost
+ * @return the query builder
+ */
+ public DataTableQueryBuilder column(String column, float boost) {
+ fields.put(column, boost);
+ return this;
+ }
+
+ /**
+ * Set the query text.
+ *
+ * @param query the query text
+ * @return the query builder
+ */
+ public DataTableQueryBuilder query(String query) {
+ this.query = query;
+ return this;
+ }
+
+ /**
+ * Build the query.
+ *
+ * @return the query
+ */
+ public Query build() {
+ var builder = new BooleanQuery.Builder();
+
+ var parser = new SimpleQueryParser(analyzer, fields);
+ var escapedQuery = QueryParserBase.escape(query);
+ var termsQuery = parser.parse(escapedQuery);
+
+ // at least one terms of the queryText must be present
+ builder.add(termsQuery, BooleanClause.Occur.MUST);
+ return builder.build();
+ }
+
+ /**
+ * Get the analyzer.
+ *
+ * @return the analyzer
+ */
+ private static Analyzer getAnalyzer() {
+ return GeocoderConstants.ANALYZER;
+ }
+}
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
index d1cd32db2..d1d702ef1 100644
---
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
+++
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesQueryBuilder.java
@@ -22,7 +22,6 @@ package org.apache.baremaps.geocoder.geonames;
import com.google.common.base.Strings;
import java.text.ParseException;
import java.util.Map;
-
import org.apache.baremaps.geocoder.GeocoderConstants;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.expressions.Expression;
@@ -52,8 +51,8 @@ public class GeonamesQueryBuilder {
private boolean scoringByPopulation;
private boolean andOperator;
- private String featureCode;
+ private String featureCode;
public GeonamesQueryBuilder() {
this(GeocoderConstants.ANALYZER);
diff --git
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/DataTableIndexTest.java
similarity index 56%
copy from
baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
copy to
baremaps-core/src/test/java/org/apache/baremaps/geocoder/DataTableIndexTest.java
index 0effcf9ad..2d5aba56b 100644
---
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
+++
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/DataTableIndexTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.baremaps.geocoder.geonames;
+package org.apache.baremaps.geocoder;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -24,11 +24,12 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
-
+import org.apache.baremaps.storage.geoparquet.GeoParquetDataTable;
import org.apache.baremaps.testing.TestFiles;
import org.apache.baremaps.utils.FileUtils;
-import org.apache.baremaps.workflow.WorkflowContext;
-import org.apache.baremaps.workflow.tasks.CreateGeonamesIndex;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
@@ -38,7 +39,7 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
-public class GeonamesIndexTest {
+public class DataTableIndexTest {
private static Path directory;
private static IndexSearcher searcher;
@@ -49,10 +50,18 @@ public class GeonamesIndexTest {
directory = Files.createTempDirectory(Paths.get("."), "geocoder_");
// Create the geonames index
- var data = TestFiles.resolve("baremaps-testing/data/geonames/sample.txt");
- var task = new CreateGeonamesIndex(data, directory);
- task.execute(new WorkflowContext());
var dir = FSDirectory.open(directory);
+ var data =
TestFiles.resolve("baremaps-testing/data/samples/example.parquet");
+ var config = new IndexWriterConfig(GeocoderConstants.ANALYZER);
+ try (var indexWriter = new IndexWriter(dir, config);
+ var inputStream = Files.newInputStream(data)) {
+ indexWriter.deleteAll();
+ var documents = new GeoParquetDataTable(data.toUri())
+ .stream()
+ .map(new DataRowMapper());
+ indexWriter.addDocuments((Iterable<Document>) documents::iterator);
+ }
+
var searcherManager = new SearcherManager(dir, new SearcherFactory());
searcher = searcherManager.acquire();
}
@@ -63,48 +72,22 @@ public class GeonamesIndexTest {
}
@Test
- void testCreateIndex() throws Exception {
- var geonamesQuery =
- new
GeonamesQueryBuilder().queryText("yverdon").countryCode("CH").build();
- var topDocs = searcher.search(geonamesQuery, 1);
- var doc =
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
- assertEquals("Yverdon-les-bains", doc.getField("name").stringValue());
- }
-
- @Test
- void testOrQuery() throws Exception {
- var geonamesQuery = new GeonamesQueryBuilder()
- .queryText("bains cheseaux")
- .countryCode("CH")
- .build();
- var topDocs = searcher.search(geonamesQuery, 2);
- assertEquals(2, topDocs.totalHits.value);
- var doc0 = searcher.doc(topDocs.scoreDocs[0].doc);
- assertEquals("Yverdon-les-bains", doc0.getField("name").stringValue());
- var doc1 = searcher.doc(topDocs.scoreDocs[1].doc);
- assertEquals("Route de Cheseaux 1", doc1.getField("name").stringValue());
- }
-
- @Test
- void testAndQueryNoHits() throws Exception {
- var geonamesQuery = new GeonamesQueryBuilder()
- .queryText("bains cheseaux")
- .andOperator()
- .countryCode("CH")
+ void testQueryNoHits() throws Exception {
+ var geonamesQuery = new DataTableQueryBuilder()
+ .query("test")
.build();
var topDocs = searcher.search(geonamesQuery, 1);
assertEquals(0, topDocs.totalHits.value);
}
@Test
- void testAndQuery() throws Exception {
- var geonamesQuery =
- new GeonamesQueryBuilder().queryText("yverdon bains")
- .andOperator()
- .countryCode("CH")
- .build();
+ void testQuery() throws Exception {
+ var geonamesQuery = new DataTableQueryBuilder()
+ .column("continent", 1.0f)
+ .query("oceania")
+ .build();
var topDocs = searcher.search(geonamesQuery, 1);
var doc =
searcher.doc(Arrays.stream(topDocs.scoreDocs).findFirst().get().doc);
- assertEquals("Yverdon-les-bains", doc.getField("name").stringValue());
+ assertEquals("Oceania", doc.getField("continent").stringValue());
}
}
diff --git
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
index 0effcf9ad..4bced3332 100644
---
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
+++
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesIndexTest.java
@@ -24,7 +24,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
-
import org.apache.baremaps.testing.TestFiles;
import org.apache.baremaps.utils.FileUtils;
import org.apache.baremaps.workflow.WorkflowContext;
diff --git
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
index 3f3170f95..c02bf59e1 100644
---
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
+++
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesReaderTest.java
@@ -21,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import java.nio.file.Files;
-
import org.apache.baremaps.testing.TestFiles;
import org.junit.jupiter.api.Test;
diff --git
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
index 4d74343e6..605685be3 100644
---
a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
+++
b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/openstreetmap/OpenStreetMapIndexTest.java
@@ -27,7 +27,6 @@ import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-
import org.apache.baremaps.utils.FileUtils;
import org.apache.baremaps.workflow.WorkflowContext;
import org.apache.baremaps.workflow.tasks.CreateGeocoderOpenStreetMap;
diff --git
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
index 72c7c6dea..eca44b246 100644
---
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
+++
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataRowImpl.java
@@ -17,6 +17,7 @@
package org.apache.baremaps.data.storage;
+
import java.util.List;
/**
@@ -67,5 +68,4 @@ public record DataRowImpl(DataSchema schema, List<Object>
values) implements Dat
public void set(int index, Object value) {
values.set(index, value);
}
-
}
diff --git
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
index c9b20c7a6..81dfc4036 100644
---
a/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
+++
b/baremaps-data/src/main/java/org/apache/baremaps/data/storage/DataSchemaImpl.java
@@ -23,11 +23,7 @@ import java.util.List;
/**
* A {@link DataSchema} defines the structure of a table.
*/
-public class DataSchemaImpl implements DataSchema {
-
- private final String name;
-
- private final List<DataColumn> columns;
+public record DataSchemaImpl(String name, List<DataColumn> columns) implements
DataSchema {
/**
* Constructs a schema with the specified name and columns.
@@ -35,9 +31,7 @@ public class DataSchemaImpl implements DataSchema {
* @param name the name of the schema
* @param columns the columns of the schema
*/
- public DataSchemaImpl(String name, List<DataColumn> columns) {
- this.name = name;
- this.columns = columns;
+ public DataSchemaImpl {
}
/**
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
index a88f3b13e..97cdb3570 100644
---
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroupFactory.java
@@ -20,6 +20,7 @@ package org.apache.baremaps.geoparquet;
import java.util.List;
import org.apache.baremaps.geoparquet.GeoParquetSchema.*;
import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -89,6 +90,12 @@ class GeoParquetGroupFactory {
geoParquetSchema);
}
+ // TODO: Handle logical types
+ else if (field.getLogicalTypeAnnotation() != null
+ &&
field.getLogicalTypeAnnotation().equals(LogicalTypeAnnotation.stringType())) {
+ return new StringField(field.getName(), cardinality);
+ }
+
// Handle primitive columns
else {
PrimitiveType primitiveType = field.asPrimitiveType();
@@ -110,6 +117,8 @@ class GeoParquetGroupFactory {
return new GeoParquetSchema(schema.getName(), fields);
}
+
+
/**
* Creates a new {@link GeoParquetGroup}.
*
diff --git a/pom.xml b/pom.xml
index ffabeb341..07acbfcdf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -59,7 +59,6 @@ limitations under the License.
<module>baremaps-pmtiles</module>
<module>baremaps-server</module>
<module>baremaps-testing</module>
- <module>baremaps-geocoder</module>
</modules>
<scm>