This is an automated email from the ASF dual-hosted git repository. bchapuis pushed a commit to branch overturemaps-lucene in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git
commit 7058cad6fda4b931dca5ea97864dfc50ea49a243 Author: Bertil Chapuis <[email protected]> AuthorDate: Thu Oct 31 14:43:23 2024 +0100 Add consumer, mapper and query for datatable abstration --- .../apache/baremaps/geocoder/DataRowConsumer.java | 48 +++++ .../apache/baremaps/geocoder/DataRowMapper.java | 232 +++++++++++++++++++++ .../apache/baremaps/geocoder/DataTableQuery.java | 45 ++++ baremaps-geocoder/pom.xml | 30 --- 4 files changed, 325 insertions(+), 30 deletions(-) diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java new file mode 100644 index 000000000..175afe89a --- /dev/null +++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowConsumer.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.baremaps.geocoder; + +import org.apache.baremaps.data.storage.DataRow; +import org.apache.lucene.index.IndexWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.function.Consumer; + +public class DataRowConsumer implements Consumer<DataRow> { + + private static final Logger logger = LoggerFactory.getLogger(DataRowConsumer.class); + + private final IndexWriter indexWriter; + + private final DataRowMapper dataRowMapper = new DataRowMapper(); + + public DataRowConsumer(IndexWriter indexWriter) { + this.indexWriter = indexWriter; + } + + @Override + public void accept(DataRow row) { + try { + var document = dataRowMapper.apply(row); + indexWriter.addDocument(document); + } catch (Exception e) { + logger.warn("The following row ({}) is not processed due to {}", row, e); + } + } +} diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java new file mode 100644 index 000000000..3938d42cf --- /dev/null +++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataRowMapper.java @@ -0,0 +1,232 @@ +package org.apache.baremaps.geocoder; + +import org.apache.baremaps.data.storage.DataColumn; +import org.apache.baremaps.data.storage.DataRow; +import org.apache.baremaps.data.storage.DataSchema; +import org.locationtech.jts.geom.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.LatLonPoint; +import org.apache.lucene.document.LatLonShape; + +import java.net.InetAddress; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.Arrays; +import java.util.Map; +import java.util.List; +import java.util.ArrayList; +import java.util.function.Function; + +public class DataRowMapper implements Function<DataRow, Document> { + + private static final Logger logger = LoggerFactory.getLogger(DataRowMapper.class); + + @Override + public Document apply(DataRow dataRow) { + Document doc = new Document(); + DataSchema schema = dataRow.schema(); + List<DataColumn> columns = schema.columns(); + for (int i = 0; i < columns.size(); i++) { + DataColumn column = columns.get(i); + String columnName = column.name(); + Object value = dataRow.get(i); + if (value == null) continue; + DataColumn.Type type = column.type(); + + try { + switch (type) { + case BINARY: + doc.add(new StoredField(columnName, (byte[]) value)); + break; + case BYTE: + doc.add(new IntPoint(columnName, ((Byte) value).intValue())); + doc.add(new StoredField(columnName, ((Byte) value).intValue())); + break; + case BOOLEAN: + doc.add(new StringField(columnName, value.toString(), Field.Store.YES)); + break; + case SHORT: + doc.add(new IntPoint(columnName, ((Short) value).intValue())); + doc.add(new StoredField(columnName, ((Short) value).intValue())); + break; + case INTEGER: + doc.add(new IntPoint(columnName, (Integer) value)); + doc.add(new StoredField(columnName, (Integer) value)); + break; + case LONG: + doc.add(new LongPoint(columnName, (Long) value)); + doc.add(new StoredField(columnName, (Long) value)); + break; + case FLOAT: + doc.add(new FloatPoint(columnName, (Float) value)); + doc.add(new StoredField(columnName, (Float) value)); + break; + case DOUBLE: + doc.add(new DoublePoint(columnName, (Double) value)); + doc.add(new StoredField(columnName, (Double) value)); + break; + case STRING: + doc.add(new TextField(columnName, (String) value, Field.Store.YES)); + break; + case COORDINATE: + Coordinate coord = (Coordinate) value; + double lat = coord.getY(); + double lon = coord.getX(); + doc.add(new LatLonPoint(columnName, lat, lon)); + doc.add(new StoredField(columnName + "_lat", lat)); + doc.add(new StoredField(columnName + "_lon", lon)); + break; + case POINT: + Point point = (Point) value; + double pointLat = point.getY(); + double pointLon = point.getX(); + doc.add(new LatLonPoint(columnName, pointLat, pointLon)); + doc.add(new StoredField(columnName + "_lat", pointLat)); + doc.add(new StoredField(columnName + "_lon", pointLon)); + break; + case LINESTRING: + case POLYGON: + case MULTIPOINT: + case MULTILINESTRING: + case MULTIPOLYGON: + case GEOMETRYCOLLECTION: + case GEOMETRY: + Geometry geometry = (Geometry) value; + if (geometry != null) { + Field[] shapeFields = createShapeFields(columnName, geometry); + for (Field field : shapeFields) { + doc.add(field); + } + doc.add(new StoredField(columnName + "_wkt", geometry.toText())); + } + break; + case ENVELOPE: + Envelope envelope = (Envelope) value; + String envelopeStr = envelope.toString(); + doc.add(new StringField(columnName, envelopeStr, Field.Store.YES)); + break; + case INET_ADDRESS: + case INET4_ADDRESS: + case INET6_ADDRESS: + InetAddress addr = (InetAddress) value; + doc.add(new StringField(columnName, addr.getHostAddress(), Field.Store.YES)); + break; + case LOCAL_DATE: + LocalDate date = (LocalDate) value; + doc.add(new StringField(columnName, date.toString(), Field.Store.YES)); + break; + case LOCAL_TIME: + LocalTime time = (LocalTime) value; + doc.add(new StringField(columnName, time.toString(), Field.Store.YES)); + break; + case LOCAL_DATE_TIME: + LocalDateTime dateTime = (LocalDateTime) value; + doc.add(new StringField(columnName, dateTime.toString(), Field.Store.YES)); + break; + case NESTED: + Map<String, Object> map = (Map<String, Object>) value; + for (Map.Entry<String, Object> entry : map.entrySet()) { + String nestedKey = columnName + "." + entry.getKey(); + Object nestedValue = entry.getValue(); + if (nestedValue != null) { + doc.add(new TextField(nestedKey, nestedValue.toString(), Field.Store.YES)); + } + } + break; + default: + doc.add(new StringField(columnName, value.toString(), Field.Store.YES)); + break; + } + } catch (Exception e) { + logger.error("Error processing column '{}' with value '{}': {}", columnName, value, e.getMessage()); + } + } + return doc; + } + + private Field[] createShapeFields(String fieldName, Geometry geometry) { + if (geometry instanceof Point point) { + double lat = point.getY(); + double lon = point.getX(); + return new Field[] { new LatLonPoint(fieldName, lat, lon) }; + } else if (geometry instanceof LineString lineString) { + return LatLonShape.createIndexableFields(fieldName, convertToLuceneLine(lineString)); + } else if (geometry instanceof Polygon polygon) { + org.apache.lucene.geo.Polygon lucenePolygon = convertToLucenePolygon(polygon); + return LatLonShape.createIndexableFields(fieldName, lucenePolygon); + } else if (geometry instanceof MultiPolygon multiPolygon) { + return createFieldsFromMultiPolygon(fieldName, multiPolygon); + } else if (geometry instanceof GeometryCollection collection) { + List<Field> fieldList = new ArrayList<>(); + for (int i = 0; i < collection.getNumGeometries(); i++) { + Geometry geom = collection.getGeometryN(i); + Field[] fields = createShapeFields(fieldName, geom); + fieldList.addAll(Arrays.asList(fields)); + } + return fieldList.toArray(new Field[0]); + } else { + logger.warn("Unsupported geometry type '{}' for field '{}'", geometry.getGeometryType(), fieldName); + return new Field[0]; + } + } + + private org.apache.lucene.geo.Line convertToLuceneLine(LineString lineString) { + Coordinate[] coords = lineString.getCoordinates(); + double[] lats = new double[coords.length]; + double[] lons = new double[coords.length]; + for (int i = 0; i < coords.length; i++) { + lats[i] = coords[i].getY(); + lons[i] = coords[i].getX(); + } + return new org.apache.lucene.geo.Line(lats, lons); + } + + private org.apache.lucene.geo.Polygon convertToLucenePolygon(org.locationtech.jts.geom.Polygon jtsPolygon) { + LinearRing shell = jtsPolygon.getExteriorRing(); + Coordinate[] shellCoords = shell.getCoordinates(); + double[] lats = new double[shellCoords.length]; + double[] lons = new double[shellCoords.length]; + for (int i = 0; i < shellCoords.length; i++) { + lats[i] = shellCoords[i].getY(); + lons[i] = shellCoords[i].getX(); + } + + int numHoles = jtsPolygon.getNumInteriorRing(); + org.apache.lucene.geo.Polygon[] holes = new org.apache.lucene.geo.Polygon[numHoles]; + for (int i = 0; i < numHoles; i++) { + LinearRing hole = jtsPolygon.getInteriorRingN(i); + Coordinate[] holeCoords = hole.getCoordinates(); + double[] holeLats = new double[holeCoords.length]; + double[] holeLons = new double[holeCoords.length]; + for (int j = 0; j < holeCoords.length; j++) { + holeLats[j] = holeCoords[j].getY(); + holeLons[j] = holeCoords[j].getX(); + } + holes[i] = new org.apache.lucene.geo.Polygon(holeLats, holeLons); + } + + return new org.apache.lucene.geo.Polygon(lats, lons, holes); + } + + private Field[] createFieldsFromMultiPolygon(String fieldName, MultiPolygon multiPolygon) { + List<Field> fieldList = new ArrayList<>(); + for (int i = 0; i < multiPolygon.getNumGeometries(); i++) { + org.locationtech.jts.geom.Polygon polygon = (org.locationtech.jts.geom.Polygon) multiPolygon.getGeometryN(i); + org.apache.lucene.geo.Polygon lucenePolygon = convertToLucenePolygon(polygon); + Field[] fields = LatLonShape.createIndexableFields(fieldName, lucenePolygon); + fieldList.addAll(Arrays.asList(fields)); + } + return fieldList.toArray(new Field[0]); + } +} diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java new file mode 100644 index 000000000..0d3c6dfc0 --- /dev/null +++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/DataTableQuery.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.baremaps.geocoder; + +import org.apache.baremaps.geocoder.openstreetmap.OsmTags; +import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.queryparser.simple.SimpleQueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +public class DataTableQuery { + + private final String query; + + public DataTableQuery(String query) { + this.query = query; + } + + public Query build() { + var builder = new BooleanQuery.Builder(); + var queryTextEsc = QueryParserBase.escape(query); + + var parser = new SimpleQueryParser(GeocoderConstants.ANALYZER, OsmTags.NAME.key()); + var termsQuery = parser.parse(queryTextEsc); + // at least one terms of the queryText must be present + builder.add(termsQuery, BooleanClause.Occur.MUST); + return builder.build(); + } +} diff --git a/baremaps-geocoder/pom.xml b/baremaps-geocoder/pom.xml deleted file mode 100644 index 8214f6b70..000000000 --- a/baremaps-geocoder/pom.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>org.apache.baremaps</groupId> - <artifactId>baremaps</artifactId> - <version>0.7.4-SNAPSHOT</version> - </parent> - <artifactId>baremaps-geocoder</artifactId> - <dependencies> - <dependency> - <groupId>org.apache.lucene</groupId> - <artifactId>lucene-core</artifactId> - </dependency> - <dependency> - <groupId>org.apache.lucene</groupId> - <artifactId>lucene-expressions</artifactId> - </dependency> - <dependency> - <groupId>org.apache.lucene</groupId> - <artifactId>lucene-queryparser</artifactId> - </dependency> - <dependency> - <groupId>org.apache.lucene</groupId> - <artifactId>lucene-spatial-extras</artifactId> - </dependency> - </dependencies> -</project> \ No newline at end of file
