Perdjesk commented on code in PR #780: URL: https://github.com/apache/incubator-baremaps/pull/780#discussion_r1323098023
########## baremaps-core/src/main/java/org/apache/baremaps/geocoderosm/GeocoderOSMDocumentMapper.java: ########## @@ -0,0 +1,98 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.baremaps.geocoderosm; + + + +import java.util.function.Function; +import org.apache.baremaps.openstreetmap.model.Element; +import org.apache.baremaps.openstreetmap.model.Node; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LatLonShape; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.geo.Polygon; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.io.geojson.GeoJsonWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class GeocoderOSMDocumentMapper implements Function<Element, Document> { + private static final Logger logger = LoggerFactory.getLogger(GeocoderOSMDocumentMapper.class); + + @Override + public Document apply(Element element) { + var document = new Document(); + document.add(new StoredField("osm_id", element.id())); + document.add(new StoredField("osm_type", element.getClass().getSimpleName())); + + if (element.getTags().containsKey(OSMTags.NAME.key())) { + document.add( + new TextField(OSMTags.NAME.key(), element.getTags().get(OSMTags.NAME.key()).toString(), + Field.Store.YES)); + } + + if (element instanceof Node node) { + document.add(LatLonShape.createIndexableFields("polygon", node.getLat(), node.getLon())[0]); + document.add(new StoredField("latitude", node.getLat())); + document.add(new StoredField("longitude", node.getLon())); + } + if (element.getGeometry() != null + && element.getGeometry().getGeometryType().equals(Geometry.TYPENAME_LINESTRING)) { + logger.debug("Geometry linestring ignored as not supported by Lucene Polygon.fromGeoJson: {}", + element); + } + if (element.getGeometry() != null + && !element.getGeometry().getGeometryType().equals(Geometry.TYPENAME_POINT) + && !element.getGeometry().getGeometryType().equals(Geometry.TYPENAME_LINESTRING)) { + // JTS to GeoJSON + var geojsonWriter = new GeoJsonWriter(); + // Remove crs field in GeoJSON as Lucene parsing is very strict. + // Avoid "crs must be CRS84 from OGC, but saw: EPSG:4326" + // See: + // https://github.com/apache/lucene/blob/ef42af65f27f7f078b1ab426de9f2b2fa214ad86/lucene/core/src/java/org/apache/lucene/geo/SimpleGeoJSONPolygonParser.java#L180 + geojsonWriter.setEncodeCRS(false); Review Comment: The linked code in Lucene expect a very specific format for this field which is not compatible with the available format written by JTS GeoJsonWriter. I had code before supporting any source projection and transforming to CRS84. However it is not necessary to support other source projection for the moment and set the expected projection while reading OSM data. References: https://github.com/apache/lucene/blob/ef42af65f27f7f078b1ab426de9f2b2fa214ad86/lucene/core/src/java/org/apache/lucene/geo/SimpleGeoJSONPolygonParser.java#L180 https://github.com/locationtech/jts/blob/ee59b591f15b5150516393d3ba0b49e46a113fc9/modules/io/common/src/main/java/org/locationtech/jts/io/geojson/GeoJsonWriter.java#L226 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
