This is an automated email from the ASF dual-hosted git repository. imbruced pushed a commit to branch fix-reading-geoparquet-when-filtering-and-bbox-is-empty in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 8cf69352780c7255308c2139d733792913def652 Author: pawelkocinski <[email protected]> AuthorDate: Thu Nov 14 13:01:29 2024 +0100 Fix issue when loading geoparquet file. --- .../datasources/parquet/GeoParquetSpatialFilter.scala | 4 ++++ .../test/resources/geoparquet/overture/bbox.geoparquet | Bin 0 -> 24496 bytes .../scala/org/apache/sedona/sql/geoparquetIOTests.scala | 14 +++++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala index 5aa782e5bd..ca932c6b34 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala @@ -69,6 +69,10 @@ object GeoParquetSpatialFilter { def evaluate(columns: Map[String, GeometryFieldMetaData]): Boolean = { columns.get(columnName).forall { column => val bbox = column.bbox + if (bbox.isEmpty) { + return true + } + val columnEnvelope = queryWindow.getFactory.toGeometry(new Envelope(bbox(0), bbox(2), bbox(1), bbox(3))) predicateType match { diff --git a/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet b/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet new file mode 100644 index 0000000000..b5393d3309 Binary files /dev/null and b/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet differ diff --git a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala index a6e74730a0..c1d49586d8 100644 --- a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala +++ b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala @@ -57,7 +57,7 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll { val legacyparquetdatalocation: String = resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet" val geoparquetoutputlocation: String = resourceFolder + "geoparquet/geoparquet_output/" - + val overtureBBOX: String = resourceFolder + "geoparquet/overture/bbox.geoparquet" override def afterAll(): Unit = FileUtils.deleteDirectory(new File(geoparquetoutputlocation)) describe("GeoParquet IO tests") { @@ -761,6 +761,18 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll { } } + describe("loading one file geoparquet and filtering") { + it("filtering one file geoparquet") { + val numberOfRecords = sparkSession.read + .format("geoparquet") + .load(overtureBBOX) + .where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))") + .count() + + assert(numberOfRecords == 9) + } + } + def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => Unit): Unit = { val parquetFiles = new File(path).listFiles().filter(_.getName.endsWith(".parquet")) parquetFiles.foreach { filePath =>
