This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 00749a45f8 [SEDONA-700] Fix ST_KNN fails on null and empty geometries
(#1763)
00749a45f8 is described below
commit 00749a45f842e67808f9669d6cfc0142c332381c
Author: Feng Zhang <[email protected]>
AuthorDate: Thu Jan 16 21:45:12 2025 -0800
[SEDONA-700] Fix ST_KNN fails on null and empty geometries (#1763)
* [SEDONA-700] Fix ST_KNN fails on null and empty geometries
* fix formatting issue
---
.../spatialPartitioning/quadtree/ExtendedQuadTree.java | 5 ++++-
.../apache/sedona/sql/utils/GeometrySerializer.scala | 5 ++++-
.../test/scala/org/apache/sedona/sql/KnnJoinSuite.scala | 17 +++++++++++++++++
3 files changed, 25 insertions(+), 2 deletions(-)
diff --git
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
index 73169363ee..9925e93f8a 100644
---
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
+++
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
@@ -146,8 +146,11 @@ public class ExtendedQuadTree<T> extends PartitioningUtils
implements Serializab
final Set<Tuple2<Integer, Geometry>> result = new HashSet<>();
for (QuadRectangle rectangle : matchedPartitions) {
+ // Ignore null or empty point
+ if (point == null || point.isEmpty()) break;
+
// For points, make sure to return only one partition
- if (point != null && !(new
HalfOpenRectangle(rectangle.getEnvelope())).contains(point)) {
+ if (!(new HalfOpenRectangle(rectangle.getEnvelope())).contains(point))
{
continue;
}
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
index a13c181aa2..a75a88f7ba 100644
---
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
+++
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
@@ -19,7 +19,7 @@
package org.apache.sedona.sql.utils
import org.apache.sedona.common.geometrySerde
-import org.locationtech.jts.geom.Geometry
+import org.locationtech.jts.geom.{Geometry, GeometryFactory}
/**
* SerDe using the WKB reader and writer objects
@@ -47,6 +47,9 @@ object GeometrySerializer {
* JTS geometry
*/
def deserialize(value: Array[Byte]): Geometry = {
+ if (value == null) {
+ return new GeometryFactory().createGeometryCollection()
+ }
geometrySerde.GeometrySerializer.deserialize(value)
}
}
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
index f3b07c2501..ab2c64898a 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
@@ -441,6 +441,23 @@ class KnnJoinSuite extends TestBaseScala with
TableDrivenPropertyChecks {
resultAll.mkString should be("[0,6][0,7]")
}
}
+
+ it("KNN Join with exact algorithms should not fail with null geometries") {
+ val df1 = sparkSession.sql(
+ "SELECT ST_GeomFromText(col1) as geom1 from values ('POINT (0.0
0.0)'), (null)")
+ val df2 = sparkSession.sql("SELECT ST_Point(0.0, 0.0) as geom2")
+ df1.cache()
+ df2.cache()
+ df1.join(df2, expr("ST_KNN(geom1, geom2, 1)")).count() should be(1)
+ }
+
+ it("KNN Join with exact algorithms should not fail with empty geometries")
{
+ val df1 = sparkSession.sql("SELECT ST_GeomFromText('POINT EMPTY') as
geom1")
+ val df2 = sparkSession.sql("SELECT ST_Point(0.0, 0.0) as geom2")
+ df1.cache()
+ df2.cache()
+ df1.join(df2, expr("ST_KNN(geom1, geom2, 1)")).count() should be(0)
+ }
}
private def withOptimizationMode(mode: String)(body: => Unit): Unit = {