This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c94300ccc9 HIVE-29352: Add function ST_DistanceSphere for points 
(#6223)
4c94300ccc9 is described below

commit 4c94300ccc9f4419967aeacfba22975440810cc3
Author: csringhofer <[email protected]>
AuthorDate: Mon Dec 8 07:50:05 2025 +0100

    HIVE-29352: Add function ST_DistanceSphere for points (#6223)
---
 .../hadoop/hive/ql/exec/FunctionRegistry.java      |  2 +
 .../apache/hadoop/hive/ql/udf/esri/Haversine.java  | 66 ++++++++++++++++
 .../hadoop/hive/ql/udf/esri/ST_DistanceSphere.java | 73 +++++++++++++++++
 .../hive/ql/udf/esri/TestStDistanceSphere.java     | 92 ++++++++++++++++++++++
 .../test/queries/clientpositive/geospatial_udfs.q  |  5 +-
 .../clientpositive/llap/geospatial_udfs.q.out      |  9 +++
 .../clientpositive/llap/show_functions.q.out       |  3 +
 7 files changed, 249 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 5784233cc2f..eda1360293f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -53,6 +53,7 @@
 import org.apache.hadoop.hive.ql.udf.esri.ST_Dimension;
 import org.apache.hadoop.hive.ql.udf.esri.ST_Disjoint;
 import org.apache.hadoop.hive.ql.udf.esri.ST_Distance;
+import org.apache.hadoop.hive.ql.udf.esri.ST_DistanceSphere;
 import org.apache.hadoop.hive.ql.udf.esri.ST_EndPoint;
 import org.apache.hadoop.hive.ql.udf.esri.ST_EnvIntersects;
 import org.apache.hadoop.hive.ql.udf.esri.ST_Envelope;
@@ -723,6 +724,7 @@ public final class FunctionRegistry {
     system.registerFunction("ST_Dimension", ST_Dimension.class);
     system.registerFunction("ST_Disjoint", ST_Disjoint.class);
     system.registerFunction("ST_Distance", ST_Distance.class);
+    system.registerFunction("ST_DistanceSphere", ST_DistanceSphere.class);
     system.registerFunction("ST_EndPoint", ST_EndPoint.class);
     system.registerFunction("ST_Envelope", ST_Envelope.class);
     system.registerFunction("ST_EnvIntersects", ST_EnvIntersects.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/Haversine.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/Haversine.java
new file mode 100644
index 00000000000..18fb5db4c96
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/Haversine.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.esri;
+
+import static java.lang.Math.atan2;
+import static java.lang.Math.cos;
+import static java.lang.Math.sin;
+import static java.lang.Math.sqrt;
+import static java.lang.Math.toRadians;
+
+import com.esri.core.geometry.ogc.OGCPoint;
+
+// Class is based on Apache Sedona code:
+// 
https://github.com/apache/sedona/blob/eee44b509624d9e4022a6dd40d9b07d72a369a20/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java#L43
+public final class Haversine {
+  public static final double AVG_EARTH_RADIUS_METERS = 6371008.0;
+
+  private Haversine() {}
+
+  /**
+   * Calculate the distance between two points on the earth using the 
"haversine" formula. This is
+   * also known as the great-circle distance This will produce almost 
identical result to PostGIS
+   * ST_DistanceSphere and ST_Distance(useSpheroid=false)
+   */
+  public static double distance(double lon1, double lat1,
+                                double lon2, double lat2, double radius) {
+    double latDistance = toRadians(lat2 - lat1);
+    double lngDistance = toRadians(lon2 - lon1);
+    double a =
+        sin(latDistance / 2) * sin(latDistance / 2)
+            + cos(toRadians(lat1))
+                * cos(toRadians(lat2))
+                * sin(lngDistance / 2)
+                * sin(lngDistance / 2);
+    double c = 2 * atan2(sqrt(a), sqrt(1 - a));
+    return radius * c;
+  }
+
+  public static double distanceMeters(double lon1, double lat1,
+                                       double lon2, double lat2) {
+    return distance(lon1, lat1, lon2, lat2, AVG_EARTH_RADIUS_METERS);
+  }
+
+  public static double distanceMeters(OGCPoint point1, OGCPoint point2) {
+    double lon1 = point1.X();
+    double lat1 = point1.Y();
+    double lon2 = point2.X();
+    double lat2 = point2.Y();
+    return distanceMeters(lon1, lat1, lon2, lat2);
+  }
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_DistanceSphere.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_DistanceSphere.java
new file mode 100755
index 00000000000..8ea5825952b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_DistanceSphere.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.esri;
+
+import com.esri.core.geometry.ogc.OGCPoint;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.udf.esri.GeometryUtils.OGCType;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Description(name = "ST_DistanceSphere",
+    value = "_FUNC_(ST_Geometry1, ST_Geometry2) - returns the great circle 
distance between 2 points in meters",
+    extended = "Example:\n" +
+       "  SELECT _FUNC_(ST_Point(113.914603,22.308919), 
ST_Point(151.177222,-33.946111)) FROM src LIMIT 1;\n" +
+       "  --  ~7393893")
+public class ST_DistanceSphere extends ST_GeometryAccessor {
+  final DoubleWritable resultDouble = new DoubleWritable();
+  static final Logger LOG = 
LoggerFactory.getLogger(ST_DistanceSphere.class.getName());
+
+  public DoubleWritable evaluate(BytesWritable geometryref1, BytesWritable 
geometryref2) {
+    if (geometryref1 == null || geometryref2 == null || 
geometryref1.getLength() == 0
+        || geometryref2.getLength() == 0) {
+      LogUtils.Log_ArgumentsNull(LOG);
+      return null;
+    }
+    if (!GeometryUtils.compareSpatialReferences(geometryref1, geometryref2)) {
+      LogUtils.Log_SRIDMismatch(LOG, geometryref1, geometryref2);
+      return null;
+    }
+    // Only distance of points is supported.
+    if (GeometryUtils.getType(geometryref1) != OGCType.ST_POINT) {
+      LogUtils.Log_InvalidType(LOG, OGCType.ST_POINT, 
GeometryUtils.getType(geometryref1));
+      return null;
+    }
+    if (GeometryUtils.getType(geometryref2) != OGCType.ST_POINT) {
+      LogUtils.Log_InvalidType(LOG, OGCType.ST_POINT, 
GeometryUtils.getType(geometryref2));
+      return null;
+    }
+
+    OGCPoint point1 = (OGCPoint) 
GeometryUtils.geometryFromEsriShape(geometryref1);
+    OGCPoint point2 = (OGCPoint) 
GeometryUtils.geometryFromEsriShape(geometryref2);
+    if (point1 == null || point2 == null) {
+      LogUtils.Log_ArgumentsNull(LOG);
+      return null;
+    }
+
+    try {
+      resultDouble.set(Haversine.distanceMeters(point1, point2));
+      return resultDouble;
+    } catch (Exception e) {
+      LogUtils.Log_InternalError(LOG, "ST_DistanceSphere: " + e);
+      return null;
+    }
+  }
+}
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/udf/esri/TestStDistanceSphere.java 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/esri/TestStDistanceSphere.java
new file mode 100644
index 00000000000..3106bb007b3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/esri/TestStDistanceSphere.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.esri;
+
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestStDistanceSphere {
+
+  private BytesWritable point(double lon, double lat) {
+    ST_Point stPt = new ST_Point();
+    return stPt.evaluate(new DoubleWritable(lon), new DoubleWritable(lat));
+  }
+
+  @Test
+  public void testStDistanceSphere() {
+    // Comparing distances to results from st_distancesphere() in PostGis 
3.2.1.
+    // The results are not exact matches, but very close.
+    ST_DistanceSphere ds = new ST_DistanceSphere();
+    BytesWritable pParlaimentDome = point(19.04572641312447, 
47.50745742600276);
+    BytesWritable pParlaimentFlag = point(19.04773610387992, 
47.507062805452804);
+    BytesWritable pBudaCastle = point(19.04125284394088, 47.49660323441422);
+    BytesWritable pSzeged = point(20.145, 46.255);
+    BytesWritable pBangalore = point(77.59167, 12.97889);
+    DoubleWritable res;
+    res = ds.evaluate(pParlaimentFlag, pParlaimentDome);
+    assertEquals(157.20008441, res.get(), 0.01);
+
+    res = ds.evaluate(pBudaCastle, pParlaimentDome);
+    assertEquals(1252.84371679, res.get(), 0.05);
+
+    res = ds.evaluate(pBudaCastle, pSzeged);
+    assertEquals(161548.64536448, res.get(), 0.1);
+
+    res = ds.evaluate(pBudaCastle, pBangalore);
+    assertEquals(6604682.18138413, res.get(), 1.0);
+  }
+
+  @Test
+  public void testHaversine() {
+    // based on Apache Sedona's tests:
+    // 
https://github.com/apache/sedona/blob/0523fbd/common/src/test/java/org/apache/sedona/common/FunctionsTest.java#L1489
+    // Basic check
+    assertEquals(1.00075559643809E7, Haversine.distanceMeters(90, 0, 0, 0), 
0.1);
+    assertEquals(543796.9506134904, Haversine.distanceMeters(-0.56, 51.3168, 
-3.1883, 55.9533), 0.1);
+    assertEquals(299073.03416817175, Haversine.distanceMeters(11.786111, 
48.353889, 8.570556, 50.033333), 0.1);
+    assertEquals(479569.4558072244, Haversine.distanceMeters(11.786111, 
48.353889, 13.287778, 52.559722), 0.1);
+
+    // HK to Sydney
+    assertEquals(7393893.072901942, Haversine.distanceMeters(113.914603, 
22.308919, 151.177222, -33.946111), 0.1);
+
+    // HK to Toronto
+    assertEquals(1.2548548944238186E7, Haversine.distanceMeters(113.914603, 
22.308919, -79.630556, 43.677223), 0.1);
+
+    // Crossing the anti-meridian
+    assertTrue(Haversine.distanceMeters(179.999, 0, -179.999, 0) < 300);
+    assertTrue(Haversine.distanceMeters(-179.999, 0, 179.999, 0) < 300);
+    assertTrue(Haversine.distanceMeters(179.999, 60, -179.999, 60) < 300);
+    assertTrue(Haversine.distanceMeters(-179.999, 60, 179.999, 60) < 300);
+    assertTrue(Haversine.distanceMeters(179.999, -60, -179.999, -60) < 300);
+    assertTrue(Haversine.distanceMeters(-179.999, -60, 179.999, -60) < 300);
+
+    // Crossing the North Pole
+    assertTrue(Haversine.distanceMeters(-60, 89.999, 120, 89.999) < 300);
+    assertTrue(Haversine.distanceMeters(120, 89.999, -60, 89.999) < 300);
+
+    // Crossing the South Pole
+    assertTrue(Haversine.distanceMeters(-60, -89.999, 120, -89.999) < 300);
+    assertTrue(Haversine.distanceMeters(120, -89.999, -60, -89.999) < 300);
+  }
+
+}
+
diff --git a/ql/src/test/queries/clientpositive/geospatial_udfs.q 
b/ql/src/test/queries/clientpositive/geospatial_udfs.q
index 7eee2a72505..29df7aa9a6a 100644
--- a/ql/src/test/queries/clientpositive/geospatial_udfs.q
+++ b/ql/src/test/queries/clientpositive/geospatial_udfs.q
@@ -162,4 +162,7 @@ SELECT ST_AsText(
     ST_GeomFromText('polygon ((0 0, 5 0, 5 5, 0 5, 0 0))'),
     ST_GeomFromText('point (2 2)')
   )
-);
\ No newline at end of file
+);
+
+-- Budapest - Honolulu distance (12356871.78163278 in PostGis 3.2.1)
+SELECT ST_DistanceSphere(ST_Point(19.04125, 47.496603), ST_Point(-157.924656, 
21.321329));
diff --git a/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out 
b/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
index 7e44c58cba0..2a6c1e3a34f 100644
--- a/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
+++ b/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
@@ -516,3 +516,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 #### A masked pattern was here ####
 POLYGON ((0 0, 5 0, 5 5, 0 5, 0 0))
+PREHOOK: query: SELECT ST_DistanceSphere(ST_Point(19.04125, 47.496603), 
ST_Point(-157.924656, 21.321329))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ST_DistanceSphere(ST_Point(19.04125, 47.496603), 
ST_Point(-157.924656, 21.321329))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1.2356870285436917E7
diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out 
b/ql/src/test/results/clientpositive/llap/show_functions.q.out
index d28610a55f5..605e953d634 100644
--- a/ql/src/test/results/clientpositive/llap/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out
@@ -383,6 +383,7 @@ st_difference
 st_dimension
 st_disjoint
 st_distance
+st_distancesphere
 st_endpoint
 st_envelope
 st_envintersects
@@ -624,6 +625,7 @@ st_asshape
 st_binenvelope
 st_difference
 st_distance
+st_distancesphere
 st_envelope
 st_geometrytype
 st_geomfromshape
@@ -1023,6 +1025,7 @@ st_difference
 st_dimension
 st_disjoint
 st_distance
+st_distancesphere
 st_endpoint
 st_envelope
 st_envintersects

Reply via email to