This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4c94300ccc9 HIVE-29352: Add function ST_DistanceSphere for points
(#6223)
4c94300ccc9 is described below
commit 4c94300ccc9f4419967aeacfba22975440810cc3
Author: csringhofer <[email protected]>
AuthorDate: Mon Dec 8 07:50:05 2025 +0100
HIVE-29352: Add function ST_DistanceSphere for points (#6223)
---
.../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +
.../apache/hadoop/hive/ql/udf/esri/Haversine.java | 66 ++++++++++++++++
.../hadoop/hive/ql/udf/esri/ST_DistanceSphere.java | 73 +++++++++++++++++
.../hive/ql/udf/esri/TestStDistanceSphere.java | 92 ++++++++++++++++++++++
.../test/queries/clientpositive/geospatial_udfs.q | 5 +-
.../clientpositive/llap/geospatial_udfs.q.out | 9 +++
.../clientpositive/llap/show_functions.q.out | 3 +
7 files changed, 249 insertions(+), 1 deletion(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 5784233cc2f..eda1360293f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -53,6 +53,7 @@
import org.apache.hadoop.hive.ql.udf.esri.ST_Dimension;
import org.apache.hadoop.hive.ql.udf.esri.ST_Disjoint;
import org.apache.hadoop.hive.ql.udf.esri.ST_Distance;
+import org.apache.hadoop.hive.ql.udf.esri.ST_DistanceSphere;
import org.apache.hadoop.hive.ql.udf.esri.ST_EndPoint;
import org.apache.hadoop.hive.ql.udf.esri.ST_EnvIntersects;
import org.apache.hadoop.hive.ql.udf.esri.ST_Envelope;
@@ -723,6 +724,7 @@ public final class FunctionRegistry {
system.registerFunction("ST_Dimension", ST_Dimension.class);
system.registerFunction("ST_Disjoint", ST_Disjoint.class);
system.registerFunction("ST_Distance", ST_Distance.class);
+ system.registerFunction("ST_DistanceSphere", ST_DistanceSphere.class);
system.registerFunction("ST_EndPoint", ST_EndPoint.class);
system.registerFunction("ST_Envelope", ST_Envelope.class);
system.registerFunction("ST_EnvIntersects", ST_EnvIntersects.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/Haversine.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/Haversine.java
new file mode 100644
index 00000000000..18fb5db4c96
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/Haversine.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.esri;
+
+import static java.lang.Math.atan2;
+import static java.lang.Math.cos;
+import static java.lang.Math.sin;
+import static java.lang.Math.sqrt;
+import static java.lang.Math.toRadians;
+
+import com.esri.core.geometry.ogc.OGCPoint;
+
+// Class is based on Apache Sedona code:
+//
https://github.com/apache/sedona/blob/eee44b509624d9e4022a6dd40d9b07d72a369a20/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java#L43
+public final class Haversine {
+ public static final double AVG_EARTH_RADIUS_METERS = 6371008.0;
+
+ private Haversine() {}
+
+ /**
+ * Calculate the distance between two points on the earth using the
"haversine" formula. This is
+ * also known as the great-circle distance This will produce almost
identical result to PostGIS
+ * ST_DistanceSphere and ST_Distance(useSpheroid=false)
+ */
+ public static double distance(double lon1, double lat1,
+ double lon2, double lat2, double radius) {
+ double latDistance = toRadians(lat2 - lat1);
+ double lngDistance = toRadians(lon2 - lon1);
+ double a =
+ sin(latDistance / 2) * sin(latDistance / 2)
+ + cos(toRadians(lat1))
+ * cos(toRadians(lat2))
+ * sin(lngDistance / 2)
+ * sin(lngDistance / 2);
+ double c = 2 * atan2(sqrt(a), sqrt(1 - a));
+ return radius * c;
+ }
+
+ public static double distanceMeters(double lon1, double lat1,
+ double lon2, double lat2) {
+ return distance(lon1, lat1, lon2, lat2, AVG_EARTH_RADIUS_METERS);
+ }
+
+ public static double distanceMeters(OGCPoint point1, OGCPoint point2) {
+ double lon1 = point1.X();
+ double lat1 = point1.Y();
+ double lon2 = point2.X();
+ double lat2 = point2.Y();
+ return distanceMeters(lon1, lat1, lon2, lat2);
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_DistanceSphere.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_DistanceSphere.java
new file mode 100755
index 00000000000..8ea5825952b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_DistanceSphere.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.esri;
+
+import com.esri.core.geometry.ogc.OGCPoint;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.udf.esri.GeometryUtils.OGCType;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Description(name = "ST_DistanceSphere",
+ value = "_FUNC_(ST_Geometry1, ST_Geometry2) - returns the great circle
distance between 2 points in meters",
+ extended = "Example:\n" +
+ " SELECT _FUNC_(ST_Point(113.914603,22.308919),
ST_Point(151.177222,-33.946111)) FROM src LIMIT 1;\n" +
+ " -- ~7393893")
+public class ST_DistanceSphere extends ST_GeometryAccessor {
+ final DoubleWritable resultDouble = new DoubleWritable();
+ static final Logger LOG =
LoggerFactory.getLogger(ST_DistanceSphere.class.getName());
+
+ public DoubleWritable evaluate(BytesWritable geometryref1, BytesWritable
geometryref2) {
+ if (geometryref1 == null || geometryref2 == null ||
geometryref1.getLength() == 0
+ || geometryref2.getLength() == 0) {
+ LogUtils.Log_ArgumentsNull(LOG);
+ return null;
+ }
+ if (!GeometryUtils.compareSpatialReferences(geometryref1, geometryref2)) {
+ LogUtils.Log_SRIDMismatch(LOG, geometryref1, geometryref2);
+ return null;
+ }
+ // Only distance of points is supported.
+ if (GeometryUtils.getType(geometryref1) != OGCType.ST_POINT) {
+ LogUtils.Log_InvalidType(LOG, OGCType.ST_POINT,
GeometryUtils.getType(geometryref1));
+ return null;
+ }
+ if (GeometryUtils.getType(geometryref2) != OGCType.ST_POINT) {
+ LogUtils.Log_InvalidType(LOG, OGCType.ST_POINT,
GeometryUtils.getType(geometryref2));
+ return null;
+ }
+
+ OGCPoint point1 = (OGCPoint)
GeometryUtils.geometryFromEsriShape(geometryref1);
+ OGCPoint point2 = (OGCPoint)
GeometryUtils.geometryFromEsriShape(geometryref2);
+ if (point1 == null || point2 == null) {
+ LogUtils.Log_ArgumentsNull(LOG);
+ return null;
+ }
+
+ try {
+ resultDouble.set(Haversine.distanceMeters(point1, point2));
+ return resultDouble;
+ } catch (Exception e) {
+ LogUtils.Log_InternalError(LOG, "ST_DistanceSphere: " + e);
+ return null;
+ }
+ }
+}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/udf/esri/TestStDistanceSphere.java
b/ql/src/test/org/apache/hadoop/hive/ql/udf/esri/TestStDistanceSphere.java
new file mode 100644
index 00000000000..3106bb007b3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/esri/TestStDistanceSphere.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.esri;
+
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestStDistanceSphere {
+
+ private BytesWritable point(double lon, double lat) {
+ ST_Point stPt = new ST_Point();
+ return stPt.evaluate(new DoubleWritable(lon), new DoubleWritable(lat));
+ }
+
+ @Test
+ public void testStDistanceSphere() {
+ // Comparing distances to results from st_distancesphere() in PostGis
3.2.1.
+ // The results are not exact matches, but very close.
+ ST_DistanceSphere ds = new ST_DistanceSphere();
+ BytesWritable pParlaimentDome = point(19.04572641312447,
47.50745742600276);
+ BytesWritable pParlaimentFlag = point(19.04773610387992,
47.507062805452804);
+ BytesWritable pBudaCastle = point(19.04125284394088, 47.49660323441422);
+ BytesWritable pSzeged = point(20.145, 46.255);
+ BytesWritable pBangalore = point(77.59167, 12.97889);
+ DoubleWritable res;
+ res = ds.evaluate(pParlaimentFlag, pParlaimentDome);
+ assertEquals(157.20008441, res.get(), 0.01);
+
+ res = ds.evaluate(pBudaCastle, pParlaimentDome);
+ assertEquals(1252.84371679, res.get(), 0.05);
+
+ res = ds.evaluate(pBudaCastle, pSzeged);
+ assertEquals(161548.64536448, res.get(), 0.1);
+
+ res = ds.evaluate(pBudaCastle, pBangalore);
+ assertEquals(6604682.18138413, res.get(), 1.0);
+ }
+
+ @Test
+ public void testHaversine() {
+ // based on Apache Sedona's tests:
+ //
https://github.com/apache/sedona/blob/0523fbd/common/src/test/java/org/apache/sedona/common/FunctionsTest.java#L1489
+ // Basic check
+ assertEquals(1.00075559643809E7, Haversine.distanceMeters(90, 0, 0, 0),
0.1);
+ assertEquals(543796.9506134904, Haversine.distanceMeters(-0.56, 51.3168,
-3.1883, 55.9533), 0.1);
+ assertEquals(299073.03416817175, Haversine.distanceMeters(11.786111,
48.353889, 8.570556, 50.033333), 0.1);
+ assertEquals(479569.4558072244, Haversine.distanceMeters(11.786111,
48.353889, 13.287778, 52.559722), 0.1);
+
+ // HK to Sydney
+ assertEquals(7393893.072901942, Haversine.distanceMeters(113.914603,
22.308919, 151.177222, -33.946111), 0.1);
+
+ // HK to Toronto
+ assertEquals(1.2548548944238186E7, Haversine.distanceMeters(113.914603,
22.308919, -79.630556, 43.677223), 0.1);
+
+ // Crossing the anti-meridian
+ assertTrue(Haversine.distanceMeters(179.999, 0, -179.999, 0) < 300);
+ assertTrue(Haversine.distanceMeters(-179.999, 0, 179.999, 0) < 300);
+ assertTrue(Haversine.distanceMeters(179.999, 60, -179.999, 60) < 300);
+ assertTrue(Haversine.distanceMeters(-179.999, 60, 179.999, 60) < 300);
+ assertTrue(Haversine.distanceMeters(179.999, -60, -179.999, -60) < 300);
+ assertTrue(Haversine.distanceMeters(-179.999, -60, 179.999, -60) < 300);
+
+ // Crossing the North Pole
+ assertTrue(Haversine.distanceMeters(-60, 89.999, 120, 89.999) < 300);
+ assertTrue(Haversine.distanceMeters(120, 89.999, -60, 89.999) < 300);
+
+ // Crossing the South Pole
+ assertTrue(Haversine.distanceMeters(-60, -89.999, 120, -89.999) < 300);
+ assertTrue(Haversine.distanceMeters(120, -89.999, -60, -89.999) < 300);
+ }
+
+}
+
diff --git a/ql/src/test/queries/clientpositive/geospatial_udfs.q
b/ql/src/test/queries/clientpositive/geospatial_udfs.q
index 7eee2a72505..29df7aa9a6a 100644
--- a/ql/src/test/queries/clientpositive/geospatial_udfs.q
+++ b/ql/src/test/queries/clientpositive/geospatial_udfs.q
@@ -162,4 +162,7 @@ SELECT ST_AsText(
ST_GeomFromText('polygon ((0 0, 5 0, 5 5, 0 5, 0 0))'),
ST_GeomFromText('point (2 2)')
)
-);
\ No newline at end of file
+);
+
+-- Budapest - Honolulu distance (12356871.78163278 in PostGis 3.2.1)
+SELECT ST_DistanceSphere(ST_Point(19.04125, 47.496603), ST_Point(-157.924656,
21.321329));
diff --git a/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
b/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
index 7e44c58cba0..2a6c1e3a34f 100644
--- a/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
+++ b/ql/src/test/results/clientpositive/llap/geospatial_udfs.q.out
@@ -516,3 +516,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POLYGON ((0 0, 5 0, 5 5, 0 5, 0 0))
+PREHOOK: query: SELECT ST_DistanceSphere(ST_Point(19.04125, 47.496603),
ST_Point(-157.924656, 21.321329))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ST_DistanceSphere(ST_Point(19.04125, 47.496603),
ST_Point(-157.924656, 21.321329))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1.2356870285436917E7
diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out
b/ql/src/test/results/clientpositive/llap/show_functions.q.out
index d28610a55f5..605e953d634 100644
--- a/ql/src/test/results/clientpositive/llap/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out
@@ -383,6 +383,7 @@ st_difference
st_dimension
st_disjoint
st_distance
+st_distancesphere
st_endpoint
st_envelope
st_envintersects
@@ -624,6 +625,7 @@ st_asshape
st_binenvelope
st_difference
st_distance
+st_distancesphere
st_envelope
st_geometrytype
st_geomfromshape
@@ -1023,6 +1025,7 @@ st_difference
st_dimension
st_disjoint
st_distance
+st_distancesphere
st_endpoint
st_envelope
st_envintersects