This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new df3bfd68 [SEDONA-281] Change ST_DistanceSphere default radius to
6371008 (#846)
df3bfd68 is described below
commit df3bfd68b910bcd7c0c7ebef87229f7887f653f6
Author: Jia Yu <[email protected]>
AuthorDate: Mon May 29 20:49:36 2023 -0700
[SEDONA-281] Change ST_DistanceSphere default radius to 6371008 (#846)
---
.../java/org/apache/sedona/common/sphere/Haversine.java | 2 +-
.../test/java/org/apache/sedona/common/FunctionsTest.java | 14 +++++++-------
docs/api/flink/Function.md | 4 ++--
docs/api/sql/Function.md | 8 ++++----
.../test/java/org/apache/sedona/flink/FunctionTest.java | 2 +-
python/sedona/sql/st_functions.py | 4 ++--
.../src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala | 2 +-
.../spark/sql/sedona_sql/expressions/st_functions.scala | 4 ++--
.../org/apache/sedona/sql/dataFrameAPITestScala.scala | 3 ++-
.../scala/org/apache/sedona/sql/functionTestScala.scala | 4 ++--
10 files changed, 24 insertions(+), 23 deletions(-)
diff --git
a/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
b/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
index ab46aae7..7c8c15ac 100644
--- a/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
+++ b/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
@@ -60,6 +60,6 @@ public class Haversine
// The radius of the earth is 6371.0 km
public static double distance(Geometry geom1, Geometry geom2)
{
- return distance(geom1, geom2, 6378137.0);
+ return distance(geom1, geom2, 6371008.0);
}
}
diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
index b81e85a6..029c888a 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
@@ -449,33 +449,33 @@ public class FunctionsTest {
// Basic check
Point p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(0, 90));
Point p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(0, 0));
- assertEquals(1.0018754171394622E7, Haversine.distance(p1, p2), 0.1);
+ assertEquals(1.00075559643809E7, Haversine.distance(p1, p2), 0.1);
p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(51.3168, -0.56));
p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(55.9533, -3.1883));
- assertEquals(544405.4459192449, Haversine.distance(p1, p2), 0.1);
+ assertEquals(543796.9506134904, Haversine.distance(p1, p2), 0.1);
p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(48.353889,
11.786111));
p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(50.033333, 8.570556));
- assertEquals(299407.6894786948, Haversine.distance(p1, p2), 0.1);
+ assertEquals(299073.03416817175, Haversine.distance(p1, p2), 0.1);
p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(48.353889,
11.786111));
p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(52.559722,
13.287778));
- assertEquals(480106.0821386384, Haversine.distance(p1, p2), 0.1);
+ assertEquals(479569.4558072244, Haversine.distance(p1, p2), 0.1);
LineString l1 = GEOMETRY_FACTORY.createLineString(coordArray(0, 0, 0,
90));
LineString l2 = GEOMETRY_FACTORY.createLineString(coordArray(0, 1, 0,
0));
- assertEquals(4953717.340300673, Haversine.distance(l1, l2), 0.1);
+ assertEquals(4948180.449055, Haversine.distance(l1, l2), 0.1);
// HK to Sydney
p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(22.308919,
113.914603));
p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(-33.946111,
151.177222));
- assertEquals(7402166.655938837, Haversine.distance(p1, p2), 0.1);
+ assertEquals(7393893.072901942, Haversine.distance(p1, p2), 0.1);
// HK to Toronto
p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(22.308919,
113.914603));
p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(43.677223,
-79.630556));
- assertEquals(1.2562590459399283E7, Haversine.distance(p1, p2), 0.1);
+ assertEquals(1.2548548944238186E7, Haversine.distance(p1, p2), 0.1);
}
@Test
diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md
index 813ed1c1..e2f1d142 100644
--- a/docs/api/flink/Function.md
+++ b/docs/api/flink/Function.md
@@ -278,7 +278,7 @@ FROM polygondf
## ST_DistanceSphere
-Introduction: Return the haversine / great-circle distance of A using a given
earth radius (default radius: 6378137.0). Unit is meter. Works better for large
geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is
equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and
`ST_DistanceSphere` function and produces nearly identical results. It provides
faster but less accurate result compared to `ST_DistanceSpheroid`.
+Introduction: Return the haversine / great-circle distance of A using a given
earth radius (default radius: 6371008.0). Unit is meter. Works better for large
geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is
equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and
`ST_DistanceSphere` function and produces nearly identical results. It provides
faster but less accurate result compared to `ST_DistanceSpheroid`.
Geometry must be in EPSG:4326 (WGS84) projection and must be in lat/lon order.
You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we
first take the centroids of both geometries and then compute the distance.
@@ -292,7 +292,7 @@ Example 1:
SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'),
ST_GeomFromWKT('POINT (55.9533 -3.1883)'))
```
-Output: `544405.4459192449`
+Output: `543796.9506134904`
Example 2:
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index b8643268..3eba9a03 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -51,7 +51,7 @@ FROM polygondf
## ST_AreaSpheroid
-Introduction: Return the geodesic area of A using WGS84 spheroid. Unit is
meter. Works better for large geometries (country level) compared to `ST_Area`
+ `ST_Transform`. It is equivalent to PostGIS `ST_Area(geography,
use_spheroid=true)` function and produces nearly identical results.
+Introduction: Return the geodesic area of A using WGS84 spheroid. Unit is
square meter. Works better for large geometries (country level) compared to
`ST_Area` + `ST_Transform`. It is equivalent to PostGIS `ST_Area(geography,
use_spheroid=true)` function and produces nearly identical results.
Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon==
order. You can use ==ST_FlipCoordinates== to swap lat and lon.
@@ -416,7 +416,7 @@ FROM polygondf
## ST_DistanceSphere
-Introduction: Return the haversine / great-circle distance of A using a given
earth radius (default radius: 6378137.0). Unit is meter. Works better for large
geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is
equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and
`ST_DistanceSphere` function and produces nearly identical results. It provides
faster but less accurate result compared to `ST_DistanceSpheroid`.
+Introduction: Return the haversine / great-circle distance of A using a given
earth radius (default radius: 6371008.0). Unit is meter. Compared to
`ST_Distance` + `ST_Transform`, it works better for datasets that cover large
regions such as continents or the entire planet. It is equivalent to PostGIS
`ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function
and produces nearly identical results. It provides faster but less accurate
result compared to `ST_DistanceSpheroid`.
Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon==
order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point
data, we first take the centroids of both geometries and then compute the
distance.
@@ -429,7 +429,7 @@ Spark SQL example 1:
SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'),
ST_GeomFromWKT('POINT (55.9533 -3.1883)'))
```
-Output: `544405.4459192449`
+Output: `543796.9506134904`
Spark SQL example 2:
```sql
@@ -441,7 +441,7 @@ Output: `544405.4459192449`
## ST_DistanceSpheroid
-Introduction: Return the geodesic distance of A using WGS84 spheroid. Unit is
meter. Works better for large geometries (country level) compared to
`ST_Distance` + `ST_Transform`. It is equivalent to PostGIS
`ST_Distance(geography, use_spheroid=true)` and `ST_DistanceSpheroid` function
and produces nearly identical results. It provides slower but more accurate
result compared to `ST_DistanceSphere`.
+Introduction: Return the geodesic distance of A using WGS84 spheroid. Unit is
meter. Compared to `ST_Distance` + `ST_Transform`, it works better for datasets
that cover large regions such as continents or the entire planet. It is
equivalent to PostGIS `ST_Distance(geography, use_spheroid=true)` and
`ST_DistanceSpheroid` function and produces nearly identical results. It
provides slower but more accurate result compared to `ST_DistanceSphere`.
Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon==
order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point
data, we first take the centroids of both geometries and then compute the
distance.
diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
index f04328e0..933e216f 100644
--- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
+++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
@@ -175,7 +175,7 @@ public class FunctionTest extends TestBase{
public void testDistanceSphere() {
Table tbl = tableEnv.sqlQuery(
"SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168
-0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)'))");
- Double expected = 544405.4459192449;
+ Double expected = 543796.9506134904;
Double actual = (Double) first(tbl).getField(0);
assertEquals(expected, actual, 0.1);
}
diff --git a/python/sedona/sql/st_functions.py
b/python/sedona/sql/st_functions.py
index 2c5196a3..f3cea50b 100644
--- a/python/sedona/sql/st_functions.py
+++ b/python/sedona/sql/st_functions.py
@@ -424,14 +424,14 @@ def ST_DistanceSpheroid(a: ColumnOrName, b: ColumnOrName)
-> Column:
return _call_st_function("ST_DistanceSpheroid", (a, b))
@validate_argument_types
-def ST_DistanceSphere(a: ColumnOrName, b: ColumnOrName, radius:
Optional[Union[ColumnOrName, float]] = 6378137.0) -> Column:
+def ST_DistanceSphere(a: ColumnOrName, b: ColumnOrName, radius:
Optional[Union[ColumnOrName, float]] = 6371008.0) -> Column:
"""Calculate the haversine/great-circle distance between two geometry
columns using a given radius.
:param a: Geometry column to use in the calculation.
:type a: ColumnOrName
:param b: Other geometry column to use in the calculation.
:type b: ColumnOrName
- :param radius: Radius of the sphere, defaults to 6378137.0
+ :param radius: Radius of the sphere, defaults to 6371008.0
:type radius: Optional[Union[ColumnOrName, float]], optional
:return: Two-dimensional haversine/great-circle distance between a and b
as a double column. Unit is meter.
:rtype: Column
diff --git a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 0842f467..d50af2ab 100644
--- a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -143,7 +143,7 @@ object Catalog {
function[ST_Split](),
function[ST_S2CellIDs](),
function[ST_GeometricMedian](1e-6, 1000, false),
- function[ST_DistanceSphere](6378137.0),
+ function[ST_DistanceSphere](6371008.0),
function[ST_DistanceSpheroid](),
function[ST_AreaSpheroid](),
function[ST_LengthSpheroid](),
diff --git
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
index f7a56bf8..ad29b854 100644
---
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
+++
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
@@ -285,8 +285,8 @@ object st_functions extends DataFrameAPI {
def ST_GeometricMedian(geometry: Column, tolerance: Column, maxIter: Column,
failIfNotConverged: Column): Column =
wrapExpression[ST_GeometricMedian](geometry, tolerance, maxIter,
failIfNotConverged)
def ST_GeometricMedian(geometry: String, tolerance: Double, maxIter: Int,
failIfNotConverged: Boolean): Column =
wrapExpression[ST_GeometricMedian](geometry, tolerance, maxIter,
failIfNotConverged)
- def ST_DistanceSphere(a: Column, b: Column): Column =
wrapExpression[ST_DistanceSphere](a, b, 6378137.0)
- def ST_DistanceSphere(a: String, b: String): Column =
wrapExpression[ST_DistanceSphere](a, b, 6378137.0)
+ def ST_DistanceSphere(a: Column, b: Column): Column =
wrapExpression[ST_DistanceSphere](a, b, 6371008.0)
+ def ST_DistanceSphere(a: String, b: String): Column =
wrapExpression[ST_DistanceSphere](a, b, 6371008.0)
def ST_DistanceSphere(a: Column, b: Column, c: Column): Column =
wrapExpression[ST_DistanceSphere](a, b, c)
def ST_DistanceSphere(a: String, b: String, c: Double): Column =
wrapExpression[ST_DistanceSphere](a, b, c)
diff --git
a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
index 24a3d2b8..787a7300 100644
---
a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
+++
b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
@@ -917,11 +917,12 @@ class dataFrameAPITestScala extends TestBaseScala {
val baseDf = sparkSession.sql("SELECT ST_GeomFromWKT('POINT (0 0)') AS
geom1, ST_GeomFromWKT('POINT (0 90)') AS geom2")
var df = baseDf.select(ST_DistanceSphere("geom1", "geom2"))
var actualResult = df.take(1)(0).getDouble(0)
- val expectedResult = 10018754.171394622
+ var expectedResult = 1.00075559643809E7
assert(actualResult == expectedResult)
df = baseDf.select(ST_DistanceSphere("geom1", "geom2", 6378137.0))
actualResult = df.take(1)(0).getDouble(0)
+ expectedResult = 1.0018754171394622E7
assertEquals(expectedResult, actualResult, 0.1)
}
diff --git
a/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
b/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index a3336ca1..0e598e6b 100644
--- a/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -1842,8 +1842,8 @@ class functionTestScala extends TestBaseScala with
Matchers with GeometrySample
it("Should pass ST_DistanceSphere") {
val geomTestCases = Map(
- ("'POINT (51.3168 -0.56)'", "'POINT (55.9533 -3.1883)'") ->
"544405.4459192449",
- ("'LineString (0 0, 0 90)'", "'LineString (0 1, 0 0)'") ->
"4953717.340300673"
+ ("'POINT (51.3168 -0.56)'", "'POINT (55.9533 -3.1883)'") ->
"543796.9506134904",
+ ("'LineString (0 0, 0 90)'", "'LineString (0 1, 0 0)'") ->
"4948180.449055"
)
for (((geom1, geom2), expectedResult) <- geomTestCases) {
val df = sparkSession.sql(s"SELECT
ST_DistanceSphere(ST_GeomFromWKT($geom1), ST_GeomFromWKT($geom2)), " +