This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch change-distancesphere-value in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 2585035e44a050119b8622f1c7976217cd655357 Author: Jia Yu <[email protected]> AuthorDate: Mon May 29 11:47:43 2023 -0700 Change ST_DistanceSphere default radius to 8371008 --- .../java/org/apache/sedona/common/sphere/Haversine.java | 2 +- .../test/java/org/apache/sedona/common/FunctionsTest.java | 14 +++++++------- docs/api/flink/Function.md | 4 ++-- docs/api/sql/Function.md | 8 ++++---- .../test/java/org/apache/sedona/flink/FunctionTest.java | 2 +- python/sedona/sql/st_functions.py | 4 ++-- .../src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala | 2 +- .../spark/sql/sedona_sql/expressions/st_functions.scala | 4 ++-- .../org/apache/sedona/sql/dataFrameAPITestScala.scala | 3 ++- .../scala/org/apache/sedona/sql/functionTestScala.scala | 4 ++-- 10 files changed, 24 insertions(+), 23 deletions(-) diff --git a/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java b/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java index ab46aae7..7c8c15ac 100644 --- a/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java +++ b/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java @@ -60,6 +60,6 @@ public class Haversine // The radius of the earth is 6371.0 km public static double distance(Geometry geom1, Geometry geom2) { - return distance(geom1, geom2, 6378137.0); + return distance(geom1, geom2, 6371008.0); } } diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java index b81e85a6..029c888a 100644 --- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java +++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java @@ -449,33 +449,33 @@ public class FunctionsTest { // Basic check Point p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(0, 90)); Point p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(0, 0)); - assertEquals(1.0018754171394622E7, Haversine.distance(p1, p2), 0.1); + assertEquals(1.00075559643809E7, Haversine.distance(p1, p2), 0.1); p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(51.3168, -0.56)); p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(55.9533, -3.1883)); - assertEquals(544405.4459192449, Haversine.distance(p1, p2), 0.1); + assertEquals(543796.9506134904, Haversine.distance(p1, p2), 0.1); p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(48.353889, 11.786111)); p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(50.033333, 8.570556)); - assertEquals(299407.6894786948, Haversine.distance(p1, p2), 0.1); + assertEquals(299073.03416817175, Haversine.distance(p1, p2), 0.1); p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(48.353889, 11.786111)); p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(52.559722, 13.287778)); - assertEquals(480106.0821386384, Haversine.distance(p1, p2), 0.1); + assertEquals(479569.4558072244, Haversine.distance(p1, p2), 0.1); LineString l1 = GEOMETRY_FACTORY.createLineString(coordArray(0, 0, 0, 90)); LineString l2 = GEOMETRY_FACTORY.createLineString(coordArray(0, 1, 0, 0)); - assertEquals(4953717.340300673, Haversine.distance(l1, l2), 0.1); + assertEquals(4948180.449055, Haversine.distance(l1, l2), 0.1); // HK to Sydney p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(22.308919, 113.914603)); p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(-33.946111, 151.177222)); - assertEquals(7402166.655938837, Haversine.distance(p1, p2), 0.1); + assertEquals(7393893.072901942, Haversine.distance(p1, p2), 0.1); // HK to Toronto p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(22.308919, 113.914603)); p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(43.677223, -79.630556)); - assertEquals(1.2562590459399283E7, Haversine.distance(p1, p2), 0.1); + assertEquals(1.2548548944238186E7, Haversine.distance(p1, p2), 0.1); } @Test diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index 813ed1c1..e2f1d142 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -278,7 +278,7 @@ FROM polygondf ## ST_DistanceSphere -Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6378137.0). Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`. +Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6371008.0). Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`. Geometry must be in EPSG:4326 (WGS84) projection and must be in lat/lon order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we first take the centroids of both geometries and then compute the distance. @@ -292,7 +292,7 @@ Example 1: SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)')) ``` -Output: `544405.4459192449` +Output: `543796.9506134904` Example 2: diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index b8643268..3eba9a03 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -51,7 +51,7 @@ FROM polygondf ## ST_AreaSpheroid -Introduction: Return the geodesic area of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Area` + `ST_Transform`. It is equivalent to PostGIS `ST_Area(geography, use_spheroid=true)` function and produces nearly identical results. +Introduction: Return the geodesic area of A using WGS84 spheroid. Unit is square meter. Works better for large geometries (country level) compared to `ST_Area` + `ST_Transform`. It is equivalent to PostGIS `ST_Area(geography, use_spheroid=true)` function and produces nearly identical results. Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon== order. You can use ==ST_FlipCoordinates== to swap lat and lon. @@ -416,7 +416,7 @@ FROM polygondf ## ST_DistanceSphere -Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6378137.0). Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`. +Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6371008.0). Unit is meter. Compared to `ST_Distance` + `ST_Transform`, it works better for datasets that cover large regions such as continents or the entire planet. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`. Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon== order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we first take the centroids of both geometries and then compute the distance. @@ -429,7 +429,7 @@ Spark SQL example 1: SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)')) ``` -Output: `544405.4459192449` +Output: `543796.9506134904` Spark SQL example 2: ```sql @@ -441,7 +441,7 @@ Output: `544405.4459192449` ## ST_DistanceSpheroid -Introduction: Return the geodesic distance of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=true)` and `ST_DistanceSpheroid` function and produces nearly identical results. It provides slower but more accurate result compared to `ST_DistanceSphere`. +Introduction: Return the geodesic distance of A using WGS84 spheroid. Unit is meter. Compared to `ST_Distance` + `ST_Transform`, it works better for datasets that cover large regions such as continents or the entire planet. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=true)` and `ST_DistanceSpheroid` function and produces nearly identical results. It provides slower but more accurate result compared to `ST_DistanceSphere`. Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon== order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we first take the centroids of both geometries and then compute the distance. diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java index f04328e0..933e216f 100644 --- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java @@ -175,7 +175,7 @@ public class FunctionTest extends TestBase{ public void testDistanceSphere() { Table tbl = tableEnv.sqlQuery( "SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)'))"); - Double expected = 544405.4459192449; + Double expected = 543796.9506134904; Double actual = (Double) first(tbl).getField(0); assertEquals(expected, actual, 0.1); } diff --git a/python/sedona/sql/st_functions.py b/python/sedona/sql/st_functions.py index 2c5196a3..f3cea50b 100644 --- a/python/sedona/sql/st_functions.py +++ b/python/sedona/sql/st_functions.py @@ -424,14 +424,14 @@ def ST_DistanceSpheroid(a: ColumnOrName, b: ColumnOrName) -> Column: return _call_st_function("ST_DistanceSpheroid", (a, b)) @validate_argument_types -def ST_DistanceSphere(a: ColumnOrName, b: ColumnOrName, radius: Optional[Union[ColumnOrName, float]] = 6378137.0) -> Column: +def ST_DistanceSphere(a: ColumnOrName, b: ColumnOrName, radius: Optional[Union[ColumnOrName, float]] = 6371008.0) -> Column: """Calculate the haversine/great-circle distance between two geometry columns using a given radius. :param a: Geometry column to use in the calculation. :type a: ColumnOrName :param b: Other geometry column to use in the calculation. :type b: ColumnOrName - :param radius: Radius of the sphere, defaults to 6378137.0 + :param radius: Radius of the sphere, defaults to 6371008.0 :type radius: Optional[Union[ColumnOrName, float]], optional :return: Two-dimensional haversine/great-circle distance between a and b as a double column. Unit is meter. :rtype: Column diff --git a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 0842f467..d50af2ab 100644 --- a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -143,7 +143,7 @@ object Catalog { function[ST_Split](), function[ST_S2CellIDs](), function[ST_GeometricMedian](1e-6, 1000, false), - function[ST_DistanceSphere](6378137.0), + function[ST_DistanceSphere](6371008.0), function[ST_DistanceSpheroid](), function[ST_AreaSpheroid](), function[ST_LengthSpheroid](), diff --git a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala index f7a56bf8..ad29b854 100644 --- a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala +++ b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala @@ -285,8 +285,8 @@ object st_functions extends DataFrameAPI { def ST_GeometricMedian(geometry: Column, tolerance: Column, maxIter: Column, failIfNotConverged: Column): Column = wrapExpression[ST_GeometricMedian](geometry, tolerance, maxIter, failIfNotConverged) def ST_GeometricMedian(geometry: String, tolerance: Double, maxIter: Int, failIfNotConverged: Boolean): Column = wrapExpression[ST_GeometricMedian](geometry, tolerance, maxIter, failIfNotConverged) - def ST_DistanceSphere(a: Column, b: Column): Column = wrapExpression[ST_DistanceSphere](a, b, 6378137.0) - def ST_DistanceSphere(a: String, b: String): Column = wrapExpression[ST_DistanceSphere](a, b, 6378137.0) + def ST_DistanceSphere(a: Column, b: Column): Column = wrapExpression[ST_DistanceSphere](a, b, 6371008.0) + def ST_DistanceSphere(a: String, b: String): Column = wrapExpression[ST_DistanceSphere](a, b, 6371008.0) def ST_DistanceSphere(a: Column, b: Column, c: Column): Column = wrapExpression[ST_DistanceSphere](a, b, c) def ST_DistanceSphere(a: String, b: String, c: Double): Column = wrapExpression[ST_DistanceSphere](a, b, c) diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 24a3d2b8..787a7300 100644 --- a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -917,11 +917,12 @@ class dataFrameAPITestScala extends TestBaseScala { val baseDf = sparkSession.sql("SELECT ST_GeomFromWKT('POINT (0 0)') AS geom1, ST_GeomFromWKT('POINT (0 90)') AS geom2") var df = baseDf.select(ST_DistanceSphere("geom1", "geom2")) var actualResult = df.take(1)(0).getDouble(0) - val expectedResult = 10018754.171394622 + var expectedResult = 1.00075559643809E7 assert(actualResult == expectedResult) df = baseDf.select(ST_DistanceSphere("geom1", "geom2", 6378137.0)) actualResult = df.take(1)(0).getDouble(0) + expectedResult = 1.0018754171394622E7 assertEquals(expectedResult, actualResult, 0.1) } diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala index a3336ca1..0e598e6b 100644 --- a/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala +++ b/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala @@ -1842,8 +1842,8 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample it("Should pass ST_DistanceSphere") { val geomTestCases = Map( - ("'POINT (51.3168 -0.56)'", "'POINT (55.9533 -3.1883)'") -> "544405.4459192449", - ("'LineString (0 0, 0 90)'", "'LineString (0 1, 0 0)'") -> "4953717.340300673" + ("'POINT (51.3168 -0.56)'", "'POINT (55.9533 -3.1883)'") -> "543796.9506134904", + ("'LineString (0 0, 0 90)'", "'LineString (0 1, 0 0)'") -> "4948180.449055" ) for (((geom1, geom2), expectedResult) <- geomTestCases) { val df = sparkSession.sql(s"SELECT ST_DistanceSphere(ST_GeomFromWKT($geom1), ST_GeomFromWKT($geom2)), " +
