This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch SEDONA-582 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 297e63c1ac72c75fd11fc167f7ca1714c7c54de4 Author: Furqaan Khan <[email protected]> AuthorDate: Wed Apr 10 02:13:30 2024 -0400 [TASK-51] Add ST_PointFromGeoHash (#162) * feat: Add ST_PointFromGeoHash * docs: add docs * fix: snowflake tests * fix: typo --- .../org/apache/sedona/common/Constructors.java | 4 +++ .../org/apache/sedona/common/ConstructorsTest.java | 12 +++++++ docs/api/flink/Constructor.md | 20 ++++++++++++ docs/api/snowflake/vector-data/Constructor.md | 37 ++++++++++++++++++++++ docs/api/sql/Constructor.md | 20 ++++++++++++ .../main/java/org/apache/sedona/flink/Catalog.java | 1 + .../sedona/flink/expressions/Constructors.java | 14 ++++++++ .../org/apache/sedona/flink/ConstructorTest.java | 13 ++++++++ python/sedona/sql/st_constructors.py | 15 +++++++++ python/tests/sql/test_constructor_test.py | 9 ++++++ python/tests/sql/test_dataframe_api.py | 4 +++ .../sedona/snowflake/snowsql/TestConstructors.java | 15 +++++++++ .../org/apache/sedona/snowflake/snowsql/UDFs.java | 14 ++++++++ .../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 + .../sql/sedona_sql/expressions/Constructors.scala | 7 ++++ .../sedona_sql/expressions/st_constructors.scala | 7 ++++ .../apache/sedona/sql/constructorTestScala.scala | 10 ++++++ .../apache/sedona/sql/dataFrameAPITestScala.scala | 7 ++++ 18 files changed, 210 insertions(+) diff --git a/common/src/main/java/org/apache/sedona/common/Constructors.java b/common/src/main/java/org/apache/sedona/common/Constructors.java index e33789ab6..93dd8b00d 100644 --- a/common/src/main/java/org/apache/sedona/common/Constructors.java +++ b/common/src/main/java/org/apache/sedona/common/Constructors.java @@ -246,6 +246,10 @@ public class Constructors { } } + public static Geometry pointFromGeoHash(String geoHash, Integer precision) { + return geomFromGeoHash(geoHash, precision).getCentroid(); + } + public static Geometry geomFromGML(String gml) throws IOException, ParserConfigurationException, SAXException { return new GMLReader().read(gml, GEOMETRY_FACTORY); } diff --git a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java index 7abbcb306..49336f9f4 100644 --- a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java +++ b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java @@ -107,6 +107,18 @@ public class ConstructorsTest { assertEquals("POINT (1 2)", point.toText()); } + @Test + public void pointFromGeoHash() { + String point = Functions.asWKT(Constructors.pointFromGeoHash("9qqj7nmxncgyy4d0dbxqz0", 4)); + assertEquals("POINT (-115.13671875 36.123046875)", point); + + point = Functions.asWKT(Constructors.pointFromGeoHash("9qqj7nmxncgyy4d0dbxqz0", null)); + assertEquals("POINT (-115.17281600000001 36.11464599999999)", point); + + point = Functions.asWKT(Constructors.pointFromGeoHash("9qqj7nmxncgyy4d0dbxqz0", 1)); + assertEquals("POINT (-112.5 22.5)", point); + } + @Test public void point2d() { Geometry point = Constructors.makePoint(1.0d, 2.0d, null, null); diff --git a/docs/api/flink/Constructor.md b/docs/api/flink/Constructor.md index a28542f26..279d53dcf 100644 --- a/docs/api/flink/Constructor.md +++ b/docs/api/flink/Constructor.md @@ -490,6 +490,26 @@ Output: POINT (1.2345 2.3456) ``` +## ST_PointFromGeoHash + +Introduction: Generates a Point geometry representing the center of the GeoHash cell defined by the input string. If `precision` is not specified, the full GeoHash precision is used. Providing a `precision` value limits the GeoHash characters used to determine the Point coordinates. + +Format: `ST_PointFromGeoHash(geoHash: String, precision: Integer)` + +Since: `vTBD` + +SQL Example + +```sql +SELECT ST_PointFromGeoHash('s00twy01mt', 4) +``` + +Output: + +``` +POINT (0.87890625 0.966796875) +``` + ## ST_PointFromText Introduction: Construct a Point from Text, delimited by Delimiter diff --git a/docs/api/snowflake/vector-data/Constructor.md b/docs/api/snowflake/vector-data/Constructor.md index a87046def..f35972a30 100644 --- a/docs/api/snowflake/vector-data/Constructor.md +++ b/docs/api/snowflake/vector-data/Constructor.md @@ -523,6 +523,43 @@ Output: POINT (10 15) ``` +## ST_PointZ + +Introduction: Construct a Point from X, Y and Z and an optional srid. If srid is not set, it defaults to 0 (unknown). +Must use ST_AsEWKT function to print the Z coordinate. + +Format: `ST_PointZ (X:decimal, Y:decimal, Z:decimal)` + +Format: `ST_PointZ (X:decimal, Y:decimal, Z:decimal, srid:integer)` + +```sql +SELECT ST_AsEWKT(ST_PointZ(1.2345, 2.3456, 3.4567)) +``` + +Output: + +``` +POINT Z(1.2345 2.3456 3.4567) +``` + +## ST_PointFromGeoHash + +Introduction: Generates a Point geometry representing the center of the GeoHash cell defined by the input string. If `precision` is not specified, the full GeoHash precision is used. Providing a `precision` value limits the GeoHash characters used to determine the Point coordinates. + +Format: `ST_PointFromGeoHash(geoHash: String, precision: Integer)` + +SQL Example + +```sql +SELECT ST_PointFromGeoHash('s00twy01mt', 4) +``` + +Output: + +``` +POINT (0.87890625 0.966796875) +``` + ## ST_PolygonFromEnvelope Introduction: Construct a Polygon from MinX, MinY, MaxX, MaxY. diff --git a/docs/api/sql/Constructor.md b/docs/api/sql/Constructor.md index 6a5db0706..a38f71e57 100644 --- a/docs/api/sql/Constructor.md +++ b/docs/api/sql/Constructor.md @@ -552,6 +552,26 @@ Output: POINT (1.2345 2.3456) ``` +## ST_PointFromGeoHash + +Introduction: Generates a Point geometry representing the center of the GeoHash cell defined by the input string. If `precision` is not specified, the full GeoHash precision is used. Providing a `precision` value limits the GeoHash characters used to determine the Point coordinates. + +Format: `ST_PointFromGeoHash(geoHash: String, precision: Integer)` + +Since: `vTBD` + +SQL Example + +```sql +SELECT ST_PointFromGeoHash('s00twy01mt', 4) +``` + +Output: + +``` +POINT (0.87890625 0.966796875) +``` + ## ST_PointFromText Introduction: Construct a Point from Text, delimited by Delimiter diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index 443fbe932..b774a30d1 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -26,6 +26,7 @@ public class Catalog { new Constructors.ST_PointZ(), new Constructors.ST_PointM(), new Constructors.ST_PointZM(), + new Constructors.ST_PointFromGeoHash(), new Constructors.ST_PointFromText(), new Constructors.ST_PointFromWKB(), new Constructors.ST_LineFromWKB(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java index cd53ac09d..2f60623b0 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java @@ -317,6 +317,20 @@ public class Constructors { } } + public static class ST_PointFromGeoHash extends ScalarFunction { + @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) + public Geometry eval(@DataTypeHint("String") String value, + @DataTypeHint("Int") Integer precision) { + // The default precision is the geohash length. Otherwise, use the precision given by the user + return org.apache.sedona.common.Constructors.pointFromGeoHash(value, precision); + } + + @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) + public Geometry eval(@DataTypeHint("String") String value) { + return eval(value, null); + } + } + public static class ST_GeomFromGML extends ScalarFunction { @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) public Geometry eval(@DataTypeHint("String") String gml) throws ParseException { diff --git a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java index b92d92100..693aadac4 100644 --- a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java @@ -463,6 +463,19 @@ public class ConstructorTest extends TestBase{ assertEquals(expectedGeom, result); } + @Test + public void testPointFromGeoHash() { + String actual = first(tableEnv.sqlQuery("SELECT 's00twy01mt' as geohash") + .select(call(Constructors.ST_PointFromGeoHash.class.getSimpleName(), + $("geohash"), 4))).getField(0).toString(); + assertEquals("POINT (0.87890625 0.966796875)", actual); + + actual = first(tableEnv.sqlQuery("SELECT 's00twy01mt' as geohash") + .select(call(Constructors.ST_PointFromGeoHash.class.getSimpleName(), + $("geohash")))).getField(0).toString(); + assertEquals("POINT (0.9999972581863403 0.9999999403953552)", actual); + } + @Test public void testGeomFromGeoHashNullPrecision() { List<Row> data = new ArrayList<>(); diff --git a/python/sedona/sql/st_constructors.py b/python/sedona/sql/st_constructors.py index c0fb0fd26..50ab1899f 100644 --- a/python/sedona/sql/st_constructors.py +++ b/python/sedona/sql/st_constructors.py @@ -46,6 +46,21 @@ def ST_GeomFromGeoHash(geohash: ColumnOrName, precision: Union[ColumnOrName, int """ return _call_constructor_function("ST_GeomFromGeoHash", (geohash, precision)) +@validate_argument_types +def ST_PointFromGeoHash(geohash: ColumnOrName, precision: Optional[Union[ColumnOrName, int]] = None) -> Column: + """Generate a point column from a geohash column at a specified precision. + + :param geohash: Geohash string column to generate from. + :type geohash: ColumnOrName + :param precision: Geohash precision to use, either an integer or an integer column. + :type precision: Union[ColumnOrName, int] + :return: Point column representing the supplied geohash and precision level. + :rtype: Column + """ + args = (geohash) if precision is None else (geohash, precision) + + return _call_constructor_function("ST_PointFromGeoHash", args) + @validate_argument_types def ST_GeomFromGeoJSON(geojson_string: ColumnOrName) -> Column: diff --git a/python/tests/sql/test_constructor_test.py b/python/tests/sql/test_constructor_test.py index c90c5db7a..cd9a30b93 100644 --- a/python/tests/sql/test_constructor_test.py +++ b/python/tests/sql/test_constructor_test.py @@ -115,6 +115,15 @@ class TestConstructors(TestBase): polygon_df.show(10) assert polygon_df.count() == 100 + def test_st_point_from_geohash(self): + actual = self.spark.sql("select ST_AsText(ST_PointFromGeohash('9qqj7nmxncgyy4d0dbxqz0', 4))").take(1)[0][0] + expected = "POINT (-115.13671875 36.123046875)" + assert actual == expected + + actual = self.spark.sql("select ST_AsText(ST_PointFromGeohash('9qqj7nmxncgyy4d0dbxqz0'))").take(1)[0][0] + expected = "POINT (-115.17281600000001 36.11464599999999)" + assert actual == expected + def test_st_geometry_from_text(self): polygon_wkt_df = self.spark.read.format("csv").\ option("delimiter", "\t").\ diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index bd75bf16a..6383c2b71 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -62,6 +62,8 @@ test_configurations = [ (stc.ST_MLineFromText, ("mline", 4326), "constructor", "" , "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))"), (stc.ST_PointM, ("x", "y", "m", "srid"), "x_y_z_m_srid", "ST_AsEWKT(geom)", "SRID=4326;POINT ZM(1 2 0 100.9)"), (stc.ST_PointZM, ("x", "y", "z", "m", "srid"), "x_y_z_m_srid", "", "POINT Z (1 2 3)"), + (stc.ST_PointFromGeoHash, ("geohash", 4), "constructor", "ST_ReducePrecision(geom, 2)", "POINT (0.88 0.97)"), + (stc.ST_PointFromGeoHash, ("geohash",), "constructor", "ST_ReducePrecision(geom, 2)", "POINT (1 1)"), (stc.ST_PointFromText, ("single_point", lambda: f.lit(',')), "constructor", "", "POINT (0 1)"), (stc.ST_PointFromWKB, ("wkbPoint",), "constructor", "", "POINT (10 15)"), (stc.ST_MakePoint, ("x", "y", "z"), "constructor", "", "POINT Z (0 1 2)"), @@ -248,6 +250,8 @@ wrong_type_configurations = [ (stc.ST_LineStringFromText, ("", None)), (stc.ST_Point, (None, "")), (stc.ST_Point, ("", None)), + (stc.ST_PointFromGeoHash, (None, 4)), + (stc.ST_PointFromGeoHash, (None,)), (stc.ST_PointFromText, (None, "")), (stc.ST_PointFromText, ("", None)), (stc.ST_PointFromWKB, (None,)), diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java index 941b31fc6..c22d36814 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java @@ -44,6 +44,21 @@ public class TestConstructors extends TestBase{ "POLYGON ((0.703125 0.87890625, 0.703125 1.0546875, 1.0546875 1.0546875, 1.0546875 0.87890625, 0.703125 0.87890625))" ); } + + @Test + public void test_ST_PointFromGeoHash() { + registerUDF("ST_PointFromGeoHash", String.class, Integer.class); + verifySqlSingleRes( + "select sedona.ST_AsText(sedona.ST_PointFromGeoHash('s00twy01mt', 4))", + "POINT (0.87890625 0.966796875)" + ); + + registerUDF("ST_PointFromGeoHash", String.class); + verifySqlSingleRes( + "select sedona.ST_AsText(sedona.ST_PointFromGeoHash('s00twy01mt'))", + "POINT (0.9999972581863403 0.9999999403953552)" + ); + } @Test public void test_ST_GeomFromGeoJSON() { registerUDF("ST_GeomFromGeoJSON", String.class); diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java index 7dc314e9f..982386fbd 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java @@ -952,6 +952,20 @@ public class UDFs { ); } + @UDFAnnotations.ParamMeta(argNames = {"geoHash", "precision"}) + public static byte[] ST_PointFromGeoHash(String geoHash, Integer precision) { + return GeometrySerde.serialize( + Constructors.pointFromGeoHash(geoHash, precision) + ); + } + + @UDFAnnotations.ParamMeta(argNames = {"geoHash"}) + public static byte[] ST_PointFromGeoHash(String geoHash) { + return GeometrySerde.serialize( + Constructors.pointFromGeoHash(geoHash, null) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"geometry", "n"}) public static byte[] ST_PointN(byte[] geometry, int n) { return GeometrySerde.serialize( diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 92c9ac807..3c382176b 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -156,6 +156,7 @@ object Catalog { function[ST_MakePolygon](null), function[ST_GeoHash](), function[ST_GeomFromGeoHash](null), + function[ST_PointFromGeoHash](null), function[ST_Collect](), function[ST_Multi](), function[ST_PointOnSurface](), diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala index a7a7811e4..cdbe0a5c9 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala @@ -409,6 +409,13 @@ case class ST_GeomFromGeoHash(inputExpressions: Seq[Expression]) } } +case class ST_PointFromGeoHash(inputExpressions: Seq[Expression]) + extends InferredExpression(InferrableFunction.allowRightNull(Constructors.pointFromGeoHash)) { + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + case class ST_GeomFromGML(inputExpressions: Seq[Expression]) extends InferredExpression(Constructors.geomFromGML _) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala index eabf1429d..5b52df90b 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala @@ -29,6 +29,13 @@ object st_constructors extends DataFrameAPI { def ST_GeomFromGeoHash(geohash: String): Column = wrapExpression[ST_GeomFromGeoHash](geohash, null) + def ST_PointFromGeoHash(geohash: Column, precision: Column): Column = wrapExpression[ST_PointFromGeoHash](geohash, precision) + def ST_PointFromGeoHash(geohash: String, precision: Int): Column = wrapExpression[ST_PointFromGeoHash](geohash, precision) + + def ST_PointFromGeoHash(geohash: Column): Column = wrapExpression[ST_PointFromGeoHash](geohash, null) + + def ST_PointFromGeoHash(geohash: String): Column = wrapExpression[ST_PointFromGeoHash](geohash, null) + def ST_GeomFromGeoJSON(geojsonString: Column): Column = wrapExpression[ST_GeomFromGeoJSON](geojsonString) def ST_GeomFromGeoJSON(geojsonString: String): Column = wrapExpression[ST_GeomFromGeoJSON](geojsonString) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala index 564e9e6d6..d989eb611 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala @@ -221,6 +221,16 @@ class constructorTestScala extends TestBaseScala { assert(polygonDf.count() == 8) } + it("Passed ST_PointFromGeoHash") { + var actual = sparkSession.sql("SELECT ST_AsText(ST_PointFromGeoHash('9qqj7nmxncgyy4d0dbxqz0', 4))").first().get(0) + var expected = "POINT (-115.13671875 36.123046875)" + assert(expected.equals(actual)) + + actual = sparkSession.sql("SELECT ST_AsText(ST_PointFromGeoHash('9qqj7nmxncgyy4d0dbxqz0'))").first().get(0) + expected = "POINT (-115.17281600000001 36.11464599999999)" + assert(expected.equals(actual)) + } + it("Passed ST_GeomFromText") { var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) polygonWktDf.createOrReplaceTempView("polygontable") diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 2548b1d3b..61a2e90ae 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -253,6 +253,13 @@ class dataFrameAPITestScala extends TestBaseScala { assert(actualResult == expectedResult) } + it("Passed ST_PointFromGeoHash") { + val df = sparkSession.sql("SELECT 's00twy01mt' AS geohash").select(ST_PointFromGeoHash("geohash", 4)) + val actual = df.take(1)(0).get(0).asInstanceOf[Geometry].toText + val expected = "POINT (0.87890625 0.966796875)" + assert(expected.equals(actual)) + } + it("passed st_geomfromgml") { val gmlString = "<gml:LineString srsName=\"EPSG:4269\"><gml:coordinates>-71.16028,42.258729 -71.160837,42.259112 -71.161143,42.25932</gml:coordinates></gml:LineString>" val df = sparkSession.sql(s"SELECT '$gmlString' AS gml").select(ST_GeomFromGML("gml"))
