This is an automated email from the ASF dual-hosted git repository. imbruced pushed a commit to branch add-sedona-deserializer in repository https://gitbox.apache.org/repos/asf/sedona-db.git
commit 9100359e29c2266b01217e47f22ec7d02345d512 Author: pawelkocinski <[email protected]> AuthorDate: Mon Dec 15 23:24:37 2025 +0100 add sedona deserialization code --- Cargo.lock | 288 +++++++++------ Cargo.toml | 1 + rust/sedona-functions/Cargo.toml | 1 + .../sedona-functions/src/fixtures/crs_point.sedona | 11 + .../src/fixtures/empty_geometry_collection.sedona | 5 + .../src/fixtures/empty_linestring.sedona | 5 + .../src/fixtures/empty_multilinestring.sedona | 5 + .../src/fixtures/empty_multipolygon.sedona | 5 + .../src/fixtures/empty_point.sedona | 5 + .../src/fixtures/empty_polygon.sedona | 5 + .../src/fixtures/geometrycollection.sedona | 42 +++ .../src/fixtures/geometrycollectioncomplex.sedona | 52 +++ .../src/fixtures/linestring.sedona | 13 + .../src/fixtures/multilinestring.sedona | 25 ++ .../src/fixtures/multipoint.sedona | 13 + .../src/fixtures/multipoint_empty.sedona | 5 + .../src/fixtures/multipolygon.sedona | 65 ++++ .../src/fixtures/nested_geometry_collection.sedona | 46 +++ rust/sedona-functions/src/fixtures/point.sedona | 11 + .../src/fixtures/point_float_coords.sedona | 42 +++ rust/sedona-functions/src/fixtures/polygon.sedona | 41 +++ rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/st_from_sedona_spark.rs | 387 +++++++++++++++++++++ rust/{sedona-functions => sedona-serde}/Cargo.toml | 25 +- rust/sedona-serde/src/deserialize.rs | 130 +++++++ rust/sedona-serde/src/lib.rs | 22 ++ rust/sedona-serde/src/linestring.rs | 98 ++++++ rust/sedona-serde/src/point.rs | 96 +++++ rust/sedona-serde/src/polygon.rs | 104 ++++++ rust/sedona-serde/src/wkb.rs | 24 ++ 31 files changed, 1439 insertions(+), 135 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23630f62..a1b060dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,8 +56,8 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a38cdcc3e43dc645038c2b6339dd98610c48ae593cc67839452e6670fa09f27" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-schema 57.1.0", ] [[package]] @@ -67,8 +67,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d36274376fdc4849cf47a78f3baeef4ae1654ef703dc3148d91adde3336c11" dependencies = [ "adbc_core", - "arrow-array", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-schema 57.1.0", ] [[package]] @@ -267,16 +267,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd798aea3553913a5986813e9c6ad31a2d2b04e931fe8ea4a37155eb541cebb5" dependencies = [ "arrow-arith", - "arrow-array", - "arrow-buffer", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", "arrow-cast", "arrow-csv", - "arrow-data", + "arrow-data 56.2.0", "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema", + "arrow-schema 56.2.0", "arrow-select", "arrow-string", ] @@ -287,52 +287,82 @@ version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "508dafb53e5804a238cab7fd97a59ddcbfab20cc4d9814b1ab5465b9fa147f2e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "chrono", "num", ] [[package]] name = "arrow-array" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2730bc045d62bb2e53ef8395b7d4242f5c8102f41ceac15e8395b9ac3d08461" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "chrono", "chrono-tz", "half", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "num", ] +[[package]] +name = "arrow-array" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +dependencies = [ + "ahash", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "chrono", + "half", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", +] + [[package]] name = "arrow-buffer" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54295b93beb702ee9a6f6fbced08ad7f4d76ec1c297952d4b83cf68755421d1d" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" dependencies = [ "bytes", "half", "num", ] +[[package]] +name = "arrow-buffer" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + [[package]] name = "arrow-cast" version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67e8bcb7dc971d779a7280593a1bf0c2743533b8028909073e804552e85e75b5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "arrow-select", "atoi", "base64", @@ -350,9 +380,9 @@ version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673fd2b5fb57a1754fdbfac425efd7cf54c947ac9950c1cce86b14e248f1c458" dependencies = [ - "arrow-array", + "arrow-array 56.2.0", "arrow-cast", - "arrow-schema", + "arrow-schema 56.2.0", "chrono", "csv", "csv-core", @@ -361,26 +391,40 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97c22fe3da840039c69e9f61f81e78092ea36d57037b4900151f063615a2f6b4" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 56.2.0", + "arrow-schema 56.2.0", "half", "num", ] +[[package]] +name = "arrow-data" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +dependencies = [ + "arrow-buffer 57.1.0", + "arrow-schema 57.1.0", + "half", + "num-integer", + "num-traits", +] + [[package]] name = "arrow-ipc" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778de14c5a69aedb27359e3dd06dd5f9c481d5f6ee9fbae912dba332fd64636b" +checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select", "flatbuffers", "lz4_flex", "zstd", @@ -392,11 +436,11 @@ version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3860db334fe7b19fcf81f6b56f8d9d95053f3839ffe443d56b5436f7a29a1794" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "chrono", "half", "indexmap", @@ -414,10 +458,10 @@ version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "425fa0b42a39d3ff55160832e7c25553e7f012c3f187def3d70313e7a29ba5d9" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "arrow-select", ] @@ -427,35 +471,44 @@ version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df9c9423c9e71abd1b08a7f788fcd203ba2698ac8e72a1f236f1faa1a06a7414" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "half", ] [[package]] name = "arrow-schema" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85fa1babc4a45fdc64a92175ef51ff00eba5ebbc0007962fecf8022ac1c6ce28" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" dependencies = [ "bitflags", "serde", "serde_json", ] +[[package]] +name = "arrow-schema" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +dependencies = [ + "bitflags", +] + [[package]] name = "arrow-select" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8854d15f1cf5005b4b358abeb60adea17091ff5bdd094dca5d3f73787d81170" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "num", ] @@ -465,10 +518,10 @@ version = "56.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c477e8b89e1213d5927a2a84a72c384a9bf4dd0dbf15f9fd66d821aafd9e95e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "arrow-select", "memchr", "num", @@ -1579,7 +1632,7 @@ checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" dependencies = [ "arrow", "arrow-ipc", - "arrow-schema", + "arrow-schema 56.2.0", "async-trait", "bytes", "bzip2 0.6.1", @@ -1927,7 +1980,7 @@ checksum = "25ddb7c4e645df080c27dad13a198d191da328dd1c98e198664a7a0f64b335cc" dependencies = [ "abi_stable", "arrow", - "arrow-schema", + "arrow-schema 56.2.0", "async-ffi", "async-trait", "datafusion", @@ -1948,7 +2001,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", - "arrow-buffer", + "arrow-buffer 56.2.0", "base64", "blake2", "blake3", @@ -2182,7 +2235,7 @@ dependencies = [ "ahash", "arrow", "arrow-ord", - "arrow-schema", + "arrow-schema 56.2.0", "async-trait", "chrono", "datafusion-common", @@ -2238,7 +2291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" dependencies = [ "arrow", - "arrow-schema", + "arrow-schema 56.2.0", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -3863,12 +3916,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", "arrow-cast", - "arrow-data", + "arrow-data 56.2.0", "arrow-ipc", - "arrow-schema", + "arrow-schema 56.2.0", "arrow-select", "base64", "brotli", @@ -4762,8 +4815,8 @@ name = "sedona" version = "0.3.0" dependencies = [ "abi_stable", - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "async-trait", "aws-config", "aws-credential-types", @@ -4807,8 +4860,8 @@ version = "0.3.0" dependencies = [ "adbc_core", "adbc_ffi", - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "datafusion", "futures", "sedona", @@ -4847,8 +4900,8 @@ dependencies = [ name = "sedona-datasource" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "async-trait", "datafusion", "datafusion-catalog", @@ -4871,7 +4924,7 @@ dependencies = [ name = "sedona-expr" version = "0.3.0" dependencies = [ - "arrow-schema", + "arrow-schema 56.2.0", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", @@ -4889,8 +4942,8 @@ dependencies = [ name = "sedona-extension" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "datafusion-common", "datafusion-expr", "libc", @@ -4904,9 +4957,9 @@ dependencies = [ name = "sedona-functions" version = "0.3.0" dependencies = [ - "arrow-array", + "arrow-array 56.2.0", "arrow-json", - "arrow-schema", + "arrow-schema 56.2.0", "criterion", "datafusion", "datafusion-common", @@ -4917,6 +4970,7 @@ dependencies = [ "sedona-expr", "sedona-geometry", "sedona-schema", + "sedona-serde", "sedona-testing", "serde_json", "tokio", @@ -4928,8 +4982,8 @@ dependencies = [ name = "sedona-geo" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "criterion", "datafusion-common", "datafusion-expr", @@ -4988,8 +5042,8 @@ dependencies = [ name = "sedona-geoarrow-c" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "cc", "criterion", "datafusion-common", @@ -5025,8 +5079,8 @@ dependencies = [ name = "sedona-geoparquet" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "async-trait", "bytes", "chrono", @@ -5062,8 +5116,8 @@ dependencies = [ name = "sedona-geos" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "byteorder", "criterion", "datafusion-common", @@ -5086,8 +5140,8 @@ dependencies = [ name = "sedona-libgpuspatial" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "bindgen", "cmake", "log", @@ -5104,8 +5158,8 @@ name = "sedona-proj" version = "0.3.0" dependencies = [ "approx", - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "cc", "criterion", "datafusion-common", @@ -5129,9 +5183,9 @@ name = "sedona-raster" version = "0.3.0" dependencies = [ "approx", - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-schema 56.2.0", "sedona-common", "sedona-schema", "sedona-testing", @@ -5141,9 +5195,9 @@ dependencies = [ name = "sedona-raster-functions" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-schema 56.2.0", "criterion", "datafusion-common", "datafusion-expr", @@ -5160,8 +5214,8 @@ dependencies = [ name = "sedona-s2geography" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "cmake", "criterion", "datafusion-common", @@ -5182,20 +5236,30 @@ dependencies = [ name = "sedona-schema" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "datafusion-common", "sedona-common", "serde_json", ] +[[package]] +name = "sedona-serde" +version = "0.3.0" +dependencies = [ + "arrow-array 56.2.0", + "byteorder", + "datafusion-common", + "wkt 0.14.0", +] + [[package]] name = "sedona-spatial-join" version = "0.3.0" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "criterion", "datafusion", "datafusion-common", @@ -5235,9 +5299,9 @@ dependencies = [ name = "sedona-testing" version = "0.3.0" dependencies = [ - "arrow-array", + "arrow-array 56.2.0", "arrow-cast", - "arrow-schema", + "arrow-schema 56.2.0", "criterion", "datafusion-common", "datafusion-expr", @@ -5262,8 +5326,8 @@ dependencies = [ name = "sedona-tg" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "cc", "criterion", "datafusion-common", @@ -5284,8 +5348,8 @@ name = "sedonadb" version = "0.3.0" dependencies = [ "adbc_core", - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "async-trait", "datafusion", "datafusion-common", @@ -5312,8 +5376,8 @@ dependencies = [ name = "sedonadbr" version = "0.3.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "datafusion", "datafusion-common", "datafusion-expr", diff --git a/Cargo.toml b/Cargo.toml index 9780fbe1..def576c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ members = [ "rust/sedona-schema", "rust/sedona-spatial-join", "rust/sedona-testing", + "rust/sedona-serde", "rust/sedona", "sedona-cli", ] diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 57afd240..393aaf5a 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -48,6 +48,7 @@ sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } sedona-schema = { workspace = true } +sedona-serde = { path = "../sedona-serde" } wkb = { workspace = true } wkt = { workspace = true } serde_json = { workspace = true } diff --git a/rust/sedona-functions/src/fixtures/crs_point.sedona b/rust/sedona-functions/src/fixtures/crs_point.sedona new file mode 100644 index 00000000..e534b3a2 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/crs_point.sedona @@ -0,0 +1,11 @@ +// Point XY with CRS EPSG:4326 +19 + +// CRS EPSG:4326 +0 16 230 + +// number of coordinates +1 0 0 0 + +// coordinates (2 doubles per coordinate) +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/empty_geometry_collection.sedona b/rust/sedona-functions/src/fixtures/empty_geometry_collection.sedona new file mode 100644 index 00000000..39dca86b --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_geometry_collection.sedona @@ -0,0 +1,5 @@ +114 + +0 0 0 + +0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/empty_linestring.sedona b/rust/sedona-functions/src/fixtures/empty_linestring.sedona new file mode 100644 index 00000000..68e7aed8 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_linestring.sedona @@ -0,0 +1,5 @@ +34 + +0 0 0 + +0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/empty_multilinestring.sedona b/rust/sedona-functions/src/fixtures/empty_multilinestring.sedona new file mode 100644 index 00000000..085f9d52 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_multilinestring.sedona @@ -0,0 +1,5 @@ +82 + +0 0 0 + +0 0 0 0 0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/empty_multipolygon.sedona b/rust/sedona-functions/src/fixtures/empty_multipolygon.sedona new file mode 100644 index 00000000..559c5a3d --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_multipolygon.sedona @@ -0,0 +1,5 @@ +98 + +0 0 0 + +0 0 0 0 0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/empty_point.sedona b/rust/sedona-functions/src/fixtures/empty_point.sedona new file mode 100644 index 00000000..5ab2f596 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_point.sedona @@ -0,0 +1,5 @@ +18 + +0 0 0 + +0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/empty_polygon.sedona b/rust/sedona-functions/src/fixtures/empty_polygon.sedona new file mode 100644 index 00000000..3acc9bc3 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/empty_polygon.sedona @@ -0,0 +1,5 @@ +50 + +0 0 0 + +0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/geometrycollection.sedona b/rust/sedona-functions/src/fixtures/geometrycollection.sedona new file mode 100644 index 00000000..3918cbaf --- /dev/null +++ b/rust/sedona-functions/src/fixtures/geometrycollection.sedona @@ -0,0 +1,42 @@ +// metadata GeometryCollection XY NO SRID +114 + +// missing srid information +0 0 0 + +// number of geometries +3 0 0 0 + +// point geometry +18 + +0 0 0 + +1 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +// linestring geometry +34 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 + +// polygon geometry +50 + +0 0 0 + +4 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +1 0 0 0 +4 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/geometrycollectioncomplex.sedona b/rust/sedona-functions/src/fixtures/geometrycollectioncomplex.sedona new file mode 100644 index 00000000..6dfb04c8 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/geometrycollectioncomplex.sedona @@ -0,0 +1,52 @@ +// // metadata GeometryCollection XY NO SRID +114 + +// missing srid information +0 0 0 + +// number of geometries +4 0 0 0 + +// point geometry +18 + +0 0 0 + +1 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +// linestring geometry +34 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 + +// polygon geometry +50 + +0 0 0 + +4 0 0 0 + +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 28 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 24 64 + +1 0 0 0 +4 0 0 0 + +// multipoint geometry +66 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 240 63 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 8 64 0 0 0 0 0 0 16 64 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/linestring.sedona b/rust/sedona-functions/src/fixtures/linestring.sedona new file mode 100644 index 00000000..02e1ba01 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/linestring.sedona @@ -0,0 +1,13 @@ +// metadata LINESTRING XY NO SRID +34 + +// missing srid information +0 0 0 + +// number of coordinates +3 0 0 0 + +// coordinates (2 doubles per coordinate) +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/multilinestring.sedona b/rust/sedona-functions/src/fixtures/multilinestring.sedona new file mode 100644 index 00000000..22466c88 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multilinestring.sedona @@ -0,0 +1,25 @@ +// metadata MultiLinestring XY NO SRID +82 + +// missing srid information +0 0 0 + +// number of points +4 0 0 0 + +// coordinates linestring 1 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 + +// coordinates linestring 2 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 20 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 28 64 + +// number of linestrings +2 0 0 0 + +// linestring 1 has 2 points +2 0 0 0 + +// linestring 2 has 2 points +2 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/multipoint.sedona b/rust/sedona-functions/src/fixtures/multipoint.sedona new file mode 100644 index 00000000..256068cb --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multipoint.sedona @@ -0,0 +1,13 @@ +// metadata Multipoint XY NO SRID +66 + +// missing srid information +0 0 0 + +// number of points +3 0 0 0 + +// coordinates (2 doubles per point) +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 20 64 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/multipoint_empty.sedona b/rust/sedona-functions/src/fixtures/multipoint_empty.sedona new file mode 100644 index 00000000..f11bc982 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multipoint_empty.sedona @@ -0,0 +1,5 @@ +66 + +0 0 0 + +0 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/multipolygon.sedona b/rust/sedona-functions/src/fixtures/multipolygon.sedona new file mode 100644 index 00000000..562aa70c --- /dev/null +++ b/rust/sedona-functions/src/fixtures/multipolygon.sedona @@ -0,0 +1,65 @@ +// metadata Multipolygon XY NO SRID +98 + +// missing srid information +0 0 0 + +// number of points +30 0 0 0 + +// polygon 1 coordinates +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 + +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 + +// polygon 2 coordinates +0 0 0 0 0 0 40 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 52 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 52 64 0 0 0 0 0 0 34 64 +0 0 0 0 0 0 40 64 0 0 0 0 0 0 34 64 +0 0 0 0 0 0 40 64 0 0 0 0 0 0 240 63 + +0 0 0 0 0 0 42 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 46 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 46 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 42 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 42 64 0 0 0 0 0 0 0 64 + +0 0 0 0 0 0 49 64 0 0 0 0 0 0 20 64 +0 0 0 0 0 0 51 64 0 0 0 0 0 0 20 64 +0 0 0 0 0 0 51 64 0 0 0 0 0 0 28 64 +0 0 0 0 0 0 49 64 0 0 0 0 0 0 28 64 +0 0 0 0 0 0 49 64 0 0 0 0 0 0 20 64 + +// number of polygons +2 0 0 0 + +// number of polygon 1 components +3 0 0 0 + +// number of points in polygon 1 components +5 0 0 0 +5 0 0 0 +5 0 0 0 + +// number of polygon 2 components +3 0 0 0 + +// number of points in polygon 2 components +5 0 0 0 +5 0 0 0 +5 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/nested_geometry_collection.sedona b/rust/sedona-functions/src/fixtures/nested_geometry_collection.sedona new file mode 100644 index 00000000..1bb0f57d --- /dev/null +++ b/rust/sedona-functions/src/fixtures/nested_geometry_collection.sedona @@ -0,0 +1,46 @@ +// metadata GeometryCollection XY NO SRID +114 + +0 0 0 + +2 0 0 0 + +// point geometry +18 + +0 0 0 + +1 0 0 0 + +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +// geometry collection geometry +114 0 0 0 + +2 0 0 0 + +// linestring geometry +34 + +0 0 0 + +2 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +// polygon geometry +50 + +0 0 0 + +5 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +1 0 0 0 +5 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/point.sedona b/rust/sedona-functions/src/fixtures/point.sedona new file mode 100644 index 00000000..94e872a1 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/point.sedona @@ -0,0 +1,11 @@ +// metadata POINT XY NO SRID +18 + +// srid information +0 0 0 + +// number of coordinates +1 0 0 0 + +// coordinates (2 doubles per coordinate) +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 \ No newline at end of file diff --git a/rust/sedona-functions/src/fixtures/point_float_coords.sedona b/rust/sedona-functions/src/fixtures/point_float_coords.sedona new file mode 100644 index 00000000..a75f94d0 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/point_float_coords.sedona @@ -0,0 +1,42 @@ +50 + +0 0 0 + +11 0 0 0 + +59 111 47 211 252 176 40 64 +243 214 83 60 230 214 70 64 + +164 49 237 31 240 116 55 64 +235 19 96 200 247 248 80 64 + +131 251 161 159 176 72 65 64 +228 105 70 91 254 100 76 64 + +243 214 83 60 230 214 70 64 +131 251 161 159 176 72 65 64 + +81 184 200 34 101 224 61 64 +157 183 151 105 126 88 54 64 + +59 111 47 211 252 176 40 64 +243 214 83 60 230 214 70 64 + +149 70 55 221 154 31 57 64 +59 111 240 116 107 254 70 64 + +118 222 224 233 214 252 62 64 +75 163 155 110 205 15 73 64 + +210 152 246 15 120 186 65 64 +210 152 246 15 120 186 70 64 + +31 197 40 164 12 60 62 64 +59 111 240 116 107 126 68 64 + +149 70 55 221 154 31 57 64 +59 111 240 116 107 254 70 64 + +2 0 0 0 +6 0 0 0 +5 0 0 0 diff --git a/rust/sedona-functions/src/fixtures/polygon.sedona b/rust/sedona-functions/src/fixtures/polygon.sedona new file mode 100644 index 00000000..7a2ab605 --- /dev/null +++ b/rust/sedona-functions/src/fixtures/polygon.sedona @@ -0,0 +1,41 @@ +// metadata Polygon XY NO SRID +50 + +// no srid +0 0 0 + +// number of points +15 0 0 0 + +// polygon 1 ring 1 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 240 63 +0 0 0 0 0 0 36 64 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 36 64 +0 0 0 0 0 0 240 63 0 0 0 0 0 0 240 63 + +// hole 1 ring 1 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 0 64 +0 0 0 0 0 0 16 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 16 64 +0 0 0 0 0 0 0 64 0 0 0 0 0 0 0 64 + +// hole 2 ring 1 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 24 64 +0 0 0 0 0 0 32 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 32 64 +0 0 0 0 0 0 24 64 0 0 0 0 0 0 24 64 + +// number of rings +3 0 0 0 + +// number of points internal 1 +5 0 0 0 + +// number of points hole 1 +5 0 0 0 + +// number of points hole 2 +5 0 0 0 \ No newline at end of file diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 44c8ad02..9eec6398 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -67,3 +67,4 @@ pub mod st_union_agg; mod st_xyzm; mod st_xyzm_minmax; mod st_zmflag; +mod st_from_sedona_spark; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index ff439578..e9bc4a14 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -120,6 +120,7 @@ pub fn default_function_set() -> FunctionSet { crate::st_xyzm::st_y_udf, crate::st_xyzm::st_z_udf, crate::st_zmflag::st_zmflag_udf, + crate::st_from_sedona_spark::st_geomfromsedona_udf, ); register_aggregate_udfs!( diff --git a/rust/sedona-functions/src/st_from_sedona_spark.rs b/rust/sedona-functions/src/st_from_sedona_spark.rs new file mode 100644 index 00000000..b96b4bb7 --- /dev/null +++ b/rust/sedona-functions/src/st_from_sedona_spark.rs @@ -0,0 +1,387 @@ +use crate::executor::WkbExecutor; +use arrow_array::builder::BinaryBuilder; +use arrow_schema::DataType; +use datafusion_common::cast::as_binary_array; +use datafusion_common::ScalarValue; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion_expr::{ColumnarValue, Documentation, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::crs::{deserialize_crs, Crs}; +use sedona_schema::datatypes::{Edges, SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY}; +use sedona_schema::matchers::ArgMatcher; +use sedona_serde::deserialize::deserialize; +use std::any::Any; +use std::io::{Read, Seek}; +use std::sync::Arc; + +fn to_crs_str(scalar_arg: &ScalarValue) -> Option<String> { + if let Ok(ScalarValue::Utf8(Some(crs))) = scalar_arg.cast_to(&DataType::Utf8) { + if crs.chars().all(|c| c.is_ascii_digit()) { + return Some(format!("EPSG:{crs}")); + } else { + return Some(crs); + } + } + + None +} + +#[derive(Debug)] +struct STGeomFromSedona { + out_type: SedonaType, +} + +pub fn st_geomfromsedona_udf() -> SedonaScalarUDF { + let kernel = Arc::new(STGeomFromSedona { + out_type: WKB_GEOMETRY, + }); + + SedonaScalarUDF::new( + "st_geomfromsedona", + vec![kernel], + Volatility::Immutable, + None, + ) +} + +impl SedonaScalarKernel for STGeomFromSedona { + fn return_type(&self, args: &[SedonaType]) -> datafusion_common::Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_binary(), ArgMatcher::is_string()], + self.out_type.clone(), + ); + + matcher.match_args(args) + } + + fn return_type_from_args_and_scalars( + &self, + args: &[SedonaType], + _scalar_args: &[Option<&ScalarValue>], + ) -> datafusion_common::Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_string()], + self.out_type.clone(), + ); + + if !matcher.matches(args) { + return Ok(None); + } + + let crs_scalar = _scalar_args.get(1).unwrap(); + + let crs_str_opt = if let Some(scalar_crs) = crs_scalar { + to_crs_str(scalar_crs) + } else { + None + }; + + match crs_str_opt { + Some(to_crs) => { + let val = serde_json::Value::String(to_crs.to_string()); + let crs = deserialize_crs(&val)?; + Ok(Some(SedonaType::Wkb(Edges::Planar, crs))) + } + _ => Ok(Some(SedonaType::Wkb(Edges::Planar, None))), + } + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> datafusion_common::Result<ColumnarValue> { + let executor = WkbExecutor::new(arg_types, args); + let arg_array = args[0] + .cast_to(&DataType::Binary, None)? + .to_array(executor.num_iterations())?; + + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + for item in as_binary_array(&arg_array)? { + if let Some(sedona_bytes) = item { + deserialize(&mut builder, sedona_bytes)?; + builder.append_value(vec![]); + } + } + + let new_array = builder.finish(); + executor.finish(Arc::new(new_array)) + } +} + +fn doc(name: &str, out_type_name: &str) -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!("Construct a {out_type_name} from Sedona internal geometry representation."), + format!("{name} (geom: Binary)"), + ) + .with_argument( + "geom", + format!( + "Binary: Sedona representation of the {}", + out_type_name.to_lowercase() + ), + ) + .with_sql_example(format!("SELECT {name}('SedonaBinary')")) + .with_related_udf("ST_FromSedona") + .build() +} + +#[cfg(test)] +mod tests { + use super::*; + use sedona_testing::testers::ScalarUdfTester; + + fn get_tester() -> ScalarUdfTester { + ScalarUdfTester::new( + st_geomfromsedona_udf().into(), + vec![ + SedonaType::Wkb(Edges::Planar, Crs::None), + SedonaType::Arrow(DataType::Utf8), + ], + ) + } + + fn fixture_to_bytes(wkb: &str) -> Vec<u8> { + wkb.split("\n") + .filter(|line| !line.starts_with("//") && !line.is_empty()) + .flat_map(|s| s.split_whitespace()) + .map(|num| num.parse::<u8>().expect("invalid byte")) + .collect::<Vec<u8>>() + } + + #[test] + fn test_st_from_sedona_spark_point_1() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/point.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals(result, "POINT (1 1)"); + } + + #[test] + fn test_st_from_sedona_spark_linestring_1() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/linestring.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals(result, "LINESTRING (0 0, 1 1, 2 2)"); + } + + #[test] + fn test_multipoint_geometry() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/multipoint.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals(result, "MULTIPOINT((1 1), (2 2), (4 5))"); + } + + #[test] + fn test_multilinestring_geometry() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/multilinestring.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals(result, "MULTILINESTRING((1 1, 2 2), (4 5, 6 7))"); + } + + #[test] + fn test_polygon_geometry() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/polygon.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals( + result, + " + POLYGON ( + (1 1, 10 1, 10 10, 1 10, 1 1), + (2 2, 4 2, 4 4, 2 4, 2 2), + (6 6, 8 6, 8 8, 6 8, 6 6) + )", + ); + } + + #[test] + fn test_multipolygon_geometry() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/multipolygon.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals( + result, + "MULTIPOLYGON ( + ( + (1 1, 10 1, 10 10, 1 10, 1 1), + (2 2, 4 2, 4 4, 2 4, 2 2), + (6 6, 8 6, 8 8, 6 8, 6 6) + ), + ( + (12 1, 20 1, 20 9, 12 9, 12 1), + (13 2, 15 2, 15 4, 13 4, 13 2), + (17 5, 19 5, 19 7, 17 7, 17 5) + ) + )", + ); + } + + #[test] + fn test_geometrycollection_geometry() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/geometrycollection.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals( + result, + "GEOMETRYCOLLECTION( + POINT(4 6), + LINESTRING(4 6,7 10), + POLYGON((4 6,7 10,4 10,4 6)) + )", + ); + } + + #[test] + fn test_complex_geometry_collection() { + let binary_geometry = + fixture_to_bytes(include_str!("fixtures/geometrycollectioncomplex.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals( + result, + "GEOMETRYCOLLECTION( + POINT(4 6), + LINESTRING(4 6,7 10), + POLYGON((4 6,7 10,4 10,4 6)), + MULTIPOINT((1 2),(3 4)) + )", + ); + } + + #[test] + fn test_geometry_collection_in_geometry_collection() { + let binary_geometry = + fixture_to_bytes(include_str!("fixtures/nested_geometry_collection.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals( + result, + "GEOMETRYCOLLECTION ( + POINT (1 1), + GEOMETRYCOLLECTION ( + LINESTRING (0 0, 1 1), + POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0)) + ) + )", + ); + } + + #[test] + fn test_empty_geometries() { + let test_data = vec![ + (include_str!("fixtures/empty_point.sedona"), "POINT EMPTY"), + ( + include_str!("fixtures/multipoint_empty.sedona"), + "MULTIPOINT EMPTY", + ), + ( + include_str!("fixtures/empty_linestring.sedona"), + "LINESTRING EMPTY", + ), + ( + include_str!("fixtures/empty_polygon.sedona"), + "POLYGON EMPTY", + ), + ( + include_str!("fixtures/empty_multipolygon.sedona"), + "MULTIPOLYGON EMPTY", + ), + ( + include_str!("fixtures/empty_multilinestring.sedona"), + "MULTILINESTRING EMPTY", + ), + ( + include_str!("fixtures/empty_geometry_collection.sedona"), + "GEOMETRYCOLLECTION EMPTY", + ), + ]; + + let tester = get_tester(); + + for (test_fixture, expected_wkt) in test_data { + let binary_geometry = fixture_to_bytes(test_fixture); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals(result, expected_wkt); + } + } + + #[test] + fn test_non_integer_coordinates() { + let binary_geometry = fixture_to_bytes(include_str!("fixtures/point_float_coords.sedona")); + let tester = get_tester(); + + let result = tester + .invoke_scalar_scalar(binary_geometry, ScalarValue::Utf8(Some("4326".to_string()))) + .unwrap(); + + tester.assert_scalar_result_equals( + result, + "POLYGON ( + ( + 12.345678901234 45.678901234567, + 23.456789012345 67.890123456789, + 34.567890123456 56.789012345678, + 45.678901234567 34.567890123456, + 29.876543210987 22.345678901234, + 12.345678901234 45.678901234567 + ), + ( + 25.123456789012 45.987654321098, + 30.987654321098 50.123456789012, + 35.456789012345 45.456789012345, + 30.234567890123 40.987654321098, + 25.123456789012 45.987654321098 + ) + )" + ); + } +} diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-serde/Cargo.toml similarity index 61% copy from rust/sedona-functions/Cargo.toml copy to rust/sedona-serde/Cargo.toml index 57afd240..7eddb106 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-serde/Cargo.toml @@ -15,11 +15,8 @@ # specific language governing permissions and limitations # under the License. [package] -name = "sedona-functions" +name = "sedona-serde" version.workspace = true -license.workspace = true -keywords.workspace = true -categories.workspace = true homepage.workspace = true repository.workspace = true description.workspace = true @@ -31,27 +28,9 @@ rust-version.workspace = true result_large_err = "allow" [dev-dependencies] -arrow-json = { workspace = true } -criterion = { workspace = true } -rstest = { workspace = true } -sedona-testing = { workspace = true, features = ["criterion"] } -datafusion = { workspace = true } -tokio = { workspace = true, features = ["rt", "macros"] } [dependencies] -arrow-schema = { workspace = true } arrow-array = { workspace = true } datafusion-common = { workspace = true } -datafusion-expr = { workspace = true } -geo-traits = { workspace = true } -sedona-common = { workspace = true } -sedona-expr = { workspace = true } -sedona-geometry = { workspace = true } -sedona-schema = { workspace = true } -wkb = { workspace = true } wkt = { workspace = true } -serde_json = { workspace = true } - -[[bench]] -harness = false -name = "native-functions" +byteorder = "1.5.0" diff --git a/rust/sedona-serde/src/deserialize.rs b/rust/sedona-serde/src/deserialize.rs new file mode 100644 index 00000000..a28811ca --- /dev/null +++ b/rust/sedona-serde/src/deserialize.rs @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::linestring::{parse_linestring, parse_multilinestring}; +use crate::point::{parse_multipoint, parse_point, write_empty_point}; +use crate::polygon::{parse_multipolygon, parse_polygon, write_empty_polygon}; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, LittleEndian, ReadBytesExt, WriteBytesExt}; +use datafusion_common::error::DataFusionError; +use std::io::{Cursor}; +use wkt::types::Dimension; + +pub fn deserialize<'a>(builder: &mut BinaryBuilder, bytes: &[u8]) -> datafusion_common::Result<()> { + use std::io::Cursor; + + if bytes.len() < 8 { + return Err(DataFusionError::Internal( + "Sedona bytes are too short".to_string(), + )); + } + + let mut reader = Cursor::new(bytes); + + parse_geometry::<LittleEndian, LittleEndian>(builder, &mut reader, bytes) +} + +pub fn parse_geometry<'a, IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + bytes: &[u8], +) -> datafusion_common::Result<()> { + let preamble_byte = cursor.read_u8()? & 0xFF; + + let wkb_type = preamble_byte >> 4; + + let dimension = get_dimension((preamble_byte & 0x0F) >> 1); + + let has_srid = (preamble_byte & 0x01) != 0; + + cursor.set_position(cursor.position() + 3); // Skip 3 bytes + + match wkb_type { + 1 => { + let number_of_coordinates = cursor.read_u32::<IN>()?; + if number_of_coordinates == 0 { + write_empty_point::<OUT>(builder, dimension)?; + return Ok(()); + } + + parse_point::<IN, OUT>(builder, cursor, dimension)?; + } + 2 => { + parse_linestring::<IN, OUT>(builder, cursor, dimension)?; + } + 3 => { + let mut meta_data_reader = Cursor::new(bytes); + + let number_of_points = cursor.read_u32::<IN>()?; + if number_of_points == 0 { + write_empty_polygon::<OUT>(builder, dimension)?; + + return Ok(()); + } + + let metadata_start_position = number_of_points * 8 * 2; + meta_data_reader.set_position(cursor.position() + (metadata_start_position) as u64); + + parse_polygon::<IN, OUT>(builder, cursor, &mut meta_data_reader, dimension)?; + cursor.set_position(meta_data_reader.position()); + } + 4 => { + parse_multipoint::<IN, OUT>(builder, cursor, dimension)?; + } + 5 => { + // let bytes = cursor.into_inner(); + let mut meta_data_reader = Cursor::new(bytes); + parse_multilinestring::<IN, OUT>(builder, cursor, &mut meta_data_reader, dimension)?; + cursor.set_position(meta_data_reader.position()); + } + 6 => { + let mut meta_data_reader = Cursor::new(bytes); + parse_multipolygon::<IN, OUT>(builder, cursor, &mut meta_data_reader, dimension)?; + cursor.set_position(meta_data_reader.position()); + } + 7 => { + let number_of_geometries = cursor.read_u32::<IN>()?; + builder.write_i8(1)?; + builder.write_u32::<OUT>(7u32)?; + + builder.write_u32::<OUT>(number_of_geometries)?; + + for i in 0..number_of_geometries { + parse_geometry::<IN, OUT>(builder, cursor, bytes)?; + } + } + _ => { + return Err(DataFusionError::Execution(format!( + "Unsupported geometry type: {}", + wkb_type + ))) + } + } + + Ok(()) +} + + +fn get_dimension(b: u8) -> Dimension { + match b { + 1 => Dimension::XY, + 2 => Dimension::XYZ, + 3 => Dimension::XYM, + 4 => Dimension::XYZM, + _ => Dimension::XY, + } +} diff --git a/rust/sedona-serde/src/lib.rs b/rust/sedona-serde/src/lib.rs new file mode 100644 index 00000000..77397606 --- /dev/null +++ b/rust/sedona-serde/src/lib.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod deserialize; +mod point; +mod linestring; +mod polygon; +mod wkb; diff --git a/rust/sedona-serde/src/linestring.rs b/rust/sedona-serde/src/linestring.rs new file mode 100644 index 00000000..1f7af45d --- /dev/null +++ b/rust/sedona-serde/src/linestring.rs @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; +use std::io::{Cursor, Read, Write}; +use wkt::types::Dimension; +use crate::wkb::write_wkb_byte_order_marker; + +fn get_linestring_marker(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 2u32, + Dimension::XYZ => 1002u32, + Dimension::XYM => 2002u32, + Dimension::XYZM => 3002u32, + } +} + +pub fn parse_linestring<IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let number_of_points = cursor.read_u32::<IN>()?; + let byte_type = get_linestring_marker(dimension); + + write_wkb_byte_order_marker(builder)?; + + builder.write_u32::<OUT>(byte_type)?; + + builder.write_u32::<OUT>(number_of_points)?; + + let mut buf = [0u8; 8]; + for _ in 0..number_of_points * 2 { + cursor.read_exact(&mut buf)?; + builder.write(&buf)?; + } + + Ok(()) +} + +pub fn parse_multilinestring<IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + metadata_reader: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = match dimension { + Dimension::XY => 5u32, + Dimension::XYZ => 1005u32, + Dimension::XYM => 2005u32, + Dimension::XYZM => 3005u32, + }; + + let linestring_type = get_linestring_marker(dimension); + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(byte_type)?; + + let number_of_points = cursor.read_u32::<IN>()?; + + metadata_reader.set_position(cursor.position() + (number_of_points * 8 * 2) as u64); + + let number_of_geometries = metadata_reader.read_u32::<IN>()?; + + builder.write_u32::<OUT>(number_of_geometries)?; + + for _ in 0..number_of_geometries { + let number_of_points_in_linestring = metadata_reader.read_u32::<IN>()?; + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(linestring_type)?; + + builder.write_u32::<OUT>(number_of_points_in_linestring)?; + + for _ in 0..number_of_points_in_linestring * 2{ + let mut buf = [0u8; 8]; + cursor.read_exact(&mut buf)?; + + builder.write(&buf)?; + } + } + + Ok(()) +} diff --git a/rust/sedona-serde/src/point.rs b/rust/sedona-serde/src/point.rs new file mode 100644 index 00000000..5a6d7400 --- /dev/null +++ b/rust/sedona-serde/src/point.rs @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::wkb::write_wkb_byte_order_marker; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; +use datafusion_common::error::Result; +use std::io::{Cursor, Read, Write}; +use wkt::types::Dimension; + +fn get_byte_type_for_point(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 1u32, + Dimension::XYZ => 1001u32, + Dimension::XYM => 2001u32, + Dimension::XYZM => 3001u32, + } +} + +pub fn write_empty_point<OUT: ByteOrder>( + builder: &mut BinaryBuilder, + dimension: Dimension, +) -> Result<()> { + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(get_byte_type_for_point(dimension))?; + + builder.write_f64::<OUT>(f64::NAN)?; // X + builder.write_f64::<OUT>(f64::NAN)?; // Y + + Ok(()) +} + +pub fn parse_point<IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> Result<()> { + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(get_byte_type_for_point(dimension))?; + + let mut buf = [0u8; 8]; + cursor.read_exact(&mut buf)?; + + builder.write(&buf)?; + + cursor.read_exact(&mut buf)?; + builder.write(&buf)?; + + Ok(()) +} + +pub fn parse_multipoint<IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> Result<()> { + let number_of_points = cursor.read_u32::<IN>()?; + + let byte_type = match dimension { + Dimension::XY => 4u32, + Dimension::XYZ => 1004u32, + Dimension::XYM => 2004u32, + Dimension::XYZM => 3004u32, + }; + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(byte_type)?; + + if number_of_points == 0 { + builder.write_u32::<OUT>(0)?; + return Ok(()); + } + + + builder.write_u32::<OUT>(number_of_points)?; + + for _ in 0..number_of_points { + parse_point::<IN, OUT>(builder, cursor, dimension)?; + } + + Ok(()) +} diff --git a/rust/sedona-serde/src/polygon.rs b/rust/sedona-serde/src/polygon.rs new file mode 100644 index 00000000..e46f1d22 --- /dev/null +++ b/rust/sedona-serde/src/polygon.rs @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::{Cursor, Read, Write}; +use arrow_array::builder::BinaryBuilder; +use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; +use wkt::types::Dimension; +use crate::wkb::write_wkb_byte_order_marker; + + +pub(crate) fn get_polygon_marker(dimension: Dimension) -> u32 { + match dimension { + Dimension::XY => 3u32, + Dimension::XYZ => 1003u32, + Dimension::XYM => 2003u32, + Dimension::XYZM => 3003u32, + } +} + +pub fn parse_polygon<IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + metadata_reader: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = get_polygon_marker(dimension); + let number_of_rings = metadata_reader.read_u32::<IN>()?; + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(byte_type)?; + builder.write_u32::<OUT>(number_of_rings)?; + + for _ in 0..number_of_rings { + let ring_number_of_points = metadata_reader.read_u32::<IN>()?; + builder.write_u32::<OUT>(ring_number_of_points)?; + + let mut buf = [0u8; 8]; + for _ in 0..ring_number_of_points * 2 { + cursor.read_exact(&mut buf)?; + builder.write(&buf)?; + } + } + + Ok(()) +} + +pub(crate) fn parse_multipolygon<IN: ByteOrder, OUT: ByteOrder>( + builder: &mut BinaryBuilder, + cursor: &mut Cursor<&[u8]>, + metadata_reader: &mut Cursor<&[u8]>, + dimension: Dimension, +) -> datafusion_common::Result<()> { + let byte_type = match dimension { + Dimension::XY => 6u32, + Dimension::XYZ => 1006u32, + Dimension::XYM => 2006u32, + Dimension::XYZM => 3006u32, + }; + + let number_of_points = cursor.read_u32::<IN>()?; + let metadata_start_position = number_of_points * 8 * 2; + metadata_reader.set_position(cursor.position() + (metadata_start_position) as u64); + + let number_of_geometries = metadata_reader.read_u32::<IN>()?; + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(byte_type)?; + builder.write_u32::<OUT>(number_of_geometries)?; + + for _ in 0..number_of_geometries { + parse_polygon::<IN, OUT>(builder, cursor, metadata_reader, dimension)?; + } + + Ok(()) +} + + +pub (crate) fn write_empty_polygon<OUT: ByteOrder>(builder: &mut BinaryBuilder, dimension: Dimension) -> datafusion_common::Result<()> { + let byte_type = match dimension { + Dimension::XY => 3u32, + Dimension::XYZ => 1003u32, + Dimension::XYM => 2003u32, + Dimension::XYZM => 3003u32, + }; + + write_wkb_byte_order_marker(builder)?; + builder.write_u32::<OUT>(byte_type)?; + builder.write_u32::<OUT>(0u32)?; // 0 rings + + Ok(()) +} diff --git a/rust/sedona-serde/src/wkb.rs b/rust/sedona-serde/src/wkb.rs new file mode 100644 index 00000000..d3b642ce --- /dev/null +++ b/rust/sedona-serde/src/wkb.rs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use byteorder::WriteBytesExt; + +pub(crate) const WKB_LITTLE_ENDIAN_MARKER: u8 = 1; + +pub fn write_wkb_byte_order_marker<W: std::io::Write>(writer: &mut W) -> std::io::Result<()> { + writer.write_u8(WKB_LITTLE_ENDIAN_MARKER) +}
