This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 5eb9f12 fix(python/sedonadb): Ensure schema displays "geometry" and
"geography" for spatial types (#100)
5eb9f12 is described below
commit 5eb9f1214d17271afa7d93387df31ac0cde75d42
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Sep 17 11:24:40 2025 -0500
fix(python/sedonadb): Ensure schema displays "geometry" and "geography" for
spatial types (#100)
---
python/sedonadb/python/sedonadb/dataframe.py | 6 +-
python/sedonadb/src/schema.rs | 2 +-
python/sedonadb/tests/test_dataframe.py | 4 +-
rust/sedona-schema/src/crs.rs | 14 +-
rust/sedona-schema/src/datatypes.rs | 244 +++++++++++++++++++--------
rust/sedona/src/show.rs | 15 +-
6 files changed, 199 insertions(+), 86 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index 78bf47b..ff8316b 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -50,11 +50,11 @@ class DataFrame:
>>> df = sd.sql("SELECT 1 as one")
>>> df.schema
SedonaSchema with 1 field:
- one: non-nullable Int64
+ one: non-nullable int64<Int64>
>>> df.schema.field(0)
- SedonaField one: non-nullable Int64
+ SedonaField one: non-nullable int64<Int64>
>>> df.schema.field(0).name, df.schema.field(0).type
- ('one', SedonaType Int64)
+ ('one', SedonaType int64<Int64>)
"""
return self._impl.schema()
diff --git a/python/sedonadb/src/schema.rs b/python/sedonadb/src/schema.rs
index 74e2519..d9466ea 100644
--- a/python/sedonadb/src/schema.rs
+++ b/python/sedonadb/src/schema.rs
@@ -181,7 +181,7 @@ impl PySedonaType {
}
pub fn repr(&self) -> String {
- format!("{}", self.inner)
+ format!("{}<{}>", self.inner.logical_type_name(), self.inner)
}
}
diff --git a/python/sedonadb/tests/test_dataframe.py
b/python/sedonadb/tests/test_dataframe.py
index b74bfc6..aab33ef 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -131,14 +131,14 @@ def test_schema(con):
# Non-geometry field accessor
assert df.schema.field(0).name == "one"
assert df.schema.field("one").name == "one"
- assert repr(df.schema.field(0).type) == "SedonaType Int64"
+ assert repr(df.schema.field(0).type) == "SedonaType int64<Int64>"
assert df.schema.field(0).type.edge_type is None
assert df.schema.field(0).type.crs is None
# Geometry field accessor
assert df.schema.field(1).name == "geom"
assert df.schema.field("geom").name == "geom"
- assert repr(df.schema.field(1).type) == "SedonaType wkb"
+ assert repr(df.schema.field(1).type) == "SedonaType geometry<Wkb>"
assert df.schema.field(1).type.edge_type == gat.EdgeType.PLANAR
assert df.schema.field(1).type.crs is None
diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs
index fef41b7..72cd727 100644
--- a/rust/sedona-schema/src/crs.rs
+++ b/rust/sedona-schema/src/crs.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
use datafusion_common::{DataFusionError, Result};
-use std::fmt::Debug;
+use std::fmt::{Debug, Display};
use std::str::FromStr;
use std::sync::Arc;
@@ -68,6 +68,18 @@ pub fn lnglat() -> Crs {
/// equality (for binary operators).
pub type Crs = Option<Arc<dyn CoordinateReferenceSystem + Send + Sync>>;
+impl Display for dyn CoordinateReferenceSystem + Send + Sync {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ if let Ok(Some(auth_code)) = self.to_authority_code() {
+ write!(f, "{}", auth_code.to_lowercase())
+ } else {
+ // We can probably try harder to get compact output out of more
+ // types of CRSes
+ write!(f, "{{...}}")
+ }
+ }
+}
+
impl PartialEq<dyn CoordinateReferenceSystem + Send + Sync>
for dyn CoordinateReferenceSystem + Send + Sync
{
diff --git a/rust/sedona-schema/src/datatypes.rs
b/rust/sedona-schema/src/datatypes.rs
index 528ee12..254ca25 100644
--- a/rust/sedona-schema/src/datatypes.rs
+++ b/rust/sedona-schema/src/datatypes.rs
@@ -20,7 +20,7 @@ use sedona_common::sedona_internal_err;
use serde_json::Value;
use std::fmt::{Debug, Display};
-use crate::crs::{deserialize_crs, CoordinateReferenceSystem, Crs};
+use crate::crs::{deserialize_crs, Crs};
use crate::extension_type::ExtensionType;
/// Data types supported by Sedona that resolve to a concrete Arrow DataType
@@ -37,48 +37,6 @@ impl From<DataType> for SedonaType {
}
}
-impl Display for SedonaType {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- SedonaType::Arrow(data_type) => Display::fmt(data_type, f),
- SedonaType::Wkb(edges, crs) => display_geometry("wkb", edges, crs,
f),
- SedonaType::WkbView(edges, crs) => display_geometry("wkb_view",
edges, crs, f),
- }
- }
-}
-
-fn display_geometry(
- name: &str,
- edges: &Edges,
- crs: &Crs,
- f: &mut std::fmt::Formatter<'_>,
-) -> std::fmt::Result {
- match edges {
- Edges::Planar => {}
- Edges::Spherical => write!(f, "spherical ")?,
- }
-
- write!(f, "{name}")?;
-
- if let Some(crs) = crs {
- write!(f, " <{}>", &crs)?;
- }
-
- Ok(())
-}
-
-impl Display for dyn CoordinateReferenceSystem + Send + Sync {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- if let Ok(Some(auth_code)) = self.to_authority_code() {
- write!(f, "{}", auth_code.to_lowercase())
- } else {
- // We can probably try harder to get compact output out of more
- // types of CRSes
- write!(f, "{{...}}")
- }
- }
-}
-
/// Edge interpolations
///
/// While at the logical level we refer to geometries and geographies, at the
execution
@@ -139,23 +97,6 @@ impl SedonaType {
}
}
- /// Returns True if another physical type matches this one for the
purposes of dispatch
- ///
- /// For Arrow types this matches on type equality; for other type it
matches on edges
- /// but not crs.
- pub fn match_signature(&self, other: &SedonaType) -> bool {
- match (self, other) {
- (SedonaType::Arrow(data_type), SedonaType::Arrow(other_data_type))
=> {
- data_type == other_data_type
- }
- (SedonaType::Wkb(edges, _), SedonaType::Wkb(other_edges, _)) =>
edges == other_edges,
- (SedonaType::WkbView(edges, _), SedonaType::WkbView(other_edges,
_)) => {
- edges == other_edges
- }
- _ => false,
- }
- }
-
/// Construct a [`Field`] as it would appear in an external `RecordBatch`
pub fn to_storage_field(&self, name: &str, nullable: bool) ->
Result<Field> {
self.extension_type().map_or(
@@ -194,6 +135,109 @@ impl SedonaType {
_ => None,
}
}
+
+ /// The logical type name for this type
+ ///
+ /// The logical type name is used in tabular display and schema printing.
Notably,
+ /// it renders Wkb and WkbView as "geometry" or "geography" depending on
the edge
+ /// type. For Arrow types, this similarly strips the storage details (e.g.,
+ /// both Utf8 and Utf8View types render as "utf8").
+ pub fn logical_type_name(&self) -> String {
+ match self {
+ SedonaType::Wkb(Edges::Planar, _) |
SedonaType::WkbView(Edges::Planar, _) => {
+ "geometry".to_string()
+ }
+ SedonaType::Wkb(Edges::Spherical, _) |
SedonaType::WkbView(Edges::Spherical, _) => {
+ "geography".to_string()
+ }
+ SedonaType::Arrow(data_type) => match data_type {
+ DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View =>
"utf8".to_string(),
+ DataType::Binary
+ | DataType::LargeBinary
+ | DataType::BinaryView
+ | DataType::FixedSizeBinary(_) => "binary".to_string(),
+ DataType::List(_)
+ | DataType::LargeList(_)
+ | DataType::ListView(_)
+ | DataType::LargeListView(_)
+ | DataType::FixedSizeList(_, _) => "list".to_string(),
+ DataType::Dictionary(_, value_type) => {
+
SedonaType::Arrow(value_type.as_ref().clone()).logical_type_name()
+ }
+ DataType::RunEndEncoded(_, value_field) => {
+ match SedonaType::from_storage_field(value_field) {
+ Ok(value_sedona_type) =>
value_sedona_type.logical_type_name(),
+ Err(_) => format!("{value_field:?}"),
+ }
+ }
+ _ => {
+ let data_type_str = data_type.to_string();
+ if let Some(params_start) = data_type_str.find('(') {
+
data_type_str[0..params_start].to_string().to_lowercase()
+ } else {
+ data_type_str.to_lowercase()
+ }
+ }
+ },
+ }
+ }
+
+ /// Returns True if another physical type matches this one for the
purposes of dispatch
+ ///
+ /// For Arrow types this matches on type equality; for other type it
matches on edges
+ /// but not crs.
+ pub fn match_signature(&self, other: &SedonaType) -> bool {
+ match (self, other) {
+ (SedonaType::Arrow(data_type), SedonaType::Arrow(other_data_type))
=> {
+ data_type == other_data_type
+ }
+ (SedonaType::Wkb(edges, _), SedonaType::Wkb(other_edges, _)) =>
edges == other_edges,
+ (SedonaType::WkbView(edges, _), SedonaType::WkbView(other_edges,
_)) => {
+ edges == other_edges
+ }
+ _ => false,
+ }
+ }
+}
+
+// Implementation details for type serialization and display
+
+impl Display for SedonaType {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ SedonaType::Arrow(data_type) => Display::fmt(data_type, f),
+ SedonaType::Wkb(edges, crs) => display_geometry("Wkb", edges, crs,
f),
+ SedonaType::WkbView(edges, crs) => display_geometry("WkbView",
edges, crs, f),
+ }
+ }
+}
+
+fn display_geometry(
+ name: &str,
+ edges: &Edges,
+ crs: &Crs,
+ f: &mut std::fmt::Formatter<'_>,
+) -> std::fmt::Result {
+ let mut params = Vec::new();
+
+ if let Some(crs) = crs {
+ params.push(crs.to_string());
+ }
+
+ match edges {
+ Edges::Planar => {}
+ Edges::Spherical => {
+ params.push("Spherical".to_string());
+ }
+ }
+
+ match params.len() {
+ 0 => write!(f, "{name}")?,
+ 1 => write!(f, "{name}({})", params[0])?,
+ _ => write!(f, "{name}({})", params.join(", "))?,
+ }
+
+ Ok(())
}
// Implementation details for importing/exporting types from/to Arrow +
metadata
@@ -347,20 +391,88 @@ mod tests {
#[test]
fn sedona_type_to_string() {
assert_eq!(SedonaType::Arrow(DataType::Int32).to_string(), "Int32");
- assert_eq!(WKB_GEOMETRY.to_string(), "wkb");
- assert_eq!(WKB_GEOGRAPHY.to_string(), "spherical wkb");
- assert_eq!(WKB_VIEW_GEOMETRY.to_string(), "wkb_view");
- assert_eq!(WKB_VIEW_GEOGRAPHY.to_string(), "spherical wkb_view");
+ assert_eq!(WKB_GEOMETRY.to_string(), "Wkb");
+ assert_eq!(WKB_GEOGRAPHY.to_string(), "Wkb(Spherical)");
+ assert_eq!(WKB_VIEW_GEOMETRY.to_string(), "WkbView");
+ assert_eq!(WKB_VIEW_GEOGRAPHY.to_string(), "WkbView(Spherical)");
assert_eq!(
SedonaType::Wkb(Edges::Planar, lnglat()).to_string(),
- "wkb <ogc:crs84>"
+ "Wkb(ogc:crs84)"
);
let projjson_value: Value = r#"{}"#.parse().unwrap();
let projjson_crs = deserialize_crs(&projjson_value).unwrap();
assert_eq!(
SedonaType::Wkb(Edges::Planar, projjson_crs).to_string(),
- "wkb <{...}>"
+ "Wkb({...})"
+ );
+ }
+
+ #[test]
+ fn sedona_logical_type_name() {
+ assert_eq!(WKB_GEOMETRY.logical_type_name(), "geometry");
+ assert_eq!(WKB_GEOGRAPHY.logical_type_name(), "geography");
+
+ assert_eq!(
+ SedonaType::Arrow(DataType::Int32).logical_type_name(),
+ "int32"
+ );
+
+ assert_eq!(
+ SedonaType::Arrow(DataType::Utf8).logical_type_name(),
+ "utf8"
+ );
+ assert_eq!(
+ SedonaType::Arrow(DataType::Utf8View).logical_type_name(),
+ "utf8"
+ );
+
+ assert_eq!(
+ SedonaType::Arrow(DataType::Binary).logical_type_name(),
+ "binary"
+ );
+ assert_eq!(
+ SedonaType::Arrow(DataType::BinaryView).logical_type_name(),
+ "binary"
+ );
+
+ assert_eq!(
+
SedonaType::Arrow(DataType::Duration(arrow_schema::TimeUnit::Microsecond))
+ .logical_type_name(),
+ "duration"
+ );
+
+ assert_eq!(
+ SedonaType::Arrow(DataType::List(
+ Field::new("item", DataType::Int32, true).into()
+ ))
+ .logical_type_name(),
+ "list"
+ );
+ assert_eq!(
+ SedonaType::Arrow(DataType::ListView(
+ Field::new("item", DataType::Int32, true).into()
+ ))
+ .logical_type_name(),
+ "list"
+ );
+
+ assert_eq!(
+ SedonaType::Arrow(DataType::Dictionary(
+ Box::new(DataType::Int32),
+ Box::new(DataType::Binary)
+ ))
+ .logical_type_name(),
+ "binary"
+ );
+
+ assert_eq!(
+ SedonaType::Arrow(DataType::RunEndEncoded(
+ Field::new("ends", DataType::Int32, true).into(),
+ Field::new("values", DataType::Binary, true).into()
+ ))
+ .logical_type_name(),
+ "binary"
);
}
diff --git a/rust/sedona/src/show.rs b/rust/sedona/src/show.rs
index 0d2bab1..f1ac362 100644
--- a/rust/sedona/src/show.rs
+++ b/rust/sedona/src/show.rs
@@ -24,10 +24,7 @@ use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
use datafusion_common::{DataFusionError, ScalarValue};
use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs,
ScalarUDF};
use sedona_expr::scalar_udf::SedonaScalarUDF;
-use sedona_schema::{
- datatypes::{Edges, SedonaType},
- matchers::ArgMatcher,
-};
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
use std::iter::zip;
use std::sync::Arc;
@@ -395,15 +392,7 @@ impl DisplayColumn {
pub fn header(&self, options: &DisplayTableOptions) -> Cell {
// Don't print the type ever if it's a continuation column
let is_continuation = self.name == "…" || self.name == "...";
- let display_type = match &self.sedona_type {
- SedonaType::Wkb(Edges::Planar, _) |
SedonaType::WkbView(Edges::Planar, _) => {
- "geometry".to_string()
- }
- SedonaType::Wkb(Edges::Spherical, _) |
SedonaType::WkbView(Edges::Spherical, _) => {
- "geography".to_string()
- }
- _ => self.sedona_type.to_string().to_lowercase(),
- };
+ let display_type = self.sedona_type.logical_type_name();
if options.arrow_options.types_info() && !is_continuation {
Cell::new(format!("{}\n{}", self.name,
display_type)).set_delimiter('\0')
} else {