This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 5eb9f12  fix(python/sedonadb): Ensure schema displays "geometry" and 
"geography" for spatial types (#100)
5eb9f12 is described below

commit 5eb9f1214d17271afa7d93387df31ac0cde75d42
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Sep 17 11:24:40 2025 -0500

    fix(python/sedonadb): Ensure schema displays "geometry" and "geography" for 
spatial types (#100)
---
 python/sedonadb/python/sedonadb/dataframe.py |   6 +-
 python/sedonadb/src/schema.rs                |   2 +-
 python/sedonadb/tests/test_dataframe.py      |   4 +-
 rust/sedona-schema/src/crs.rs                |  14 +-
 rust/sedona-schema/src/datatypes.rs          | 244 +++++++++++++++++++--------
 rust/sedona/src/show.rs                      |  15 +-
 6 files changed, 199 insertions(+), 86 deletions(-)

diff --git a/python/sedonadb/python/sedonadb/dataframe.py 
b/python/sedonadb/python/sedonadb/dataframe.py
index 78bf47b..ff8316b 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -50,11 +50,11 @@ class DataFrame:
             >>> df = sd.sql("SELECT 1 as one")
             >>> df.schema
             SedonaSchema with 1 field:
-              one: non-nullable Int64
+              one: non-nullable int64<Int64>
             >>> df.schema.field(0)
-            SedonaField one: non-nullable Int64
+            SedonaField one: non-nullable int64<Int64>
             >>> df.schema.field(0).name, df.schema.field(0).type
-            ('one', SedonaType Int64)
+            ('one', SedonaType int64<Int64>)
         """
         return self._impl.schema()
 
diff --git a/python/sedonadb/src/schema.rs b/python/sedonadb/src/schema.rs
index 74e2519..d9466ea 100644
--- a/python/sedonadb/src/schema.rs
+++ b/python/sedonadb/src/schema.rs
@@ -181,7 +181,7 @@ impl PySedonaType {
     }
 
     pub fn repr(&self) -> String {
-        format!("{}", self.inner)
+        format!("{}<{}>", self.inner.logical_type_name(), self.inner)
     }
 }
 
diff --git a/python/sedonadb/tests/test_dataframe.py 
b/python/sedonadb/tests/test_dataframe.py
index b74bfc6..aab33ef 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -131,14 +131,14 @@ def test_schema(con):
     # Non-geometry field accessor
     assert df.schema.field(0).name == "one"
     assert df.schema.field("one").name == "one"
-    assert repr(df.schema.field(0).type) == "SedonaType Int64"
+    assert repr(df.schema.field(0).type) == "SedonaType int64<Int64>"
     assert df.schema.field(0).type.edge_type is None
     assert df.schema.field(0).type.crs is None
 
     # Geometry field accessor
     assert df.schema.field(1).name == "geom"
     assert df.schema.field("geom").name == "geom"
-    assert repr(df.schema.field(1).type) == "SedonaType wkb"
+    assert repr(df.schema.field(1).type) == "SedonaType geometry<Wkb>"
     assert df.schema.field(1).type.edge_type == gat.EdgeType.PLANAR
     assert df.schema.field(1).type.crs is None
 
diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs
index fef41b7..72cd727 100644
--- a/rust/sedona-schema/src/crs.rs
+++ b/rust/sedona-schema/src/crs.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 use datafusion_common::{DataFusionError, Result};
-use std::fmt::Debug;
+use std::fmt::{Debug, Display};
 use std::str::FromStr;
 use std::sync::Arc;
 
@@ -68,6 +68,18 @@ pub fn lnglat() -> Crs {
 /// equality (for binary operators).
 pub type Crs = Option<Arc<dyn CoordinateReferenceSystem + Send + Sync>>;
 
+impl Display for dyn CoordinateReferenceSystem + Send + Sync {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Ok(Some(auth_code)) = self.to_authority_code() {
+            write!(f, "{}", auth_code.to_lowercase())
+        } else {
+            // We can probably try harder to get compact output out of more
+            // types of CRSes
+            write!(f, "{{...}}")
+        }
+    }
+}
+
 impl PartialEq<dyn CoordinateReferenceSystem + Send + Sync>
     for dyn CoordinateReferenceSystem + Send + Sync
 {
diff --git a/rust/sedona-schema/src/datatypes.rs 
b/rust/sedona-schema/src/datatypes.rs
index 528ee12..254ca25 100644
--- a/rust/sedona-schema/src/datatypes.rs
+++ b/rust/sedona-schema/src/datatypes.rs
@@ -20,7 +20,7 @@ use sedona_common::sedona_internal_err;
 use serde_json::Value;
 use std::fmt::{Debug, Display};
 
-use crate::crs::{deserialize_crs, CoordinateReferenceSystem, Crs};
+use crate::crs::{deserialize_crs, Crs};
 use crate::extension_type::ExtensionType;
 
 /// Data types supported by Sedona that resolve to a concrete Arrow DataType
@@ -37,48 +37,6 @@ impl From<DataType> for SedonaType {
     }
 }
 
-impl Display for SedonaType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            SedonaType::Arrow(data_type) => Display::fmt(data_type, f),
-            SedonaType::Wkb(edges, crs) => display_geometry("wkb", edges, crs, 
f),
-            SedonaType::WkbView(edges, crs) => display_geometry("wkb_view", 
edges, crs, f),
-        }
-    }
-}
-
-fn display_geometry(
-    name: &str,
-    edges: &Edges,
-    crs: &Crs,
-    f: &mut std::fmt::Formatter<'_>,
-) -> std::fmt::Result {
-    match edges {
-        Edges::Planar => {}
-        Edges::Spherical => write!(f, "spherical ")?,
-    }
-
-    write!(f, "{name}")?;
-
-    if let Some(crs) = crs {
-        write!(f, " <{}>", &crs)?;
-    }
-
-    Ok(())
-}
-
-impl Display for dyn CoordinateReferenceSystem + Send + Sync {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        if let Ok(Some(auth_code)) = self.to_authority_code() {
-            write!(f, "{}", auth_code.to_lowercase())
-        } else {
-            // We can probably try harder to get compact output out of more
-            // types of CRSes
-            write!(f, "{{...}}")
-        }
-    }
-}
-
 /// Edge interpolations
 ///
 /// While at the logical level we refer to geometries and geographies, at the 
execution
@@ -139,23 +97,6 @@ impl SedonaType {
         }
     }
 
-    /// Returns True if another physical type matches this one for the 
purposes of dispatch
-    ///
-    /// For Arrow types this matches on type equality; for other type it 
matches on edges
-    /// but not crs.
-    pub fn match_signature(&self, other: &SedonaType) -> bool {
-        match (self, other) {
-            (SedonaType::Arrow(data_type), SedonaType::Arrow(other_data_type)) 
=> {
-                data_type == other_data_type
-            }
-            (SedonaType::Wkb(edges, _), SedonaType::Wkb(other_edges, _)) => 
edges == other_edges,
-            (SedonaType::WkbView(edges, _), SedonaType::WkbView(other_edges, 
_)) => {
-                edges == other_edges
-            }
-            _ => false,
-        }
-    }
-
     /// Construct a [`Field`] as it would appear in an external `RecordBatch`
     pub fn to_storage_field(&self, name: &str, nullable: bool) -> 
Result<Field> {
         self.extension_type().map_or(
@@ -194,6 +135,109 @@ impl SedonaType {
             _ => None,
         }
     }
+
+    /// The logical type name for this type
+    ///
+    /// The logical type name is used in tabular display and schema printing. 
Notably,
+    /// it renders Wkb and WkbView as "geometry" or "geography" depending on 
the edge
+    /// type. For Arrow types, this similarly strips the storage details (e.g.,
+    /// both Utf8 and Utf8View types render as "utf8").
+    pub fn logical_type_name(&self) -> String {
+        match self {
+            SedonaType::Wkb(Edges::Planar, _) | 
SedonaType::WkbView(Edges::Planar, _) => {
+                "geometry".to_string()
+            }
+            SedonaType::Wkb(Edges::Spherical, _) | 
SedonaType::WkbView(Edges::Spherical, _) => {
+                "geography".to_string()
+            }
+            SedonaType::Arrow(data_type) => match data_type {
+                DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => 
"utf8".to_string(),
+                DataType::Binary
+                | DataType::LargeBinary
+                | DataType::BinaryView
+                | DataType::FixedSizeBinary(_) => "binary".to_string(),
+                DataType::List(_)
+                | DataType::LargeList(_)
+                | DataType::ListView(_)
+                | DataType::LargeListView(_)
+                | DataType::FixedSizeList(_, _) => "list".to_string(),
+                DataType::Dictionary(_, value_type) => {
+                    
SedonaType::Arrow(value_type.as_ref().clone()).logical_type_name()
+                }
+                DataType::RunEndEncoded(_, value_field) => {
+                    match SedonaType::from_storage_field(value_field) {
+                        Ok(value_sedona_type) => 
value_sedona_type.logical_type_name(),
+                        Err(_) => format!("{value_field:?}"),
+                    }
+                }
+                _ => {
+                    let data_type_str = data_type.to_string();
+                    if let Some(params_start) = data_type_str.find('(') {
+                        
data_type_str[0..params_start].to_string().to_lowercase()
+                    } else {
+                        data_type_str.to_lowercase()
+                    }
+                }
+            },
+        }
+    }
+
+    /// Returns True if another physical type matches this one for the 
purposes of dispatch
+    ///
+    /// For Arrow types this matches on type equality; for other type it 
matches on edges
+    /// but not crs.
+    pub fn match_signature(&self, other: &SedonaType) -> bool {
+        match (self, other) {
+            (SedonaType::Arrow(data_type), SedonaType::Arrow(other_data_type)) 
=> {
+                data_type == other_data_type
+            }
+            (SedonaType::Wkb(edges, _), SedonaType::Wkb(other_edges, _)) => 
edges == other_edges,
+            (SedonaType::WkbView(edges, _), SedonaType::WkbView(other_edges, 
_)) => {
+                edges == other_edges
+            }
+            _ => false,
+        }
+    }
+}
+
+// Implementation details for type serialization and display
+
+impl Display for SedonaType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SedonaType::Arrow(data_type) => Display::fmt(data_type, f),
+            SedonaType::Wkb(edges, crs) => display_geometry("Wkb", edges, crs, 
f),
+            SedonaType::WkbView(edges, crs) => display_geometry("WkbView", 
edges, crs, f),
+        }
+    }
+}
+
+fn display_geometry(
+    name: &str,
+    edges: &Edges,
+    crs: &Crs,
+    f: &mut std::fmt::Formatter<'_>,
+) -> std::fmt::Result {
+    let mut params = Vec::new();
+
+    if let Some(crs) = crs {
+        params.push(crs.to_string());
+    }
+
+    match edges {
+        Edges::Planar => {}
+        Edges::Spherical => {
+            params.push("Spherical".to_string());
+        }
+    }
+
+    match params.len() {
+        0 => write!(f, "{name}")?,
+        1 => write!(f, "{name}({})", params[0])?,
+        _ => write!(f, "{name}({})", params.join(", "))?,
+    }
+
+    Ok(())
 }
 
 // Implementation details for importing/exporting types from/to Arrow + 
metadata
@@ -347,20 +391,88 @@ mod tests {
     #[test]
     fn sedona_type_to_string() {
         assert_eq!(SedonaType::Arrow(DataType::Int32).to_string(), "Int32");
-        assert_eq!(WKB_GEOMETRY.to_string(), "wkb");
-        assert_eq!(WKB_GEOGRAPHY.to_string(), "spherical wkb");
-        assert_eq!(WKB_VIEW_GEOMETRY.to_string(), "wkb_view");
-        assert_eq!(WKB_VIEW_GEOGRAPHY.to_string(), "spherical wkb_view");
+        assert_eq!(WKB_GEOMETRY.to_string(), "Wkb");
+        assert_eq!(WKB_GEOGRAPHY.to_string(), "Wkb(Spherical)");
+        assert_eq!(WKB_VIEW_GEOMETRY.to_string(), "WkbView");
+        assert_eq!(WKB_VIEW_GEOGRAPHY.to_string(), "WkbView(Spherical)");
         assert_eq!(
             SedonaType::Wkb(Edges::Planar, lnglat()).to_string(),
-            "wkb <ogc:crs84>"
+            "Wkb(ogc:crs84)"
         );
 
         let projjson_value: Value = r#"{}"#.parse().unwrap();
         let projjson_crs = deserialize_crs(&projjson_value).unwrap();
         assert_eq!(
             SedonaType::Wkb(Edges::Planar, projjson_crs).to_string(),
-            "wkb <{...}>"
+            "Wkb({...})"
+        );
+    }
+
+    #[test]
+    fn sedona_logical_type_name() {
+        assert_eq!(WKB_GEOMETRY.logical_type_name(), "geometry");
+        assert_eq!(WKB_GEOGRAPHY.logical_type_name(), "geography");
+
+        assert_eq!(
+            SedonaType::Arrow(DataType::Int32).logical_type_name(),
+            "int32"
+        );
+
+        assert_eq!(
+            SedonaType::Arrow(DataType::Utf8).logical_type_name(),
+            "utf8"
+        );
+        assert_eq!(
+            SedonaType::Arrow(DataType::Utf8View).logical_type_name(),
+            "utf8"
+        );
+
+        assert_eq!(
+            SedonaType::Arrow(DataType::Binary).logical_type_name(),
+            "binary"
+        );
+        assert_eq!(
+            SedonaType::Arrow(DataType::BinaryView).logical_type_name(),
+            "binary"
+        );
+
+        assert_eq!(
+            
SedonaType::Arrow(DataType::Duration(arrow_schema::TimeUnit::Microsecond))
+                .logical_type_name(),
+            "duration"
+        );
+
+        assert_eq!(
+            SedonaType::Arrow(DataType::List(
+                Field::new("item", DataType::Int32, true).into()
+            ))
+            .logical_type_name(),
+            "list"
+        );
+        assert_eq!(
+            SedonaType::Arrow(DataType::ListView(
+                Field::new("item", DataType::Int32, true).into()
+            ))
+            .logical_type_name(),
+            "list"
+        );
+
+        assert_eq!(
+            SedonaType::Arrow(DataType::Dictionary(
+                Box::new(DataType::Int32),
+                Box::new(DataType::Binary)
+            ))
+            .logical_type_name(),
+            "binary"
+        );
+
+        assert_eq!(
+            SedonaType::Arrow(DataType::RunEndEncoded(
+                Field::new("ends", DataType::Int32, true).into(),
+                Field::new("values", DataType::Binary, true).into()
+            ))
+            .logical_type_name(),
+            "binary"
         );
     }
 
diff --git a/rust/sedona/src/show.rs b/rust/sedona/src/show.rs
index 0d2bab1..f1ac362 100644
--- a/rust/sedona/src/show.rs
+++ b/rust/sedona/src/show.rs
@@ -24,10 +24,7 @@ use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
 use datafusion_common::{DataFusionError, ScalarValue};
 use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, 
ScalarUDF};
 use sedona_expr::scalar_udf::SedonaScalarUDF;
-use sedona_schema::{
-    datatypes::{Edges, SedonaType},
-    matchers::ArgMatcher,
-};
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
 use std::iter::zip;
 use std::sync::Arc;
 
@@ -395,15 +392,7 @@ impl DisplayColumn {
     pub fn header(&self, options: &DisplayTableOptions) -> Cell {
         // Don't print the type ever if it's a continuation column
         let is_continuation = self.name == "…" || self.name == "...";
-        let display_type = match &self.sedona_type {
-            SedonaType::Wkb(Edges::Planar, _) | 
SedonaType::WkbView(Edges::Planar, _) => {
-                "geometry".to_string()
-            }
-            SedonaType::Wkb(Edges::Spherical, _) | 
SedonaType::WkbView(Edges::Spherical, _) => {
-                "geography".to_string()
-            }
-            _ => self.sedona_type.to_string().to_lowercase(),
-        };
+        let display_type = self.sedona_type.logical_type_name();
         if options.arrow_options.types_info() && !is_continuation {
             Cell::new(format!("{}\n{}", self.name, 
display_type)).set_delimiter('\0')
         } else {

Reply via email to