paleolimbot commented on code in PR #561:
URL: https://github.com/apache/sedona-db/pull/561#discussion_r2764584541


##########
rust/sedona-geoparquet/src/metadata.rs:
##########
@@ -382,20 +383,50 @@ impl GeoParquetMetadata {
     }
 
     /// Construct a [`GeoParquetMetadata`] from a [`ParquetMetaData`]
+    ///
+    /// This constructor considers (1) the GeoParquet metadata in the key/value
+    /// metadata and (2) Geometry/Geography types present in the Parquet 
schema.
+    /// Specification of a column in the GeoParquet metadata takes precedence.
     pub fn try_from_parquet_metadata(metadata: &ParquetMetaData) -> 
Result<Option<Self>> {
-        if let Some(kv) = metadata.file_metadata().key_value_metadata() {
-            for item in kv {
-                if item.key != "geo" {
-                    continue;
-                }
+        let schema = metadata.file_metadata().schema_descr().root_schema();
+        let kv_metadata = metadata.file_metadata().key_value_metadata();
+        Self::try_from_parquet_metadata_impl(schema, kv_metadata)
+    }
 
-                if let Some(value) = &item.value {
-                    return Ok(Some(Self::try_new(value)?));
-                }
+    /// For testing, as it is easier to simulate the schema and key/value 
metadata
+    /// than the whole ParquetMetaData.
+    fn try_from_parquet_metadata_impl(
+        root_schema: &parquet::schema::types::Type,
+        kv_metadata: Option<&Vec<KeyValue>>,
+    ) -> Result<Option<Self>> {
+        let mut columns_from_schema = columns_from_parquet_schema(root_schema, 
kv_metadata)?;
+
+        if let Some(value) = get_parquet_key_value("geo", kv_metadata) {
+            // Values in the GeoParquet metadata take precedence over those 
from the
+            // Parquet schema
+            let mut out = Self::try_new(&value)?;
+            for (k, v) in columns_from_schema.drain() {
+                out.columns.entry(k).or_insert(v);
             }
+
+            return Ok(Some(out));
         }
 
-        Ok(None)
+        // No geo metadata key, but we have geo columns from the schema
+        if !columns_from_schema.is_empty() {
+            // To keep metadata valid, ensure we set a primary column 
deterministically
+            let mut column_names = 
columns_from_schema.keys().collect::<Vec<_>>();
+            column_names.sort();
+            let primary_column = column_names[0].to_string();

Review Comment:
   This is a great point...I refactored the existing logic we have for this and 
used it here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to