Copilot commented on code in PR #52:
URL: https://github.com/apache/sedona-db/pull/52#discussion_r2345786068
##########
rust/sedona-geoparquet/src/writer.rs:
##########
@@ -0,0 +1,248 @@
+use std::sync::Arc;
+
+use datafusion::{
+ config::TableParquetOptions,
+ datasource::{
+ file_format::parquet::ParquetSink, physical_plan::FileSinkConfig,
sink::DataSinkExec,
+ },
+};
+use datafusion_common::{exec_datafusion_err, exec_err, not_impl_err, Result};
+use datafusion_expr::dml::InsertOp;
+use datafusion_physical_expr::LexRequirement;
+use datafusion_physical_plan::ExecutionPlan;
+use sedona_common::sedona_internal_err;
+use sedona_schema::{
+ crs::lnglat,
+ datatypes::{Edges, SedonaType},
+ schema::SedonaSchema,
+};
+
+use crate::{
+ metadata::{GeoParquetColumnMetadata, GeoParquetMetadata},
+ options::{GeoParquetVersion, TableGeoParquetOptions},
+};
+
+pub fn create_geoparquet_writer_physical_plan(
+ input: Arc<dyn ExecutionPlan>,
+ conf: FileSinkConfig,
+ order_requirements: Option<LexRequirement>,
+ options: &TableGeoParquetOptions,
+) -> Result<Arc<dyn ExecutionPlan>> {
+ if conf.insert_op != InsertOp::Append {
+ return not_impl_err!("Overwrites are not implemented yet for Parquet");
+ }
+
+ // If there is no geometry, just use the inner implementation
+ let output_geometry_column_indices =
conf.output_schema().geometry_column_indices()?;
+ if output_geometry_column_indices.is_empty() {
+ return create_inner_writer(input, conf, order_requirements,
options.inner.clone());
+ }
+
+ // We have geometry and/or geography! Collect the GeoParquetMetadata we'll
need to write
+ let mut metadata = GeoParquetMetadata::default();
+
+ // Check the version
+ match options.geoparquet_version {
+ GeoParquetVersion::V1_0 => {
+ metadata.version = "1.0.0".to_string();
+ }
+ _ => {
+ return not_impl_err!(
+ "GeoParquetVersion {:?} is not yet supported",
+ options.geoparquet_version
+ );
+ }
+ };
+
+ let field_names = conf
+ .output_schema()
+ .fields()
+ .iter()
+ .map(|f| f.name())
+ .collect::<Vec<_>>();
+
+ // Apply primary column
+ if let Some(output_geometry_primary) =
conf.output_schema().primary_geometry_column_index()? {
+ metadata.primary_column = field_names[output_geometry_primary].clone();
+ }
+
+ // Apply all columns
+ for i in output_geometry_column_indices {
+ let f = conf.output_schema().field(i);
+ let sedona_type = SedonaType::from_storage_field(f)?;
+ let mut column_metadata = GeoParquetColumnMetadata::default();
+
+ let (edge_type, crs) = match sedona_type {
+ SedonaType::Wkb(edge_type, crs) | SedonaType::WkbView(edge_type,
crs) => {
+ (edge_type, crs)
+ }
+ _ => return sedona_internal_err!("Unexpected type: {sedona_type}"),
+ };
+
+ // Assign edge type if needed
+ match edge_type {
+ Edges::Planar => {}
+ Edges::Spherical => {
+ column_metadata.edges = Some("spherical".to_string());
+ }
+ }
+
+ // Assign crs
+ if crs == lnglat() {
+ // Do nothing, lnglat is the meaning of an omitted CRS
+ } else if let Some(crs) = crs {
+ column_metadata.crs = Some(crs.to_json().parse().map_err(|e| {
+ exec_datafusion_err!("Failed to parse CRS for column '{}'{e}",
f.name())
Review Comment:
Missing space before the error variable in the format string. Should be
`'{}' {e}`.
```suggestion
exec_datafusion_err!("Failed to parse CRS for column '{}'
{e}", f.name())
```
##########
rust/sedona-geoparquet/src/format.rs:
##########
@@ -351,7 +365,6 @@ impl GeoParquetFileSource {
) -> Result<Self> {
if let Some(parquet_source) =
inner.as_any().downcast_ref::<ParquetSource>() {
let mut parquet_source = parquet_source.clone();
-
// Extract the predicate from the existing source if it exists so
we can keep a copy of it
Review Comment:
The variable `parquet_source` is shadowing the previous binding. Consider
using a different name like `mut cloned_parquet_source` to avoid confusion.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]