Copilot commented on code in PR #1: URL: https://github.com/apache/sedona-db/pull/1#discussion_r2310087589
########## rust/sedona-functions/src/sd_format.rs: ########## @@ -113,39 +97,236 @@ impl SedonaScalarKernel for SDFormatGeometry { } } - let executor = WkbExecutor::new(&arg_types[0..1], &args[0..1]); + let formatted_type = sedona_type_to_formatted_type(&arg_types[0])?; + if formatted_type == arg_types[0] { Review Comment: Avoid calling `sedona_type_to_formatted_type` twice for the same type. The function is already called in `return_type` method at line 64, and now called again here. Consider caching the result or restructuring to avoid duplicate computation. ########## rust/sedona-functions/src/sd_format.rs: ########## @@ -113,39 +97,236 @@ impl SedonaScalarKernel for SDFormatGeometry { } } - let executor = WkbExecutor::new(&arg_types[0..1], &args[0..1]); + let formatted_type = sedona_type_to_formatted_type(&arg_types[0])?; + if formatted_type == arg_types[0] { + // No change in type, the input data does not have geospatial columns that we can format, + // so just return the input value + return Ok(args[0].clone()); + } + + columnar_value_to_formatted_value(&arg_types[0], &args[0], maybe_width_hint) + } +} + +fn sedona_type_to_formatted_type(sedona_type: &SedonaType) -> Result<SedonaType> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Ok(SedonaType::Arrow(DataType::Utf8)), + SedonaType::Arrow(arrow_type) => { + // dive into the arrow type and translate geospatial types into Utf8 + match arrow_type { + DataType::Struct(fields) => { + let mut new_fields = Vec::with_capacity(fields.len()); + for field in fields { + let new_field = field_to_formatted_field(field)?; + new_fields.push(Arc::new(new_field)); + } + Ok(SedonaType::Arrow(DataType::Struct(new_fields.into()))) + } + DataType::List(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::List(Arc::new(new_field)))) + } + DataType::ListView(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::ListView(Arc::new(new_field)))) + } + _ => Ok(sedona_type.clone()), + } + } + } +} + +fn field_to_formatted_field(field: &Field) -> Result<Field> { + let new_type = sedona_type_to_formatted_type(&SedonaType::from_data_type(field.data_type())?)?; + let new_field = field.clone().with_data_type(new_type.data_type()); + Ok(new_field) +} + +fn columnar_value_to_formatted_value( + sedona_type: &SedonaType, + columnar_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => { + geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) + } + SedonaType::Arrow(arrow_type) => match arrow_type { + DataType::Struct(fields) => match columnar_value { + ColumnarValue::Array(array) => { + let struct_array = array.as_struct(); + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_struct_array))) + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => { + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::Struct(Arc::new( + formatted_struct_array, + )))) + } + _ => internal_err!("Unsupported struct columnar value"), + }, + DataType::List(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list::<i32>(); + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + ColumnarValue::Scalar(ScalarValue::List(list_array)) => { + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::List(Arc::new( + formatted_list_array, + )))) + } + _ => internal_err!("Unsupported list columnar value"), + }, + DataType::ListView(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list_view::<i32>(); + let formatted_list_array = + list_view_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + _ => internal_err!("Unsupported list view columnar value"), + }, + _ => Ok(columnar_value.clone()), + }, + } +} + +/// Implementation format geometry or geography +/// +/// This is very similar to ST_AsText except it respects the width_hint by +/// stopping the render for each item when too many characters have been written. +fn geospatial_value_to_formatted_value( + sedona_type: &SedonaType, + geospatial_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + let arg_types: &[SedonaType] = std::slice::from_ref(sedona_type); + let args: &[ColumnarValue] = std::slice::from_ref(geospatial_value); Review Comment: [nitpick] Creating slices from single references using `std::slice::from_ref` is unnecessary. The `WkbExecutor::new` could be modified to accept single values directly, or these could be passed as arrays `[sedona_type]` and `[geospatial_value]` for better readability. ```suggestion let arg_types: &[SedonaType] = &[*sedona_type]; let args: &[ColumnarValue] = &[geospatial_value.clone()]; ``` ########## rust/sedona-functions/src/sd_format.rs: ########## @@ -113,39 +97,236 @@ impl SedonaScalarKernel for SDFormatGeometry { } } - let executor = WkbExecutor::new(&arg_types[0..1], &args[0..1]); + let formatted_type = sedona_type_to_formatted_type(&arg_types[0])?; + if formatted_type == arg_types[0] { + // No change in type, the input data does not have geospatial columns that we can format, + // so just return the input value + return Ok(args[0].clone()); + } + + columnar_value_to_formatted_value(&arg_types[0], &args[0], maybe_width_hint) + } +} + +fn sedona_type_to_formatted_type(sedona_type: &SedonaType) -> Result<SedonaType> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Ok(SedonaType::Arrow(DataType::Utf8)), + SedonaType::Arrow(arrow_type) => { + // dive into the arrow type and translate geospatial types into Utf8 + match arrow_type { + DataType::Struct(fields) => { + let mut new_fields = Vec::with_capacity(fields.len()); + for field in fields { + let new_field = field_to_formatted_field(field)?; + new_fields.push(Arc::new(new_field)); + } + Ok(SedonaType::Arrow(DataType::Struct(new_fields.into()))) + } + DataType::List(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::List(Arc::new(new_field)))) + } + DataType::ListView(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::ListView(Arc::new(new_field)))) + } + _ => Ok(sedona_type.clone()), + } + } + } +} + +fn field_to_formatted_field(field: &Field) -> Result<Field> { + let new_type = sedona_type_to_formatted_type(&SedonaType::from_data_type(field.data_type())?)?; + let new_field = field.clone().with_data_type(new_type.data_type()); + Ok(new_field) +} + +fn columnar_value_to_formatted_value( + sedona_type: &SedonaType, + columnar_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => { + geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) + } + SedonaType::Arrow(arrow_type) => match arrow_type { + DataType::Struct(fields) => match columnar_value { + ColumnarValue::Array(array) => { + let struct_array = array.as_struct(); + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_struct_array))) + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => { + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::Struct(Arc::new( + formatted_struct_array, + )))) + } + _ => internal_err!("Unsupported struct columnar value"), + }, + DataType::List(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list::<i32>(); + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + ColumnarValue::Scalar(ScalarValue::List(list_array)) => { + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::List(Arc::new( + formatted_list_array, + )))) + } + _ => internal_err!("Unsupported list columnar value"), + }, + DataType::ListView(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list_view::<i32>(); + let formatted_list_array = + list_view_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + _ => internal_err!("Unsupported list view columnar value"), + }, + _ => Ok(columnar_value.clone()), + }, + } +} + +/// Implementation format geometry or geography +/// +/// This is very similar to ST_AsText except it respects the width_hint by +/// stopping the render for each item when too many characters have been written. +fn geospatial_value_to_formatted_value( + sedona_type: &SedonaType, + geospatial_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + let arg_types: &[SedonaType] = std::slice::from_ref(sedona_type); + let args: &[ColumnarValue] = std::slice::from_ref(geospatial_value); + let executor = WkbExecutor::new(arg_types, args); + + let min_output_size = match maybe_width_hint { + Some(width_hint) => executor.num_iterations() * width_hint, + None => executor.num_iterations() * 25, + }; + + // Initialize an output builder of the appropriate type + let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_output_size); + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + let mut builder_wrapper = + LimitedSizeOutput::new(&mut builder, maybe_width_hint.unwrap_or(usize::MAX)); + + // We ignore this error on purpose: we raised it on purpose to prevent + // the WKT writer from writing too many characters + #[allow(unused_must_use)] + wkt::to_wkt::write_geometry(&mut builder_wrapper, &item); - let min_output_size = match maybe_width_hint { - Some(width_hint) => executor.num_iterations() * width_hint, - None => executor.num_iterations() * 25, + builder.append_value(""); + } + None => builder.append_null(), }; - // Initialize an output builder of the appropriate type - let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_output_size); + Ok(()) + })?; - executor.execute_wkb_void(|maybe_item| { - match maybe_item { - Some(item) => { - let mut builder_wrapper = LimitedSizeOutput::new( - &mut builder, - maybe_width_hint.unwrap_or(usize::MAX), - ); + executor.finish(Arc::new(builder.finish())) +} - // We ignore this error on purpose: we raised it on purpose to prevent - // the WKT writer from writing too many characters - #[allow(unused_must_use)] - wkt::to_wkt::write_geometry(&mut builder_wrapper, &item); +fn struct_value_to_formatted_value( + fields: &Fields, + struct_array: &StructArray, + maybe_width_hint: Option<usize>, +) -> Result<StructArray> { + let columns = struct_array.columns(); - builder.append_value(""); - } - None => builder.append_null(), - }; + let mut new_fields = Vec::with_capacity(columns.len()); + for (column, field) in columns.iter().zip(fields) { + let new_field = field_to_formatted_field(field)?; + let sedona_type = SedonaType::from_data_type(field.data_type())?; + let unwrapped_column = sedona_type.unwrap_array(column)?; + let new_column = columnar_value_to_formatted_value( + &sedona_type, + &ColumnarValue::Array(unwrapped_column), + maybe_width_hint, + )?; - Ok(()) - })?; + let ColumnarValue::Array(new_array) = new_column else { + return internal_err!("Expected Array"); Review Comment: The error message 'Expected Array' is not very descriptive. Consider providing more context about where this error occurred and what was received instead, such as 'Expected Array in struct field formatting, got: {actual_type}'. ```suggestion return internal_err!( "Expected Array in struct field formatting, got: {:?}", new_column ); ``` ########## rust/sedona-functions/src/sd_format.rs: ########## @@ -113,39 +97,236 @@ impl SedonaScalarKernel for SDFormatGeometry { } } - let executor = WkbExecutor::new(&arg_types[0..1], &args[0..1]); + let formatted_type = sedona_type_to_formatted_type(&arg_types[0])?; + if formatted_type == arg_types[0] { + // No change in type, the input data does not have geospatial columns that we can format, + // so just return the input value + return Ok(args[0].clone()); + } + + columnar_value_to_formatted_value(&arg_types[0], &args[0], maybe_width_hint) + } +} + +fn sedona_type_to_formatted_type(sedona_type: &SedonaType) -> Result<SedonaType> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Ok(SedonaType::Arrow(DataType::Utf8)), + SedonaType::Arrow(arrow_type) => { + // dive into the arrow type and translate geospatial types into Utf8 + match arrow_type { + DataType::Struct(fields) => { + let mut new_fields = Vec::with_capacity(fields.len()); + for field in fields { + let new_field = field_to_formatted_field(field)?; + new_fields.push(Arc::new(new_field)); + } + Ok(SedonaType::Arrow(DataType::Struct(new_fields.into()))) + } + DataType::List(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::List(Arc::new(new_field)))) + } + DataType::ListView(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::ListView(Arc::new(new_field)))) + } + _ => Ok(sedona_type.clone()), + } + } + } +} + +fn field_to_formatted_field(field: &Field) -> Result<Field> { + let new_type = sedona_type_to_formatted_type(&SedonaType::from_data_type(field.data_type())?)?; + let new_field = field.clone().with_data_type(new_type.data_type()); + Ok(new_field) +} + +fn columnar_value_to_formatted_value( + sedona_type: &SedonaType, + columnar_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => { + geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) + } + SedonaType::Arrow(arrow_type) => match arrow_type { + DataType::Struct(fields) => match columnar_value { + ColumnarValue::Array(array) => { + let struct_array = array.as_struct(); + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_struct_array))) + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => { + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::Struct(Arc::new( + formatted_struct_array, + )))) + } + _ => internal_err!("Unsupported struct columnar value"), + }, + DataType::List(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list::<i32>(); + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + ColumnarValue::Scalar(ScalarValue::List(list_array)) => { + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::List(Arc::new( + formatted_list_array, + )))) + } + _ => internal_err!("Unsupported list columnar value"), + }, + DataType::ListView(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list_view::<i32>(); + let formatted_list_array = + list_view_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + _ => internal_err!("Unsupported list view columnar value"), + }, + _ => Ok(columnar_value.clone()), + }, + } +} + +/// Implementation format geometry or geography +/// +/// This is very similar to ST_AsText except it respects the width_hint by +/// stopping the render for each item when too many characters have been written. +fn geospatial_value_to_formatted_value( + sedona_type: &SedonaType, + geospatial_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + let arg_types: &[SedonaType] = std::slice::from_ref(sedona_type); + let args: &[ColumnarValue] = std::slice::from_ref(geospatial_value); + let executor = WkbExecutor::new(arg_types, args); + + let min_output_size = match maybe_width_hint { + Some(width_hint) => executor.num_iterations() * width_hint, + None => executor.num_iterations() * 25, + }; + + // Initialize an output builder of the appropriate type + let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_output_size); + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + let mut builder_wrapper = + LimitedSizeOutput::new(&mut builder, maybe_width_hint.unwrap_or(usize::MAX)); + + // We ignore this error on purpose: we raised it on purpose to prevent + // the WKT writer from writing too many characters + #[allow(unused_must_use)] + wkt::to_wkt::write_geometry(&mut builder_wrapper, &item); - let min_output_size = match maybe_width_hint { - Some(width_hint) => executor.num_iterations() * width_hint, - None => executor.num_iterations() * 25, + builder.append_value(""); + } + None => builder.append_null(), }; - // Initialize an output builder of the appropriate type - let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_output_size); + Ok(()) + })?; - executor.execute_wkb_void(|maybe_item| { - match maybe_item { - Some(item) => { - let mut builder_wrapper = LimitedSizeOutput::new( - &mut builder, - maybe_width_hint.unwrap_or(usize::MAX), - ); + executor.finish(Arc::new(builder.finish())) +} - // We ignore this error on purpose: we raised it on purpose to prevent - // the WKT writer from writing too many characters - #[allow(unused_must_use)] - wkt::to_wkt::write_geometry(&mut builder_wrapper, &item); +fn struct_value_to_formatted_value( + fields: &Fields, + struct_array: &StructArray, + maybe_width_hint: Option<usize>, +) -> Result<StructArray> { + let columns = struct_array.columns(); - builder.append_value(""); - } - None => builder.append_null(), - }; + let mut new_fields = Vec::with_capacity(columns.len()); + for (column, field) in columns.iter().zip(fields) { + let new_field = field_to_formatted_field(field)?; + let sedona_type = SedonaType::from_data_type(field.data_type())?; + let unwrapped_column = sedona_type.unwrap_array(column)?; + let new_column = columnar_value_to_formatted_value( + &sedona_type, + &ColumnarValue::Array(unwrapped_column), + maybe_width_hint, + )?; - Ok(()) - })?; + let ColumnarValue::Array(new_array) = new_column else { + return internal_err!("Expected Array"); + }; - executor.finish(Arc::new(builder.finish())) + new_fields.push((Arc::new(new_field), new_array)); } + + Ok(StructArray::from(new_fields)) +} + +fn list_value_to_formatted_value<OffsetSize: OffsetSizeTrait>( + field: &Field, + list_array: &GenericListArray<OffsetSize>, + maybe_width_hint: Option<usize>, +) -> Result<GenericListArray<OffsetSize>> { + let values_array = list_array.values(); + let offsets = list_array.offsets(); + let nulls = list_array.nulls(); + + let new_field = field_to_formatted_field(field)?; + let sedona_type = SedonaType::from_data_type(field.data_type())?; + let unwrapped_values_array = sedona_type.unwrap_array(values_array)?; + let new_columnar_value = columnar_value_to_formatted_value( + &sedona_type, + &ColumnarValue::Array(unwrapped_values_array), + maybe_width_hint, + )?; + let ColumnarValue::Array(new_values_array) = new_columnar_value else { + return internal_err!("Expected Array"); Review Comment: Same issue as Comment 3 - the error message 'Expected Array' lacks context. Consider providing more specific error information about the list formatting operation. ```suggestion return internal_err!( "Expected Array when formatting list for field '{}', but got: {:?}", field.name(), new_columnar_value ); ``` ########## rust/sedona-functions/src/sd_format.rs: ########## @@ -113,39 +97,236 @@ impl SedonaScalarKernel for SDFormatGeometry { } } - let executor = WkbExecutor::new(&arg_types[0..1], &args[0..1]); + let formatted_type = sedona_type_to_formatted_type(&arg_types[0])?; + if formatted_type == arg_types[0] { + // No change in type, the input data does not have geospatial columns that we can format, + // so just return the input value + return Ok(args[0].clone()); + } + + columnar_value_to_formatted_value(&arg_types[0], &args[0], maybe_width_hint) + } +} + +fn sedona_type_to_formatted_type(sedona_type: &SedonaType) -> Result<SedonaType> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Ok(SedonaType::Arrow(DataType::Utf8)), + SedonaType::Arrow(arrow_type) => { + // dive into the arrow type and translate geospatial types into Utf8 + match arrow_type { + DataType::Struct(fields) => { + let mut new_fields = Vec::with_capacity(fields.len()); + for field in fields { + let new_field = field_to_formatted_field(field)?; + new_fields.push(Arc::new(new_field)); + } + Ok(SedonaType::Arrow(DataType::Struct(new_fields.into()))) + } + DataType::List(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::List(Arc::new(new_field)))) + } + DataType::ListView(field) => { + let new_field = field_to_formatted_field(field)?; + Ok(SedonaType::Arrow(DataType::ListView(Arc::new(new_field)))) + } + _ => Ok(sedona_type.clone()), + } + } + } +} + +fn field_to_formatted_field(field: &Field) -> Result<Field> { + let new_type = sedona_type_to_formatted_type(&SedonaType::from_data_type(field.data_type())?)?; + let new_field = field.clone().with_data_type(new_type.data_type()); + Ok(new_field) +} + +fn columnar_value_to_formatted_value( + sedona_type: &SedonaType, + columnar_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + match sedona_type { + SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => { + geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) + } + SedonaType::Arrow(arrow_type) => match arrow_type { + DataType::Struct(fields) => match columnar_value { + ColumnarValue::Array(array) => { + let struct_array = array.as_struct(); + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_struct_array))) + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => { + let formatted_struct_array = + struct_value_to_formatted_value(fields, struct_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::Struct(Arc::new( + formatted_struct_array, + )))) + } + _ => internal_err!("Unsupported struct columnar value"), + }, + DataType::List(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list::<i32>(); + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + ColumnarValue::Scalar(ScalarValue::List(list_array)) => { + let formatted_list_array = + list_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Scalar(ScalarValue::List(Arc::new( + formatted_list_array, + )))) + } + _ => internal_err!("Unsupported list columnar value"), + }, + DataType::ListView(field) => match columnar_value { + ColumnarValue::Array(array) => { + let list_array = array.as_list_view::<i32>(); + let formatted_list_array = + list_view_value_to_formatted_value(field, list_array, maybe_width_hint)?; + Ok(ColumnarValue::Array(Arc::new(formatted_list_array))) + } + _ => internal_err!("Unsupported list view columnar value"), + }, + _ => Ok(columnar_value.clone()), + }, + } +} + +/// Implementation format geometry or geography +/// +/// This is very similar to ST_AsText except it respects the width_hint by +/// stopping the render for each item when too many characters have been written. +fn geospatial_value_to_formatted_value( + sedona_type: &SedonaType, + geospatial_value: &ColumnarValue, + maybe_width_hint: Option<usize>, +) -> Result<ColumnarValue> { + let arg_types: &[SedonaType] = std::slice::from_ref(sedona_type); + let args: &[ColumnarValue] = std::slice::from_ref(geospatial_value); + let executor = WkbExecutor::new(arg_types, args); + + let min_output_size = match maybe_width_hint { + Some(width_hint) => executor.num_iterations() * width_hint, + None => executor.num_iterations() * 25, + }; + + // Initialize an output builder of the appropriate type + let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_output_size); + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + let mut builder_wrapper = + LimitedSizeOutput::new(&mut builder, maybe_width_hint.unwrap_or(usize::MAX)); + + // We ignore this error on purpose: we raised it on purpose to prevent + // the WKT writer from writing too many characters + #[allow(unused_must_use)] + wkt::to_wkt::write_geometry(&mut builder_wrapper, &item); - let min_output_size = match maybe_width_hint { - Some(width_hint) => executor.num_iterations() * width_hint, - None => executor.num_iterations() * 25, + builder.append_value(""); + } + None => builder.append_null(), }; - // Initialize an output builder of the appropriate type - let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_output_size); + Ok(()) + })?; - executor.execute_wkb_void(|maybe_item| { - match maybe_item { - Some(item) => { - let mut builder_wrapper = LimitedSizeOutput::new( - &mut builder, - maybe_width_hint.unwrap_or(usize::MAX), - ); + executor.finish(Arc::new(builder.finish())) +} - // We ignore this error on purpose: we raised it on purpose to prevent - // the WKT writer from writing too many characters - #[allow(unused_must_use)] - wkt::to_wkt::write_geometry(&mut builder_wrapper, &item); +fn struct_value_to_formatted_value( + fields: &Fields, + struct_array: &StructArray, + maybe_width_hint: Option<usize>, +) -> Result<StructArray> { + let columns = struct_array.columns(); - builder.append_value(""); - } - None => builder.append_null(), - }; + let mut new_fields = Vec::with_capacity(columns.len()); + for (column, field) in columns.iter().zip(fields) { + let new_field = field_to_formatted_field(field)?; + let sedona_type = SedonaType::from_data_type(field.data_type())?; + let unwrapped_column = sedona_type.unwrap_array(column)?; + let new_column = columnar_value_to_formatted_value( + &sedona_type, + &ColumnarValue::Array(unwrapped_column), + maybe_width_hint, + )?; - Ok(()) - })?; + let ColumnarValue::Array(new_array) = new_column else { + return internal_err!("Expected Array"); + }; - executor.finish(Arc::new(builder.finish())) + new_fields.push((Arc::new(new_field), new_array)); } + + Ok(StructArray::from(new_fields)) +} + +fn list_value_to_formatted_value<OffsetSize: OffsetSizeTrait>( + field: &Field, + list_array: &GenericListArray<OffsetSize>, + maybe_width_hint: Option<usize>, +) -> Result<GenericListArray<OffsetSize>> { + let values_array = list_array.values(); + let offsets = list_array.offsets(); + let nulls = list_array.nulls(); + + let new_field = field_to_formatted_field(field)?; + let sedona_type = SedonaType::from_data_type(field.data_type())?; + let unwrapped_values_array = sedona_type.unwrap_array(values_array)?; + let new_columnar_value = columnar_value_to_formatted_value( + &sedona_type, + &ColumnarValue::Array(unwrapped_values_array), + maybe_width_hint, + )?; + let ColumnarValue::Array(new_values_array) = new_columnar_value else { + return internal_err!("Expected Array"); + }; + + Ok(GenericListArray::<OffsetSize>::new( + Arc::new(new_field), + offsets.clone(), + new_values_array, + nulls.cloned(), + )) +} + +fn list_view_value_to_formatted_value<OffsetSize: OffsetSizeTrait>( + field: &Field, + list_view_array: &GenericListViewArray<OffsetSize>, + maybe_width_hint: Option<usize>, +) -> Result<GenericListViewArray<OffsetSize>> { + let values_array = list_view_array.values(); + let offsets = list_view_array.offsets(); + let sizes = list_view_array.sizes(); + let nulls = list_view_array.nulls(); + + let new_field = field_to_formatted_field(field)?; + let sedona_type = SedonaType::from_data_type(field.data_type())?; + let unwrapped_values_array = sedona_type.unwrap_array(values_array)?; + let new_columnar_value = columnar_value_to_formatted_value( + &sedona_type, + &ColumnarValue::Array(unwrapped_values_array), + maybe_width_hint, + )?; + let ColumnarValue::Array(new_values_array) = new_columnar_value else { + return internal_err!("Expected Array"); Review Comment: Same issue as Comment 3 and 4 - the error message 'Expected Array' lacks context. Consider providing more specific error information about the list view formatting operation. ```suggestion return internal_err!( "Expected Array during list view formatting for field '{}' of type '{}'", field.name(), field.data_type() ); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@sedona.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org