scovich commented on code in PR #8233:
URL: https://github.com/apache/arrow-rs/pull/8233#discussion_r2307773304
##########
parquet-variant-compute/src/cast_to_variant.rs:
##########
@@ -46,72 +46,81 @@ use parquet_variant::{
Variant, VariantBuilder, VariantDecimal16, VariantDecimal4,
VariantDecimal8,
};
-fn convert_timestamp(
+fn convert_timestamp_with_options(
time_unit: &TimeUnit,
time_zone: &Option<Arc<str>>,
input: &dyn Array,
builder: &mut VariantArrayBuilder,
-) {
+ strict: bool,
+) -> Result<(), ArrowError> {
let native_datetimes: Vec<Option<NaiveDateTime>> = match time_unit {
arrow_schema::TimeUnit::Second => {
let ts_array = input
.as_any()
.downcast_ref::<TimestampSecondArray>()
.expect("Array is not TimestampSecondArray");
-
- ts_array
- .iter()
- .map(|x| x.map(|y| timestamp_s_to_datetime(y).unwrap()))
- .collect()
+ timestamp_to_variant_timestamp!(ts_array, timestamp_s_to_datetime,
"seconds", strict)
}
arrow_schema::TimeUnit::Millisecond => {
let ts_array = input
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.expect("Array is not TimestampMillisecondArray");
-
- ts_array
- .iter()
- .map(|x| x.map(|y| timestamp_ms_to_datetime(y).unwrap()))
- .collect()
+ timestamp_to_variant_timestamp!(
+ ts_array,
+ timestamp_ms_to_datetime,
+ "milliseconds",
+ strict
+ )
}
arrow_schema::TimeUnit::Microsecond => {
let ts_array = input
.as_any()
.downcast_ref::<TimestampMicrosecondArray>()
.expect("Array is not TimestampMicrosecondArray");
- ts_array
- .iter()
- .map(|x| x.map(|y| timestamp_us_to_datetime(y).unwrap()))
- .collect()
+ timestamp_to_variant_timestamp!(
+ ts_array,
+ timestamp_us_to_datetime,
+ "microseconds",
+ strict
+ )
}
arrow_schema::TimeUnit::Nanosecond => {
let ts_array = input
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.expect("Array is not TimestampNanosecondArray");
- ts_array
- .iter()
- .map(|x| x.map(|y| timestamp_ns_to_datetime(y).unwrap()))
- .collect()
+ timestamp_to_variant_timestamp!(
+ ts_array,
+ timestamp_ns_to_datetime,
+ "nanoseconds",
+ strict
+ )
}
};
- for x in native_datetimes {
+ for (i, x) in native_datetimes.iter().enumerate() {
match x {
Some(ndt) => {
if time_zone.is_none() {
- builder.append_variant(ndt.into());
+ builder.append_variant((*ndt).into());
} else {
- let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&ndt);
+ let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(ndt);
builder.append_variant(utc_dt.into());
}
}
None => {
+ if strict && input.is_valid(i) {
+ return Err(ArrowError::ComputeError(format!(
+ "Failed to convert timestamp at index {}: invalid
timestamp value",
+ i
+ )));
+ }
builder.append_null();
Review Comment:
nit: just adding an extra match arm with a guard might be easier to read?
```patch
}
+None if strict && input.is_valid(i) {
+ return Err(...);
+}
None => {
```
##########
parquet-variant-compute/src/cast_to_variant.rs:
##########
@@ -143,7 +152,14 @@ fn convert_timestamp(
/// `1970-01-01T00:00:01.234567890Z`
/// will be truncated to
/// `1970-01-01T00:00:01.234567Z`
-pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
+///
+/// # Arguments
+/// * `input` - The array to convert to VariantArray
+/// * `strict` - If true, return error on conversion failure. If false, insert
null for failed conversions.
+pub fn cast_to_variant_with_options(
+ input: &dyn Array,
+ strict: bool,
Review Comment:
Is there any possibility/risk that we may need additional options in the
future? If so, it might be better to pass a struct? Or maybe we just deal with
that if/when it comes?
##########
parquet-variant-compute/src/cast_to_variant.rs:
##########
@@ -545,6 +563,14 @@ pub fn cast_to_variant(input: &dyn Array) ->
Result<VariantArray, ArrowError> {
Ok(builder.build())
}
+/// Convert an array to a `VariantArray` with strict mode enabled (panics on
conversion failures).
+///
+/// This function provides backward compatibility. For non-panicking behavior,
+/// use `cast_to_variant_with_options` with `strict = false`.
Review Comment:
I don't think passing `false` panics either? Doesn't the `strict` flag just
decide whether a failed cast becomes NULL vs. error?
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -123,3 +158,30 @@ macro_rules! decimal_to_variant_decimal {
}};
}
pub(crate) use decimal_to_variant_decimal;
+
+/// Convert a timestamp value to a `VariantTimestamp`
+macro_rules! timestamp_to_variant_timestamp {
+ ($ts_array:expr, $converter:expr, $unit_name:expr, $strict:expr) => {
+ if $strict {
+ let mut result = Vec::with_capacity($ts_array.len());
+ for x in $ts_array.iter() {
+ match x {
+ Some(y) => match $converter(y) {
+ Some(dt) => result.push(Some(dt)),
+ None => {
+ return Err(ArrowError::ComputeError(format!(
+ "Invalid timestamp {} value",
+ $unit_name
+ )))
+ }
+ },
+ None => result.push(None),
+ }
+ }
+ result
Review Comment:
Would this work?
```suggestion
let error = || ArrowError::ComputeError(format!(
"Invalid timestamp {} value",
$unit_name
));
let converter = |x| $converter(x).ok_or_else(error);
let iter = $ts_array.iter().map(|x|
x.map(converter).transpose());
iter.collect::<Result<Vec<_>, ArrowErrors>()?
```
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -32,6 +33,31 @@ macro_rules! non_generic_conversion_array {
$builder.append_variant(Variant::from(cast_value));
}
}};
+ ($array:expr, $cast_fn:expr, $builder:expr, $strict:expr) => {{
+ let array = $array;
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ $builder.append_null();
+ continue;
+ }
+ match $cast_fn(array.value(i)) {
+ Some(cast_value) => {
+ $builder.append_variant(Variant::from(cast_value));
+ }
+ None => {
+ if $strict {
+ return Err(ArrowError::ComputeError(format!(
+ "Failed to convert value at index {}: conversion
failed",
+ i
+ )));
+ } else {
+ $builder.append_null();
+ }
+ }
Review Comment:
similar to above
```suggestion
Some(cast_value) =>
$builder.append_variant(Variant::from(cast_value)),
None if $strict => {
return Err(ArrowError::ComputeError(format!(
"Failed to convert value at index {i}: conversion
failed",
)));
}
None => $builder.append_null(),
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]