scovich commented on code in PR #8666:
URL: https://github.com/apache/arrow-rs/pull/8666#discussion_r2445900714


##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -192,94 +197,111 @@ pub(crate) fn 
make_primitive_variant_to_arrow_row_builder<'a>(
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder =
-        match data_type {
-            DataType::Boolean => {
-                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, 
capacity))
-            }
-            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Float16 => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Float32 => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Float64 => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Decimal32(precision, scale) => Decimal32(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Decimal64(precision, scale) => Decimal64(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Decimal128(precision, scale) => Decimal128(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Decimal256(precision, scale) => Decimal256(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Timestamp(TimeUnit::Microsecond, None) => 
TimestampMicroNtz(
-                VariantToTimestampNtzArrowRowBuilder::new(cast_options, 
capacity),
-            ),
-            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
-                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-            ),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => 
TimestampNanoNtz(
-                VariantToTimestampNtzArrowRowBuilder::new(cast_options, 
capacity),
-            ),
-            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
-                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-            ),
-            DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            _ if data_type.is_primitive() => {
-                return Err(ArrowError::NotYetImplemented(format!(
-                    "Primitive data_type {data_type:?} not yet implemented"
-                )));
-            }
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Not a primitive type: {data_type:?}"
-                )));
-            }
-        };
+    let builder = match data_type {
+        DataType::Boolean => 
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
+        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Decimal32(precision, scale) => 
Decimal32(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Decimal64(precision, scale) => 
Decimal64(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Decimal128(precision, scale) => 
Decimal128(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Decimal256(precision, scale) => 
Decimal256(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
+            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+        ),
+        DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
+        ),
+        DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
+            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+        ),
+        DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
+        ),
+        DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::FixedSizeBinary(16) => {
+            Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
+        }
+        DataType::FixedSizeBinary(size) => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinary({size}) is not a valid variant shredding 
type. Only FixedSizeBinary(16) for UUID is supported."

Review Comment:
   Just double checking -- we have reliable ways of identifying Decimal128 and 
Decimal256 (they have proper arrow data types), so we don't need to ever 
interpret raw FixedSizeBinary as decimal, even tho it _is_ a valid physical 
representation for those larger decimal types?



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -504,6 +526,50 @@ where
     }
 }
 
+/// Builder for converting variant values to FixedSizeBinary(16) for UUIDs
+pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
+    builder: FixedSizeBinaryBuilder,
+    cast_options: &'a CastOptions<'a>,
+}
+
+impl<'a> VariantToUuidArrowRowBuilder<'a> {
+    fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
+        Self {
+            builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
+            cast_options,
+        }
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        match value.as_uuid() {
+            Some(uuid) => {
+                self.builder
+                    .append_value(uuid.as_bytes())
+                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
+
+                Ok(true)
+            }
+            None if self.cast_options.safe => {
+                self.builder.append_null();
+                Ok(false)
+            }
+            None => Err(ArrowError::CastError(format!(
+                "Failed to extract UUID from variant {:?}",
+                value

Review Comment:
   ```suggestion
                   "Failed to extract UUID from variant {value:?}",
   ```



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -192,94 +197,111 @@ pub(crate) fn 
make_primitive_variant_to_arrow_row_builder<'a>(
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder =
-        match data_type {
-            DataType::Boolean => {
-                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, 
capacity))
-            }
-            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Float16 => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Float32 => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Float64 => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            DataType::Decimal32(precision, scale) => Decimal32(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Decimal64(precision, scale) => Decimal64(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Decimal128(precision, scale) => Decimal128(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Decimal256(precision, scale) => Decimal256(
-                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
-            ),
-            DataType::Timestamp(TimeUnit::Microsecond, None) => 
TimestampMicroNtz(
-                VariantToTimestampNtzArrowRowBuilder::new(cast_options, 
capacity),
-            ),
-            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
-                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-            ),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => 
TimestampNanoNtz(
-                VariantToTimestampNtzArrowRowBuilder::new(cast_options, 
capacity),
-            ),
-            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
-                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-            ),
-            DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
-                cast_options,
-                capacity,
-            )),
-            _ if data_type.is_primitive() => {
-                return Err(ArrowError::NotYetImplemented(format!(
-                    "Primitive data_type {data_type:?} not yet implemented"
-                )));
-            }
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Not a primitive type: {data_type:?}"
-                )));
-            }
-        };
+    let builder = match data_type {
+        DataType::Boolean => 
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
+        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::Decimal32(precision, scale) => 
Decimal32(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Decimal64(precision, scale) => 
Decimal64(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Decimal128(precision, scale) => 
Decimal128(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Decimal256(precision, scale) => 
Decimal256(VariantToDecimalArrowRowBuilder::new(
+            cast_options,
+            capacity,
+            *precision,
+            *scale,
+        )?),
+        DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
+            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+        ),
+        DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
+        ),
+        DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
+            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+        ),
+        DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
+        ),
+        DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
+            cast_options,
+            capacity,
+        )),
+        DataType::FixedSizeBinary(16) => {
+            Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
+        }
+        DataType::FixedSizeBinary(size) => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinary({size}) is not a valid variant shredding 
type. Only FixedSizeBinary(16) for UUID is supported."
+            )));
+        }

Review Comment:
   This is a weird quick of `fmt` that I keep running into -- it seems to flip 
unpredictably back and forth between two forms:
   ```rust
   let builder = match data_type {
   ```
   vs
   ```rust
   let builder = 
       match data_type {
   ```
   IMO it's a bug in `fmt`. I _think_ it's intentional because changes in 
indentation can impact whether certain things fit on one line vs. several? For 
example, indenting _further_ can ironically _reduce_ line counts because of 
"fun" interactions with the rule that every arg goes on its own line, if they 
don't all fit on the same line as the open+close parens. 
   
   Case in point -- this PR actually has _more_ lines, because de-denting the 
four DecimalXX match arms caused them to put every arg on its own line. Even 
tho de-denting also saved a couple lines for the Boolean match arm.
   
   Anyway, I usually just review with whitespace ignored when I see this...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to