alamb commented on a change in pull request #8402:
URL: https://github.com/apache/arrow/pull/8402#discussion_r513327799



##########
File path: rust/parquet/src/arrow/array_reader.rs
##########
@@ -267,90 +267,79 @@ impl<T: DataType> ArrayReader for PrimitiveArrayReader<T> 
{
             }
         }
 
-        // convert to arrays
-        let array =
-            match (&self.data_type, T::get_physical_type()) {
-                (ArrowType::Boolean, PhysicalType::BOOLEAN) => {
-                    BoolConverter::new(BooleanArrayConverter {})
-                        .convert(self.record_reader.cast::<BoolType>())
-                }
-                (ArrowType::Int8, PhysicalType::INT32) => {
-                    
Int8Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::Int16, PhysicalType::INT32) => {
-                    
Int16Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::Int32, PhysicalType::INT32) => {
-                    
Int32Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::UInt8, PhysicalType::INT32) => {
-                    
UInt8Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::UInt16, PhysicalType::INT32) => {
-                    
UInt16Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::UInt32, PhysicalType::INT32) => {
-                    
UInt32Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::Int64, PhysicalType::INT64) => {
-                    
Int64Converter::new().convert(self.record_reader.cast::<Int64Type>())
-                }
-                (ArrowType::UInt64, PhysicalType::INT64) => {
-                    
UInt64Converter::new().convert(self.record_reader.cast::<Int64Type>())
-                }
-                (ArrowType::Float32, PhysicalType::FLOAT) => 
Float32Converter::new()
-                    .convert(self.record_reader.cast::<FloatType>()),
-                (ArrowType::Float64, PhysicalType::DOUBLE) => 
Float64Converter::new()
-                    .convert(self.record_reader.cast::<DoubleType>()),
-                (ArrowType::Timestamp(unit, _), PhysicalType::INT64) => match 
unit {
-                    TimeUnit::Millisecond => 
TimestampMillisecondConverter::new()
-                        .convert(self.record_reader.cast::<Int64Type>()),
-                    TimeUnit::Microsecond => 
TimestampMicrosecondConverter::new()
-                        .convert(self.record_reader.cast::<Int64Type>()),
-                    _ => Err(general_err!("No conversion from parquet type to 
arrow type for timestamp with unit {:?}", unit)),
-                },
-                (ArrowType::Date32(unit), PhysicalType::INT32) => match unit {
-                    DateUnit::Day => Date32Converter::new()
-                        .convert(self.record_reader.cast::<Int32Type>()),
-                    _ => Err(general_err!("No conversion from parquet type to 
arrow type for date with unit {:?}", unit)),
-                }
-                (ArrowType::Time32(unit), PhysicalType::INT32) => {
-                    match unit {
-                        TimeUnit::Second => {
-                            
Time32SecondConverter::new().convert(self.record_reader.cast::<Int32Type>())
-                        }
-                        TimeUnit::Millisecond => {
-                            
Time32MillisecondConverter::new().convert(self.record_reader.cast::<Int32Type>())
-                        }
-                        _ => Err(general_err!("Invalid or unsupported arrow 
array with datatype {:?}", self.get_data_type()))
-                    }
-                }
-                (ArrowType::Time64(unit), PhysicalType::INT64) => {
-                    match unit {
-                        TimeUnit::Microsecond => {
-                            
Time64MicrosecondConverter::new().convert(self.record_reader.cast::<Int64Type>())
-                        }
-                        TimeUnit::Nanosecond => {
-                            
Time64NanosecondConverter::new().convert(self.record_reader.cast::<Int64Type>())
-                        }
-                        _ => Err(general_err!("Invalid or unsupported arrow 
array with datatype {:?}", self.get_data_type()))
-                    }
-                }
-                (ArrowType::Interval(IntervalUnit::YearMonth), 
PhysicalType::INT32) => {
-                    
UInt32Converter::new().convert(self.record_reader.cast::<Int32Type>())
-                }
-                (ArrowType::Interval(IntervalUnit::DayTime), 
PhysicalType::INT64) => {
-                    
UInt64Converter::new().convert(self.record_reader.cast::<Int64Type>())
-                }
-                (ArrowType::Duration(_), PhysicalType::INT64) => {
-                    
UInt64Converter::new().convert(self.record_reader.cast::<Int64Type>())
-                }
-                (arrow_type, physical_type) => Err(general_err!(
-                    "Reading {:?} type from parquet {:?} is not supported 
yet.",
-                    arrow_type,
-                    physical_type
-                )),
-            }?;
+        let arrow_data_type = match T::get_physical_type() {
+            PhysicalType::BOOLEAN => ArrowBooleanType::DATA_TYPE,
+            PhysicalType::INT32 => ArrowInt32Type::DATA_TYPE,
+            PhysicalType::INT64 => ArrowInt64Type::DATA_TYPE,
+            PhysicalType::FLOAT => ArrowFloat32Type::DATA_TYPE,
+            PhysicalType::DOUBLE => ArrowFloat64Type::DATA_TYPE,
+            PhysicalType::INT96
+            | PhysicalType::BYTE_ARRAY
+            | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                unreachable!(
+                    "PrimitiveArrayReaders don't support complex physical 
types"
+                );
+            }
+        };
+
+        // Convert to arrays by using the Parquet phyisical type.
+        // The physical types are then cast to Arrow types if necessary
+
+        let mut record_data = self.record_reader.consume_record_data()?;
+
+        if T::get_physical_type() == PhysicalType::BOOLEAN {
+            let mut boolean_buffer = 
BooleanBufferBuilder::new(record_data.len());
+
+            for e in record_data.data() {
+                boolean_buffer.append(*e > 0)?;
+            }
+            record_data = boolean_buffer.finish();
+        }
+
+        let mut array_data = ArrayDataBuilder::new(arrow_data_type)
+            .len(self.record_reader.num_values())
+            .add_buffer(record_data);
+
+        if let Some(b) = self.record_reader.consume_bitmap_buffer()? {
+            array_data = array_data.null_bit_buffer(b);
+        }
+
+        let array = match T::get_physical_type() {
+            PhysicalType::BOOLEAN => {
+                
Arc::new(PrimitiveArray::<ArrowBooleanType>::from(array_data.build()))
+                    as ArrayRef
+            }
+            PhysicalType::INT32 => {
+                
Arc::new(PrimitiveArray::<ArrowInt32Type>::from(array_data.build()))
+                    as ArrayRef
+            }
+            PhysicalType::INT64 => {
+                
Arc::new(PrimitiveArray::<ArrowInt64Type>::from(array_data.build()))
+                    as ArrayRef
+            }
+            PhysicalType::FLOAT => {
+                
Arc::new(PrimitiveArray::<ArrowFloat32Type>::from(array_data.build()))
+                    as ArrayRef
+            }
+            PhysicalType::DOUBLE => {
+                
Arc::new(PrimitiveArray::<ArrowFloat64Type>::from(array_data.build()))
+                    as ArrayRef
+            }
+            PhysicalType::INT96
+            | PhysicalType::BYTE_ARRAY
+            | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                unreachable!(
+                    "PrimitiveArrayReaders don't support complex physical 
types"
+                );
+            }
+        };
+
+        // cast to Arrow type
+        // TODO: we need to check if it's fine for this to be fallible.
+        // My assumption is that we can't get to an illegal cast as we can only
+        // generate types that are supported, because we'd have gotten them 
from
+        // the metadata which was written to the Parquet sink

Review comment:
       If you want to check if a cast is supported by the cast kernel, perhaps 
you could use the `can_cast_types` function - 
https://github.com/apache/arrow/blob/master/rust/arrow/src/compute/kernels/cast.rs#L51
   
   That way the code could fail early and produce a helpful error message 
rather than a panic




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to