This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f6d8db8  Restrict RecordReader and friends to scalar types (#1132) 
(#1155)
f6d8db8 is described below

commit f6d8db8c0e27920b9ea2fa3962856a77491a4609
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Jan 11 14:02:30 2022 +0000

    Restrict RecordReader and friends to scalar types (#1132) (#1155)
---
 parquet/src/arrow/array_reader.rs  | 17 +++++++++--------
 parquet/src/arrow/record_reader.rs |  6 +++---
 parquet/src/data_type.rs           | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/parquet/src/arrow/array_reader.rs 
b/parquet/src/arrow/array_reader.rs
index 752ca4c..c3e170a 100644
--- a/parquet/src/arrow/array_reader.rs
+++ b/parquet/src/arrow/array_reader.rs
@@ -67,6 +67,7 @@ use crate::arrow::schema::parquet_to_arrow_field;
 use crate::basic::{ConvertedType, Repetition, Type as PhysicalType};
 use crate::column::page::PageIterator;
 use crate::column::reader::ColumnReaderImpl;
+use crate::data_type::private::ScalarDataType;
 use crate::data_type::{
     BoolType, ByteArrayType, DataType, DoubleType, FixedLenByteArrayType, 
FloatType,
     Int32Type, Int64Type, Int96Type,
@@ -104,7 +105,7 @@ pub trait ArrayReader {
 ///
 /// Returns the number of records read, which can be less than batch_size if
 /// pages is exhausted.
-fn read_records<T: DataType>(
+fn read_records<T: ScalarDataType>(
     record_reader: &mut RecordReader<T>,
     pages: &mut dyn PageIterator,
     batch_size: usize,
@@ -132,7 +133,7 @@ fn read_records<T: DataType>(
 
 /// A NullArrayReader reads Parquet columns stored as null int32s with an Arrow
 /// NullArray type.
-pub struct NullArrayReader<T: DataType> {
+pub struct NullArrayReader<T: ScalarDataType> {
     data_type: ArrowType,
     pages: Box<dyn PageIterator>,
     def_levels_buffer: Option<Buffer>,
@@ -142,7 +143,7 @@ pub struct NullArrayReader<T: DataType> {
     _type_marker: PhantomData<T>,
 }
 
-impl<T: DataType> NullArrayReader<T> {
+impl<T: ScalarDataType> NullArrayReader<T> {
     /// Construct null array reader.
     pub fn new(pages: Box<dyn PageIterator>, column_desc: ColumnDescPtr) -> 
Result<Self> {
         let record_reader = RecordReader::<T>::new(column_desc.clone());
@@ -160,7 +161,7 @@ impl<T: DataType> NullArrayReader<T> {
 }
 
 /// Implementation of primitive array reader.
-impl<T: DataType> ArrayReader for NullArrayReader<T> {
+impl<T: ScalarDataType> ArrayReader for NullArrayReader<T> {
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -200,7 +201,7 @@ impl<T: DataType> ArrayReader for NullArrayReader<T> {
 
 /// Primitive array readers are leaves of array reader tree. They accept page 
iterator
 /// and read them into primitive arrays.
-pub struct PrimitiveArrayReader<T: DataType> {
+pub struct PrimitiveArrayReader<T: ScalarDataType> {
     data_type: ArrowType,
     pages: Box<dyn PageIterator>,
     def_levels_buffer: Option<Buffer>,
@@ -210,7 +211,7 @@ pub struct PrimitiveArrayReader<T: DataType> {
     _type_marker: PhantomData<T>,
 }
 
-impl<T: DataType> PrimitiveArrayReader<T> {
+impl<T: ScalarDataType> PrimitiveArrayReader<T> {
     /// Construct primitive array reader.
     pub fn new(
         pages: Box<dyn PageIterator>,
@@ -240,7 +241,7 @@ impl<T: DataType> PrimitiveArrayReader<T> {
 }
 
 /// Implementation of primitive array reader.
-impl<T: DataType> ArrayReader for PrimitiveArrayReader<T> {
+impl<T: ScalarDataType> ArrayReader for PrimitiveArrayReader<T> {
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -288,7 +289,7 @@ impl<T: DataType> ArrayReader for PrimitiveArrayReader<T> {
             }
         };
 
-        // Convert to arrays by using the Parquet phyisical type.
+        // Convert to arrays by using the Parquet physical type.
         // The physical types are then cast to Arrow types if necessary
 
         let mut record_data = self.record_reader.consume_record_data()?;
diff --git a/parquet/src/arrow/record_reader.rs 
b/parquet/src/arrow/record_reader.rs
index a5c0b47..53db620 100644
--- a/parquet/src/arrow/record_reader.rs
+++ b/parquet/src/arrow/record_reader.rs
@@ -19,7 +19,7 @@ use std::cmp::{max, min};
 use std::mem::{replace, size_of};
 
 use crate::column::{page::PageReader, reader::ColumnReaderImpl};
-use crate::data_type::DataType;
+use crate::data_type::private::ScalarDataType;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use arrow::array::BooleanBufferBuilder;
@@ -29,7 +29,7 @@ use arrow::buffer::{Buffer, MutableBuffer};
 const MIN_BATCH_SIZE: usize = 1024;
 
 /// A `RecordReader` is a stateful column reader that delimits semantic 
records.
-pub struct RecordReader<T: DataType> {
+pub struct RecordReader<T: ScalarDataType> {
     column_desc: ColumnDescPtr,
 
     records: MutableBuffer,
@@ -47,7 +47,7 @@ pub struct RecordReader<T: DataType> {
     values_written: usize,
 }
 
-impl<T: DataType> RecordReader<T> {
+impl<T: ScalarDataType> RecordReader<T> {
     pub fn new(column_schema: ColumnDescPtr) -> Self {
         let (def_levels, null_map) = if column_schema.max_def_level() > 0 {
             (
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 6f3468a..73a010a 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -572,6 +572,7 @@ impl AsBytes for str {
 }
 
 pub(crate) mod private {
+    use super::*;
     use crate::encodings::decoding::PlainDecoderDetails;
     use crate::util::bit_util::{round_upto_power_of_2, BitReader, BitWriter};
     use crate::util::memory::ByteBufferPtr;
@@ -1032,6 +1033,21 @@ pub(crate) mod private {
             self
         }
     }
+
+    /// A marker trait for [`DataType`] with a [scalar] physical type
+    ///
+    /// This means that a `[Self::T::default()]` of length `len` can be safely 
created from a
+    /// zero-initialized `[u8]` with length `len * Self::get_type_size()` and
+    /// alignment of `Self::get_type_size()`
+    ///
+    /// [scalar]: 
https://doc.rust-lang.org/book/ch03-02-data-types.html#scalar-types
+    ///
+    pub trait ScalarDataType: DataType {}
+    impl ScalarDataType for BoolType {}
+    impl ScalarDataType for Int32Type {}
+    impl ScalarDataType for Int64Type {}
+    impl ScalarDataType for FloatType {}
+    impl ScalarDataType for DoubleType {}
 }
 
 /// Contains the Parquet physical type information as well as the Rust 
primitive type

Reply via email to