tustvold commented on code in PR #4676:
URL: https://github.com/apache/arrow-rs/pull/4676#discussion_r1289995764
##########
parquet/src/arrow/arrow_reader/mod.rs:
##########
@@ -234,48 +221,187 @@ impl ArrowReaderOptions {
}
}
+/// The clone-able metadata necessary to construct a [`ArrowReaderBuilder`]
+///
+/// This allows loading the metadata for a file once and then using this to
construct
+/// multiple separate readers, for example, to distribute readers across
multiple threads
+#[derive(Debug, Clone)]
+pub struct ArrowReaderMetadata {
+ pub(crate) metadata: Arc<ParquetMetaData>,
+
+ pub(crate) schema: SchemaRef,
+
+ pub(crate) fields: Option<ParquetField>,
+}
+
+impl ArrowReaderMetadata {
+ pub(crate) fn try_new(
+ metadata: Arc<ParquetMetaData>,
+ options: ArrowReaderOptions,
+ ) -> Result<Self> {
+ let kv_metadata = match options.skip_arrow_metadata {
+ true => None,
+ false => metadata.file_metadata().key_value_metadata(),
+ };
+
+ let (schema, fields) = parquet_to_arrow_schema_and_fields(
+ metadata.file_metadata().schema_descr(),
+ ProjectionMask::all(),
+ kv_metadata,
+ )?;
+
+ Ok(Self {
+ metadata,
+ schema: Arc::new(schema),
+ fields,
+ })
+ }
+
+ /// Returns a reference to the [`ParquetMetaData`] for this parquet file
+ pub fn metadata(&self) -> &Arc<ParquetMetaData> {
+ &self.metadata
+ }
+
+ /// Returns the parquet [`SchemaDescriptor`] for this parquet file
+ pub fn parquet_schema(&self) -> &SchemaDescriptor {
+ self.metadata.file_metadata().schema_descr()
+ }
+
+ /// Returns the arrow [`SchemaRef`] for this parquet file
+ pub fn schema(&self) -> &SchemaRef {
+ &self.schema
+ }
+}
+
#[doc(hidden)]
/// A newtype used within [`ReaderOptionsBuilder`] to distinguish sync readers
from async
-pub struct SyncReader<T: ChunkReader>(SerializedFileReader<T>);
+pub struct SyncReader<T: ChunkReader>(T);
/// A synchronous builder used to construct [`ParquetRecordBatchReader`] for a
file
///
/// For an async API see
[`crate::arrow::async_reader::ParquetRecordBatchStreamBuilder`]
pub type ParquetRecordBatchReaderBuilder<T> =
ArrowReaderBuilder<SyncReader<T>>;
-impl<T: ChunkReader + 'static> ArrowReaderBuilder<SyncReader<T>> {
+impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
Review Comment:
This does not change the type, but improves the docs rendering as the
methods will be shown for the typedef
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]