corasaurus-hex commented on code in PR #18457:
URL: https://github.com/apache/datafusion/pull/18457#discussion_r2511128959
##########
datafusion/datasource-arrow/src/source.rs:
##########
@@ -129,13 +143,165 @@ impl FileSource for ArrowSource {
}
}
-/// The struct arrow that implements `[FileOpener]` trait
-pub struct ArrowOpener {
- pub object_store: Arc<dyn ObjectStore>,
- pub projection: Option<Vec<usize>>,
+/// `FileSource` for Arrow IPC stream format. Supports only sequential reading.
+#[derive(Clone)]
+pub(crate) struct ArrowStreamFileSource {
+ table_schema: TableSchema,
+ metrics: ExecutionPlanMetricsSet,
+ projected_statistics: Option<Statistics>,
+ schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
}
-impl FileOpener for ArrowOpener {
+impl ArrowStreamFileSource {
+ /// Initialize an ArrowStreamFileSource with the provided schema
+ pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+ Self {
+ table_schema: table_schema.into(),
+ metrics: ExecutionPlanMetricsSet::new(),
+ projected_statistics: None,
+ schema_adapter_factory: None,
+ }
+ }
+}
+
+impl From<ArrowStreamFileSource> for Arc<dyn FileSource> {
+ fn from(source: ArrowStreamFileSource) -> Self {
+ as_file_source(source)
+ }
+}
+
+impl FileSource for ArrowStreamFileSource {
+ fn create_file_opener(
+ &self,
+ object_store: Arc<dyn ObjectStore>,
+ base_config: &FileScanConfig,
+ _partition: usize,
+ ) -> Arc<dyn FileOpener> {
+ Arc::new(ArrowStreamFileOpener {
+ object_store,
+ projection: base_config.file_column_projection_indices(),
+ })
+ }
+
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
+ Arc::new(Self { ..self.clone() })
Review Comment:
That would be a change in behavior, though, since it it's supposed to return
an entirely new clone of the data? And this code is [the same as what was here
before](https://github.com/apache/datafusion/blob/main/datafusion/datasource-arrow/src/source.rs#L88-L90)
and also [what is given as an example in MockSource in the main
crate](https://github.com/corasaurus-hex/datafusion/blob/6ab4d216b768c9327982e59376a62a29c69ca436/datafusion/datasource/src/test_util.rs#L91-L97).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]