corasaurus-hex commented on code in PR #18457:
URL: https://github.com/apache/datafusion/pull/18457#discussion_r2511128959


##########
datafusion/datasource-arrow/src/source.rs:
##########
@@ -129,13 +143,165 @@ impl FileSource for ArrowSource {
     }
 }
 
-/// The struct arrow that implements `[FileOpener]` trait
-pub struct ArrowOpener {
-    pub object_store: Arc<dyn ObjectStore>,
-    pub projection: Option<Vec<usize>>,
+/// `FileSource` for Arrow IPC stream format. Supports only sequential reading.
+#[derive(Clone)]
+pub(crate) struct ArrowStreamFileSource {
+    table_schema: TableSchema,
+    metrics: ExecutionPlanMetricsSet,
+    projected_statistics: Option<Statistics>,
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
 
-impl FileOpener for ArrowOpener {
+impl ArrowStreamFileSource {
+    /// Initialize an ArrowStreamFileSource with the provided schema
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        Self {
+            table_schema: table_schema.into(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+        }
+    }
+}
+
+impl From<ArrowStreamFileSource> for Arc<dyn FileSource> {
+    fn from(source: ArrowStreamFileSource) -> Self {
+        as_file_source(source)
+    }
+}
+
+impl FileSource for ArrowStreamFileSource {
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        _partition: usize,
+    ) -> Arc<dyn FileOpener> {
+        Arc::new(ArrowStreamFileOpener {
+            object_store,
+            projection: base_config.file_column_projection_indices(),
+        })
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
+        Arc::new(Self { ..self.clone() })

Review Comment:
   That would be a change in behavior, though, since it it's supposed to return 
an entirely new clone of the data? And this code is [the same as what was here 
before](https://github.com/apache/datafusion/blob/main/datafusion/datasource-arrow/src/source.rs#L88-L90)
 and also [what is given as an example in MockSource in the main 
crate](https://github.com/corasaurus-hex/datafusion/blob/6ab4d216b768c9327982e59376a62a29c69ca436/datafusion/datasource/src/test_util.rs#L91-L97).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to