tustvold commented on code in PR #6563:
URL: https://github.com/apache/arrow-datafusion/pull/6563#discussion_r1219676800


##########
datafusion/core/src/physical_plan/file_format/mod.rs:
##########
@@ -389,84 +389,51 @@ impl SchemaAdapter {
         file_schema: &Schema,
     ) -> Option<usize> {
         let field = self.table_schema.field(index);
-        file_schema.index_of(field.name()).ok()
-    }
-
-    /// Re-order projected columns by index in record batch to match table 
schema column ordering. If the record
-    /// batch does not contain a column for an expected field, insert a 
null-valued column at the
-    /// required column index.
-    #[allow(dead_code)]
-    pub fn adapt_batch(
-        &self,
-        batch: RecordBatch,
-        projections: &[usize],
-    ) -> Result<RecordBatch> {
-        let batch_rows = batch.num_rows();
-
-        let batch_schema = batch.schema();
-
-        let mut cols: Vec<ArrayRef> = 
Vec::with_capacity(batch.columns().len());
-        let batch_cols = batch.columns().to_vec();
-
-        for field_idx in projections {
-            let table_field = &self.table_schema.fields()[*field_idx];
-            if let Some((batch_idx, _name)) =
-                batch_schema.column_with_name(table_field.name().as_str())
-            {
-                cols.push(batch_cols[batch_idx].clone());
-            } else {
-                cols.push(new_null_array(table_field.data_type(), batch_rows))
-            }
-        }
-
-        let projected_schema = 
Arc::new(self.table_schema.clone().project(projections)?);
-
-        // Necessary to handle empty batches
-        let options = 
RecordBatchOptions::new().with_row_count(Some(batch.num_rows()));
-
-        Ok(RecordBatch::try_new_with_options(
-            projected_schema,
-            cols,
-            &options,
-        )?)
+        Some(file_schema.fields.find(field.name())?.0)
     }
 
     /// Creates a `SchemaMapping` that can be used to cast or map the columns 
from the file schema to the table schema.
     ///
     /// If the provided `file_schema` contains columns of a different type to 
the expected
     /// `table_schema`, the method will attempt to cast the array data from 
the file schema
     /// to the table schema where possible.
+    ///
+    /// Returns a [`SchemaMapping`] that can be applied to the output batch
+    /// along with an ordered list of columns to project from the file

Review Comment:
   The ordered is important as `parquet::ProjectionMask` is not order preserving



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to