alamb commented on code in PR #6563:
URL: https://github.com/apache/arrow-datafusion/pull/6563#discussion_r1220099753


##########
datafusion/core/src/physical_plan/file_format/mod.rs:
##########
@@ -477,44 +444,32 @@ impl SchemaAdapter {
 pub struct SchemaMapping {
     /// The schema of the table. This is the expected schema after conversion 
and it should match the schema of the query result.
     table_schema: SchemaRef,
-    /// In `field_mappings`, a `true` value indicates that the corresponding 
field in `table_schema` exists in `file_schema`,
-    /// while a `false` value indicates that the corresponding field does not 
exist.
-    field_mappings: Vec<bool>,
+    /// Mapping from field index in `table_schema` to index in projected 
file_schema
+    field_mappings: Vec<Option<usize>>,
 }
 
 impl SchemaMapping {
     /// Adapts a `RecordBatch` to match the `table_schema` using the stored 
mapping and conversions.
     fn map_batch(&self, batch: RecordBatch) -> Result<RecordBatch> {
         let batch_rows = batch.num_rows();
         let batch_cols = batch.columns().to_vec();
-        let batch_schema = batch.schema();
 
         let cols = self
             .table_schema
             .fields()
             .iter()
-            .enumerate()
-            .map(|(idx, field)| {
-                if self.field_mappings[idx] {
-                    match batch_schema.index_of(field.name()) {
-                        Ok(batch_idx) => arrow::compute::cast(
-                            &batch_cols[batch_idx],
-                            field.data_type(),
-                        )
-                        .map_err(DataFusionError::ArrowError),
-                        Err(_) => Ok(new_null_array(field.data_type(), 
batch_rows)),
-                    }
-                } else {
-                    Ok(new_null_array(field.data_type(), batch_rows))
-                }
+            .zip(&self.field_mappings)
+            .map(|(field, file_idx)| match file_idx {
+                Some(batch_idx) => cast(&batch_cols[*batch_idx], 
field.data_type()),
+                None => Ok(new_null_array(field.data_type(), batch_rows)),

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to