alamb commented on code in PR #13063:
URL: https://github.com/apache/datafusion/pull/13063#discussion_r1811281197


##########
datafusion/core/src/datasource/schema_adapter.rs:
##########
@@ -99,8 +107,86 @@ pub trait SchemaMapper: Debug + Send + Sync {
     ) -> datafusion_common::Result<RecordBatch>;
 }
 
-/// Implementation of [`SchemaAdapterFactory`] that maps columns by name
-/// and casts columns to the expected type.
+/// Default  [`SchemaAdapterFactory`] for mapping schemas.
+///
+/// This can be used to adapt file-level record batches to a table schema and
+/// implement schema evolution.
+///
+/// Given an input file schema and a table schema, this factor can make
+/// [`SchemaMapper`]s that:
+///
+/// 1. Reorder columns
+/// 2. Cast columns to the correct type
+/// 3. Fill missing columns with nulls
+///
+/// # Illustration of Schema Mapping
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─                  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
─
+///  ┌───────┐   ┌───────┐ │                  ┌───────┐   ┌───────┐   
┌───────┐ │
+/// ││  1.0  │   │ "foo" │                   ││ NULL  │   │ "foo" │   │ "1.0" │
+///  ├───────┤   ├───────┤ │ Schema mapping   ├───────┤   ├───────┤   
├───────┤ │
+/// ││  2.0  │   │ "bar" │                   ││  NULL │   │ "bar" │   │ "2.0" │
+///  └───────┘   └───────┘ │────────────────▶ └───────┘   └───────┘   
└───────┘ │
+/// │                                        │
+///  column "c"  column "b"│                  column "a"  column "b"  column 
"c"│
+/// │ Float64       Utf8                     │  Int32        Utf8        Utf8
+///  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘                  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
─ ┘
+///     Input Record Batch                         Output Record Batch
+///
+///     Schema {                                   Schema {
+///      "c": Float64,                              "a": Int32,
+///      "b": Utf8,                                 "b": Utf8,
+///     }                                           "c": Utf8,
+///                                                }
+/// ```
+/// # Example of using the `DefaultSchemaAdapterFactory` to map 
[`RecordBatch`]s
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow::datatypes::{DataType, Field, Schema};
+/// # use 
datafusion::datasource::schema_adapter::{DefaultSchemaAdapterFactory, 
SchemaAdapterFactory};
+/// # use datafusion_common::record_batch;
+/// // Table has fields "a" and "b"
+/// let table_schema = Schema::new(vec![
+///     Field::new("a", DataType::Int32, true),
+///     Field::new("b", DataType::Utf8, true),
+///     Field::new("c", DataType::Utf8, true),
+/// ]);
+///
+/// // The file provides only fields "b" and "c" that oder
+/// let projected_table_schema = table_schema.project(&[2, 1]).unwrap();
+///
+/// // create an adapter for the table schema and file schema
+/// let adapter = DefaultSchemaAdapterFactory.create(
+///   Arc::new(table_schema),
+///   Arc::new(projected_table_schema)

Review Comment:
   wow -- you are totally right. 🤦 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to