This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f67c20f9d2 docs: show creation of DFSchema (#8132)
f67c20f9d2 is described below
commit f67c20f9d21e123840374d2198b81cfd2757c651
Author: Will Jones <[email protected]>
AuthorDate: Sun Nov 12 03:31:48 2023 -0800
docs: show creation of DFSchema (#8132)
* docs: show creation of DFSchema
* Apply suggestions from code review
Co-authored-by: Andy Grove <[email protected]>
---------
Co-authored-by: Andy Grove <[email protected]>
---
datafusion/common/src/dfschema.rs | 72 +++++++++++++++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 2 deletions(-)
diff --git a/datafusion/common/src/dfschema.rs
b/datafusion/common/src/dfschema.rs
index d8cd103a47..52cd856758 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -34,10 +34,75 @@ use crate::{
use arrow::compute::can_cast_types;
use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
-/// A reference-counted reference to a `DFSchema`.
+/// A reference-counted reference to a [DFSchema].
pub type DFSchemaRef = Arc<DFSchema>;
-/// DFSchema wraps an Arrow schema and adds relation names
+/// DFSchema wraps an Arrow schema and adds relation names.
+///
+/// The schema may hold the fields across multiple tables. Some fields may be
+/// qualified and some unqualified. A qualified field is a field that has a
+/// relation name associated with it.
+///
+/// Unqualified fields must be unique not only amongst themselves, but also
must
+/// have a distinct name from any qualified field names. This allows finding a
+/// qualified field by name to be possible, so long as there aren't multiple
+/// qualified fields with the same name.
+///
+/// There is an alias to `Arc<DFSchema>` named [DFSchemaRef].
+///
+/// # Creating qualified schemas
+///
+/// Use [DFSchema::try_from_qualified_schema] to create a qualified schema from
+/// an Arrow schema.
+///
+/// ```rust
+/// use datafusion_common::{DFSchema, Column};
+/// use arrow_schema::{DataType, Field, Schema};
+///
+/// let arrow_schema = Schema::new(vec![
+/// Field::new("c1", DataType::Int32, false),
+/// ]);
+///
+/// let df_schema = DFSchema::try_from_qualified_schema("t1",
&arrow_schema).unwrap();
+/// let column = Column::from_qualified_name("t1.c1");
+/// assert!(df_schema.has_column(&column));
+///
+/// // Can also access qualified fields with unqualified name, if it's
unambiguous
+/// let column = Column::from_qualified_name("c1");
+/// assert!(df_schema.has_column(&column));
+/// ```
+///
+/// # Creating unqualified schemas
+///
+/// Create an unqualified schema using TryFrom:
+///
+/// ```rust
+/// use datafusion_common::{DFSchema, Column};
+/// use arrow_schema::{DataType, Field, Schema};
+///
+/// let arrow_schema = Schema::new(vec![
+/// Field::new("c1", DataType::Int32, false),
+/// ]);
+///
+/// let df_schema = DFSchema::try_from(arrow_schema).unwrap();
+/// let column = Column::new_unqualified("c1");
+/// assert!(df_schema.has_column(&column));
+/// ```
+///
+/// # Converting back to Arrow schema
+///
+/// Use the `Into` trait to convert `DFSchema` into an Arrow schema:
+///
+/// ```rust
+/// use datafusion_common::{DFSchema, DFField};
+/// use arrow_schema::Schema;
+///
+/// let df_schema = DFSchema::new(vec![
+/// DFField::new_unqualified("c1", arrow::datatypes::DataType::Int32,
false),
+/// ]).unwrap();
+/// let schema = Schema::from(df_schema);
+/// assert_eq!(schema.fields().len(), 1);
+/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DFSchema {
/// Fields
@@ -112,6 +177,9 @@ impl DFSchema {
}
/// Create a `DFSchema` from an Arrow schema and a given qualifier
+ ///
+ /// To create a schema from an Arrow schema without a qualifier, use
+ /// `DFSchema::try_from`.
pub fn try_from_qualified_schema<'a>(
qualifier: impl Into<TableReference<'a>>,
schema: &Schema,