This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 35adf47fdb Add support for external tables with qualified names
(#12645)
35adf47fdb is described below
commit 35adf47fdbd626d79051799921146b96e3345e3b
Author: OussamaSaoudi <[email protected]>
AuthorDate: Tue Oct 1 15:29:47 2024 -0700
Add support for external tables with qualified names (#12645)
* Make support schemas
* Set default name to table
* Remove print statements and stale comment
* Add tests for create table
* Fix typo
* Update datafusion/sql/src/statement.rs
Co-authored-by: Jonah Gao <[email protected]>
* convert create_external_table to objectname
* Add sqllogic tests
* Fix failing tests
---------
Co-authored-by: Jonah Gao <[email protected]>
---
datafusion/core/src/catalog_common/mod.rs | 4 +--
datafusion/sql/src/parser.rs | 39 +++++++++++-----------
datafusion/sql/src/statement.rs | 3 +-
datafusion/sql/tests/sql_integration.rs | 7 ++++
.../test_files/create_external_table.slt | 12 +++++++
5 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/datafusion/core/src/catalog_common/mod.rs
b/datafusion/core/src/catalog_common/mod.rs
index b841437886..85207845a0 100644
--- a/datafusion/core/src/catalog_common/mod.rs
+++ b/datafusion/core/src/catalog_common/mod.rs
@@ -185,9 +185,7 @@ pub fn resolve_table_references(
let _ = s.as_ref().visit(visitor);
}
DFStatement::CreateExternalTable(table) => {
- visitor
- .relations
-
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
+ visitor.relations.insert(table.name.clone());
}
DFStatement::CopyTo(CopyToStatement { source, .. }) => match
source {
CopyToSource::Relation(table_name) => {
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 2df8d89c59..6d130647a4 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -181,7 +181,7 @@ pub(crate) type LexOrdering = Vec<OrderByExpr>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CreateExternalTable {
/// Table name
- pub name: String,
+ pub name: ObjectName,
/// Optional schema
pub columns: Vec<ColumnDef>,
/// File type (Parquet, NDJSON, CSV, etc)
@@ -813,7 +813,7 @@ impl<'a> DFParser<'a> {
}
let create = CreateExternalTable {
- name: table_name.to_string(),
+ name: table_name,
columns,
file_type: builder.file_type.unwrap(),
location: builder.location.unwrap(),
@@ -915,8 +915,9 @@ mod tests {
// positive case
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION
'foo.csv'";
let display = None;
+ let name = ObjectName(vec![Ident::from("t")]);
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -932,7 +933,7 @@ mod tests {
// positive case: leading space
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION
'foo.csv' ";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -949,7 +950,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'
;";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -966,7 +967,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION
'foo.csv' OPTIONS (format.delimiter '|')";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -986,7 +987,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED
BY (p1, p2) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -1013,7 +1014,7 @@ mod tests {
];
for (sql, compression) in sqls {
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -1033,7 +1034,7 @@ mod tests {
// positive case: it is ok for parquet files not to have columns
specified
let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION
'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
@@ -1049,7 +1050,7 @@ mod tests {
// positive case: it is ok for parquet files to be other than upper
case
let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION
'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
@@ -1065,7 +1066,7 @@ mod tests {
// positive case: it is ok for avro files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![],
file_type: "AVRO".to_string(),
location: "foo.avro".into(),
@@ -1082,7 +1083,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION
'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
@@ -1099,7 +1100,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1
int) LOCATION 'foo.csv'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("p1", DataType::Int(None)),
@@ -1132,7 +1133,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION
'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![],
file_type: "X".to_string(),
location: "blahblah".into(),
@@ -1149,7 +1150,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2)
LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![],
file_type: "X".to_string(),
location: "blahblah".into(),
@@ -1188,7 +1189,7 @@ mod tests {
];
for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter())
{
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
@@ -1214,7 +1215,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH
ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
@@ -1253,7 +1254,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH
ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
@@ -1297,7 +1298,7 @@ mod tests {
'TRUNCATE' 'NO',
'format.has_header' 'true')";
let expected = Statement::CreateExternalTable(CreateExternalTable {
- name: "t".into(),
+ name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("c2", DataType::Float(None)),
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 895285c597..656d72d07b 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1239,8 +1239,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let ordered_exprs =
self.build_order_by(order_exprs, &df_schema, &mut
planner_context)?;
- // External tables do not support schemas at the moment, so the name
is just a table name
- let name = TableReference::bare(name);
+ let name = self.object_name_to_table_reference(name)?;
let constraints =
Constraints::new_from_table_constraints(&all_constraints,
&df_schema)?;
Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 5c9655a556..44b591fede 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1913,6 +1913,13 @@ fn create_external_table_with_pk() {
quick_test(sql, expected);
}
+#[test]
+fn create_external_table_wih_schema() {
+ let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION
'foo.csv'";
+ let expected = "CreateExternalTable: Partial { schema: \"staging\", table:
\"foo\" }";
+ quick_test(sql, expected);
+}
+
#[test]
fn create_schema_with_quoted_name() {
let sql = "CREATE SCHEMA \"quoted_schema_name\"";
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt
b/datafusion/sqllogictest/test_files/create_external_table.slt
index 12b097c3d5..9ac2ecdce7 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -275,3 +275,15 @@ DROP TABLE t;
# query should fail with bad column
statement error DataFusion error: Error during planning: Column foo is not in
schema
CREATE EXTERNAL TABLE t STORED AS parquet LOCATION
'../../parquet-testing/data/alltypes_plain.parquet' WITH ORDER (foo);
+
+# Create external table with qualified name should belong to the schema
+statement ok
+CREATE SCHEMA staging;
+
+statement ok
+CREATE EXTERNAL TABLE staging.foo STORED AS parquet LOCATION
'../../parquet-testing/data/alltypes_plain.parquet';
+
+# Create external table with qualified name, but no schema should error
+statement error DataFusion error: Error during planning: failed to resolve
schema: release
+CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION
'../../parquet-testing/data/alltypes_plain.parquet';
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]