This is an automated email from the ASF dual-hosted git repository.
akurmustafa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new d88d5f0ed9 Select Into support (#6219)
d88d5f0ed9 is described below
commit d88d5f0ed953c763e627297f3357d322992394a5
Author: Berkay Şahin <[email protected]>
AuthorDate: Fri May 5 14:46:35 2023 +0300
Select Into support (#6219)
* select into support and related tests are added
* adapt the logical plan changes after merge
* simplifications
* comments added
* updating tests
* cargo lock update
---------
Co-authored-by: Mustafa Akur <[email protected]>
---
datafusion-cli/Cargo.lock | 45 +++++++++++-----------
.../tests/sqllogictests/test_files/aggregate.slt | 20 +++++++++-
.../core/tests/sqllogictests/test_files/ddl.slt | 39 +++++++++++++++++++
datafusion/sql/src/select.rs | 26 ++++++++++---
datafusion/sql/tests/integration_test.rs | 4 --
5 files changed, 100 insertions(+), 34 deletions(-)
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 79700f9e53..9408547607 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -784,9 +784,9 @@ dependencies = [
[[package]]
name = "clap"
-version = "3.2.24"
+version = "3.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eef2b3ded6a26dfaec672a742c93c8cf6b689220324da509ec5caa20de55dc83"
+checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
dependencies = [
"atty",
"bitflags",
@@ -801,9 +801,9 @@ dependencies = [
[[package]]
name = "clap_derive"
-version = "3.2.24"
+version = "3.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d756c5824fc5c0c1ee8e36000f576968dbcb2081def956c83fad6f40acd46f96"
+checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008"
dependencies = [
"heck",
"proc-macro-error",
@@ -1344,9 +1344,9 @@ dependencies = [
[[package]]
name = "flate2"
-version = "1.0.25"
+version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
+checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
dependencies = [
"crc32fast",
"miniz_oxide",
@@ -1863,9 +1863,9 @@ dependencies = [
[[package]]
name = "linux-raw-sys"
-version = "0.3.4"
+version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36eb31c1778188ae1e64398743890d0877fef36d11521ac60406b42016e8c2cf"
+checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
[[package]]
name = "lock_api"
@@ -1949,9 +1949,9 @@ checksum =
"6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "miniz_oxide"
-version = "0.6.2"
+version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
+checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
@@ -2300,9 +2300,9 @@ checksum =
"8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
-version = "0.3.26"
+version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "ppv-lite86"
@@ -2527,9 +2527,9 @@ dependencies = [
[[package]]
name = "rustix"
-version = "0.37.15"
+version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0661814f891c57c930a610266415528da53c4933e6dea5fb350cbfe048a9ece"
+checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [
"bitflags",
"errno",
@@ -2684,18 +2684,18 @@ checksum =
"e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
[[package]]
name = "serde"
-version = "1.0.160"
+version = "1.0.162"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"
+checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
-version = "1.0.160"
+version = "1.0.162"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"
+checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
dependencies = [
"proc-macro2",
"quote",
@@ -3091,10 +3091,11 @@ checksum =
"b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]]
name = "tracing"
-version = "0.1.38"
+version = "0.1.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf9cf6a813d3f40c88b0b6b6f29a5c95c6cdbf97c1f9cc53fb820200f5ad814d"
+checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
dependencies = [
+ "cfg-if",
"log",
"pin-project-lite",
"tracing-attributes",
@@ -3207,9 +3208,9 @@ checksum =
"711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "uuid"
-version = "1.3.1"
+version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb"
+checksum = "4dad5567ad0cf5b760e5665964bec1b47dfd077ba8a2544b513f3556d3d239a2"
dependencies = [
"getrandom",
]
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index a9b6552931..d2472e917f 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -371,12 +371,29 @@ select host, median(usage) from cpu group by host;
host0 90.1
host1 90.3
+statement ok
+drop table cpu;
+
+# this test is to show create table as and select into works in the same way
+statement ok
+SELECT * INTO cpu
+FROM (VALUES
+ ('host0', 90.1),
+ ('host1', 90.2),
+ ('host1', 90.4)
+ ) AS cpu (host, usage);
+
+query TR rowsort
+select host, median(usage) from cpu group by host;
+----
+host0 90.1
+host1 90.3
+
query R
select median(usage) from cpu;
----
90.2
-
statement ok
drop table cpu;
@@ -1389,7 +1406,6 @@ as values
(null, 'Row 2'),
('2021-01-01T05:11:10.432', 'Row 3');
-
statement ok
create table t as
select
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 856994881f..f08c5e61ca 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -28,6 +28,22 @@ select * from users;
1 2
2 3
+statement ok
+drop table users
+
+statement ok
+SELECT * INTO users FROM (VALUES(1,2),(2,3));
+
+query II rowsort
+select * from users;
+----
+1 2
+2 3
+
+# Error if the table already exists
+statement error DataFusion error: Execution error: Table 'users' already exists
+SELECT * INTO users FROM (VALUES(1,2),(2,3));
+
statement ok
insert into users values(2, 4);
@@ -251,6 +267,18 @@ SELECT * FROM my_table order by c1 LIMIT 1
----
0.00001 0.000000000001 true
+statement ok
+drop table my_table
+
+# select_into
+statement ok
+SELECT* INTO my_table FROM (SELECT * FROM aggregate_simple)
+
+query RRB rowsort
+SELECT * FROM my_table order by c1 LIMIT 1
+----
+0.00001 0.000000000001 true
+
statement ok
DROP TABLE my_table;
@@ -296,9 +324,20 @@ SELECT * FROM my_table;
----
1 2 hello
+# Preserving data types with select into
+statement ok
+SELECT * INTO new_table FROM my_table
+
+query IRT rowsort
+SELECT * FROM new_table;
+----
+1 2 hello
+
statement ok
DROP TABLE my_table;
+statement ok
+DROP TABLE new_table;
# TODO enable information schema
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index d77c1aefd2..2da967a224 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -31,7 +31,8 @@ use datafusion_expr::utils::{
};
use datafusion_expr::Expr::Alias;
use datafusion_expr::{
- Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
+ CreateMemoryTable, DdlStatement, Expr, Filter, GroupingSet, LogicalPlan,
+ LogicalPlanBuilder, Partitioning,
};
use sqlparser::ast::{Expr as SQLExpr, WildcardAdditionalOptions};
use sqlparser::ast::{Select, SelectItem, TableWithJoins};
@@ -61,9 +62,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
if !select.sort_by.is_empty() {
return Err(DataFusionError::NotImplemented("SORT BY".to_string()));
}
- if select.into.is_some() {
- return Err(DataFusionError::NotImplemented("INTO".to_string()));
- }
// process `from` clause
let plan = self.plan_from_tables(select.from, planner_context)?;
@@ -207,7 +205,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}?;
// DISTRIBUTE BY
- if !select.distribute_by.is_empty() {
+ let plan = if !select.distribute_by.is_empty() {
let x = select
.distribute_by
.iter()
@@ -221,7 +219,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.collect::<Result<Vec<_>>>()?;
LogicalPlanBuilder::from(plan)
.repartition(Partitioning::DistributeBy(x))?
- .build()
+ .build()?
+ } else {
+ plan
+ };
+
+ if let Some(select_into) = select.into {
+ Ok(LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(
+ CreateMemoryTable {
+ name:
self.object_name_to_table_reference(select_into.name)?,
+ // SELECT INTO statement does not copy constraints such as
primary key
+ primary_key: Vec::new(),
+ input: Arc::new(plan),
+ // These are not applicable with SELECT INTO
+ if_not_exists: false,
+ or_replace: false,
+ },
+ )))
} else {
Ok(plan)
}
diff --git a/datafusion/sql/tests/integration_test.rs
b/datafusion/sql/tests/integration_test.rs
index a69a824a3e..e1acfcd7c5 100644
--- a/datafusion/sql/tests/integration_test.rs
+++ b/datafusion/sql/tests/integration_test.rs
@@ -3392,10 +3392,6 @@ fn test_select_distinct_order_by() {
"SELECT * FROM person SORT BY id",
"This feature is not implemented: SORT BY"
)]
-#[case::select_into_unsupported(
- "SELECT * INTO test FROM person",
- "This feature is not implemented: INTO"
-)]
#[test]
fn test_select_unsupported_syntax_errors(#[case] sql: &str, #[case] error:
&str) {
let err = logical_plan(sql).unwrap_err();