This is an automated email from the ASF dual-hosted git repository.

akurmustafa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d88d5f0ed9 Select Into support (#6219)
d88d5f0ed9 is described below

commit d88d5f0ed953c763e627297f3357d322992394a5
Author: Berkay Şahin <[email protected]>
AuthorDate: Fri May 5 14:46:35 2023 +0300

    Select Into support (#6219)
    
    * select into support and related tests are added
    
    * adapt the logical plan changes after merge
    
    * simplifications
    
    * comments added
    
    * updating tests
    
    * cargo lock update
    
    ---------
    
    Co-authored-by: Mustafa Akur <[email protected]>
---
 datafusion-cli/Cargo.lock                          | 45 +++++++++++-----------
 .../tests/sqllogictests/test_files/aggregate.slt   | 20 +++++++++-
 .../core/tests/sqllogictests/test_files/ddl.slt    | 39 +++++++++++++++++++
 datafusion/sql/src/select.rs                       | 26 ++++++++++---
 datafusion/sql/tests/integration_test.rs           |  4 --
 5 files changed, 100 insertions(+), 34 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 79700f9e53..9408547607 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -784,9 +784,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "3.2.24"
+version = "3.2.25"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "eef2b3ded6a26dfaec672a742c93c8cf6b689220324da509ec5caa20de55dc83"
+checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
 dependencies = [
  "atty",
  "bitflags",
@@ -801,9 +801,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "3.2.24"
+version = "3.2.25"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d756c5824fc5c0c1ee8e36000f576968dbcb2081def956c83fad6f40acd46f96"
+checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008"
 dependencies = [
  "heck",
  "proc-macro-error",
@@ -1344,9 +1344,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.25"
+version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
+checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1863,9 +1863,9 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.3.4"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "36eb31c1778188ae1e64398743890d0877fef36d11521ac60406b42016e8c2cf"
+checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
 
 [[package]]
 name = "lock_api"
@@ -1949,9 +1949,9 @@ checksum = 
"6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
 [[package]]
 name = "miniz_oxide"
-version = "0.6.2"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
+checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
 dependencies = [
  "adler",
 ]
@@ -2300,9 +2300,9 @@ checksum = 
"8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.26"
+version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
 
 [[package]]
 name = "ppv-lite86"
@@ -2527,9 +2527,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.37.15"
+version = "0.37.19"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a0661814f891c57c930a610266415528da53c4933e6dea5fb350cbfe048a9ece"
+checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
 dependencies = [
  "bitflags",
  "errno",
@@ -2684,18 +2684,18 @@ checksum = 
"e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
 
 [[package]]
 name = "serde"
-version = "1.0.160"
+version = "1.0.162"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"
+checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.160"
+version = "1.0.162"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"
+checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3091,10 +3091,11 @@ checksum = 
"b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
 
 [[package]]
 name = "tracing"
-version = "0.1.38"
+version = "0.1.37"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "cf9cf6a813d3f40c88b0b6b6f29a5c95c6cdbf97c1f9cc53fb820200f5ad814d"
+checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
 dependencies = [
+ "cfg-if",
  "log",
  "pin-project-lite",
  "tracing-attributes",
@@ -3207,9 +3208,9 @@ checksum = 
"711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
 [[package]]
 name = "uuid"
-version = "1.3.1"
+version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb"
+checksum = "4dad5567ad0cf5b760e5665964bec1b47dfd077ba8a2544b513f3556d3d239a2"
 dependencies = [
  "getrandom",
 ]
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt 
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index a9b6552931..d2472e917f 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -371,12 +371,29 @@ select host, median(usage) from cpu group by host;
 host0 90.1
 host1 90.3
 
+statement ok
+drop table cpu;
+
+# this test is to show create table as and select into works in the same way
+statement ok
+SELECT * INTO cpu 
+FROM (VALUES
+    ('host0', 90.1),
+    ('host1', 90.2),
+    ('host1', 90.4)
+  ) AS cpu (host, usage);
+
+query TR rowsort
+select host, median(usage) from cpu group by host;
+----
+host0 90.1
+host1 90.3
+
 query R
 select median(usage) from cpu;
 ----
 90.2
 
-
 statement ok
 drop table cpu;
 
@@ -1389,7 +1406,6 @@ as values
  (null, 'Row 2'),
  ('2021-01-01T05:11:10.432', 'Row 3');
 
-
 statement ok
 create table t as
 select
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt 
b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 856994881f..f08c5e61ca 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -28,6 +28,22 @@ select * from users;
 1 2
 2 3
 
+statement ok
+drop table users
+
+statement ok
+SELECT * INTO users FROM (VALUES(1,2),(2,3));
+
+query II rowsort
+select * from users;
+----
+1 2
+2 3
+
+# Error if the table already exists
+statement error DataFusion error: Execution error: Table 'users' already exists
+SELECT * INTO users FROM (VALUES(1,2),(2,3));
+
 statement ok
 insert into users values(2, 4);
 
@@ -251,6 +267,18 @@ SELECT * FROM my_table order by c1 LIMIT 1
 ----
 0.00001 0.000000000001 true
 
+statement ok
+drop table my_table
+
+# select_into
+statement ok
+SELECT* INTO my_table FROM (SELECT * FROM aggregate_simple)
+
+query RRB rowsort
+SELECT * FROM my_table order by c1 LIMIT 1
+----
+0.00001 0.000000000001 true
+
 statement ok
 DROP TABLE my_table;
 
@@ -296,9 +324,20 @@ SELECT * FROM my_table;
 ----
 1 2 hello
 
+# Preserving data types with select into
+statement ok
+SELECT * INTO new_table FROM my_table
+
+query IRT rowsort
+SELECT * FROM new_table;
+----
+1 2 hello
+
 statement ok
 DROP TABLE my_table;
 
+statement ok
+DROP TABLE new_table;
 
 # TODO enable information schema
 
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index d77c1aefd2..2da967a224 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -31,7 +31,8 @@ use datafusion_expr::utils::{
 };
 use datafusion_expr::Expr::Alias;
 use datafusion_expr::{
-    Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
+    CreateMemoryTable, DdlStatement, Expr, Filter, GroupingSet, LogicalPlan,
+    LogicalPlanBuilder, Partitioning,
 };
 use sqlparser::ast::{Expr as SQLExpr, WildcardAdditionalOptions};
 use sqlparser::ast::{Select, SelectItem, TableWithJoins};
@@ -61,9 +62,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         if !select.sort_by.is_empty() {
             return Err(DataFusionError::NotImplemented("SORT BY".to_string()));
         }
-        if select.into.is_some() {
-            return Err(DataFusionError::NotImplemented("INTO".to_string()));
-        }
 
         // process `from` clause
         let plan = self.plan_from_tables(select.from, planner_context)?;
@@ -207,7 +205,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         }?;
 
         // DISTRIBUTE BY
-        if !select.distribute_by.is_empty() {
+        let plan = if !select.distribute_by.is_empty() {
             let x = select
                 .distribute_by
                 .iter()
@@ -221,7 +219,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .collect::<Result<Vec<_>>>()?;
             LogicalPlanBuilder::from(plan)
                 .repartition(Partitioning::DistributeBy(x))?
-                .build()
+                .build()?
+        } else {
+            plan
+        };
+
+        if let Some(select_into) = select.into {
+            Ok(LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(
+                CreateMemoryTable {
+                    name: 
self.object_name_to_table_reference(select_into.name)?,
+                    // SELECT INTO statement does not copy constraints such as 
primary key
+                    primary_key: Vec::new(),
+                    input: Arc::new(plan),
+                    // These are not applicable with SELECT INTO
+                    if_not_exists: false,
+                    or_replace: false,
+                },
+            )))
         } else {
             Ok(plan)
         }
diff --git a/datafusion/sql/tests/integration_test.rs 
b/datafusion/sql/tests/integration_test.rs
index a69a824a3e..e1acfcd7c5 100644
--- a/datafusion/sql/tests/integration_test.rs
+++ b/datafusion/sql/tests/integration_test.rs
@@ -3392,10 +3392,6 @@ fn test_select_distinct_order_by() {
     "SELECT * FROM person SORT BY id",
     "This feature is not implemented: SORT BY"
 )]
-#[case::select_into_unsupported(
-    "SELECT * INTO test FROM person",
-    "This feature is not implemented: INTO"
-)]
 #[test]
 fn test_select_unsupported_syntax_errors(#[case] sql: &str, #[case] error: 
&str) {
     let err = logical_plan(sql).unwrap_err();

Reply via email to