This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6aa423b70b feature: Support `EXPLAIN COPY` (#7291)
6aa423b70b is described below

commit 6aa423b70b4eb1f28165919fbfb4efe323669bc4
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Aug 21 17:15:47 2023 -0400

    feature: Support `EXPLAIN COPY` (#7291)
    
    * Support EXPLAIN COPY
    
    * clippy
    
    * clippy
    
    * Fix argument handling
    
    * Apply suggestions from code review
    
    Co-authored-by: Metehan Yıldırım 
<[email protected]>
    
    * Improve nested explain error
    
    ---------
    
    Co-authored-by: Metehan Yıldırım 
<[email protected]>
---
 datafusion/core/src/execution/context.rs       |  49 +++++----
 datafusion/sql/src/parser.rs                   | 140 ++++++++++++++++++-------
 datafusion/sql/src/statement.rs                |  22 +++-
 datafusion/sql/tests/sql_integration.rs        |  12 +++
 datafusion/sqllogictest/test_files/copy.slt    |  13 ++-
 datafusion/sqllogictest/test_files/explain.slt |   8 +-
 6 files changed, 177 insertions(+), 67 deletions(-)

diff --git a/datafusion/core/src/execution/context.rs 
b/datafusion/core/src/execution/context.rs
index ec09bdaec1..c0a0134fed 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -1698,30 +1698,39 @@ impl SessionState {
         }
 
         let mut visitor = RelationVisitor(&mut relations);
-        match statement {
-            DFStatement::Statement(s) => {
-                let _ = s.as_ref().visit(&mut visitor);
-            }
-            DFStatement::CreateExternalTable(table) => {
-                visitor
-                    .0
-                    
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
-            }
-            DFStatement::DescribeTableStmt(table) => 
visitor.insert(&table.table_name),
-            DFStatement::CopyTo(CopyToStatement {
-                source,
-                target: _,
-                options: _,
-            }) => match source {
-                CopyToSource::Relation(table_name) => {
-                    visitor.insert(table_name);
+        fn visit_statement(statement: &DFStatement, visitor: &mut 
RelationVisitor<'_>) {
+            match statement {
+                DFStatement::Statement(s) => {
+                    let _ = s.as_ref().visit(visitor);
                 }
-                CopyToSource::Query(query) => {
-                    query.visit(&mut visitor);
+                DFStatement::CreateExternalTable(table) => {
+                    visitor
+                        .0
+                        
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
                 }
-            },
+                DFStatement::DescribeTableStmt(table) => {
+                    visitor.insert(&table.table_name)
+                }
+                DFStatement::CopyTo(CopyToStatement {
+                    source,
+                    target: _,
+                    options: _,
+                }) => match source {
+                    CopyToSource::Relation(table_name) => {
+                        visitor.insert(table_name);
+                    }
+                    CopyToSource::Query(query) => {
+                        query.visit(visitor);
+                    }
+                },
+                DFStatement::Explain(explain) => {
+                    visit_statement(&explain.statement, visitor)
+                }
+            }
         }
 
+        visit_statement(statement, &mut visitor);
+
         // Always include information_schema if available
         if self.config.information_schema() {
             for s in INFORMATION_SCHEMA_TABLES {
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 13159ad8d1..cab8f0f06e 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -44,6 +44,35 @@ fn parse_file_type(s: &str) -> Result<String, ParserError> {
     Ok(s.to_uppercase())
 }
 
+/// DataFusion specific EXPLAIN (needed so we can EXPLAIN datafusion
+/// specific COPY and other statements)
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ExplainStatement {
+    pub analyze: bool,
+    pub verbose: bool,
+    pub statement: Box<Statement>,
+}
+
+impl fmt::Display for ExplainStatement {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let Self {
+            analyze,
+            verbose,
+            statement,
+        } = self;
+
+        write!(f, "EXPLAIN ")?;
+        if *analyze {
+            write!(f, "ANALYZE ")?;
+        }
+        if *verbose {
+            write!(f, "VERBOSE ")?;
+        }
+
+        write!(f, "{statement}")
+    }
+}
+
 /// DataFusion extension DDL for `COPY`
 ///
 /// # Syntax:
@@ -74,7 +103,7 @@ pub struct CopyToStatement {
     /// The URL to where the data is heading
     pub target: String,
     /// Target specific options
-    pub options: HashMap<String, Value>,
+    pub options: Vec<(String, Value)>,
 }
 
 impl fmt::Display for CopyToStatement {
@@ -88,10 +117,8 @@ impl fmt::Display for CopyToStatement {
         write!(f, "COPY {source} TO {target}")?;
 
         if !options.is_empty() {
-            let mut opts: Vec<_> =
-                options.iter().map(|(k, v)| format!("{k} {v}")).collect();
+            let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} 
{v}")).collect();
             // print them in sorted order
-            opts.sort_unstable();
             write!(f, " ({})", opts.join(", "))?;
         }
 
@@ -208,6 +235,8 @@ pub enum Statement {
     DescribeTableStmt(DescribeTableStmt),
     /// Extension: `COPY TO`
     CopyTo(CopyToStatement),
+    /// EXPLAIN for extensions
+    Explain(ExplainStatement),
 }
 
 impl fmt::Display for Statement {
@@ -217,11 +246,12 @@ impl fmt::Display for Statement {
             Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
             Statement::DescribeTableStmt(_) => write!(f, "DESCRIBE TABLE ..."),
             Statement::CopyTo(stmt) => write!(f, "{stmt}"),
+            Statement::Explain(stmt) => write!(f, "{stmt}"),
         }
     }
 }
 
-/// DataFusion SQL Parser based on [`sqlparser`]
+/// Datafusion SQL Parser based on [`sqlparser`]
 ///
 /// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s
 /// [`Parser`](sqlparser::parser::Parser).
@@ -307,24 +337,24 @@ impl<'a> DFParser<'a> {
             Token::Word(w) => {
                 match w.keyword {
                     Keyword::CREATE => {
-                        // move one token forward
-                        self.parser.next_token();
-                        // use custom parsing
+                        self.parser.next_token(); // CREATE
                         self.parse_create()
                     }
                     Keyword::COPY => {
-                        // move one token forward
-                        self.parser.next_token();
+                        self.parser.next_token(); // COPY
                         self.parse_copy()
                     }
                     Keyword::DESCRIBE => {
-                        // move one token forward
-                        self.parser.next_token();
-                        // use custom parsing
+                        self.parser.next_token(); // DESCRIBE
                         self.parse_describe()
                     }
+                    Keyword::EXPLAIN => {
+                        // (TODO parse all supported statements)
+                        self.parser.next_token(); // EXPLAIN
+                        self.parse_explain()
+                    }
                     _ => {
-                        // use the native parser
+                        // use sqlparser-rs parser
                         Ok(Statement::Statement(Box::from(
                             self.parser.parse_statement()?,
                         )))
@@ -369,7 +399,7 @@ impl<'a> DFParser<'a> {
         let options = if self.parser.peek_token().token == Token::LParen {
             self.parse_value_options()?
         } else {
-            HashMap::new()
+            vec![]
         };
 
         Ok(Statement::CopyTo(CopyToStatement {
@@ -421,6 +451,19 @@ impl<'a> DFParser<'a> {
         }
     }
 
+    /// Parse a SQL `EXPLAIN`
+    pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
+        let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
+        let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
+        let statement = self.parse_statement()?;
+
+        Ok(Statement::Explain(ExplainStatement {
+            statement: Box::new(statement),
+            analyze,
+            verbose,
+        }))
+    }
+
     /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
     pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
         if self.parser.parse_keyword(Keyword::EXTERNAL) {
@@ -758,14 +801,14 @@ impl<'a> DFParser<'a> {
     /// Unlike [`Self::parse_string_options`], this method supports
     /// keywords as key names as well as multiple value types such as
     /// Numbers as well as Strings.
-    fn parse_value_options(&mut self) -> Result<HashMap<String, Value>, 
ParserError> {
-        let mut options = HashMap::new();
+    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, 
ParserError> {
+        let mut options = vec![];
         self.parser.expect_token(&Token::LParen)?;
 
         loop {
             let key = self.parse_option_key()?;
             let value = self.parse_option_value()?;
-            options.insert(key, value);
+            options.push((key, value));
             let comma = self.parser.consume_token(&Token::Comma);
             if self.parser.consume_token(&Token::RParen) {
                 // allow a trailing comma, even though it's not in standard
@@ -1285,13 +1328,39 @@ mod tests {
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
             target: "bar".to_string(),
-            options: HashMap::new(),
+            options: vec![],
         });
 
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
     }
 
+    #[test]
+    fn explain_copy_to_table_to_table() -> Result<(), ParserError> {
+        let cases = vec![
+            ("EXPLAIN COPY foo TO bar", false, false),
+            ("EXPLAIN ANALYZE COPY foo TO bar", true, false),
+            ("EXPLAIN VERBOSE COPY foo TO bar", false, true),
+            ("EXPLAIN ANALYZE VERBOSE COPY foo TO bar", true, true),
+        ];
+        for (sql, analyze, verbose) in cases {
+            println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
+
+            let expected_copy = Statement::CopyTo(CopyToStatement {
+                source: object_name("foo"),
+                target: "bar".to_string(),
+                options: vec![],
+            });
+            let expected = Statement::Explain(ExplainStatement {
+                analyze,
+                verbose,
+                statement: Box::new(expected_copy),
+            });
+            assert_eq!(verified_stmt(sql), expected);
+        }
+        Ok(())
+    }
+
     #[test]
     fn copy_to_query_to_table() -> Result<(), ParserError> {
         let statement = verified_stmt("SELECT 1");
@@ -1313,7 +1382,7 @@ mod tests {
         let expected = Statement::CopyTo(CopyToStatement {
             source: CopyToSource::Query(query),
             target: "bar".to_string(),
-            options: HashMap::new(),
+            options: vec![],
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1325,10 +1394,10 @@ mod tests {
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
             target: "bar".to_string(),
-            options: HashMap::from([(
+            options: vec![(
                 "row_group_size".to_string(),
                 Value::Number("55".to_string(), false),
-            )]),
+            )],
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1336,17 +1405,11 @@ mod tests {
 
     #[test]
     fn copy_to_multi_options() -> Result<(), ParserError> {
+        // order of options is preserved
         let sql =
             "COPY foo TO bar (format parquet, row_group_size 55, compression 
snappy)";
-        // canonical order is alphabetical
-        let canonical =
-            "COPY foo TO bar (compression snappy, format parquet, 
row_group_size 55)";
 
-        let expected_options = HashMap::from([
-            (
-                "compression".to_string(),
-                Value::UnQuotedString("snappy".to_string()),
-            ),
+        let expected_options = vec![
             (
                 "format".to_string(),
                 Value::UnQuotedString("parquet".to_string()),
@@ -1355,14 +1418,17 @@ mod tests {
                 "row_group_size".to_string(),
                 Value::Number("55".to_string(), false),
             ),
-        ]);
+            (
+                "compression".to_string(),
+                Value::UnQuotedString("snappy".to_string()),
+            ),
+        ];
 
-        let options =
-            if let Statement::CopyTo(copy_to) = one_statement_parses_to(sql, 
canonical) {
-                copy_to.options
-            } else {
-                panic!("Expected copy");
-            };
+        let options = if let Statement::CopyTo(copy_to) = verified_stmt(sql) {
+            copy_to.options
+        } else {
+            panic!("Expected copy");
+        };
 
         assert_eq!(options, expected_options);
 
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index c684cfaa97..b5bb8e3d05 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -17,7 +17,7 @@
 
 use crate::parser::{
     CopyToSource, CopyToStatement, CreateExternalTable, DFParser, 
DescribeTableStmt,
-    LexOrdering, Statement as DFStatement,
+    ExplainStatement, LexOrdering, Statement as DFStatement,
 };
 use crate::planner::{
     object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel,
@@ -93,6 +93,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             DFStatement::Statement(s) => self.sql_statement_to_plan(*s),
             DFStatement::DescribeTableStmt(s) => 
self.describe_table_to_plan(s),
             DFStatement::CopyTo(s) => self.copy_to_plan(s),
+            DFStatement::Explain(ExplainStatement {
+                verbose,
+                analyze,
+                statement,
+            }) => self.explain_to_plan(verbose, analyze, *statement),
         }
     }
 
@@ -127,7 +132,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 format: _,
                 describe_alias: _,
                 ..
-            } => self.explain_statement_to_plan(verbose, analyze, *statement),
+            } => {
+                self.explain_to_plan(verbose, analyze, 
DFStatement::Statement(statement))
+            }
             Statement::Query(query) => self.query_to_plan(*query, 
planner_context),
             Statement::ShowVariable { variable } => 
self.show_variable_to_plan(&variable),
             Statement::SetVariable {
@@ -712,13 +719,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
     /// Generate a plan for EXPLAIN ... that will print out a plan
     ///
-    fn explain_statement_to_plan(
+    /// Note this is the sqlparser explain statement, not the
+    /// datafusion `EXPLAIN` statement.
+    fn explain_to_plan(
         &self,
         verbose: bool,
         analyze: bool,
-        statement: Statement,
+        statement: DFStatement,
     ) -> Result<LogicalPlan> {
-        let plan = self.sql_statement_to_plan(statement)?;
+        let plan = self.statement_to_plan(statement)?;
+        if matches!(plan, LogicalPlan::Explain(_)) {
+            return plan_err!("Nested EXPLAINs are not supported");
+        }
         let plan = Arc::new(plan);
         let schema = LogicalPlan::explain_schema();
         let schema = schema.to_dfschema_ref()?;
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index d46fbd65d8..c315266b3c 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -336,6 +336,18 @@ CopyTo: format=csv output_url=output.csv 
per_thread_output=false options: ()
     quick_test(sql, plan);
 }
 
+#[test]
+fn plan_explain_copy_to() {
+    let sql = "EXPLAIN COPY test_decimal to 'output.csv'";
+    let plan = r#"
+Explain
+  CopyTo: format=csv output_url=output.csv per_thread_output=false options: ()
+    TableScan: test_decimal
+    "#
+    .trim();
+    quick_test(sql, plan);
+}
+
 #[test]
 fn plan_copy_to_query() {
     let sql = "COPY (select * from test_decimal limit 10) to 'output.csv'";
diff --git a/datafusion/sqllogictest/test_files/copy.slt 
b/datafusion/sqllogictest/test_files/copy.slt
index 364459fa2d..a44a662ada 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -25,12 +25,19 @@ COPY source_table TO 'test_files/scratch/table' (format 
parquet, per_thread_outp
 ----
 2
 
-#Explain copy queries not currently working
-query error DataFusion error: This feature is not implemented: Unsupported SQL 
statement: Some\("COPY source_table TO 'test_files/scratch/table'"\)
+# Error case
+query error DataFusion error: Error during planning: Copy To format not 
explicitly set and unable to get file extension!
 EXPLAIN COPY source_table to 'test_files/scratch/table'
 
-query error DataFusion error: SQL error: ParserError\("Expected end of 
statement, found: source_table"\)
+query TT
 EXPLAIN COPY source_table to 'test_files/scratch/table' (format parquet, 
per_thread_output true)
+----
+logical_plan
+CopyTo: format=parquet output_url=test_files/scratch/table 
per_thread_output=true options: (format parquet, per_thread_output true)
+--TableScan: source_table projection=[col1, col2]
+physical_plan
+InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[])
+--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
 
 # Copy more files to directory via query
 query IT
diff --git a/datafusion/sqllogictest/test_files/explain.slt 
b/datafusion/sqllogictest/test_files/explain.slt
index 7aa7870630..ad9b2be40e 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -101,13 +101,17 @@ set datafusion.explain.physical_plan_only = false
 
 
 ## explain nested
-statement error Explain must be root of the plan
+query error DataFusion error: Error during planning: Nested EXPLAINs are not 
supported
 EXPLAIN explain select 1
 
+## explain nested
+statement error DataFusion error: Error during planning: Nested EXPLAINs are 
not supported
+EXPLAIN EXPLAIN explain select 1
+
 statement ok
 set datafusion.explain.physical_plan_only = true
 
-statement error Explain must be root of the plan
+statement error DataFusion error: Error during planning: Nested EXPLAINs are 
not supported
 EXPLAIN explain select 1
 
 statement ok

Reply via email to