This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6aa423b70b feature: Support `EXPLAIN COPY` (#7291)
6aa423b70b is described below
commit 6aa423b70b4eb1f28165919fbfb4efe323669bc4
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Aug 21 17:15:47 2023 -0400
feature: Support `EXPLAIN COPY` (#7291)
* Support EXPLAIN COPY
* clippy
* clippy
* Fix argument handling
* Apply suggestions from code review
Co-authored-by: Metehan Yıldırım
<[email protected]>
* Improve nested explain error
---------
Co-authored-by: Metehan Yıldırım
<[email protected]>
---
datafusion/core/src/execution/context.rs | 49 +++++----
datafusion/sql/src/parser.rs | 140 ++++++++++++++++++-------
datafusion/sql/src/statement.rs | 22 +++-
datafusion/sql/tests/sql_integration.rs | 12 +++
datafusion/sqllogictest/test_files/copy.slt | 13 ++-
datafusion/sqllogictest/test_files/explain.slt | 8 +-
6 files changed, 177 insertions(+), 67 deletions(-)
diff --git a/datafusion/core/src/execution/context.rs
b/datafusion/core/src/execution/context.rs
index ec09bdaec1..c0a0134fed 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -1698,30 +1698,39 @@ impl SessionState {
}
let mut visitor = RelationVisitor(&mut relations);
- match statement {
- DFStatement::Statement(s) => {
- let _ = s.as_ref().visit(&mut visitor);
- }
- DFStatement::CreateExternalTable(table) => {
- visitor
- .0
-
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
- }
- DFStatement::DescribeTableStmt(table) =>
visitor.insert(&table.table_name),
- DFStatement::CopyTo(CopyToStatement {
- source,
- target: _,
- options: _,
- }) => match source {
- CopyToSource::Relation(table_name) => {
- visitor.insert(table_name);
+ fn visit_statement(statement: &DFStatement, visitor: &mut
RelationVisitor<'_>) {
+ match statement {
+ DFStatement::Statement(s) => {
+ let _ = s.as_ref().visit(visitor);
}
- CopyToSource::Query(query) => {
- query.visit(&mut visitor);
+ DFStatement::CreateExternalTable(table) => {
+ visitor
+ .0
+
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
}
- },
+ DFStatement::DescribeTableStmt(table) => {
+ visitor.insert(&table.table_name)
+ }
+ DFStatement::CopyTo(CopyToStatement {
+ source,
+ target: _,
+ options: _,
+ }) => match source {
+ CopyToSource::Relation(table_name) => {
+ visitor.insert(table_name);
+ }
+ CopyToSource::Query(query) => {
+ query.visit(visitor);
+ }
+ },
+ DFStatement::Explain(explain) => {
+ visit_statement(&explain.statement, visitor)
+ }
+ }
}
+ visit_statement(statement, &mut visitor);
+
// Always include information_schema if available
if self.config.information_schema() {
for s in INFORMATION_SCHEMA_TABLES {
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 13159ad8d1..cab8f0f06e 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -44,6 +44,35 @@ fn parse_file_type(s: &str) -> Result<String, ParserError> {
Ok(s.to_uppercase())
}
+/// DataFusion specific EXPLAIN (needed so we can EXPLAIN datafusion
+/// specific COPY and other statements)
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ExplainStatement {
+ pub analyze: bool,
+ pub verbose: bool,
+ pub statement: Box<Statement>,
+}
+
+impl fmt::Display for ExplainStatement {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ analyze,
+ verbose,
+ statement,
+ } = self;
+
+ write!(f, "EXPLAIN ")?;
+ if *analyze {
+ write!(f, "ANALYZE ")?;
+ }
+ if *verbose {
+ write!(f, "VERBOSE ")?;
+ }
+
+ write!(f, "{statement}")
+ }
+}
+
/// DataFusion extension DDL for `COPY`
///
/// # Syntax:
@@ -74,7 +103,7 @@ pub struct CopyToStatement {
/// The URL to where the data is heading
pub target: String,
/// Target specific options
- pub options: HashMap<String, Value>,
+ pub options: Vec<(String, Value)>,
}
impl fmt::Display for CopyToStatement {
@@ -88,10 +117,8 @@ impl fmt::Display for CopyToStatement {
write!(f, "COPY {source} TO {target}")?;
if !options.is_empty() {
- let mut opts: Vec<_> =
- options.iter().map(|(k, v)| format!("{k} {v}")).collect();
+ let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k}
{v}")).collect();
// print them in sorted order
- opts.sort_unstable();
write!(f, " ({})", opts.join(", "))?;
}
@@ -208,6 +235,8 @@ pub enum Statement {
DescribeTableStmt(DescribeTableStmt),
/// Extension: `COPY TO`
CopyTo(CopyToStatement),
+ /// EXPLAIN for extensions
+ Explain(ExplainStatement),
}
impl fmt::Display for Statement {
@@ -217,11 +246,12 @@ impl fmt::Display for Statement {
Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"),
Statement::DescribeTableStmt(_) => write!(f, "DESCRIBE TABLE ..."),
Statement::CopyTo(stmt) => write!(f, "{stmt}"),
+ Statement::Explain(stmt) => write!(f, "{stmt}"),
}
}
}
-/// DataFusion SQL Parser based on [`sqlparser`]
+/// Datafusion SQL Parser based on [`sqlparser`]
///
/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s
/// [`Parser`](sqlparser::parser::Parser).
@@ -307,24 +337,24 @@ impl<'a> DFParser<'a> {
Token::Word(w) => {
match w.keyword {
Keyword::CREATE => {
- // move one token forward
- self.parser.next_token();
- // use custom parsing
+ self.parser.next_token(); // CREATE
self.parse_create()
}
Keyword::COPY => {
- // move one token forward
- self.parser.next_token();
+ self.parser.next_token(); // COPY
self.parse_copy()
}
Keyword::DESCRIBE => {
- // move one token forward
- self.parser.next_token();
- // use custom parsing
+ self.parser.next_token(); // DESCRIBE
self.parse_describe()
}
+ Keyword::EXPLAIN => {
+ // (TODO parse all supported statements)
+ self.parser.next_token(); // EXPLAIN
+ self.parse_explain()
+ }
_ => {
- // use the native parser
+ // use sqlparser-rs parser
Ok(Statement::Statement(Box::from(
self.parser.parse_statement()?,
)))
@@ -369,7 +399,7 @@ impl<'a> DFParser<'a> {
let options = if self.parser.peek_token().token == Token::LParen {
self.parse_value_options()?
} else {
- HashMap::new()
+ vec![]
};
Ok(Statement::CopyTo(CopyToStatement {
@@ -421,6 +451,19 @@ impl<'a> DFParser<'a> {
}
}
+ /// Parse a SQL `EXPLAIN`
+ pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
+ let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
+ let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
+ let statement = self.parse_statement()?;
+
+ Ok(Statement::Explain(ExplainStatement {
+ statement: Box::new(statement),
+ analyze,
+ verbose,
+ }))
+ }
+
/// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
if self.parser.parse_keyword(Keyword::EXTERNAL) {
@@ -758,14 +801,14 @@ impl<'a> DFParser<'a> {
/// Unlike [`Self::parse_string_options`], this method supports
/// keywords as key names as well as multiple value types such as
/// Numbers as well as Strings.
- fn parse_value_options(&mut self) -> Result<HashMap<String, Value>,
ParserError> {
- let mut options = HashMap::new();
+ fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>,
ParserError> {
+ let mut options = vec![];
self.parser.expect_token(&Token::LParen)?;
loop {
let key = self.parse_option_key()?;
let value = self.parse_option_value()?;
- options.insert(key, value);
+ options.push((key, value));
let comma = self.parser.consume_token(&Token::Comma);
if self.parser.consume_token(&Token::RParen) {
// allow a trailing comma, even though it's not in standard
@@ -1285,13 +1328,39 @@ mod tests {
let expected = Statement::CopyTo(CopyToStatement {
source: object_name("foo"),
target: "bar".to_string(),
- options: HashMap::new(),
+ options: vec![],
});
assert_eq!(verified_stmt(sql), expected);
Ok(())
}
+ #[test]
+ fn explain_copy_to_table_to_table() -> Result<(), ParserError> {
+ let cases = vec![
+ ("EXPLAIN COPY foo TO bar", false, false),
+ ("EXPLAIN ANALYZE COPY foo TO bar", true, false),
+ ("EXPLAIN VERBOSE COPY foo TO bar", false, true),
+ ("EXPLAIN ANALYZE VERBOSE COPY foo TO bar", true, true),
+ ];
+ for (sql, analyze, verbose) in cases {
+ println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}");
+
+ let expected_copy = Statement::CopyTo(CopyToStatement {
+ source: object_name("foo"),
+ target: "bar".to_string(),
+ options: vec![],
+ });
+ let expected = Statement::Explain(ExplainStatement {
+ analyze,
+ verbose,
+ statement: Box::new(expected_copy),
+ });
+ assert_eq!(verified_stmt(sql), expected);
+ }
+ Ok(())
+ }
+
#[test]
fn copy_to_query_to_table() -> Result<(), ParserError> {
let statement = verified_stmt("SELECT 1");
@@ -1313,7 +1382,7 @@ mod tests {
let expected = Statement::CopyTo(CopyToStatement {
source: CopyToSource::Query(query),
target: "bar".to_string(),
- options: HashMap::new(),
+ options: vec![],
});
assert_eq!(verified_stmt(sql), expected);
Ok(())
@@ -1325,10 +1394,10 @@ mod tests {
let expected = Statement::CopyTo(CopyToStatement {
source: object_name("foo"),
target: "bar".to_string(),
- options: HashMap::from([(
+ options: vec![(
"row_group_size".to_string(),
Value::Number("55".to_string(), false),
- )]),
+ )],
});
assert_eq!(verified_stmt(sql), expected);
Ok(())
@@ -1336,17 +1405,11 @@ mod tests {
#[test]
fn copy_to_multi_options() -> Result<(), ParserError> {
+ // order of options is preserved
let sql =
"COPY foo TO bar (format parquet, row_group_size 55, compression
snappy)";
- // canonical order is alphabetical
- let canonical =
- "COPY foo TO bar (compression snappy, format parquet,
row_group_size 55)";
- let expected_options = HashMap::from([
- (
- "compression".to_string(),
- Value::UnQuotedString("snappy".to_string()),
- ),
+ let expected_options = vec![
(
"format".to_string(),
Value::UnQuotedString("parquet".to_string()),
@@ -1355,14 +1418,17 @@ mod tests {
"row_group_size".to_string(),
Value::Number("55".to_string(), false),
),
- ]);
+ (
+ "compression".to_string(),
+ Value::UnQuotedString("snappy".to_string()),
+ ),
+ ];
- let options =
- if let Statement::CopyTo(copy_to) = one_statement_parses_to(sql,
canonical) {
- copy_to.options
- } else {
- panic!("Expected copy");
- };
+ let options = if let Statement::CopyTo(copy_to) = verified_stmt(sql) {
+ copy_to.options
+ } else {
+ panic!("Expected copy");
+ };
assert_eq!(options, expected_options);
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index c684cfaa97..b5bb8e3d05 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -17,7 +17,7 @@
use crate::parser::{
CopyToSource, CopyToStatement, CreateExternalTable, DFParser,
DescribeTableStmt,
- LexOrdering, Statement as DFStatement,
+ ExplainStatement, LexOrdering, Statement as DFStatement,
};
use crate::planner::{
object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel,
@@ -93,6 +93,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
DFStatement::Statement(s) => self.sql_statement_to_plan(*s),
DFStatement::DescribeTableStmt(s) =>
self.describe_table_to_plan(s),
DFStatement::CopyTo(s) => self.copy_to_plan(s),
+ DFStatement::Explain(ExplainStatement {
+ verbose,
+ analyze,
+ statement,
+ }) => self.explain_to_plan(verbose, analyze, *statement),
}
}
@@ -127,7 +132,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
format: _,
describe_alias: _,
..
- } => self.explain_statement_to_plan(verbose, analyze, *statement),
+ } => {
+ self.explain_to_plan(verbose, analyze,
DFStatement::Statement(statement))
+ }
Statement::Query(query) => self.query_to_plan(*query,
planner_context),
Statement::ShowVariable { variable } =>
self.show_variable_to_plan(&variable),
Statement::SetVariable {
@@ -712,13 +719,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
/// Generate a plan for EXPLAIN ... that will print out a plan
///
- fn explain_statement_to_plan(
+ /// Note this is the sqlparser explain statement, not the
+ /// datafusion `EXPLAIN` statement.
+ fn explain_to_plan(
&self,
verbose: bool,
analyze: bool,
- statement: Statement,
+ statement: DFStatement,
) -> Result<LogicalPlan> {
- let plan = self.sql_statement_to_plan(statement)?;
+ let plan = self.statement_to_plan(statement)?;
+ if matches!(plan, LogicalPlan::Explain(_)) {
+ return plan_err!("Nested EXPLAINs are not supported");
+ }
let plan = Arc::new(plan);
let schema = LogicalPlan::explain_schema();
let schema = schema.to_dfschema_ref()?;
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index d46fbd65d8..c315266b3c 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -336,6 +336,18 @@ CopyTo: format=csv output_url=output.csv
per_thread_output=false options: ()
quick_test(sql, plan);
}
+#[test]
+fn plan_explain_copy_to() {
+ let sql = "EXPLAIN COPY test_decimal to 'output.csv'";
+ let plan = r#"
+Explain
+ CopyTo: format=csv output_url=output.csv per_thread_output=false options: ()
+ TableScan: test_decimal
+ "#
+ .trim();
+ quick_test(sql, plan);
+}
+
#[test]
fn plan_copy_to_query() {
let sql = "COPY (select * from test_decimal limit 10) to 'output.csv'";
diff --git a/datafusion/sqllogictest/test_files/copy.slt
b/datafusion/sqllogictest/test_files/copy.slt
index 364459fa2d..a44a662ada 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -25,12 +25,19 @@ COPY source_table TO 'test_files/scratch/table' (format
parquet, per_thread_outp
----
2
-#Explain copy queries not currently working
-query error DataFusion error: This feature is not implemented: Unsupported SQL
statement: Some\("COPY source_table TO 'test_files/scratch/table'"\)
+# Error case
+query error DataFusion error: Error during planning: Copy To format not
explicitly set and unable to get file extension!
EXPLAIN COPY source_table to 'test_files/scratch/table'
-query error DataFusion error: SQL error: ParserError\("Expected end of
statement, found: source_table"\)
+query TT
EXPLAIN COPY source_table to 'test_files/scratch/table' (format parquet,
per_thread_output true)
+----
+logical_plan
+CopyTo: format=parquet output_url=test_files/scratch/table
per_thread_output=true options: (format parquet, per_thread_output true)
+--TableScan: source_table projection=[col1, col2]
+physical_plan
+InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[])
+--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
# Copy more files to directory via query
query IT
diff --git a/datafusion/sqllogictest/test_files/explain.slt
b/datafusion/sqllogictest/test_files/explain.slt
index 7aa7870630..ad9b2be40e 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -101,13 +101,17 @@ set datafusion.explain.physical_plan_only = false
## explain nested
-statement error Explain must be root of the plan
+query error DataFusion error: Error during planning: Nested EXPLAINs are not
supported
EXPLAIN explain select 1
+## explain nested
+statement error DataFusion error: Error during planning: Nested EXPLAINs are
not supported
+EXPLAIN EXPLAIN explain select 1
+
statement ok
set datafusion.explain.physical_plan_only = true
-statement error Explain must be root of the plan
+statement error DataFusion error: Error during planning: Nested EXPLAINs are
not supported
EXPLAIN explain select 1
statement ok