alamb commented on code in PR #4721:
URL: https://github.com/apache/arrow-datafusion/pull/4721#discussion_r1056643369


##########
datafusion/core/src/execution/context.rs:
##########
@@ -492,6 +497,15 @@ impl SessionContext {
         }
     }
 
+    /// Creates a [`DataFrame`] that will execute the specified
+    /// LogicalPlan, but will error if the plans represent DDL such as
+    /// `CREATE TABLE`
+    ///
+    /// Use [`Self::dataframe`] to run plans with DDL
+    pub fn dataframe_without_ddl(&self, plan: LogicalPlan) -> 
Result<DataFrame> {

Review Comment:
   This is the API to support 
https://github.com/apache/arrow-datafusion/issues/4720



##########
datafusion/core/src/physical_plan/planner.rs:
##########
@@ -1097,15 +1097,15 @@ impl DefaultPhysicalPlanner {
                     // TABLE" -- it must be handled at a higher level (so
                     // that the appropriate table can be registered with
                     // the context)
-                    Err(DataFusionError::Internal(
+                    Err(DataFusionError::Plan(

Review Comment:
   These are not really "internal errors" as they can be triggered by trying to 
run a sql query that contains DDL



##########
datafusion/core/src/execution/context.rs:
##########
@@ -245,26 +245,31 @@ impl SessionContext {
         self.state.read().config.clone()
     }
 
-    /// Creates a [`DataFrame`] that will execute a SQL query.
+    /// Creates a [`DataFrame`] that executes a SQL query supported by
+    /// DataFusion, including DDL such as (such as `CREATE TABLE`).
     ///
-    /// This method is `async` because queries of type `CREATE EXTERNAL TABLE`
-    /// might require the schema to be inferred.
+    /// You can use [`Self::plan_sql`] and
+    /// [`DataFrame::create_physical_plan`] directly if you need read only
+    /// query support (no way to create external tables, for example)
+    ///
+    /// This method is `async` because queries of type `CREATE
+    /// EXTERNAL TABLE` might require the schema to be inferred.
     pub async fn sql(&self, sql: &str) -> Result<DataFrame> {
-        let mut statements = DFParser::parse_sql(sql)?;
-        if statements.len() != 1 {
-            return Err(DataFusionError::NotImplemented(
-                "The context currently only supports a single SQL 
statement".to_string(),
-            ));
-        }
-
-        // create a query planner
-        let plan = {
-            // TODO: Move catalog off SessionState onto SessionContext
-            let state = self.state.read();
-            let query_planner = SqlToRel::new(&*state);
-            query_planner.statement_to_plan(statements.pop_front().unwrap())?
-        };
+        let plan = self.plan_sql(sql)?;
+        self.dataframe(plan).await
+    }
 
+    /// Creates a [`DataFrame`] that will execute the specified
+    /// LogicalPlan, including DDL such as (such as `CREATE TABLE`).

Review Comment:
   ```suggestion
       /// LogicalPlan, including DDL (such as `CREATE TABLE`).
   ```



##########
datafusion/core/src/execution/context.rs:
##########
@@ -245,26 +245,31 @@ impl SessionContext {
         self.state.read().config.clone()
     }
 
-    /// Creates a [`DataFrame`] that will execute a SQL query.
+    /// Creates a [`DataFrame`] that executes a SQL query supported by
+    /// DataFusion, including DDL such as (such as `CREATE TABLE`).
     ///
-    /// This method is `async` because queries of type `CREATE EXTERNAL TABLE`
-    /// might require the schema to be inferred.
+    /// You can use [`Self::plan_sql`] and
+    /// [`DataFrame::create_physical_plan`] directly if you need read only
+    /// query support (no way to create external tables, for example)
+    ///
+    /// This method is `async` because queries of type `CREATE
+    /// EXTERNAL TABLE` might require the schema to be inferred.
     pub async fn sql(&self, sql: &str) -> Result<DataFrame> {

Review Comment:
   the core SessionContext::sql API does not change



##########
benchmarks/src/bin/tpch.rs:
##########
@@ -324,18 +324,17 @@ async fn execute_query(
     debug: bool,
     enable_scheduler: bool,
 ) -> Result<Vec<RecordBatch>> {
-    let plan = ctx.sql(sql).await?;
-    let plan = plan.into_unoptimized_plan();
+    let plan = ctx.sql(sql).await?.into_unoptimized_plan();
 
     if debug {
         println!("=== Logical plan ===\n{:?}\n", plan);
     }
 
-    let plan = ctx.optimize(&plan)?;
+    let plan = ctx.dataframe(plan).await?.into_optimized_plan()?;

Review Comment:
   this is the new pattern to get an optimized plan (rather than calling 
`ctx.optimize` directly



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to