alamb commented on a change in pull request #2029:
URL: https://github.com/apache/arrow-datafusion/pull/2029#discussion_r830581887
##########
File path: datafusion/src/physical_optimizer/aggregate_statistics.rs
##########
@@ -297,7 +297,7 @@ mod tests {
) -> Result<()> {
let session_ctx = SessionContext::new();
let task_ctx = session_ctx.task_ctx();
- let conf = session_ctx.state.lock().clone().config;
+ let conf = session_ctx.copied_config();
Review comment:
As mentioned elsewhere, I think most rust programmers would expect this
code to look like
```suggestion
let conf = session_ctx.config().clone();
```
##########
File path: ballista/rust/scheduler/src/scheduler_server/mod.rs
##########
@@ -160,9 +160,19 @@ impl<T: 'static + AsLogicalPlan, U: 'static +
AsExecutionPlan> SchedulerServer<T
}
}
-/// Create a DataFusion context that is compatible with Ballista
+/// Create a DataFusion session context that is compatible with Ballista
Configuration
pub fn create_datafusion_context(config: &BallistaConfig) -> SessionContext {
let config =
SessionConfig::new().with_target_partitions(config.default_shuffle_partitions());
SessionContext::with_config(config)
}
+
+/// Update the existing DataFusion session context with Ballista Configuration
+pub fn update_datafusion_context(
+ session_ctx: Arc<SessionContext>,
+ config: &BallistaConfig,
+) -> Arc<SessionContext> {
+ session_ctx.state.lock().config.target_partitions =
Review comment:
Is the plan over time to copy more fields from Ballista config into
`SessionState`?
##########
File path: datafusion/src/execution/context.rs
##########
@@ -667,34 +635,15 @@ impl SessionContext {
/// Optimizes the logical plan by applying optimizer rules.
pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
- if let LogicalPlan::Explain(e) = plan {
- let mut stringified_plans = e.stringified_plans.clone();
-
- // optimize the child plan, capturing the output of each optimizer
- let plan =
- self.optimize_internal(e.plan.as_ref(), |optimized_plan,
optimizer| {
- let optimizer_name = optimizer.name().to_string();
- let plan_type = PlanType::OptimizedLogicalPlan {
optimizer_name };
-
stringified_plans.push(optimized_plan.to_stringified(plan_type));
- })?;
-
- Ok(LogicalPlan::Explain(Explain {
- verbose: e.verbose,
- plan: Arc::new(plan),
- stringified_plans,
- schema: e.schema.clone(),
- }))
- } else {
- self.optimize_internal(plan, |_, _| {})
- }
+ self.state.lock().optimize(plan)
Review comment:
This lock I think will effectively serialize all optimizer runs (so the
optimizer can not run in multiple tasks / threads at once)
##########
File path: datafusion/src/execution/runtime_env.rs
##########
@@ -26,19 +26,21 @@ use crate::{
},
};
+use crate::datasource::object_store::{ObjectStore, ObjectStoreRegistry};
+use datafusion_common::DataFusionError;
use std::fmt::{Debug, Formatter};
+use std::path::PathBuf;
use std::sync::Arc;
#[derive(Clone)]
-/// Execution runtime environment. This structure is passed to the
-/// physical plans when they are run.
+/// Execution runtime environment.
pub struct RuntimeEnv {
- /// Default batch size while creating new batches
- pub batch_size: usize,
/// Runtime memory management
pub memory_manager: Arc<MemoryManager>,
/// Manage temporary files during query execution
pub disk_manager: Arc<DiskManager>,
+ /// Object Store Registry
+ pub object_store_registry: Arc<ObjectStoreRegistry>,
Review comment:
I think this is a good change 👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]