andygrove commented on code in PR #414:
URL: https://github.com/apache/arrow-ballista/pull/414#discussion_r1001835013
##########
ballista/scheduler/src/state/mod.rs:
##########
@@ -256,11 +260,46 @@ impl<T: 'static + AsLogicalPlan, U: 'static +
AsExecutionPlan> SchedulerState<T,
plan: &LogicalPlan,
) -> Result<()> {
let start = Instant::now();
+
+ // optimizing the plan here is redundant because the physical planner
will do this again
+ // but it is helpful to see what the optimized plan will be
let optimized_plan = session_ctx.optimize(plan)?;
+ debug!("Optimized plan: {}", optimized_plan.display_indent());
+
+ struct VerifyPathsExist {}
+ impl PlanVisitor for VerifyPathsExist {
+ type Error = BallistaError;
+
+ fn pre_visit(
+ &mut self,
+ plan: &LogicalPlan,
+ ) -> std::result::Result<bool, Self::Error> {
+ if let LogicalPlan::TableScan(scan) = plan {
+ let provider = source_as_provider(&scan.source)?;
+ if let Some(table) =
provider.as_any().downcast_ref::<ListingTable>()
+ {
+ for url in table.table_paths() {
+ // remove file:/// prefix and verify that the file
is accessible
+ let url = url.as_str();
+ let url =
url.strip_prefix("file:///").unwrap_or(url);
Review Comment:
I pushed a change so that this check is only performed for files on the
local file system (starting with `file:///`) and it now only checks the first
file
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]