thinkharderdev commented on code in PR #153:
URL: https://github.com/apache/arrow-ballista/pull/153#discussion_r954924242


##########
ballista/rust/scheduler/src/state/task_manager.rs:
##########
@@ -37,16 +37,19 @@ use ballista_core::serde::{AsExecutionPlan, BallistaCodec};
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
 use datafusion_proto::logical_plan::AsLogicalPlan;
-use log::{debug, info, warn};
+#[cfg(not(test))]
+use log::info;
+use log::{debug, error, warn};
 use rand::distributions::Alphanumeric;
 use rand::{thread_rng, Rng};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::default::Default;
 use std::sync::Arc;
 use tokio::sync::RwLock;
 use tonic::transport::Channel;
 
 type ExecutorClients = Arc<RwLock<HashMap<String, 
ExecutorGrpcClient<Channel>>>>;
+type ExecutionGraphCache = Arc<RwLock<HashMap<String, 
Arc<RwLock<ExecutionGraph>>>>>;
 
 #[derive(Clone)]
 pub struct TaskManager<T: 'static + AsLogicalPlan, U: 'static + 
AsExecutionPlan> {

Review Comment:
   The executor only has one scheduler connection but the scheduler's can 
(currently) assign tasks to any executor since the executor pool is a shared 
global state. So they would currently report status to only one scheduler 
instance (unless the scheduler endpoint was pointing to a load balancer or 
something) but there is no guarantee that they would have received the task 
assignment from that scheduler. 
   
   I think it should be relatively straightforward though. The `TaskAssignment` 
would just need a scheduler URL in it and the executor would report status back 
to that URL (instead of maintaining a single global connection). 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to