[GitHub] [airflow] nuclearpinguin commented on a change in pull request #6725: [AIRFLOW-6157] Support for multiple executors

2019-12-04 Thread GitBox
nuclearpinguin commented on a change in pull request #6725: [AIRFLOW-6157] 
Support for multiple executors
URL: https://github.com/apache/airflow/pull/6725#discussion_r353689795
 
 

 ##
 File path: airflow/executors/executor_loader.py
 ##
 @@ -31,47 +32,108 @@ class ExecutorLoader:
 DASK_EXECUTOR = "DaskExecutor"
 KUBERNETES_EXECUTOR = "KubernetesExecutor"
 
-_default_executor: Optional[BaseExecutor] = None
+_default_executor: Optional[BaseExecutorProtocol] = None
+
+_all_executors: Dict[str, BaseExecutor] = dict()
 
 @classmethod
-def get_default_executor(cls) -> BaseExecutor:
+def get_default_executor(cls) -> BaseExecutorProtocol:
 """Creates a new instance of the configured executor if none exists 
and returns it"""
 if cls._default_executor is not None:
 return cls._default_executor
 
 from airflow.configuration import conf
-executor_name = conf.get('core', 'EXECUTOR')
+main_executor_name = conf.get('core', 'EXECUTOR')
 
-cls._default_executor = ExecutorLoader._get_executor(executor_name)
+main_executor = cls._get_executor(main_executor_name)
 
+additional_executor_specs = conf.get('core', "ADDITIONAL_EXECUTORS")
+if additional_executor_specs:
+executor_array = additional_executor_specs.split(' ')
+additional_executors_dict: Dict[str, BaseExecutor] = {}
+for executor_spec in executor_array:
+executor_name, queue_array_string = executor_spec.split(':')
+executor = cls._get_executor(executor_name)
+queue_array = queue_array_string.split(",")
+for queue_name in queue_array:
+additional_executors_dict[queue_name] = executor
+cls._default_executor = MultipleExecutors(
+main_executor=main_executor,
+additional_executors_dict=additional_executors_dict)
+else:
+cls._default_executor = main_executor
 from airflow import LoggingMixin
 log = LoggingMixin().log
-log.info("Using executor %s", executor_name)
+if additional_executor_specs:
+log.info("Using MultipleExecutor with main executor %s and 
additional executors %s",
+ main_executor_name, additional_executor_specs)
+else:
+log.info("Using executor %s", main_executor_name)
 
 return cls._default_executor
 
+@classmethod
+def get_or_create_executor(cls, executor_name: str,
+   create_executor: Callable[[], BaseExecutor]) -> 
BaseExecutor:
+"""Retrieves (and creates if needed) an executor with the name 
specified"""
+if executor_name in cls._all_executors:
+return cls._all_executors[executor_name]
+executor = create_executor()
+cls._all_executors[executor_name] = executor
+return executor
+
+@staticmethod
+def create_local_executor() -> BaseExecutor:
+"""Creates LocalExecutor"""
+from airflow.executors.local_executor import LocalExecutor
+return LocalExecutor()
+
 @staticmethod
-def _get_executor(executor_name: str) -> BaseExecutor:
+def create_sequential_executor() -> BaseExecutor:
+"""Creates SequentialExecutor"""
+from airflow.executors.sequential_executor import SequentialExecutor
+return SequentialExecutor()
+
+@staticmethod
+def create_celery_executor() -> BaseExecutor:
+"""Creates CeleryExecutor"""
+from airflow.executors.celery_executor import CeleryExecutor
+return CeleryExecutor()
+
+@staticmethod
+def create_dask_executor() -> BaseExecutor:
+"""Creates DaskExecutor"""
+from airflow.executors.dask_executor import DaskExecutor
+return DaskExecutor()
+
+@staticmethod
+def create_kubernetes_executor() -> BaseExecutor:
+"""Creates KubernetesExecutor"""
+from airflow.executors.kubernetes_executor import KubernetesExecutor
+return KubernetesExecutor()
+
+@classmethod
+def _get_executor(cls, executor_name: str) -> BaseExecutor:
 """
 Creates a new instance of the named executor.
 In case the executor name is unknown in airflow,
-look for it in the plugins
+look for it in the plugins.
 """
 if executor_name == ExecutorLoader.LOCAL_EXECUTOR:
-from airflow.executors.local_executor import LocalExecutor
-return LocalExecutor()
+return cls.get_or_create_executor(executor_name=executor_name,
+  
create_executor=ExecutorLoader.create_local_executor)
 elif executor_name == ExecutorLoader.SEQUENTIAL_EXECUTOR:
-from airflow.executors.sequential_executor import 
SequentialExecutor
-return SequentialExecutor()
+return cls.get_or_create_exe

[GitHub] [airflow] nuclearpinguin commented on a change in pull request #6725: [AIRFLOW-6157] Support for multiple executors

2019-12-04 Thread GitBox
nuclearpinguin commented on a change in pull request #6725: [AIRFLOW-6157] 
Support for multiple executors
URL: https://github.com/apache/airflow/pull/6725#discussion_r353691352
 
 

 ##
 File path: airflow/executors/multiple_executors.py
 ##
 @@ -0,0 +1,117 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Multiple  executor."""
+from itertools import chain
+from typing import Any, Dict, Iterable, Optional, Set
+
+from airflow.executors.base_executor import BaseExecutor, 
BaseExecutorProtocol, CommandType
+from airflow.models import TaskInstance
+from airflow.models.taskinstance import SimpleTaskInstance, TaskInstanceKeyType
+
+
+class MultipleExecutors(BaseExecutorProtocol):
+"""
+This executor can run multiple executors under the hood.
+"""
+def __init__(self, main_executor: BaseExecutor, additional_executors_dict: 
Dict[str, BaseExecutor]):
+super().__init__()
+self.main_executor = main_executor
+self.additional_executors_dict = additional_executors_dict
+self.executor_set: Set[BaseExecutor] = 
set(additional_executors_dict.values())
+self.executor_set.add(main_executor)
+self.log.info("Multiple executor")
+self.log.info("Main executor %s", str(main_executor))
+self.log.info("Additional executor dict %s", 
str(self.additional_executors_dict))
+self.log.info("Executor set %s", str(self.executor_set))
+
+def _get_executor_for_queue(self, queue: Optional[str]) -> BaseExecutor:
+executor = None
+if queue:
+executor = self.additional_executors_dict.get(queue)
+return self.main_executor if not executor else executor
+
+def start(self):
+for executor in self.executor_set:
+executor.start()
+
+def has_task(self, task_instance: TaskInstance) -> bool:
+executor = self._get_executor_for_queue(task_instance.queue)
+return executor.has_task(task_instance=task_instance)
+
+def sync(self) -> None:
+for executor in self.executor_set:
+executor.sync()
 
 Review comment:
   I just wonder if we should use try / except in such loops? Because now when 
first executor fails everything fails. But using exception handling we can 
"execute as much as possible" and then throw an exception. WDYT?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services