This is an automated email from the ASF dual-hosted git repository.

jbonofre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-liminal.git

commit 6da38b82f53a20a91bdbf3f4a4afd56c709fb206
Author: assapin <[email protected]>
AuthorDate: Thu Jun 11 16:33:22 2020 +0300

    Rainbow local mode
---
 README.md                                          |  44 +++++++++--
 images/airflow.png                                 | Bin 0 -> 70049 bytes
 rainbow/build/build_rainbows.py                    |  25 ++++--
 rainbow-cli => rainbow/core/environment.py         |  32 ++++----
 rainbow/core/util/class_util.py                    |  62 +++++++++------
 rainbow/core/util/files_util.py                    |   2 +-
 .../util/files_util.py => runners/__init__.py}     |  13 ---
 rainbow/runners/airflow/config/__init__.py         |   0
 .../airflow/config/standalone_variable_backend.py  |  27 +++----
 rainbow/runners/airflow/dag/rainbow_dags.py        |  31 +++++---
 .../airflow/operators/job_status_operator.py       |   9 ++-
 .../runners/airflow/tasks/defaults/job_end.py~HEAD |  44 -----------
 .../airflow/tasks/defaults/job_start.py~HEAD       |  43 ----------
 rainbow/runners/airflow/tasks/python.py            |  11 +--
 rainbow/runners/airflow/tasks/spark.py             |   1 -
 requirements-airflow.txt                           |   5 ++
 requirements.txt                                   |   6 +-
 scripts/docker-compose.yml                         |  40 ++++++++++
 scripts/package.sh                                 |  69 ++++++++++++++++
 scripts/rainbow                                    |  87 +++++++++++++++++++++
 setup.py                                           |  48 ++++++++++++
 tests/runners/airflow/rainbow/requirements.txt     |   1 +
 tests/util/test_class_utils.py                     |  32 ++++++++
 .../util/test_pkg_1/__init__.py                    |  13 ---
 .../util/test_pkg_1/test_clazz_base.py             |  15 ++--
 .../util/test_pkg_1/test_pkg_1_1/__init__.py       |  13 ---
 .../test_pkg_1/test_pkg_1_1/test_clazz_child_1.py  |  18 ++---
 .../test_pkg_1/test_pkg_1_1/test_clazz_child_2.py  |  14 +---
 .../test_pkg_1_1/test_pkg_1_1_1/__init__.py        |  13 ---
 .../test_pkg_1_1_1/test_clazz_leaf_1.py            |  21 ++---
 .../test_pkg_1_1/test_pkg_1_1_2/__init__.py        |   0
 .../test_pkg_1_1_2/test_clazz_leaf_2.py            |   8 ++
 32 files changed, 469 insertions(+), 278 deletions(-)

diff --git a/README.md b/README.md
index 467edf2..ee2f961 100644
--- a/README.md
+++ b/README.md
@@ -76,12 +76,44 @@ services:
         function: myendpoint1func
 ```
 
-## Example repository structure
-
-[Example repository structure](
-https://github.com/Natural-Intelligence/rainbow/tree/master/tests/runners/airflow/rainbow]
-)
 
 # Installation
+1. Install this package
+```bash
+   pip install 
git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode
+```
+2. Optional: set RAINBOW_HOME to path of your choice (if not set, will default 
to ~/rainbow_home)
+```bash
+echo 'export RAINBOW_HOME=</path/to/some/folder>' >> ~/.bash_profile && source 
~/.bash_profile
+```
+
+# Authoring pipelines
+
+This involves at minimum creating a single file called rainbow.yml as in the 
example above.
+
+If your pipeline requires custom python code to implement tasks, they should 
be organized 
+[like 
this](https://github.com/Natural-Intelligence/rainbow/tree/master/tests/runners/airflow/rainbow)
+
+If your pipeline  introduces imports of external packages which are not 
already a part 
+of the rainbow framework (i.e. you had to pip install them yourself), you need 
to also provide 
+a requirements.txt in the root of your project.
+
+# Testing the pipeline locally
+
+When your pipeline code is ready, you can test it by running it locally on 
your machine.
+
+1. Deploy the pipeline:
+```bash
+cd </path/to/your/rainbow/code> 
+rainbow deploy
+```
+2. Make sure you have docker running
+3. Start the Server
+```bash
+rainbow start
+```
+4. Navigate to [http://localhost:8080/admin]
+5. You should see your 
![pipeline](https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/images/airflow.png";)
 
-TODO: installation.
+### Running Tests (for contributors)
+When doing local development and running Rainbow unit-tests, make sure to set 
RAINBOW_STAND_ALONE_MODE=True
diff --git a/images/airflow.png b/images/airflow.png
new file mode 100644
index 0000000..229f8fa
Binary files /dev/null and b/images/airflow.png differ
diff --git a/rainbow/build/build_rainbows.py b/rainbow/build/build_rainbows.py
index b7ea6eb..66d27cb 100644
--- a/rainbow/build/build_rainbows.py
+++ b/rainbow/build/build_rainbows.py
@@ -74,33 +74,42 @@ def __build_image(base_path, builder_config, builder):
 
 
 def __get_task_build_class(task_type):
-    return task_build_classes[task_type] if task_type in task_build_classes 
else None
+    return task_build_types.get(task_type, None)
 
 
 def __get_service_build_class(service_type):
-    return service_build_classes[service_type] if service_type in 
service_build_classes else None
+    return service_build_types.get(service_type, None)
 
 
 print(f'Loading image builder implementations..')
 
 # TODO: add configuration for user image builders package
-image_builders_package = 'rainbow/build/image'
-user_image_builders_package = 'TODO: user_image_builders_package'
+image_builders_package = 'rainbow.build.image'
+# user_image_builders_package = 'TODO: user_image_builders_package'
 
 task_build_classes = class_util.find_subclasses_in_packages(
-    [image_builders_package, user_image_builders_package],
+    [image_builders_package],
     ImageBuilder)
 
+
+def get_types_dict(task_build_classes):
+    # take module name from class name
+    return {x.split(".")[-2]: c for x, c in task_build_classes.items()}
+
+
+task_build_types = get_types_dict(task_build_classes)
+
 print(f'Finished loading image builder implementations: {task_build_classes}')
 
 print(f'Loading service image builder implementations..')
 
 # TODO: add configuration for user service image builders package
-service_builders_package = 'rainbow/build/service'
-user_service_builders_package = 'TODO: user_service_builders_package'
+service_builders_package = 'rainbow.build.service'
+# user_service_builders_package = 'TODO: user_service_builders_package'
 
 service_build_classes = class_util.find_subclasses_in_packages(
-    [service_builders_package, user_service_builders_package],
+    [service_builders_package],
     ServiceImageBuilderMixin)
 
+service_build_types = get_types_dict(service_build_classes)
 print(f'Finished loading service image builder implementations: 
{service_build_classes}')
diff --git a/rainbow-cli b/rainbow/core/environment.py
old mode 100755
new mode 100644
similarity index 50%
copy from rainbow-cli
copy to rainbow/core/environment.py
index 4f16b4e..27f4d41
--- a/rainbow-cli
+++ b/rainbow/core/environment.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -17,24 +15,24 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import os
-
-import click
-
-from rainbow.build import build_rainbows
 
+import os
 
[email protected]()
-def cli():
-    pass
+DEFAULT_DAGS_ZIP_NAME = 'rainbows.zip'
+DEFAULT_RAINBOW_HOME = os.path.expanduser('~/rainbow_home')
+DEFAULT_RAINBOWS_SUBDIR = "rainbows"
+RAINBOW_HOME_PARAM_NAME = "RAINBOW_HOME"
 
 
[email protected]()
[email protected]('--path', default=os.getcwd(), help='Build within this path.')
-def build(path):
-    click.echo(f'Building rainbows in {path}')
-    build_rainbows.build_rainbows(path)
+def get_rainbow_home():
+    if not os.environ.get(RAINBOW_HOME_PARAM_NAME):
+        print("no environment parameter called RAINBOW_HOME detected")
+        print(f"registering {DEFAULT_RAINBOW_HOME} as the RAINBOW_HOME 
directory")
+        os.environ[RAINBOW_HOME_PARAM_NAME] = DEFAULT_RAINBOW_HOME
+    return os.environ.get(RAINBOW_HOME_PARAM_NAME, DEFAULT_RAINBOW_HOME)
 
 
-if __name__ == '__main__':
-    cli()
+def get_dags_dir():
+    # if we are inside airflow, we will take it from the configured dags folder
+    base_dir = os.environ.get("AIRFLOW__CORE__DAGS_FOLDER", get_rainbow_home())
+    return os.path.join(base_dir, DEFAULT_RAINBOWS_SUBDIR)
diff --git a/rainbow/core/util/class_util.py b/rainbow/core/util/class_util.py
index e083477..129c61a 100644
--- a/rainbow/core/util/class_util.py
+++ b/rainbow/core/util/class_util.py
@@ -17,9 +17,7 @@
 # under the License.
 
 import importlib.util
-import inspect
-import os
-import sys
+import pkgutil
 
 
 def find_subclasses_in_packages(packages, parent_class):
@@ -27,28 +25,42 @@ def find_subclasses_in_packages(packages, parent_class):
     Finds all subclasses of given parent class within given packages
     :return: map of module ref -> class
     """
-    classes = {}
-
-    for py_path in [a for a in sys.path]:
-        for root, directories, files in os.walk(py_path):
-            if any(package in root for package in packages):
-                for file in files:
-                    file_path = os.path.join(root, file)
-                    if file.endswith('.py') and '__pycache__' not in file_path:
-                        spec = 
importlib.util.spec_from_file_location(file[:-3], file_path)
-                        mod = importlib.util.module_from_spec(spec)
-                        spec.loader.exec_module(mod)
-                        for name, obj in inspect.getmembers(mod):
-                            if inspect.isclass(obj) and not 
obj.__name__.endswith('Mixin'):
-                                module_name = mod.__name__
-                                class_name = obj.__name__
-                                parent_module = root[len(py_path) + 
1:].replace('/', '.')
-                                module = 
parent_module.replace('airflow.dags.', '') + \
-                                         '.' + module_name
-                                clazz = __get_class(module, class_name)
-                                if issubclass(clazz, parent_class):
-                                    classes.update({module_name: clazz})
-    return classes
+    module_content = {}
+    for p in packages:
+        module_content.update(import_module(p))
+
+    subclasses = set()
+    work = [parent_class]
+    while work:
+        parent = work.pop()
+        for child in parent.__subclasses__():
+            if child not in subclasses:
+                work.append(child)
+                # verify that the found class is in the relevant module
+                for p in packages:
+                    if p in child.__module__:
+                        subclasses.add(child)
+                        break
+
+    result = {sc.__module__ + "." + sc.__name__: sc for sc in subclasses}
+    return result
+
+
+def import_module(package, recrsive=True):
+    """ Import all submodules of a module, recursively, including subpackages
+    :param package: package (name or actual module)
+    :type package: str | module
+    :rtype: dict[str, types.ModuleType]
+    """
+    if isinstance(package, str):
+        package = importlib.import_module(package)
+    results = {}
+    for loader, name, is_pkg in pkgutil.walk_packages(package.__path__):
+        full_name = package.__name__ + '.' + name
+        results[full_name] = importlib.import_module(full_name)
+        if recrsive and is_pkg:
+            results.update(import_module(full_name))
+    return results
 
 
 def __get_class(the_module, the_class):
diff --git a/rainbow/core/util/files_util.py b/rainbow/core/util/files_util.py
index b1d1daf..4a03020 100644
--- a/rainbow/core/util/files_util.py
+++ b/rainbow/core/util/files_util.py
@@ -24,7 +24,7 @@ def find_config_files(path):
     print(path)
     for r, d, f in os.walk(path):
         for file in f:
-            print(os.path.basename(file))
             if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
+                print(os.path.join(r, file))
                 files.append(os.path.join(r, file))
     return files
diff --git a/rainbow/core/util/files_util.py b/rainbow/runners/__init__.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to rainbow/runners/__init__.py
index b1d1daf..217e5db 100644
--- a/rainbow/core/util/files_util.py
+++ b/rainbow/runners/__init__.py
@@ -15,16 +15,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-import os
-
-
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
diff --git a/rainbow/runners/airflow/config/__init__.py 
b/rainbow/runners/airflow/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/rainbow-cli 
b/rainbow/runners/airflow/config/standalone_variable_backend.py
old mode 100755
new mode 100644
similarity index 64%
copy from rainbow-cli
copy to rainbow/runners/airflow/config/standalone_variable_backend.py
index 4f16b4e..d7df06c
--- a/rainbow-cli
+++ b/rainbow/runners/airflow/config/standalone_variable_backend.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -18,23 +16,20 @@
 # specific language governing permissions and limitations
 # under the License.
 import os
+from os import environ
 
-import click
-
-from rainbow.build import build_rainbows
-
+from airflow.models import Variable
 
[email protected]()
-def cli():
-    pass
+RAINBOW_STAND_ALONE_MODE_KEY = "RAINBOW_STAND_ALONE_MODE"
 
 
[email protected]()
[email protected]('--path', default=os.getcwd(), help='Build within this path.')
-def build(path):
-    click.echo(f'Building rainbows in {path}')
-    build_rainbows.build_rainbows(path)
+def get_variable(key, default_val):
+    if rainbow_local_mode():
+        return os.environ.get(key, default_val)
+    else:
+        return Variable.get(key, default_var=default_val)
 
 
-if __name__ == '__main__':
-    cli()
+def rainbow_local_mode():
+    stand_alone = environ.get(RAINBOW_STAND_ALONE_MODE_KEY, "False")
+    return stand_alone.strip().lower() == "true"
diff --git a/rainbow/runners/airflow/dag/rainbow_dags.py 
b/rainbow/runners/airflow/dag/rainbow_dags.py
index d5e3be1..730fd03 100644
--- a/rainbow/runners/airflow/dag/rainbow_dags.py
+++ b/rainbow/runners/airflow/dag/rainbow_dags.py
@@ -17,11 +17,13 @@
 # under the License.
 
 from datetime import datetime, timedelta
+from os import environ
 
 import yaml
 from airflow import DAG
 from airflow.models import Variable
 
+from rainbow.core import environment
 from rainbow.core.util import class_util
 from rainbow.core.util import files_util
 from rainbow.runners.airflow.model.task import Task
@@ -33,13 +35,13 @@ __DEPENDS_ON_PAST = 'depends_on_past'
 
 def register_dags(configs_path):
     """
-    Registers pipelines in rainbow yml files found in given path (recursively) 
as airflow DAGs.
+    TODO: doc for register_dags
     """
-
+    print(f'Registering DAG from path: {configs_path}')
     config_files = files_util.find_config_files(configs_path)
 
     dags = []
-
+    print(f'found {len(config_files)} in path: {configs_path}')
     for config_file in config_files:
         print(f'Registering DAG for file: {config_file}')
 
@@ -83,28 +85,35 @@ def register_dags(configs_path):
                 job_end_task = JobEndTask(dag, pipeline_name, parent, 
pipeline, 'all_done')
                 job_end_task.apply_task_to_dag()
 
-                print(f'{pipeline_name}: {dag.tasks}')
+                print(f'registered DAG {dag.dag_id}: {dag.tasks}')
 
                 globals()[pipeline_name] = dag
-
                 dags.append(dag)
 
-            return dags
+    return dags
 
 
 print(f'Loading task implementations..')
 
 # TODO: add configuration for user tasks package
-task_package = 'rainbow/runners/airflow/tasks'
+impl_packages = 'rainbow.runners.airflow.tasks'
 user_task_package = 'TODO: user_tasks_package'
 
-task_classes = class_util.find_subclasses_in_packages([task_package, 
user_task_package], Task)
+task_classes = class_util.find_subclasses_in_packages([impl_packages], Task)
+
+
+def tasks_by_rainbow_name(task_classes):
+    return {full_name.replace(impl_packages, '').replace(clzz.__name__, 
'')[1:-1]: clzz
+            for (full_name, clzz) in task_classes.items()}
+
+
+tasks_by_rainbow_name = tasks_by_rainbow_name(task_classes)
 
-print(f'Finished loading task implementations: {task_classes}')
+print(f'Finished loading task implementations: {tasks_by_rainbow_name}')
 
 
 def get_task_class(task_type):
-    return task_classes[task_type]
+    return tasks_by_rainbow_name[task_type]
 
 
-register_dags(Variable.get('rainbows_dir'))
+register_dags(environment.get_dags_dir())
diff --git a/rainbow/runners/airflow/operators/job_status_operator.py 
b/rainbow/runners/airflow/operators/job_status_operator.py
index dc318e5..ae9382a 100644
--- a/rainbow/runners/airflow/operators/job_status_operator.py
+++ b/rainbow/runners/airflow/operators/job_status_operator.py
@@ -38,7 +38,7 @@ class JobStatusOperator(BaseOperator):
             *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.backends = backends
-        self.cloudwatch = CloudWatchHook()
+        self.cloudwatch = None
 
     def execute(self, context):
         for backend in self.backends:
@@ -52,12 +52,17 @@ class JobStatusOperator(BaseOperator):
         raise NotImplementedError
 
     def send_metric_to_cloudwatch(self, metric):
-        self.cloudwatch.put_metric_data(metric)
+        self.get_cloudwatch().put_metric_data(metric)
 
     report_functions = {
         'cloudwatch': send_metric_to_cloudwatch
     }
 
+    def get_cloudwatch(self):
+        if not self.cloudwatch:
+            self.cloudwatch = CloudWatchHook()
+        return self.cloudwatch
+
 
 class JobStartOperator(JobStatusOperator):
     ui_color = '#c5e5e8'
diff --git a/rainbow/runners/airflow/tasks/defaults/job_end.py~HEAD 
b/rainbow/runners/airflow/tasks/defaults/job_end.py~HEAD
deleted file mode 100644
index e177ccc..0000000
--- a/rainbow/runners/airflow/tasks/defaults/job_end.py~HEAD
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from rainbow.runners.airflow.operators.job_status_operator import 
JobEndOperator
-from rainbow.runners.airflow.tasks.defaults.default_task import DefaultTask
-
-
-class JobEndTask(DefaultTask):
-    """
-      Job end task. Reports job end metrics.
-    """
-
-    def __init__(self, dag, pipeline_name, parent, config, trigger_rule):
-        super().__init__(dag, pipeline_name, parent, config, trigger_rule)
-
-    def apply_task_to_dag(self):
-        job_end_task = JobEndOperator(
-            task_id='end',
-            namespace=self.metrics_namespace,
-            application_name=self.pipeline_name,
-            backends=self.metrics_backends,
-            dag=self.dag,
-            trigger_rule=self.trigger_rule
-        )
-
-        if self.parent:
-            self.parent.set_downstream(job_end_task)
-
-        return job_end_task
diff --git a/rainbow/runners/airflow/tasks/defaults/job_start.py~HEAD 
b/rainbow/runners/airflow/tasks/defaults/job_start.py~HEAD
deleted file mode 100644
index e196919..0000000
--- a/rainbow/runners/airflow/tasks/defaults/job_start.py~HEAD
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from rainbow.runners.airflow.operators.job_status_operator import 
JobStartOperator
-from rainbow.runners.airflow.tasks.defaults.default_task import DefaultTask
-
-
-class JobStartTask(DefaultTask):
-    """
-    Job start task. Reports job start metrics.
-    """
-
-    def __init__(self, dag, pipeline_name, parent, config, trigger_rule):
-        super().__init__(dag, pipeline_name, parent, config, trigger_rule)
-
-    def apply_task_to_dag(self):
-        job_start_task = JobStartOperator(
-            task_id='start',
-            namespace=self.metrics_namespace,
-            application_name=self.pipeline_name,
-            backends=self.metrics_backends,
-            dag=self.dag,
-            trigger_rule=self.trigger_rule
-        )
-
-        if self.parent:
-            self.parent.set_downstream(job_start_task)
-
-        return job_start_task
diff --git a/rainbow/runners/airflow/tasks/python.py 
b/rainbow/runners/airflow/tasks/python.py
index 8bd11cf..d5d4e00 100644
--- a/rainbow/runners/airflow/tasks/python.py
+++ b/rainbow/runners/airflow/tasks/python.py
@@ -20,6 +20,7 @@ import json
 from airflow.models import Variable
 from airflow.operators.dummy_operator import DummyOperator
 
+from rainbow.runners.airflow.config.standalone_variable_backend import 
get_variable
 from rainbow.runners.airflow.model import task
 from 
rainbow.runners.airflow.operators.kubernetes_pod_operator_with_input_output 
import \
     KubernetesPodOperatorWithInputAndOutput, \
@@ -143,10 +144,10 @@ class PythonTask(task.Task):
 
     def __kubernetes_kwargs(self):
         kubernetes_kwargs = {
-            'namespace': Variable.get('kubernetes_namespace', 
default_var='default'),
+            'namespace': get_variable('kubernetes_namespace', 
default_val='default'),
             'name': self.task_name.replace('_', '-'),
-            'in_cluster': Variable.get('in_kubernetes_cluster', 
default_var=False),
-            'image_pull_policy': Variable.get('image_pull_policy', 
default_var='IfNotPresent'),
+            'in_cluster': get_variable('in_kubernetes_cluster', 
default_val=False),
+            'image_pull_policy': get_variable('image_pull_policy', 
default_val='IfNotPresent'),
             'get_logs': True,
             'env_vars': self.env_vars,
             'do_xcom_push': True,
@@ -162,9 +163,9 @@ class PythonTask(task.Task):
         env_vars = {}
         if 'env_vars' in self.config:
             env_vars = self.config['env_vars']
-        airflow_configuration_variable = Variable.get(
+        airflow_configuration_variable = get_variable(
             f'''{self.pipeline_name}_dag_configuration''',
-            default_var=None)
+            default_val=None)
         if airflow_configuration_variable:
             airflow_configs = json.loads(airflow_configuration_variable)
             environment_variables_key = 
f'''{self.pipeline_name}_environment_variables'''
diff --git a/rainbow/runners/airflow/tasks/spark.py 
b/rainbow/runners/airflow/tasks/spark.py
index 9a46dd4..68cfac0 100644
--- a/rainbow/runners/airflow/tasks/spark.py
+++ b/rainbow/runners/airflow/tasks/spark.py
@@ -18,7 +18,6 @@
 
 from rainbow.runners.airflow.model import task
 
-
 class SparkTask(task.Task):
     """
     Executes a Spark application.
diff --git a/requirements-airflow.txt b/requirements-airflow.txt
new file mode 100644
index 0000000..5191d2a
--- /dev/null
+++ b/requirements-airflow.txt
@@ -0,0 +1,5 @@
+click==7.1.1
+pyyaml
+boto3==1.12.10
+botocore==1.15.21
+kubernetes
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index d7eec03..3fef3a5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ docker-pycreds==0.4.0
 click==7.1.1
 Flask==1.1.1
 pyyaml
-statsd
-botocore
-boto3
+boto3==1.12.10
+botocore==1.15.21
 kubernetes
+
diff --git a/scripts/docker-compose.yml b/scripts/docker-compose.yml
new file mode 100644
index 0000000..b6a2dc3
--- /dev/null
+++ b/scripts/docker-compose.yml
@@ -0,0 +1,40 @@
+    version: '3.7'
+    services:
+        postgres:
+            image: postgres:9.6
+            environment:
+                - POSTGRES_USER=airflow
+                - POSTGRES_PASSWORD=airflow
+                - POSTGRES_DB=
+
+            ports:
+                - "5432:5432"
+            logging:
+                options:
+                    max-size: 10m
+                    max-file: "3"
+
+        webserver:
+            image: puckel/docker-airflow:1.10.9
+            restart: always
+            depends_on:
+                - postgres
+            environment:
+                - LOAD_EX=n
+                - EXECUTOR=Local
+                - AIRFLOW__CORE__DAGS_FOLDER=/usr/local/airflow/dags
+                - AIRFLOW__WEBSERVER__WORKERS=1
+            logging:
+                options:
+                    max-size: 10m
+                    max-file: "3"
+            volumes:
+                - ${RAINBOW_HOME}:/usr/local/airflow/dags
+            ports:
+                - "8080:8080"
+            command: webserver
+            healthcheck:
+                test: ["CMD-SHELL", "[ -f 
/usr/local/airflow/airflow-webserver.pid ]"]
+                interval: 30s
+                timeout: 30s
+                retries: 3
diff --git a/scripts/package.sh b/scripts/package.sh
new file mode 100755
index 0000000..f4083e4
--- /dev/null
+++ b/scripts/package.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required bgit y applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+echo $1
+target_path="$1"
+
+echo "running from " $(PWD)
+echo "target path for rainbow zip file is " $target_path
+
+echo "cleaning up the temp dirs $TMPDIR/rainbow_build"
+rm -rf $TMPDIR/rainbow_build-*/
+
+tmp_dir=$(mktemp -d -t rainbow_build-)
+echo "creating temp directory $tmp_dir"
+
+docker_build_dir=$tmp_dir/docker_build
+mkdir -p $docker_build_dir
+echo "docker build directory :"$docker_build_dir
+
+mkdir $docker_build_dir/"zip_content"
+mkdir $docker_build_dir/"dags"
+
+#copy the content of the user project into the build folder
+rsync -a --exclude 'venv' $(PWD)/ $docker_build_dir/zip_content/
+
+# perform installation of external pacakges (framework-requirements and 
user-requirements)
+# this is done inside a docker to 1) avoid requiring the user to install 
stuff, and 2) to create a platform-compatible
+# package (install the native libraries in a flavour suitable for the docker 
in which airflow runs, and not user machine)
+docker stop rainbow_build
+docker rm rainbow_build
+docker run --name rainbow_build -v 
/private/"$docker_build_dir":/home/rainbow/tmp --entrypoint="" -u 0 \
+       puckel/docker-airflow:1.10.9 /bin/bash -c "apt-get update && apt-get 
install -y wget && apt-get install -y git &&
+       cd /home/rainbow/tmp/zip_content &&
+       wget 
https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/rainbow/runners/airflow/dag/rainbow_dags.py
 &&
+       wget 
https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/requirements-airflow.txt
 &&
+       wget 
https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/scripts/docker-compose.yml
 &&
+       pip install --no-deps --target=\"/home/rainbow/tmp/zip_content\" 
git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode &&
+       pip install --target=\"/home/rainbow/tmp/zip_content\" -r 
/home/rainbow/tmp/zip_content/requirements-airflow.txt &&
+       pip install --target=\"/home/rainbow/tmp/zip_content\" -r 
/home/rainbow/tmp/zip_content/requirements.txt"
+
+docker stop rainbow_build
+docker rm rainbow_build
+
+# zip the content per 
https://airflow.apache.org/docs/stable/concepts.html#packaged-dags
+cd $docker_build_dir/zip_content
+mv docker-compose.yml $target_path
+rm __init__.py
+
+zip -r ../dags/rainbows.zip .
+cp ../dags/rainbows.zip $target_path
+
+
+
diff --git a/scripts/rainbow b/scripts/rainbow
new file mode 100755
index 0000000..1d5f65e
--- /dev/null
+++ b/scripts/rainbow
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required bgit y applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import os
+import shutil
+import site
+import sys
+
+import click
+from rainbow.build import build_rainbows
+import subprocess
+from rainbow.core import environment
+from rainbow.core.util import files_util
+
+
[email protected]()
+def cli():
+    pass
+
+
+def docker_is_running():
+    try:
+        return not subprocess.check_output("docker info >/dev/null 2>&1", 
shell=True)
+    except subprocess.CalledProcessError as e:
+        msg = "Docker is not running. Please start docker service on your 
machine\n"
+        sys.stderr.write(f"ERROR: {msg}")
+        raise RuntimeError(msg)
+
+
[email protected]("build", short_help="builds dockers from your business logic")
[email protected]('--path', default=os.getcwd(), help='Build within this path.')
+def build(path):
+    click.echo(f'Building rainbows in {path}')
+    if docker_is_running():
+        build_rainbows.build_rainbows(path)
+
+
+def deploy_rainbow_core_internal():
+    click.echo("WARN: refreshing rainbow core package")
+    rainbow_home = environment.get_rainbow_home()
+    subprocess.call([f'package.sh {rainbow_home}'], shell=True)
+
+
[email protected]("deploy", short_help="deploys your rainbow.yaml files to 
$RAINBOW_HOME folder")
[email protected]('--path', default=os.getcwd(), help="folder containing 
rainbow.yaml files")
+def deploy_rainbows(path):
+    click.echo("deploying rainbow yaml files")
+    rainbow_home = environment.get_rainbow_home()
+    os.makedirs(rainbow_home, exist_ok=True)
+    os.makedirs(environment.get_dags_dir(), exist_ok=True)
+    deploy_rainbow_core_internal()
+    config_files = files_util.find_config_files(path)
+    for config_file in config_files:
+        click.echo(f"deploying rainbow file: {config_file}")
+        yml_name = os.path.basename(config_file)
+        target_yml_name = os.path.join(environment.get_dags_dir(), yml_name)
+        shutil.copyfile(config_file, target_yml_name)
+
+
[email protected]("start", short_help="starts a local airflow in docker compose. 
should be run after deploy. " +
+                                 "Make sure docker is running on your machine")
+def start():
+    if docker_is_running():
+        # initialize rainbow home by default
+        environment.get_rainbow_home()
+        result = subprocess.call([f'docker-compose -f 
"{environment.get_rainbow_home()}/docker-compose.yml" up'],
+                                 env=os.environ, shell=True)
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c102ae3
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required bgit y applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import setuptools
+from setuptools import setup
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+with open('requirements.txt') as f:
+    requirements = f.read().splitlines()
+    print(requirements)
+
+setuptools.setup(
+    name="rainbow",
+    version="0.0.1",
+    author="Rainbow team",
+    description="A package for authoring and deploying machine learning 
workflows",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/Natural-Intelligence/rainbow";,
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: Apache 2.0",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+    install_requires=requirements,
+    scripts=['scripts/rainbow', 'scripts/package.sh']
+)
diff --git a/tests/runners/airflow/rainbow/requirements.txt 
b/tests/runners/airflow/rainbow/requirements.txt
new file mode 100644
index 0000000..037103e
--- /dev/null
+++ b/tests/runners/airflow/rainbow/requirements.txt
@@ -0,0 +1 @@
+pillow
\ No newline at end of file
diff --git a/tests/util/test_class_utils.py b/tests/util/test_class_utils.py
new file mode 100644
index 0000000..0deeff6
--- /dev/null
+++ b/tests/util/test_class_utils.py
@@ -0,0 +1,32 @@
+from unittest import TestCase
+
+from rainbow.core.util import class_util
+from tests.util.test_pkg_1.test_clazz_base import A, Z
+from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B
+from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_2 import C
+from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 
import F, D, E
+from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_2.test_clazz_leaf_2 
import G, H
+
+
+class Test(TestCase):
+    def test_find_full_hierarchy_from_root(self):
+        expected_set = set([B, C, D, E, H, Z])
+        self.hierarchy_check(A, expected_set)
+
+    def test_find_full_hierarchy_mid_tree_in_package(self):
+        expected_set = set([G])
+        self.hierarchy_check(F, expected_set)
+
+    def test_leaf_class(self):
+        expected_set = set()
+        self.hierarchy_check(G, expected_set)
+
+    def hierarchy_check(self, clazz, expected_set):
+        pkg_root = 'tests.util.test_pkg_1'
+        full_tree = class_util.find_subclasses_in_packages(
+            [pkg_root],
+            clazz)
+
+        res_set = set()
+        res_set.update(full_tree.values())
+        self.assertEqual(res_set, expected_set)
diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/__init__.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to tests/util/test_pkg_1/__init__.py
index b1d1daf..217e5db 100644
--- a/rainbow/core/util/files_util.py
+++ b/tests/util/test_pkg_1/__init__.py
@@ -15,16 +15,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-import os
-
-
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
diff --git a/rainbow/core/util/files_util.py 
b/tests/util/test_pkg_1/test_clazz_base.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to tests/util/test_pkg_1/test_clazz_base.py
index b1d1daf..3e7c523 100644
--- a/rainbow/core/util/files_util.py
+++ b/tests/util/test_pkg_1/test_clazz_base.py
@@ -16,15 +16,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
 
+class A:
+    pass
 
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
+
+class Z(A):
+    pass
diff --git a/rainbow/core/util/files_util.py 
b/tests/util/test_pkg_1/test_pkg_1_1/__init__.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to tests/util/test_pkg_1/test_pkg_1_1/__init__.py
index b1d1daf..217e5db 100644
--- a/rainbow/core/util/files_util.py
+++ b/tests/util/test_pkg_1/test_pkg_1_1/__init__.py
@@ -15,16 +15,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-import os
-
-
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
diff --git a/rainbow/core/util/files_util.py 
b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py
index b1d1daf..6fe2e9a 100644
--- a/rainbow/core/util/files_util.py
+++ b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py
@@ -16,15 +16,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
 
+from tests.util.test_pkg_1.test_clazz_base import A
 
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
+
+class B(A):
+    pass
+
+
+class M:
+    pass
diff --git a/rainbow/core/util/files_util.py 
b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py
index b1d1daf..e279c7a 100644
--- a/rainbow/core/util/files_util.py
+++ b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py
@@ -16,15 +16,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
 
+from tests.util.test_pkg_1.test_clazz_base import A
 
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
+
+class C(A):
+    pass
diff --git a/rainbow/core/util/files_util.py 
b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py
similarity index 71%
copy from rainbow/core/util/files_util.py
copy to tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py
index b1d1daf..217e5db 100644
--- a/rainbow/core/util/files_util.py
+++ b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py
@@ -15,16 +15,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-import os
-
-
-def find_config_files(path):
-    files = []
-    print(path)
-    for r, d, f in os.walk(path):
-        for file in f:
-            print(os.path.basename(file))
-            if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']:
-                files.append(os.path.join(r, file))
-    return files
diff --git a/rainbow-cli 
b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py
old mode 100755
new mode 100644
similarity index 69%
rename from rainbow-cli
rename to tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py
index 4f16b4e..2aba50e
--- a/rainbow-cli
+++ b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -17,24 +15,19 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import os
 
-import click
 
-from rainbow.build import build_rainbows
+from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B
+from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_2 import C
 
 
[email protected]()
-def cli():
+class D(B):
     pass
 
 
[email protected]()
[email protected]('--path', default=os.getcwd(), help='Build within this path.')
-def build(path):
-    click.echo(f'Building rainbows in {path}')
-    build_rainbows.build_rainbows(path)
+class E(C):
+    pass
 
 
-if __name__ == '__main__':
-    cli()
+class F:
+    pass
diff --git a/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/__init__.py 
b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git 
a/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py 
b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py
new file mode 100644
index 0000000..a328f33
--- /dev/null
+++ b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py
@@ -0,0 +1,8 @@
+from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 
import F, E
+
+
+class G(F):
+    pass
+
+class H(E):
+    pass

Reply via email to