This is an automated email from the ASF dual-hosted git repository. jbonofre pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-liminal.git
commit 6da38b82f53a20a91bdbf3f4a4afd56c709fb206 Author: assapin <[email protected]> AuthorDate: Thu Jun 11 16:33:22 2020 +0300 Rainbow local mode --- README.md | 44 +++++++++-- images/airflow.png | Bin 0 -> 70049 bytes rainbow/build/build_rainbows.py | 25 ++++-- rainbow-cli => rainbow/core/environment.py | 32 ++++---- rainbow/core/util/class_util.py | 62 +++++++++------ rainbow/core/util/files_util.py | 2 +- .../util/files_util.py => runners/__init__.py} | 13 --- rainbow/runners/airflow/config/__init__.py | 0 .../airflow/config/standalone_variable_backend.py | 27 +++---- rainbow/runners/airflow/dag/rainbow_dags.py | 31 +++++--- .../airflow/operators/job_status_operator.py | 9 ++- .../runners/airflow/tasks/defaults/job_end.py~HEAD | 44 ----------- .../airflow/tasks/defaults/job_start.py~HEAD | 43 ---------- rainbow/runners/airflow/tasks/python.py | 11 +-- rainbow/runners/airflow/tasks/spark.py | 1 - requirements-airflow.txt | 5 ++ requirements.txt | 6 +- scripts/docker-compose.yml | 40 ++++++++++ scripts/package.sh | 69 ++++++++++++++++ scripts/rainbow | 87 +++++++++++++++++++++ setup.py | 48 ++++++++++++ tests/runners/airflow/rainbow/requirements.txt | 1 + tests/util/test_class_utils.py | 32 ++++++++ .../util/test_pkg_1/__init__.py | 13 --- .../util/test_pkg_1/test_clazz_base.py | 15 ++-- .../util/test_pkg_1/test_pkg_1_1/__init__.py | 13 --- .../test_pkg_1/test_pkg_1_1/test_clazz_child_1.py | 18 ++--- .../test_pkg_1/test_pkg_1_1/test_clazz_child_2.py | 14 +--- .../test_pkg_1_1/test_pkg_1_1_1/__init__.py | 13 --- .../test_pkg_1_1_1/test_clazz_leaf_1.py | 21 ++--- .../test_pkg_1_1/test_pkg_1_1_2/__init__.py | 0 .../test_pkg_1_1_2/test_clazz_leaf_2.py | 8 ++ 32 files changed, 469 insertions(+), 278 deletions(-) diff --git a/README.md b/README.md index 467edf2..ee2f961 100644 --- a/README.md +++ b/README.md @@ -76,12 +76,44 @@ services: function: myendpoint1func ``` -## Example repository structure - -[Example repository structure]( -https://github.com/Natural-Intelligence/rainbow/tree/master/tests/runners/airflow/rainbow] -) # Installation +1. Install this package +```bash + pip install git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode +``` +2. Optional: set RAINBOW_HOME to path of your choice (if not set, will default to ~/rainbow_home) +```bash +echo 'export RAINBOW_HOME=</path/to/some/folder>' >> ~/.bash_profile && source ~/.bash_profile +``` + +# Authoring pipelines + +This involves at minimum creating a single file called rainbow.yml as in the example above. + +If your pipeline requires custom python code to implement tasks, they should be organized +[like this](https://github.com/Natural-Intelligence/rainbow/tree/master/tests/runners/airflow/rainbow) + +If your pipeline introduces imports of external packages which are not already a part +of the rainbow framework (i.e. you had to pip install them yourself), you need to also provide +a requirements.txt in the root of your project. + +# Testing the pipeline locally + +When your pipeline code is ready, you can test it by running it locally on your machine. + +1. Deploy the pipeline: +```bash +cd </path/to/your/rainbow/code> +rainbow deploy +``` +2. Make sure you have docker running +3. Start the Server +```bash +rainbow start +``` +4. Navigate to [http://localhost:8080/admin] +5. You should see your  -TODO: installation. +### Running Tests (for contributors) +When doing local development and running Rainbow unit-tests, make sure to set RAINBOW_STAND_ALONE_MODE=True diff --git a/images/airflow.png b/images/airflow.png new file mode 100644 index 0000000..229f8fa Binary files /dev/null and b/images/airflow.png differ diff --git a/rainbow/build/build_rainbows.py b/rainbow/build/build_rainbows.py index b7ea6eb..66d27cb 100644 --- a/rainbow/build/build_rainbows.py +++ b/rainbow/build/build_rainbows.py @@ -74,33 +74,42 @@ def __build_image(base_path, builder_config, builder): def __get_task_build_class(task_type): - return task_build_classes[task_type] if task_type in task_build_classes else None + return task_build_types.get(task_type, None) def __get_service_build_class(service_type): - return service_build_classes[service_type] if service_type in service_build_classes else None + return service_build_types.get(service_type, None) print(f'Loading image builder implementations..') # TODO: add configuration for user image builders package -image_builders_package = 'rainbow/build/image' -user_image_builders_package = 'TODO: user_image_builders_package' +image_builders_package = 'rainbow.build.image' +# user_image_builders_package = 'TODO: user_image_builders_package' task_build_classes = class_util.find_subclasses_in_packages( - [image_builders_package, user_image_builders_package], + [image_builders_package], ImageBuilder) + +def get_types_dict(task_build_classes): + # take module name from class name + return {x.split(".")[-2]: c for x, c in task_build_classes.items()} + + +task_build_types = get_types_dict(task_build_classes) + print(f'Finished loading image builder implementations: {task_build_classes}') print(f'Loading service image builder implementations..') # TODO: add configuration for user service image builders package -service_builders_package = 'rainbow/build/service' -user_service_builders_package = 'TODO: user_service_builders_package' +service_builders_package = 'rainbow.build.service' +# user_service_builders_package = 'TODO: user_service_builders_package' service_build_classes = class_util.find_subclasses_in_packages( - [service_builders_package, user_service_builders_package], + [service_builders_package], ServiceImageBuilderMixin) +service_build_types = get_types_dict(service_build_classes) print(f'Finished loading service image builder implementations: {service_build_classes}') diff --git a/rainbow-cli b/rainbow/core/environment.py old mode 100755 new mode 100644 similarity index 50% copy from rainbow-cli copy to rainbow/core/environment.py index 4f16b4e..27f4d41 --- a/rainbow-cli +++ b/rainbow/core/environment.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,24 +15,24 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import os - -import click - -from rainbow.build import build_rainbows +import os [email protected]() -def cli(): - pass +DEFAULT_DAGS_ZIP_NAME = 'rainbows.zip' +DEFAULT_RAINBOW_HOME = os.path.expanduser('~/rainbow_home') +DEFAULT_RAINBOWS_SUBDIR = "rainbows" +RAINBOW_HOME_PARAM_NAME = "RAINBOW_HOME" [email protected]() [email protected]('--path', default=os.getcwd(), help='Build within this path.') -def build(path): - click.echo(f'Building rainbows in {path}') - build_rainbows.build_rainbows(path) +def get_rainbow_home(): + if not os.environ.get(RAINBOW_HOME_PARAM_NAME): + print("no environment parameter called RAINBOW_HOME detected") + print(f"registering {DEFAULT_RAINBOW_HOME} as the RAINBOW_HOME directory") + os.environ[RAINBOW_HOME_PARAM_NAME] = DEFAULT_RAINBOW_HOME + return os.environ.get(RAINBOW_HOME_PARAM_NAME, DEFAULT_RAINBOW_HOME) -if __name__ == '__main__': - cli() +def get_dags_dir(): + # if we are inside airflow, we will take it from the configured dags folder + base_dir = os.environ.get("AIRFLOW__CORE__DAGS_FOLDER", get_rainbow_home()) + return os.path.join(base_dir, DEFAULT_RAINBOWS_SUBDIR) diff --git a/rainbow/core/util/class_util.py b/rainbow/core/util/class_util.py index e083477..129c61a 100644 --- a/rainbow/core/util/class_util.py +++ b/rainbow/core/util/class_util.py @@ -17,9 +17,7 @@ # under the License. import importlib.util -import inspect -import os -import sys +import pkgutil def find_subclasses_in_packages(packages, parent_class): @@ -27,28 +25,42 @@ def find_subclasses_in_packages(packages, parent_class): Finds all subclasses of given parent class within given packages :return: map of module ref -> class """ - classes = {} - - for py_path in [a for a in sys.path]: - for root, directories, files in os.walk(py_path): - if any(package in root for package in packages): - for file in files: - file_path = os.path.join(root, file) - if file.endswith('.py') and '__pycache__' not in file_path: - spec = importlib.util.spec_from_file_location(file[:-3], file_path) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - for name, obj in inspect.getmembers(mod): - if inspect.isclass(obj) and not obj.__name__.endswith('Mixin'): - module_name = mod.__name__ - class_name = obj.__name__ - parent_module = root[len(py_path) + 1:].replace('/', '.') - module = parent_module.replace('airflow.dags.', '') + \ - '.' + module_name - clazz = __get_class(module, class_name) - if issubclass(clazz, parent_class): - classes.update({module_name: clazz}) - return classes + module_content = {} + for p in packages: + module_content.update(import_module(p)) + + subclasses = set() + work = [parent_class] + while work: + parent = work.pop() + for child in parent.__subclasses__(): + if child not in subclasses: + work.append(child) + # verify that the found class is in the relevant module + for p in packages: + if p in child.__module__: + subclasses.add(child) + break + + result = {sc.__module__ + "." + sc.__name__: sc for sc in subclasses} + return result + + +def import_module(package, recrsive=True): + """ Import all submodules of a module, recursively, including subpackages + :param package: package (name or actual module) + :type package: str | module + :rtype: dict[str, types.ModuleType] + """ + if isinstance(package, str): + package = importlib.import_module(package) + results = {} + for loader, name, is_pkg in pkgutil.walk_packages(package.__path__): + full_name = package.__name__ + '.' + name + results[full_name] = importlib.import_module(full_name) + if recrsive and is_pkg: + results.update(import_module(full_name)) + return results def __get_class(the_module, the_class): diff --git a/rainbow/core/util/files_util.py b/rainbow/core/util/files_util.py index b1d1daf..4a03020 100644 --- a/rainbow/core/util/files_util.py +++ b/rainbow/core/util/files_util.py @@ -24,7 +24,7 @@ def find_config_files(path): print(path) for r, d, f in os.walk(path): for file in f: - print(os.path.basename(file)) if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: + print(os.path.join(r, file)) files.append(os.path.join(r, file)) return files diff --git a/rainbow/core/util/files_util.py b/rainbow/runners/__init__.py similarity index 71% copy from rainbow/core/util/files_util.py copy to rainbow/runners/__init__.py index b1d1daf..217e5db 100644 --- a/rainbow/core/util/files_util.py +++ b/rainbow/runners/__init__.py @@ -15,16 +15,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -import os - - -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files diff --git a/rainbow/runners/airflow/config/__init__.py b/rainbow/runners/airflow/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rainbow-cli b/rainbow/runners/airflow/config/standalone_variable_backend.py old mode 100755 new mode 100644 similarity index 64% copy from rainbow-cli copy to rainbow/runners/airflow/config/standalone_variable_backend.py index 4f16b4e..d7df06c --- a/rainbow-cli +++ b/rainbow/runners/airflow/config/standalone_variable_backend.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -18,23 +16,20 @@ # specific language governing permissions and limitations # under the License. import os +from os import environ -import click - -from rainbow.build import build_rainbows - +from airflow.models import Variable [email protected]() -def cli(): - pass +RAINBOW_STAND_ALONE_MODE_KEY = "RAINBOW_STAND_ALONE_MODE" [email protected]() [email protected]('--path', default=os.getcwd(), help='Build within this path.') -def build(path): - click.echo(f'Building rainbows in {path}') - build_rainbows.build_rainbows(path) +def get_variable(key, default_val): + if rainbow_local_mode(): + return os.environ.get(key, default_val) + else: + return Variable.get(key, default_var=default_val) -if __name__ == '__main__': - cli() +def rainbow_local_mode(): + stand_alone = environ.get(RAINBOW_STAND_ALONE_MODE_KEY, "False") + return stand_alone.strip().lower() == "true" diff --git a/rainbow/runners/airflow/dag/rainbow_dags.py b/rainbow/runners/airflow/dag/rainbow_dags.py index d5e3be1..730fd03 100644 --- a/rainbow/runners/airflow/dag/rainbow_dags.py +++ b/rainbow/runners/airflow/dag/rainbow_dags.py @@ -17,11 +17,13 @@ # under the License. from datetime import datetime, timedelta +from os import environ import yaml from airflow import DAG from airflow.models import Variable +from rainbow.core import environment from rainbow.core.util import class_util from rainbow.core.util import files_util from rainbow.runners.airflow.model.task import Task @@ -33,13 +35,13 @@ __DEPENDS_ON_PAST = 'depends_on_past' def register_dags(configs_path): """ - Registers pipelines in rainbow yml files found in given path (recursively) as airflow DAGs. + TODO: doc for register_dags """ - + print(f'Registering DAG from path: {configs_path}') config_files = files_util.find_config_files(configs_path) dags = [] - + print(f'found {len(config_files)} in path: {configs_path}') for config_file in config_files: print(f'Registering DAG for file: {config_file}') @@ -83,28 +85,35 @@ def register_dags(configs_path): job_end_task = JobEndTask(dag, pipeline_name, parent, pipeline, 'all_done') job_end_task.apply_task_to_dag() - print(f'{pipeline_name}: {dag.tasks}') + print(f'registered DAG {dag.dag_id}: {dag.tasks}') globals()[pipeline_name] = dag - dags.append(dag) - return dags + return dags print(f'Loading task implementations..') # TODO: add configuration for user tasks package -task_package = 'rainbow/runners/airflow/tasks' +impl_packages = 'rainbow.runners.airflow.tasks' user_task_package = 'TODO: user_tasks_package' -task_classes = class_util.find_subclasses_in_packages([task_package, user_task_package], Task) +task_classes = class_util.find_subclasses_in_packages([impl_packages], Task) + + +def tasks_by_rainbow_name(task_classes): + return {full_name.replace(impl_packages, '').replace(clzz.__name__, '')[1:-1]: clzz + for (full_name, clzz) in task_classes.items()} + + +tasks_by_rainbow_name = tasks_by_rainbow_name(task_classes) -print(f'Finished loading task implementations: {task_classes}') +print(f'Finished loading task implementations: {tasks_by_rainbow_name}') def get_task_class(task_type): - return task_classes[task_type] + return tasks_by_rainbow_name[task_type] -register_dags(Variable.get('rainbows_dir')) +register_dags(environment.get_dags_dir()) diff --git a/rainbow/runners/airflow/operators/job_status_operator.py b/rainbow/runners/airflow/operators/job_status_operator.py index dc318e5..ae9382a 100644 --- a/rainbow/runners/airflow/operators/job_status_operator.py +++ b/rainbow/runners/airflow/operators/job_status_operator.py @@ -38,7 +38,7 @@ class JobStatusOperator(BaseOperator): *args, **kwargs): super().__init__(*args, **kwargs) self.backends = backends - self.cloudwatch = CloudWatchHook() + self.cloudwatch = None def execute(self, context): for backend in self.backends: @@ -52,12 +52,17 @@ class JobStatusOperator(BaseOperator): raise NotImplementedError def send_metric_to_cloudwatch(self, metric): - self.cloudwatch.put_metric_data(metric) + self.get_cloudwatch().put_metric_data(metric) report_functions = { 'cloudwatch': send_metric_to_cloudwatch } + def get_cloudwatch(self): + if not self.cloudwatch: + self.cloudwatch = CloudWatchHook() + return self.cloudwatch + class JobStartOperator(JobStatusOperator): ui_color = '#c5e5e8' diff --git a/rainbow/runners/airflow/tasks/defaults/job_end.py~HEAD b/rainbow/runners/airflow/tasks/defaults/job_end.py~HEAD deleted file mode 100644 index e177ccc..0000000 --- a/rainbow/runners/airflow/tasks/defaults/job_end.py~HEAD +++ /dev/null @@ -1,44 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from rainbow.runners.airflow.operators.job_status_operator import JobEndOperator -from rainbow.runners.airflow.tasks.defaults.default_task import DefaultTask - - -class JobEndTask(DefaultTask): - """ - Job end task. Reports job end metrics. - """ - - def __init__(self, dag, pipeline_name, parent, config, trigger_rule): - super().__init__(dag, pipeline_name, parent, config, trigger_rule) - - def apply_task_to_dag(self): - job_end_task = JobEndOperator( - task_id='end', - namespace=self.metrics_namespace, - application_name=self.pipeline_name, - backends=self.metrics_backends, - dag=self.dag, - trigger_rule=self.trigger_rule - ) - - if self.parent: - self.parent.set_downstream(job_end_task) - - return job_end_task diff --git a/rainbow/runners/airflow/tasks/defaults/job_start.py~HEAD b/rainbow/runners/airflow/tasks/defaults/job_start.py~HEAD deleted file mode 100644 index e196919..0000000 --- a/rainbow/runners/airflow/tasks/defaults/job_start.py~HEAD +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from rainbow.runners.airflow.operators.job_status_operator import JobStartOperator -from rainbow.runners.airflow.tasks.defaults.default_task import DefaultTask - - -class JobStartTask(DefaultTask): - """ - Job start task. Reports job start metrics. - """ - - def __init__(self, dag, pipeline_name, parent, config, trigger_rule): - super().__init__(dag, pipeline_name, parent, config, trigger_rule) - - def apply_task_to_dag(self): - job_start_task = JobStartOperator( - task_id='start', - namespace=self.metrics_namespace, - application_name=self.pipeline_name, - backends=self.metrics_backends, - dag=self.dag, - trigger_rule=self.trigger_rule - ) - - if self.parent: - self.parent.set_downstream(job_start_task) - - return job_start_task diff --git a/rainbow/runners/airflow/tasks/python.py b/rainbow/runners/airflow/tasks/python.py index 8bd11cf..d5d4e00 100644 --- a/rainbow/runners/airflow/tasks/python.py +++ b/rainbow/runners/airflow/tasks/python.py @@ -20,6 +20,7 @@ import json from airflow.models import Variable from airflow.operators.dummy_operator import DummyOperator +from rainbow.runners.airflow.config.standalone_variable_backend import get_variable from rainbow.runners.airflow.model import task from rainbow.runners.airflow.operators.kubernetes_pod_operator_with_input_output import \ KubernetesPodOperatorWithInputAndOutput, \ @@ -143,10 +144,10 @@ class PythonTask(task.Task): def __kubernetes_kwargs(self): kubernetes_kwargs = { - 'namespace': Variable.get('kubernetes_namespace', default_var='default'), + 'namespace': get_variable('kubernetes_namespace', default_val='default'), 'name': self.task_name.replace('_', '-'), - 'in_cluster': Variable.get('in_kubernetes_cluster', default_var=False), - 'image_pull_policy': Variable.get('image_pull_policy', default_var='IfNotPresent'), + 'in_cluster': get_variable('in_kubernetes_cluster', default_val=False), + 'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'), 'get_logs': True, 'env_vars': self.env_vars, 'do_xcom_push': True, @@ -162,9 +163,9 @@ class PythonTask(task.Task): env_vars = {} if 'env_vars' in self.config: env_vars = self.config['env_vars'] - airflow_configuration_variable = Variable.get( + airflow_configuration_variable = get_variable( f'''{self.pipeline_name}_dag_configuration''', - default_var=None) + default_val=None) if airflow_configuration_variable: airflow_configs = json.loads(airflow_configuration_variable) environment_variables_key = f'''{self.pipeline_name}_environment_variables''' diff --git a/rainbow/runners/airflow/tasks/spark.py b/rainbow/runners/airflow/tasks/spark.py index 9a46dd4..68cfac0 100644 --- a/rainbow/runners/airflow/tasks/spark.py +++ b/rainbow/runners/airflow/tasks/spark.py @@ -18,7 +18,6 @@ from rainbow.runners.airflow.model import task - class SparkTask(task.Task): """ Executes a Spark application. diff --git a/requirements-airflow.txt b/requirements-airflow.txt new file mode 100644 index 0000000..5191d2a --- /dev/null +++ b/requirements-airflow.txt @@ -0,0 +1,5 @@ +click==7.1.1 +pyyaml +boto3==1.12.10 +botocore==1.15.21 +kubernetes \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d7eec03..3fef3a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ docker-pycreds==0.4.0 click==7.1.1 Flask==1.1.1 pyyaml -statsd -botocore -boto3 +boto3==1.12.10 +botocore==1.15.21 kubernetes + diff --git a/scripts/docker-compose.yml b/scripts/docker-compose.yml new file mode 100644 index 0000000..b6a2dc3 --- /dev/null +++ b/scripts/docker-compose.yml @@ -0,0 +1,40 @@ + version: '3.7' + services: + postgres: + image: postgres:9.6 + environment: + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB= + + ports: + - "5432:5432" + logging: + options: + max-size: 10m + max-file: "3" + + webserver: + image: puckel/docker-airflow:1.10.9 + restart: always + depends_on: + - postgres + environment: + - LOAD_EX=n + - EXECUTOR=Local + - AIRFLOW__CORE__DAGS_FOLDER=/usr/local/airflow/dags + - AIRFLOW__WEBSERVER__WORKERS=1 + logging: + options: + max-size: 10m + max-file: "3" + volumes: + - ${RAINBOW_HOME}:/usr/local/airflow/dags + ports: + - "8080:8080" + command: webserver + healthcheck: + test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"] + interval: 30s + timeout: 30s + retries: 3 diff --git a/scripts/package.sh b/scripts/package.sh new file mode 100755 index 0000000..f4083e4 --- /dev/null +++ b/scripts/package.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required bgit y applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo $1 +target_path="$1" + +echo "running from " $(PWD) +echo "target path for rainbow zip file is " $target_path + +echo "cleaning up the temp dirs $TMPDIR/rainbow_build" +rm -rf $TMPDIR/rainbow_build-*/ + +tmp_dir=$(mktemp -d -t rainbow_build-) +echo "creating temp directory $tmp_dir" + +docker_build_dir=$tmp_dir/docker_build +mkdir -p $docker_build_dir +echo "docker build directory :"$docker_build_dir + +mkdir $docker_build_dir/"zip_content" +mkdir $docker_build_dir/"dags" + +#copy the content of the user project into the build folder +rsync -a --exclude 'venv' $(PWD)/ $docker_build_dir/zip_content/ + +# perform installation of external pacakges (framework-requirements and user-requirements) +# this is done inside a docker to 1) avoid requiring the user to install stuff, and 2) to create a platform-compatible +# package (install the native libraries in a flavour suitable for the docker in which airflow runs, and not user machine) +docker stop rainbow_build +docker rm rainbow_build +docker run --name rainbow_build -v /private/"$docker_build_dir":/home/rainbow/tmp --entrypoint="" -u 0 \ + puckel/docker-airflow:1.10.9 /bin/bash -c "apt-get update && apt-get install -y wget && apt-get install -y git && + cd /home/rainbow/tmp/zip_content && + wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/rainbow/runners/airflow/dag/rainbow_dags.py && + wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/requirements-airflow.txt && + wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/scripts/docker-compose.yml && + pip install --no-deps --target=\"/home/rainbow/tmp/zip_content\" git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode && + pip install --target=\"/home/rainbow/tmp/zip_content\" -r /home/rainbow/tmp/zip_content/requirements-airflow.txt && + pip install --target=\"/home/rainbow/tmp/zip_content\" -r /home/rainbow/tmp/zip_content/requirements.txt" + +docker stop rainbow_build +docker rm rainbow_build + +# zip the content per https://airflow.apache.org/docs/stable/concepts.html#packaged-dags +cd $docker_build_dir/zip_content +mv docker-compose.yml $target_path +rm __init__.py + +zip -r ../dags/rainbows.zip . +cp ../dags/rainbows.zip $target_path + + + diff --git a/scripts/rainbow b/scripts/rainbow new file mode 100755 index 0000000..1d5f65e --- /dev/null +++ b/scripts/rainbow @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required bgit y applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import shutil +import site +import sys + +import click +from rainbow.build import build_rainbows +import subprocess +from rainbow.core import environment +from rainbow.core.util import files_util + + [email protected]() +def cli(): + pass + + +def docker_is_running(): + try: + return not subprocess.check_output("docker info >/dev/null 2>&1", shell=True) + except subprocess.CalledProcessError as e: + msg = "Docker is not running. Please start docker service on your machine\n" + sys.stderr.write(f"ERROR: {msg}") + raise RuntimeError(msg) + + [email protected]("build", short_help="builds dockers from your business logic") [email protected]('--path', default=os.getcwd(), help='Build within this path.') +def build(path): + click.echo(f'Building rainbows in {path}') + if docker_is_running(): + build_rainbows.build_rainbows(path) + + +def deploy_rainbow_core_internal(): + click.echo("WARN: refreshing rainbow core package") + rainbow_home = environment.get_rainbow_home() + subprocess.call([f'package.sh {rainbow_home}'], shell=True) + + [email protected]("deploy", short_help="deploys your rainbow.yaml files to $RAINBOW_HOME folder") [email protected]('--path', default=os.getcwd(), help="folder containing rainbow.yaml files") +def deploy_rainbows(path): + click.echo("deploying rainbow yaml files") + rainbow_home = environment.get_rainbow_home() + os.makedirs(rainbow_home, exist_ok=True) + os.makedirs(environment.get_dags_dir(), exist_ok=True) + deploy_rainbow_core_internal() + config_files = files_util.find_config_files(path) + for config_file in config_files: + click.echo(f"deploying rainbow file: {config_file}") + yml_name = os.path.basename(config_file) + target_yml_name = os.path.join(environment.get_dags_dir(), yml_name) + shutil.copyfile(config_file, target_yml_name) + + [email protected]("start", short_help="starts a local airflow in docker compose. should be run after deploy. " + + "Make sure docker is running on your machine") +def start(): + if docker_is_running(): + # initialize rainbow home by default + environment.get_rainbow_home() + result = subprocess.call([f'docker-compose -f "{environment.get_rainbow_home()}/docker-compose.yml" up'], + env=os.environ, shell=True) + + +if __name__ == '__main__': + cli() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c102ae3 --- /dev/null +++ b/setup.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required bgit y applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import setuptools +from setuptools import setup + +with open("README.md", "r") as fh: + long_description = fh.read() + +with open('requirements.txt') as f: + requirements = f.read().splitlines() + print(requirements) + +setuptools.setup( + name="rainbow", + version="0.0.1", + author="Rainbow team", + description="A package for authoring and deploying machine learning workflows", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/Natural-Intelligence/rainbow", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: Apache 2.0", + "Operating System :: OS Independent", + ], + python_requires='>=3.6', + install_requires=requirements, + scripts=['scripts/rainbow', 'scripts/package.sh'] +) diff --git a/tests/runners/airflow/rainbow/requirements.txt b/tests/runners/airflow/rainbow/requirements.txt new file mode 100644 index 0000000..037103e --- /dev/null +++ b/tests/runners/airflow/rainbow/requirements.txt @@ -0,0 +1 @@ +pillow \ No newline at end of file diff --git a/tests/util/test_class_utils.py b/tests/util/test_class_utils.py new file mode 100644 index 0000000..0deeff6 --- /dev/null +++ b/tests/util/test_class_utils.py @@ -0,0 +1,32 @@ +from unittest import TestCase + +from rainbow.core.util import class_util +from tests.util.test_pkg_1.test_clazz_base import A, Z +from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B +from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_2 import C +from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 import F, D, E +from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_2.test_clazz_leaf_2 import G, H + + +class Test(TestCase): + def test_find_full_hierarchy_from_root(self): + expected_set = set([B, C, D, E, H, Z]) + self.hierarchy_check(A, expected_set) + + def test_find_full_hierarchy_mid_tree_in_package(self): + expected_set = set([G]) + self.hierarchy_check(F, expected_set) + + def test_leaf_class(self): + expected_set = set() + self.hierarchy_check(G, expected_set) + + def hierarchy_check(self, clazz, expected_set): + pkg_root = 'tests.util.test_pkg_1' + full_tree = class_util.find_subclasses_in_packages( + [pkg_root], + clazz) + + res_set = set() + res_set.update(full_tree.values()) + self.assertEqual(res_set, expected_set) diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/__init__.py similarity index 71% copy from rainbow/core/util/files_util.py copy to tests/util/test_pkg_1/__init__.py index b1d1daf..217e5db 100644 --- a/rainbow/core/util/files_util.py +++ b/tests/util/test_pkg_1/__init__.py @@ -15,16 +15,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -import os - - -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/test_clazz_base.py similarity index 71% copy from rainbow/core/util/files_util.py copy to tests/util/test_pkg_1/test_clazz_base.py index b1d1daf..3e7c523 100644 --- a/rainbow/core/util/files_util.py +++ b/tests/util/test_pkg_1/test_clazz_base.py @@ -16,15 +16,10 @@ # specific language governing permissions and limitations # under the License. -import os +class A: + pass -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files + +class Z(A): + pass diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/test_pkg_1_1/__init__.py similarity index 71% copy from rainbow/core/util/files_util.py copy to tests/util/test_pkg_1/test_pkg_1_1/__init__.py index b1d1daf..217e5db 100644 --- a/rainbow/core/util/files_util.py +++ b/tests/util/test_pkg_1/test_pkg_1_1/__init__.py @@ -15,16 +15,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -import os - - -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py similarity index 71% copy from rainbow/core/util/files_util.py copy to tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py index b1d1daf..6fe2e9a 100644 --- a/rainbow/core/util/files_util.py +++ b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py @@ -16,15 +16,13 @@ # specific language governing permissions and limitations # under the License. -import os +from tests.util.test_pkg_1.test_clazz_base import A -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files + +class B(A): + pass + + +class M: + pass diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py similarity index 71% copy from rainbow/core/util/files_util.py copy to tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py index b1d1daf..e279c7a 100644 --- a/rainbow/core/util/files_util.py +++ b/tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py @@ -16,15 +16,9 @@ # specific language governing permissions and limitations # under the License. -import os +from tests.util.test_pkg_1.test_clazz_base import A -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files + +class C(A): + pass diff --git a/rainbow/core/util/files_util.py b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py similarity index 71% copy from rainbow/core/util/files_util.py copy to tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py index b1d1daf..217e5db 100644 --- a/rainbow/core/util/files_util.py +++ b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py @@ -15,16 +15,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -import os - - -def find_config_files(path): - files = [] - print(path) - for r, d, f in os.walk(path): - for file in f: - print(os.path.basename(file)) - if os.path.basename(file) in ['rainbow.yml', 'rainbow.yaml']: - files.append(os.path.join(r, file)) - return files diff --git a/rainbow-cli b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py old mode 100755 new mode 100644 similarity index 69% rename from rainbow-cli rename to tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py index 4f16b4e..2aba50e --- a/rainbow-cli +++ b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,24 +15,19 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import os -import click -from rainbow.build import build_rainbows +from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B +from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_2 import C [email protected]() -def cli(): +class D(B): pass [email protected]() [email protected]('--path', default=os.getcwd(), help='Build within this path.') -def build(path): - click.echo(f'Building rainbows in {path}') - build_rainbows.build_rainbows(path) +class E(C): + pass -if __name__ == '__main__': - cli() +class F: + pass diff --git a/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/__init__.py b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py new file mode 100644 index 0000000..a328f33 --- /dev/null +++ b/tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py @@ -0,0 +1,8 @@ +from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 import F, E + + +class G(F): + pass + +class H(E): + pass
