This is an automated email from the ASF dual-hosted git repository. jbonofre pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-liminal.git
commit 07aad66928fb260d2dbb2ce821c2a5b1f864b55c Author: assapin <[email protected]> AuthorDate: Mon Jun 22 15:03:53 2020 +0300 Local mode improvements --- MANIFEST.in | 21 +++++++++ README.md | 67 +++++++++++++++++++++++++--- images/airflow_trigger.png | Bin 0 -> 148427 bytes images/k8s_running.png | Bin 0 -> 223001 bytes rainbow/runners/airflow/dag/rainbow_dags.py | 2 +- scripts/docker-compose.yml | 1 + scripts/package.sh | 19 +++----- setup.py | 17 ++++--- tests/runners/airflow/rainbow/rainbow.yml | 2 +- 9 files changed, 101 insertions(+), 28 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..04cdb6d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required bgit y applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include scripts/* +include requirements-airflow.txt +recursive-include rainbow/build/ * \ No newline at end of file diff --git a/README.md b/README.md index ee2f961..078343a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,22 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required bgit y applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + # Rainbow Rainbow is an end-to-end platform for data engineers & scientists, allowing them to build, @@ -80,7 +99,7 @@ services: # Installation 1. Install this package ```bash - pip install git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode + pip install liminal ``` 2. Optional: set RAINBOW_HOME to path of your choice (if not set, will default to ~/rainbow_home) ```bash @@ -102,18 +121,54 @@ a requirements.txt in the root of your project. When your pipeline code is ready, you can test it by running it locally on your machine. -1. Deploy the pipeline: +1. Ensure you have The Docker engine running locally, and enable a local Kubernetes cluster: + + +If you want to execute your pipeline on a remote kubernetes cluster, make sure the cluster is configured +using : +```bash +kubectl config set-context <your remote kubernetes cluster> +``` +2. Build the docker images used by your pipeline. + +In the example pipeline above, you can see that tasks and services have an "image" field - such as +"my_static_input_task_image". This means that the task is executed inside a docker container, and the docker container +is created from a docker image where various code and libraries are installed. + +You can take a look at what the build process looks like, e.g. +[here](https://github.com/Natural-Intelligence/rainbow/tree/master/rainbow/build/image/python) + +In order for the images to be available for your pipeline, you'll need to build them locally: + +```bash +cd </path/to/your/rainbow/code> +rainbow build +``` + +You'll see that a number of outputs indicating various docker images built. + +3. Deploy the pipeline: ```bash cd </path/to/your/rainbow/code> rainbow deploy ``` -2. Make sure you have docker running -3. Start the Server + +4. Start the server ```bash rainbow start ``` -4. Navigate to [http://localhost:8080/admin] -5. You should see your  + +5. Navigate to [http://localhost:8080/admin](http://localhost:8080/admin) + +6. You should see your  +The pipeline is scheduled to run according to the ```json schedule: 0 * 1 * *``` field in the .yml file you provided. + +7. To manually activate your pipeline: +Click your pipeline and then click "trigger DAG" +Click "Graph view" +You should see the steps in your pipeline getting executed in "real time" by clicking "Refresh" periodically. + + ### Running Tests (for contributors) When doing local development and running Rainbow unit-tests, make sure to set RAINBOW_STAND_ALONE_MODE=True diff --git a/images/airflow_trigger.png b/images/airflow_trigger.png new file mode 100644 index 0000000..22168e8 Binary files /dev/null and b/images/airflow_trigger.png differ diff --git a/images/k8s_running.png b/images/k8s_running.png new file mode 100644 index 0000000..8bf8f3b Binary files /dev/null and b/images/k8s_running.png differ diff --git a/rainbow/runners/airflow/dag/rainbow_dags.py b/rainbow/runners/airflow/dag/rainbow_dags.py index 730fd03..9deef8e 100644 --- a/rainbow/runners/airflow/dag/rainbow_dags.py +++ b/rainbow/runners/airflow/dag/rainbow_dags.py @@ -35,7 +35,7 @@ __DEPENDS_ON_PAST = 'depends_on_past' def register_dags(configs_path): """ - TODO: doc for register_dags + Registers pipelines in rainbow yml files found in given path (recursively) as airflow DAGs. """ print(f'Registering DAG from path: {configs_path}') config_files = files_util.find_config_files(configs_path) diff --git a/scripts/docker-compose.yml b/scripts/docker-compose.yml index b6a2dc3..d0304e5 100644 --- a/scripts/docker-compose.yml +++ b/scripts/docker-compose.yml @@ -30,6 +30,7 @@ max-file: "3" volumes: - ${RAINBOW_HOME}:/usr/local/airflow/dags + - ${HOME}/.kube:/usr/local/airflow/.kube ports: - "8080:8080" command: webserver diff --git a/scripts/package.sh b/scripts/package.sh index f4083e4..7824fd5 100755 --- a/scripts/package.sh +++ b/scripts/package.sh @@ -42,20 +42,14 @@ rsync -a --exclude 'venv' $(PWD)/ $docker_build_dir/zip_content/ # perform installation of external pacakges (framework-requirements and user-requirements) # this is done inside a docker to 1) avoid requiring the user to install stuff, and 2) to create a platform-compatible # package (install the native libraries in a flavour suitable for the docker in which airflow runs, and not user machine) -docker stop rainbow_build -docker rm rainbow_build -docker run --name rainbow_build -v /private/"$docker_build_dir":/home/rainbow/tmp --entrypoint="" -u 0 \ - puckel/docker-airflow:1.10.9 /bin/bash -c "apt-get update && apt-get install -y wget && apt-get install -y git && - cd /home/rainbow/tmp/zip_content && - wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/rainbow/runners/airflow/dag/rainbow_dags.py && - wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/requirements-airflow.txt && - wget https://raw.githubusercontent.com/Natural-Intelligence/rainbow/rainbow_local_mode/scripts/docker-compose.yml && - pip install --no-deps --target=\"/home/rainbow/tmp/zip_content\" git+https://github.com/Natural-Intelligence/rainbow.git@rainbow_local_mode && + +docker run --rm --name rainbow_build -v /private/"$docker_build_dir":/home/rainbow/tmp --entrypoint="" -u 0 \ + puckel/docker-airflow:1.10.9 /bin/bash -c "cd /home/rainbow/tmp/zip_content && + pip install --no-deps --target=\"/home/rainbow/tmp/zip_content\" liminal==0.0.2dev5 && + rsync -avzh --ignore-errors /home/rainbow/tmp/zip_content/liminal-resources/* /home/rainbow/tmp/zip_content/ pip install --target=\"/home/rainbow/tmp/zip_content\" -r /home/rainbow/tmp/zip_content/requirements-airflow.txt && pip install --target=\"/home/rainbow/tmp/zip_content\" -r /home/rainbow/tmp/zip_content/requirements.txt" -docker stop rainbow_build -docker rm rainbow_build # zip the content per https://airflow.apache.org/docs/stable/concepts.html#packaged-dags cd $docker_build_dir/zip_content @@ -64,6 +58,3 @@ rm __init__.py zip -r ../dags/rainbows.zip . cp ../dags/rainbows.zip $target_path - - - diff --git a/setup.py b/setup.py index c102ae3..2a0fdbb 100644 --- a/setup.py +++ b/setup.py @@ -17,9 +17,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import os import setuptools -from setuptools import setup with open("README.md", "r") as fh: long_description = fh.read() @@ -29,9 +29,9 @@ with open('requirements.txt') as f: print(requirements) setuptools.setup( - name="rainbow", - version="0.0.1", - author="Rainbow team", + name="liminal", + version=os.environ["LIMINAL_BUILD_VERSION"], + author="liminal team", description="A package for authoring and deploying machine learning workflows", long_description=long_description, long_description_content_type="text/markdown", @@ -39,10 +39,15 @@ setuptools.setup( packages=setuptools.find_packages(), classifiers=[ "Programming Language :: Python :: 3", - "License :: Apache 2.0", + "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ], + license='Apache License, Version 2.0', python_requires='>=3.6', install_requires=requirements, - scripts=['scripts/rainbow', 'scripts/package.sh'] + scripts=['scripts/rainbow', 'scripts/package.sh'], + include_package_data=True, + data_files=[('liminal-resources', ['scripts/docker-compose.yml', + 'requirements-airflow.txt', + 'rainbow/runners/airflow/dag/rainbow_dags.py'])] ) diff --git a/tests/runners/airflow/rainbow/rainbow.yml b/tests/runners/airflow/rainbow/rainbow.yml index 77af37b..1d5da13 100644 --- a/tests/runners/airflow/rainbow/rainbow.yml +++ b/tests/runners/airflow/rainbow/rainbow.yml @@ -30,7 +30,7 @@ pipelines: key2: val2 metrics: namespace: TestNamespace - backends: [ 'cloudwatch' ] + backends: [ ] tasks: - task: my_static_input_task type: python
