This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/main by this push:
     new 1b26d3e5de indentation fix, minor updates #3967 (#3977)
1b26d3e5de is described below

commit 1b26d3e5de504e65b62eed25e8df963d0eb0a6c4
Author: Bart Maertens <[email protected]>
AuthorDate: Mon May 27 08:42:59 2024 +0200

    indentation fix, minor updates #3967 (#3977)
---
 .../apache-airflow/docker-compose.yaml             | 35 +++++--------
 .../how-to-guides/run-hop-in-apache-airflow.adoc   | 60 +++++++++++-----------
 2 files changed, 43 insertions(+), 52 deletions(-)

diff --git 
a/docs/hop-user-manual/modules/ROOT/assets/files/how-to-guides/apache-airflow/docker-compose.yaml
 
b/docs/hop-user-manual/modules/ROOT/assets/files/how-to-guides/apache-airflow/docker-compose.yaml
index e8b8884833..a2984185eb 100644
--- 
a/docs/hop-user-manual/modules/ROOT/assets/files/how-to-guides/apache-airflow/docker-compose.yaml
+++ 
b/docs/hop-user-manual/modules/ROOT/assets/files/how-to-guides/apache-airflow/docker-compose.yaml
@@ -24,7 +24,7 @@
 # The following variables are supported:
 #
 # AIRFLOW_IMAGE_NAME           - Docker image name used to run Airflow.
-#                                Default: apache/airflow:2.6.0
+#                                Default: apache/airflow:2.9.1
 # AIRFLOW_UID                  - User ID in Airflow containers
 #                                Default: 50000
 # AIRFLOW_PROJ_DIR             - Base path to which all the files will be 
volumed.
@@ -44,20 +44,17 @@
 #
 # Feel free to modify this file to suit your needs.
 ---
-version: '3.8'
 x-airflow-common:
   &airflow-common
   # In order to add custom dependencies or upgrade provider packages you can 
use your extended image.
   # Comment the image line, place your Dockerfile in the directory where you 
placed the docker-compose.yaml
   # and uncomment the "build" line below, Then run `docker-compose build` to 
build the images.
-  image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.6.0}
+  image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.9.1}
   # build: .
   environment:
     &airflow-common-env
     AIRFLOW__CORE__EXECUTOR: CeleryExecutor
     AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: 
postgresql+psycopg2://airflow:airflow@postgres/airflow
-    # For backward compatibility, with Airflow <2.3
-    AIRFLOW__CORE__SQL_ALCHEMY_CONN: 
postgresql+psycopg2://airflow:airflow@postgres/airflow
     AIRFLOW__CELERY__RESULT_BACKEND: 
db+postgresql://airflow:airflow@postgres/airflow
     AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
     AIRFLOW__CORE__FERNET_KEY: ''
@@ -72,11 +69,14 @@ x-airflow-common:
     # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks
     # for other purpose (development, test and especially production usage) 
build/extend Airflow image.
     _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
+    # The following line can be used to set a custom config file, stored in 
the local config folder
+    # If you want to use it, outcomment it and replace airflow.cfg with the 
name of your config file
+    # AIRFLOW_CONFIG: '/opt/airflow/config/airflow.cfg'
   volumes:
     - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
     - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
+    - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
     - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
-    - /var/run/docker.sock:/var/run/docker.sock
   user: "${AIRFLOW_UID:-50000}:0"
   depends_on:
     &airflow-common-depends-on
@@ -102,7 +102,9 @@ services:
     restart: always
 
   redis:
-    image: redis:latest
+    # Redis is limited to 7.2-bookworm due to licencing change
+    # https://redis.io/blog/redis-adopts-dual-source-available-licensing/
+    image: redis:7.2-bookworm
     expose:
       - 6379
     healthcheck:
@@ -149,9 +151,10 @@ services:
     <<: *airflow-common
     command: celery worker
     healthcheck:
+      # yamllint disable rule:line-length
       test:
         - "CMD-SHELL"
-        - 'celery --app airflow.executors.celery_executor.app inspect ping -d 
"celery@$${HOSTNAME}"'
+        - 'celery --app airflow.providers.celery.executors.celery_executor.app 
inspect ping -d "celery@$${HOSTNAME}" || celery --app 
airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
       interval: 30s
       timeout: 10s
       retries: 5
@@ -189,20 +192,6 @@ services:
     command:
       - -c
       - |
-        function ver() {
-          printf "%04d%04d%04d%04d" $${1//./ }
-        }
-        airflow_version=$$(AIRFLOW__LOGGING__LOGGING_LEVEL=INFO && gosu 
airflow airflow version)
-        airflow_version_comparable=$$(ver $${airflow_version})
-        min_airflow_version=2.2.0
-        min_airflow_version_comparable=$$(ver $${min_airflow_version})
-        if (( airflow_version_comparable < min_airflow_version_comparable )); 
then
-          echo
-          echo -e "\033[1;31mERROR!!!: Too old Airflow version 
$${airflow_version}!\e[0m"
-          echo "The minimum Airflow version supported: 
$${min_airflow_version}. Only use this or higher!"
-          echo
-          exit 1
-        fi
         if [[ -z "${AIRFLOW_UID}" ]]; then
           echo
           echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
@@ -251,7 +240,7 @@ services:
     # yamllint enable rule:line-length
     environment:
       <<: *airflow-common-env
-      _AIRFLOW_DB_UPGRADE: 'true'
+      _AIRFLOW_DB_MIGRATE: 'true'
       _AIRFLOW_WWW_USER_CREATE: 'true'
       _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
       _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
diff --git 
a/docs/hop-user-manual/modules/ROOT/pages/how-to-guides/run-hop-in-apache-airflow.adoc
 
b/docs/hop-user-manual/modules/ROOT/pages/how-to-guides/run-hop-in-apache-airflow.adoc
index b8767de582..6da47397d2 100644
--- 
a/docs/hop-user-manual/modules/ROOT/pages/how-to-guides/run-hop-in-apache-airflow.adoc
+++ 
b/docs/hop-user-manual/modules/ROOT/pages/how-to-guides/run-hop-in-apache-airflow.adoc
@@ -39,7 +39,7 @@ The goal of this page is to get a basic Airflow setup running 
to demonstrate how
 
 To keep things simple, we'll use Docker Compose to get Apache Airflow up and 
running in a matter of minutes. Even though 
https://docs.docker.com/compose/[Docker Compose^] has been said to be on the 
verge of extinction for quite a while now, it still is a quick and convenient 
way to experiment with data platforms that would otherwise be time-consuming 
and difficult to set up.
 
-Apache Airflow provides a 
https://airflow.apache.org/docs/apache-airflow/2.6.0/docker-compose.yaml[docker-compose.yaml^]
 file. Our goal is to run Apache Hop workflows and pipelines in Apache Airflow, 
so we're not interested in the Airflow sample DAGs that come with this 
docker-compose file.
+Apache Airflow provides a 
https://airflow.apache.org/docs/apache-airflow/2.9.1/docker-compose.yaml[docker-compose.yaml^]
 file. Our goal is to run Apache Hop workflows and pipelines in Apache Airflow, 
so we're not interested in the Airflow sample DAGs that come with this 
docker-compose file.
 
 Change the **AIRFLOW\__CORE__LOAD_EXAMPLES** variable to "false" in the 
default file, and add an additional line 
**/var/run/docker.sock:/var/run/docker.sock** in the volumes section.
 All of this has already been done if you use the 
https://github.com/apache/hop/tree/master/docs/hop-user-manual/modules/ROOT/assets/files/how-to-guides/apache-airflow/docker-compose.yaml[the
 file] in our github repository.
@@ -55,19 +55,19 @@ The various Apache Airflow need a couple of moments to 
start. Once you see a cou
 
 [source, bash]
 ----
-apache-airflow-airflow-triggerer-1  | [2023-05-07 07:50:08 +0000] [24] [INFO] 
Booting worker with pid: 24
-apache-airflow-airflow-triggerer-1  | [2023-05-07 07:50:08 +0000] [25] [INFO] 
Booting worker with pid: 25
+apache-airflow-airflow-triggerer-1  | [2024-05-07 07:50:08 +0000] [24] [INFO] 
Booting worker with pid: 24
+apache-airflow-airflow-triggerer-1  | [2024-05-07 07:50:08 +0000] [25] [INFO] 
Booting worker with pid: 25
 apache-airflow-airflow-scheduler-1  |   ____________       _____________
 apache-airflow-airflow-scheduler-1  |  ____    |__( )_________  __/__  
/________      __
 apache-airflow-airflow-scheduler-1  | ____  /| |_  /__  ___/_  /_ __  /_  __ 
\_ | /| / /
 apache-airflow-airflow-scheduler-1  | ___  ___ |  / _  /   _  __/ _  / / /_/ 
/_ |/ |/ /
 apache-airflow-airflow-scheduler-1  |  _/_/  |_/_/  /_/    /_/    /_/  
\____/____/|__/
-apache-airflow-airflow-scheduler-1  | [2023-05-07T07:50:08.601+0000] 
{executor_loader.py:114} INFO - Loaded executor: CeleryExecutor
-apache-airflow-airflow-scheduler-1  | [2023-05-07T07:50:08.652+0000] 
{scheduler_job_runner.py:823} INFO - Starting the scheduler
-apache-airflow-airflow-scheduler-1  | [2023-05-07T07:50:08.653+0000] 
{scheduler_job_runner.py:830} INFO - Processing each file at most -1 times
-apache-airflow-airflow-scheduler-1  | [2023-05-07T07:50:08.657+0000] 
{manager.py:165} INFO - Launched DagFileProcessorManager with pid: 34
-apache-airflow-airflow-scheduler-1  | [2023-05-07T07:50:08.658+0000] 
{scheduler_job_runner.py:1576} INFO - Resetting orphaned tasks for active dag 
runs
-apache-airflow-airflow-scheduler-1  | [2023-05-07T07:50:08.660+0000] 
{settings.py:60} INFO - Configured default timezone Timezone('UTC')
+apache-airflow-airflow-scheduler-1  | [2024-05-07T07:50:08.601+0000] 
{executor_loader.py:114} INFO - Loaded executor: CeleryExecutor
+apache-airflow-airflow-scheduler-1  | [2024-05-07T07:50:08.652+0000] 
{scheduler_job_runner.py:823} INFO - Starting the scheduler
+apache-airflow-airflow-scheduler-1  | [2024-05-07T07:50:08.653+0000] 
{scheduler_job_runner.py:830} INFO - Processing each file at most -1 times
+apache-airflow-airflow-scheduler-1  | [2024-05-07T07:50:08.657+0000] 
{manager.py:165} INFO - Launched DagFileProcessorManager with pid: 34
+apache-airflow-airflow-scheduler-1  | [2024-05-07T07:50:08.658+0000] 
{scheduler_job_runner.py:1576} INFO - Resetting orphaned tasks for active dag 
runs
+apache-airflow-airflow-scheduler-1  | [2024-05-07T07:50:08.660+0000] 
{settings.py:60} INFO - Configured default timezone Timezone('UTC')
 ----
 
 Go to http://localhost:8080/home in your browser and log on with username 
"airflow" and password "airflow".
@@ -171,7 +171,7 @@ with DAG('sample-pipeline', default_args=default_args, 
schedule_interval=None, c
     end_dag = DummyOperator(
         task_id='end_dag'
         )
-        hop = DockerOperator(
+    hop = DockerOperator(
         task_id='sample-pipeline',
         # use the Apache Hop Docker image. Add your tags here in the default 
apache/hop: syntax
         image='apache/hop',
@@ -201,10 +201,12 @@ All it takes to deploy your dag is to put it in Airflow's 
dags folder. Our docke
 
 Save the DAG we just created in your dags folder as apache-hop-dag-simple.py. 
After a short wait, your DAG will show up in the list of dags.
 
-If there are any syntax errors in your DAG, Airflow will let you know. Expand 
the error dialog for more details about the error.
-
+If there are any syntax errors in your DAG, Airflow will let you know. Expand 
the error dialog for more details about the error, as shown in the image below. 
Don't worry, you shouldn't have any errors with the DAG we just created. 
+ 
 
image:how-to-guides/run-hop-in-apache-airflow/apache-airflow-dag-error.png[Apache
 Airflow - DAG error, width="45%"]
 
+If your DAG is deployed correctly (it should), you'll see it show up in the 
list of available DAGs. 
+
 
image:how-to-guides/run-hop-in-apache-airflow/apache-airflow-dag-available.png[Apache
 Airflow - DAG available, width="75%"]
 
 Click on the **sample-pipeline** DAG to see more details about it. From the 
tab list at the top of the page, select "Code" to review the DAG you just 
deployed, or "Graph" to see the graph representation of the DAG. This graph is 
extremely simple, but we're exploring Apache Airflow, so that's intentional.
@@ -221,12 +223,12 @@ 
image:how-to-guides/run-hop-in-apache-airflow/apache-airflow-dag-logs.png[Apache
 
 [source, bash]
 ----
-2023-05-07, 13:54:39 UTC] {docker.py:391} INFO - 2023/05/07 13:54:39 - Ouput.0 
- Finished processing (I=0, O=0, R=5, W=5, U=0, E=0)
-[2023-05-07, 13:54:39 UTC] {docker.py:391} INFO - 2023/05/07 13:54:39 - 
null-if-basic - Pipeline duration : 0.45 seconds [  0.450 ]
-[2023-05-07, 13:54:39 UTC] {docker.py:391} INFO - HopRun exit.
-[2023-05-07, 13:54:39 UTC] {docker.py:391} INFO - 2023/05/07 13:54:39 - 
null-if-basic - Execution finished on a local pipeline engine with run 
configuration 'local'
-[2023-05-07, 13:54:40 UTC] {taskinstance.py:1373} INFO - Marking task as 
SUCCESS. dag_id=sample-pipeline, task_id=sample-pipeline, 
execution_date=20230507T135409, start_date=20230507T135411, 
end_date=20230507T135440
-[2023-05-07, 13:54:40 UTC] {local_task_job_runner.py:232} INFO - Task exited 
with return code 0
+2024-05-07, 13:54:39 UTC] {docker.py:391} INFO - 2023/05/07 13:54:39 - Ouput.0 
- Finished processing (I=0, O=0, R=5, W=5, U=0, E=0)
+[2024-05-07, 13:54:39 UTC] {docker.py:391} INFO - 2023/05/07 13:54:39 - 
null-if-basic - Pipeline duration : 0.45 seconds [  0.450 ]
+[2024-05-07, 13:54:39 UTC] {docker.py:391} INFO - HopRun exit.
+[2024-05-07, 13:54:39 UTC] {docker.py:391} INFO - 2023/05/07 13:54:39 - 
null-if-basic - Execution finished on a local pipeline engine with run 
configuration 'local'
+[2024-05-07, 13:54:40 UTC] {taskinstance.py:1373} INFO - Marking task as 
SUCCESS. dag_id=sample-pipeline, task_id=sample-pipeline, 
execution_date=20230507T135409, start_date=20230507T135411, 
end_date=20230507T135440
+[2024-05-07, 13:54:40 UTC] {local_task_job_runner.py:232} INFO - Task exited 
with return code 0
 ----
 
 When you return to the Airflow home screen, your DAG will now show green 
circles for successful runs.
@@ -310,17 +312,17 @@ Your DAG logs will now show the environment variable and 
the parameter we used i
 
 [source, bash]
 ----
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - 
pipeline-with-parameter - Pipeline has allocated 5 threads and 4 rowsets.
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - 
generate 1 row.0 - Starting to run...
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - 
generate 1 row.0 - Finished processing (I=0, O=0, R=0, W=1, U=0, E=0)
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${PRM_EXAMPLE}.0 - field [example] has value [EXAMPLE VALUE]
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${PRM_EXAMPLE}.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
parameter to log.0 -
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${ENV_VARIABLE}.0 - field [env_variable] has value [variable value]
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
env_variable to log.0 -
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
parameter to log.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${ENV_VARIABLE}.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
-[2023-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
env_variable to log.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - 
pipeline-with-parameter - Pipeline has allocated 5 threads and 4 rowsets.
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - 
generate 1 row.0 - Starting to run...
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - 
generate 1 row.0 - Finished processing (I=0, O=0, R=0, W=1, U=0, E=0)
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${PRM_EXAMPLE}.0 - field [example] has value [EXAMPLE VALUE]
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${PRM_EXAMPLE}.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
parameter to log.0 -
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${ENV_VARIABLE}.0 - field [env_variable] has value [variable value]
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
env_variable to log.0 -
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
parameter to log.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - get 
${ENV_VARIABLE}.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
+[2024-05-08, 08:21:34 UTC] {docker.py:391} INFO - 2023/05/08 08:21:34 - write 
env_variable to log.0 - Finished processing (I=0, O=0, R=1, W=1, U=0, E=0)
 ----
 
 == Scheduling a DAG in Apache Airflow

Reply via email to