This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/gh-pages by this push:
     new 7f75838  Airflow blog post and tutorial typo fixes.
7f75838 is described below

commit 7f75838902b98f3b378d81af475c6bdd216171b5
Author: James Turton <[email protected]>
AuthorDate: Mon Aug 23 10:19:48 2021 +0200

    Airflow blog post and tutorial typo fixes.
---
 _data/docs.json                                    | 142 ++++++++++++++++++---
 .../080-orchestrating-queries-with-airflow.md      |  26 ++--
 .../080-orchestrating-queries-with-airflow.md      |   2 +-
 _tools/createdatadocs.py                           |   3 +-
 .../en/2021-08-05-drill-provider-for-airflow.md    |   2 +-
 5 files changed, 138 insertions(+), 37 deletions(-)

diff --git a/_data/docs.json b/_data/docs.json
index 6544170..ce67611 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -279,8 +279,8 @@
                     }
                 ],
                 "children": [],
-                "next_title": "Drill-on-YARN",
-                "next_url": "/docs/drill-on-yarn/",
+                "next_title": "Orchestrating queries with Airflow",
+                "next_url": "/docs/orchestrating-queries-with-airflow/",
                 "parent": "Tutorials",
                 "previous_title": "Analyzing Social Media",
                 "previous_url": "/docs/analyzing-social-media/",
@@ -5209,8 +5209,8 @@
                 "next_title": "Drill-on-YARN Introduction",
                 "next_url": "/docs/drill-on-yarn-introduction/",
                 "parent": "",
-                "previous_title": "Analyzing Data Using Window Functions",
-                "previous_url": "/docs/analyzing-data-using-window-functions/",
+                "previous_title": "Orchestrating queries with Airflow",
+                "previous_url": "/docs/orchestrating-queries-with-airflow/",
                 "relative_path": "_docs/en/031-drill-on-yarn.md",
                 "title": "Drill-on-YARN",
                 "url": "/docs/drill-on-yarn/"
@@ -8451,6 +8451,23 @@
                 "title": "Optimizing Parquet Metadata Reading",
                 "url": "/docs/optimizing-parquet-metadata-reading/"
             },
+            "Orchestrating queries with Airflow": {
+                "breadcrumbs": [
+                    {
+                        "title": "Tutorials",
+                        "url": "/docs/tutorials/"
+                    }
+                ],
+                "children": [],
+                "next_title": "Drill-on-YARN",
+                "next_url": "/docs/drill-on-yarn/",
+                "parent": "Tutorials",
+                "previous_title": "Analyzing Data Using Window Functions",
+                "previous_url": "/docs/analyzing-data-using-window-functions/",
+                "relative_path": 
"_docs/en/tutorials/080-orchestrating-queries-with-airflow.md",
+                "title": "Orchestrating queries with Airflow",
+                "url": "/docs/orchestrating-queries-with-airflow/"
+            },
             "PARTITION BY Clause": {
                 "breadcrumbs": [
                     {
@@ -16253,14 +16270,31 @@
                             }
                         ],
                         "children": [],
-                        "next_title": "Drill-on-YARN",
-                        "next_url": "/docs/drill-on-yarn/",
+                        "next_title": "Orchestrating queries with Airflow",
+                        "next_url": 
"/docs/orchestrating-queries-with-airflow/",
                         "parent": "Tutorials",
                         "previous_title": "Analyzing Social Media",
                         "previous_url": "/docs/analyzing-social-media/",
                         "relative_path": 
"_docs/en/tutorials/070-analyzing-data-using-window-functions.md",
                         "title": "Analyzing Data Using Window Functions",
                         "url": "/docs/analyzing-data-using-window-functions/"
+                    },
+                    {
+                        "breadcrumbs": [
+                            {
+                                "title": "Tutorials",
+                                "url": "/docs/tutorials/"
+                            }
+                        ],
+                        "children": [],
+                        "next_title": "Drill-on-YARN",
+                        "next_url": "/docs/drill-on-yarn/",
+                        "parent": "Tutorials",
+                        "previous_title": "Analyzing Data Using Window 
Functions",
+                        "previous_url": 
"/docs/analyzing-data-using-window-functions/",
+                        "relative_path": 
"_docs/en/tutorials/080-orchestrating-queries-with-airflow.md",
+                        "title": "Orchestrating queries with Airflow",
+                        "url": "/docs/orchestrating-queries-with-airflow/"
                     }
                 ],
                 "next_title": "Tutorials Introduction",
@@ -17650,14 +17684,31 @@
                             }
                         ],
                         "children": [],
-                        "next_title": "Drill-on-YARN",
-                        "next_url": "/docs/drill-on-yarn/",
+                        "next_title": "Orchestrating queries with Airflow",
+                        "next_url": 
"/docs/orchestrating-queries-with-airflow/",
                         "parent": "Tutorials",
                         "previous_title": "Analyzing Social Media",
                         "previous_url": "/docs/analyzing-social-media/",
                         "relative_path": 
"_docs/en/tutorials/070-analyzing-data-using-window-functions.md",
                         "title": "Analyzing Data Using Window Functions",
                         "url": "/docs/analyzing-data-using-window-functions/"
+                    },
+                    {
+                        "breadcrumbs": [
+                            {
+                                "title": "Tutorials",
+                                "url": "/docs/tutorials/"
+                            }
+                        ],
+                        "children": [],
+                        "next_title": "Drill-on-YARN",
+                        "next_url": "/docs/drill-on-yarn/",
+                        "parent": "Tutorials",
+                        "previous_title": "Analyzing Data Using Window 
Functions",
+                        "previous_url": 
"/docs/analyzing-data-using-window-functions/",
+                        "relative_path": 
"_docs/en/tutorials/080-orchestrating-queries-with-airflow.md",
+                        "title": "Orchestrating queries with Airflow",
+                        "url": "/docs/orchestrating-queries-with-airflow/"
                     }
                 ],
                 "next_title": "Tutorials Introduction",
@@ -17880,8 +17931,8 @@
                 "next_title": "Drill-on-YARN Introduction",
                 "next_url": "/docs/drill-on-yarn-introduction/",
                 "parent": "",
-                "previous_title": "Analyzing Data Using Window Functions",
-                "previous_url": "/docs/analyzing-data-using-window-functions/",
+                "previous_title": "Orchestrating queries with Airflow",
+                "previous_url": "/docs/orchestrating-queries-with-airflow/",
                 "relative_path": "_docs/en/031-drill-on-yarn.md",
                 "title": "Drill-on-YARN",
                 "url": "/docs/drill-on-yarn/"
@@ -29214,8 +29265,8 @@
                 "next_title": "Drill-on-YARN Introduction",
                 "next_url": "/docs/drill-on-yarn-introduction/",
                 "parent": "",
-                "previous_title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
-                "previous_url": "/docs/analyzing-data-using-window-functions/",
+                "previous_title": "Orchestrating queries with Airflow",
+                "previous_url": "/docs/orchestrating-queries-with-airflow/",
                 "relative_path": "_docs/en/031-drill-on-yarn.md",
                 "title": "Drill-on-YARN",
                 "url": "/docs/drill-on-yarn/"
@@ -32139,6 +32190,23 @@
                 "title": "Optimizing Parquet Metadata Reading",
                 "url": "/docs/optimizing-parquet-metadata-reading/"
             },
+            "Orchestrating queries with Airflow": {
+                "breadcrumbs": [
+                    {
+                        "title": "\u6559\u7a0b",
+                        "url": "/docs/tutorials/"
+                    }
+                ],
+                "children": [],
+                "next_title": "Drill-on-YARN",
+                "next_url": "/docs/drill-on-yarn/",
+                "parent": "\u6559\u7a0b",
+                "previous_title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
+                "previous_url": "/docs/analyzing-data-using-window-functions/",
+                "relative_path": 
"_docs/zh/tutorials/080-orchestrating-queries-with-airflow.md",
+                "title": "Orchestrating queries with Airflow",
+                "url": "/docs/orchestrating-queries-with-airflow/"
+            },
             "PARTITION BY Clause": {
                 "breadcrumbs": [
                     {
@@ -41207,14 +41275,31 @@
                             }
                         ],
                         "children": [],
-                        "next_title": "Drill-on-YARN",
-                        "next_url": "/docs/drill-on-yarn/",
+                        "next_title": "Orchestrating queries with Airflow",
+                        "next_url": 
"/docs/orchestrating-queries-with-airflow/",
                         "parent": "\u6559\u7a0b",
                         "previous_title": 
"\u5206\u6790\u793e\u4ea4\u5a92\u4f53",
                         "previous_url": "/docs/analyzing-social-media/",
                         "relative_path": 
"_docs/zh/tutorials/070-analyzing-data-using-window-functions.md",
                         "title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
                         "url": "/docs/analyzing-data-using-window-functions/"
+                    },
+                    {
+                        "breadcrumbs": [
+                            {
+                                "title": "\u6559\u7a0b",
+                                "url": "/docs/tutorials/"
+                            }
+                        ],
+                        "children": [],
+                        "next_title": "Drill-on-YARN",
+                        "next_url": "/docs/drill-on-yarn/",
+                        "parent": "\u6559\u7a0b",
+                        "previous_title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
+                        "previous_url": 
"/docs/analyzing-data-using-window-functions/",
+                        "relative_path": 
"_docs/zh/tutorials/080-orchestrating-queries-with-airflow.md",
+                        "title": "Orchestrating queries with Airflow",
+                        "url": "/docs/orchestrating-queries-with-airflow/"
                     }
                 ],
                 "next_title": "\u6559\u7a0b\u4ecb\u7ecd",
@@ -41361,8 +41446,8 @@
                     }
                 ],
                 "children": [],
-                "next_title": "Drill-on-YARN",
-                "next_url": "/docs/drill-on-yarn/",
+                "next_title": "Orchestrating queries with Airflow",
+                "next_url": "/docs/orchestrating-queries-with-airflow/",
                 "parent": "\u6559\u7a0b",
                 "previous_title": "\u5206\u6790\u793e\u4ea4\u5a92\u4f53",
                 "previous_url": "/docs/analyzing-social-media/",
@@ -41761,14 +41846,31 @@
                             }
                         ],
                         "children": [],
-                        "next_title": "Drill-on-YARN",
-                        "next_url": "/docs/drill-on-yarn/",
+                        "next_title": "Orchestrating queries with Airflow",
+                        "next_url": 
"/docs/orchestrating-queries-with-airflow/",
                         "parent": "\u6559\u7a0b",
                         "previous_title": 
"\u5206\u6790\u793e\u4ea4\u5a92\u4f53",
                         "previous_url": "/docs/analyzing-social-media/",
                         "relative_path": 
"_docs/zh/tutorials/070-analyzing-data-using-window-functions.md",
                         "title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
                         "url": "/docs/analyzing-data-using-window-functions/"
+                    },
+                    {
+                        "breadcrumbs": [
+                            {
+                                "title": "\u6559\u7a0b",
+                                "url": "/docs/tutorials/"
+                            }
+                        ],
+                        "children": [],
+                        "next_title": "Drill-on-YARN",
+                        "next_url": "/docs/drill-on-yarn/",
+                        "parent": "\u6559\u7a0b",
+                        "previous_title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
+                        "previous_url": 
"/docs/analyzing-data-using-window-functions/",
+                        "relative_path": 
"_docs/zh/tutorials/080-orchestrating-queries-with-airflow.md",
+                        "title": "Orchestrating queries with Airflow",
+                        "url": "/docs/orchestrating-queries-with-airflow/"
                     }
                 ],
                 "next_title": "\u6559\u7a0b\u4ecb\u7ecd",
@@ -41991,8 +42093,8 @@
                 "next_title": "Drill-on-YARN Introduction",
                 "next_url": "/docs/drill-on-yarn-introduction/",
                 "parent": "",
-                "previous_title": 
"\u901a\u8fc7\u7a97\u53e3\u51fd\u6570\u5206\u6790\u6570\u636e\u96c6",
-                "previous_url": "/docs/analyzing-data-using-window-functions/",
+                "previous_title": "Orchestrating queries with Airflow",
+                "previous_url": "/docs/orchestrating-queries-with-airflow/",
                 "relative_path": "_docs/en/031-drill-on-yarn.md",
                 "title": "Drill-on-YARN",
                 "url": "/docs/drill-on-yarn/"
diff --git a/_docs/en/tutorials/080-orchestrating-queries-with-airflow.md 
b/_docs/en/tutorials/080-orchestrating-queries-with-airflow.md
index 2dda77a..e9b8fef 100644
--- a/_docs/en/tutorials/080-orchestrating-queries-with-airflow.md
+++ b/_docs/en/tutorials/080-orchestrating-queries-with-airflow.md
@@ -4,7 +4,7 @@ slug: "Orchestrating queries with Airflow"
 parent: "Tutorials"
 ---
 
-This tutorial walks through the development of Apache Airflow DAG that 
implements a basic ETL process using Apache Drill.  We'll install Airflow into 
a Python virtualenv using pip before writing and testing our new DAG.  Consult 
the [Airflow installation 
documentation](https://airflow.apache.org/docs/apache-airflow/stable/installation.html)
 for more information about installing Airflow.
+This tutorial walks through the development of an Apache Airflow DAG that 
implements a basic ETL process using Apache Drill.  We'll install Airflow into 
a Python virtualenv using pip before writing and testing our new DAG.  Consult 
the [Airflow installation 
documentation](https://airflow.apache.org/docs/apache-airflow/stable/installation.html)
 for more information about installing Airflow.
 
 I'll be issuing commands using a shell on a Debian Linux machine in this 
tutorial but it should be possible with a little translation to follow along on 
other platforms.
 
@@ -24,12 +24,12 @@ virtualenv -p /usr/bin/python3 $VIRT_ENV_HOME/airflow
 
 ## Install Airflow
 
-If you've read their installation guide you'll have seen that the Airflow 
project provides constraints files the pin the versions of its Python package 
dependencies to known-good versions.  In many cases things work fine without 
constraints but, for the sake of reproducibility, we'll apply the constraints 
file applicable to our Python version using the script 0they provide for the 
purpose.
+If you've read their installation guide, you'll have seen that the Airflow 
project provides constraints files that pin its Python package dependencies to 
known-good versions.  In many cases things work fine without constraints but, 
for the sake of reproducibility, we'll apply the constraints file applicable to 
our Python version using the script they provide for the purpose.
 ```sh
 AIRFLOW_VERSION=2.1.2
 PYTHON_VERSION="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)"
 
CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt";
-pip install "apache-0airflow==${AIRFLOW_VERSION}" --constraint 
"${CONSTRAINT_URL}"
+pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint 
"${CONSTRAINT_URL}"
 pip install apache-airflow-providers-apache-drill
 ```
 
@@ -38,8 +38,8 @@ pip install apache-airflow-providers-apache-drill
 We're just experimenting here so we'll have Airflow set up a local SQLite 
database and add an admin user for ourselves.
 ```sh
 # Optional: change Airflow's data dir from the default of ~/airflow
-export0 AIRFLOW_HOME=~/Development/airflow
-mkdir -p ~/Development/airflow/
+export AIRFLOW_HOME=~/Development/airflow
+mkdir -p ~/Development/airflow
 
 # Create a new SQLite database for Airflow
 airflow db init
@@ -56,7 +56,7 @@ airflow users create \
 
 ## Configure a Drill connection
 
-At this point we should have a working Airflow installation. Fire up the web 
UI with `airflow webserver` and browse to http://localhost:8080.  Click on 
Admin -> Connections.  Add a new Drill connection called `drill_tutorial`, 
setting configuration according to your Drill environment.  If you're using 
embedded mode Drill locally like I am then you'll want the following config.
+At this point we should have a working Airflow installation. Fire up the web 
UI with `airflow webserver` and browse to http://localhost:8080.  Click on 
Admin -> Connections and add a new Drill connection called `drill_tutorial`, 
setting configuration according to your Drill environment.  If you're using 
embedded mode Drill locally like I am, then you'll want the following config.
 
 | Setting   | Value                                                        |
 | --------- | ------------------------------------------------------------ |
@@ -72,17 +72,17 @@ After you've saved the new connection you can shut the 
Airflow web UI down with
 
 ## Explore the source data
 
-If you've built ETLs before you know that you can't build anything until 
you've come to grips with the source data.  Let's obtain a sample of the first 
1m rows from the source take a look.
+If you've developed ETLs before you know that you can't build anything until 
you've come to grips with the source data.  Let's obtain a sample of the first 
1m rows from the source take a look.
 
 ```sh
 curl -s 
https://data.cdc.gov/api/views/vbim-akqf/rows.csv\?accessType\=DOWNLOAD | pv 
-lSs 1000000 > /tmp/cdc_covid_cases.csvh
 ```
 
-You can replace `pv -lSs 1000000` above with `head -n1000000` or just drop it 
if you don't mind fetching the whole file.  Downloading it with a web browser 
will also work fine.  Note that for a default Drill installation, saving with 
the file extension `.csvh` does matter for what follows because it will set 
`extractHeader = true` when this CSV file is queried, and this file does 
include a header.
+You can replace `pv -lSs 1000000` above with `head -n1000000`, or just drop it 
if you don't mind fetching the whole file.  Downloading the CSV file with a web 
browser will also get the job done.  Note that for a default Drill 
installation, saving with the file extension `.csvh` does matter for what 
follows because it will set the option `extractHeader = true` when this CSV 
file is queried, something that the SQL code to come relies on.
 
-It's time to break out Drill.  Instead of dumping my entire interactive SQL 
session here, I'll just list queries that I ran and the corresponding 
observations that I made.
+It's time to break out Drill.  Instead of dumping my entire interactive SQL 
session here, I'll just list relevant queries that I ran and the corresponding 
observations that I made.
 ```sql
-select * from dfs.tmp.`cdc_covid_case.csvh`
+select * from dfs.tmp.`cdc_covid_case.csvh`;
 -- 1. In date fields, the empty string '' can be converted to SQL NULL
 -- 2. Age groups can be split into two numerical fields, with the final
 --    group being unbounded above.
@@ -101,7 +101,7 @@ select hosp_yn, count() from dfs.tmp.`cdc_covid_case.csvh` 
group by hosp_yn;
 --    so they cannot be transformed to nullable booleans
 ```
 
-So... this is what it feels like to be a data scientist 😆.  Jokes aside, we 
learned a lot of neccesary stuff pretty quickly there and it's easy to see that 
we could have carried on for a long way, testing ranges, casts and regexps and 
even creating reports if we didn't reign ourselves in.  Let's skip forward to 
the ETL statement I ended up creating after exploring.
+So... this is what it feels like to be a data scientist 😆!  Jokes aside, we 
learned a lot of neccesary stuff pretty quickly there and it's easy to see that 
we could have carried on for a long way, testing ranges, casts and regexps and 
even creating reports if we didn't reign ourselves in.  Let's skip forward to 
the ETL statement I ended up creating after exploring.
 
 ## Develop a CTAS (Create Table As Select) ETL
 
@@ -163,14 +163,14 @@ To complete this step, save the CTAS script above into a 
new file at `$AIRFLOW_H
 
 ## Develop an Airflow DAG
 
-The definition of our DAG will reside in a single Python script.  The complete 
listing of that script follows immediately, with my commentary continuing as 
inline source code comments.  You should save this script to a new file at 
`$AIRFLOW_HOME/dags/drill_tutorial.py`.
+The definition of our DAG will reside in a single Python script.  The complete 
listing of that script follows immediately, with my commentary continuing as 
inline source code comments.  You should save this script to a new file at 
`$AIRFLOW_HOME/dags/drill-tutorial.py`.
 
 ```python
 '''
 Uses the Apache Drill provider to transform, load and report from COVID case
 data downloaded from the website of the CDC.
 
-Data source citatation.
+Data source citation.
 
 Centers for Disease Control and Prevention, COVID-19 Response. COVID-19 Case
 Surveillance Public Data Access, Summary, and Limitations.
diff --git a/_docs/en/tutorials/080-orchestrating-queries-with-airflow.md 
b/_docs/zh/tutorials/080-orchestrating-queries-with-airflow.md
similarity index 99%
copy from _docs/en/tutorials/080-orchestrating-queries-with-airflow.md
copy to _docs/zh/tutorials/080-orchestrating-queries-with-airflow.md
index 2dda77a..1aff5b7 100644
--- a/_docs/en/tutorials/080-orchestrating-queries-with-airflow.md
+++ b/_docs/zh/tutorials/080-orchestrating-queries-with-airflow.md
@@ -1,7 +1,7 @@
 ---
 title: "Orchestrating queries with Airflow"
 slug: "Orchestrating queries with Airflow"
-parent: "Tutorials"
+parent: "教程"
 ---
 
 This tutorial walks through the development of Apache Airflow DAG that 
implements a basic ETL process using Apache Drill.  We'll install Airflow into 
a Python virtualenv using pip before writing and testing our new DAG.  Consult 
the [Airflow installation 
documentation](https://airflow.apache.org/docs/apache-airflow/stable/installation.html)
 for more information about installing Airflow.
diff --git a/_tools/createdatadocs.py b/_tools/createdatadocs.py
index 9b22a33..894ad48 100755
--- a/_tools/createdatadocs.py
+++ b/_tools/createdatadocs.py
@@ -94,8 +94,7 @@ if __name__ == '__main__':
                             ' (which may be empty or contain the original 
English text)' \
                             ' and set their _parent_ to the new _title_ on the 
parent page'
 
-                    logging.error(msg)
-                    raise e
+                    logging.warn(msg)
 
         for doc in top_level_docs:
             add_docs(docs_in_order, doc)
diff --git a/blog/_posts/en/2021-08-05-drill-provider-for-airflow.md 
b/blog/_posts/en/2021-08-05-drill-provider-for-airflow.md
index 3e37284..4de64c7 100644
--- a/blog/_posts/en/2021-08-05-drill-provider-for-airflow.md
+++ b/blog/_posts/en/2021-08-05-drill-provider-for-airflow.md
@@ -25,4 +25,4 @@ In contrast Airflow is, unless you're doing it wrong, used 
only to instruct othe
 
 The exciting news for Drill users is that [a new provider package adding 
support for 
Drill](https://pypi.org/project/apache-airflow-providers-apache-drill/) was 
added to Airflow this month.  This provider is based on the [sqlalchemy-drill 
package](https://pypi.org/project/sqlalchemy-drill/) which provides Drill 
connectivity for Python programs.  This means that you can add tasks which 
execute queries on Drill to your Airflow DAGs without any hacky intermediate 
shell scripts, or build new [...]
 
-A new tutorial that walks through the development of a simple Airflow DAG that 
uses the Drill provider [is available 
here]({{site.baseurl}}/docs/tutorials/orchestrating-queries-with-airflow).
+A new tutorial that walks through the development of a simple Airflow DAG that 
uses the Drill provider [is available 
here]({{site.baseurl}}/docs/orchestrating-queries-with-airflow/).

Reply via email to