This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 96f3d74 Travis CI build asf-site
96f3d74 is described below
commit 96f3d746d491c06e67c494985452fd95d0b831ee
Author: CI <[email protected]>
AuthorDate: Sun Mar 29 03:30:47 2020 +0000
Travis CI build asf-site
---
test-content/assets/js/lunr/lunr-store.js | 4 +-
test-content/cn/docs/0.5.2-querying_data.html | 102 +++++++++++++++++++++++++-
test-content/cn/docs/querying_data.html | 102 +++++++++++++++++++++++++-
test-content/docs/0.5.2-querying_data.html | 3 +-
test-content/docs/querying_data.html | 3 +-
5 files changed, 204 insertions(+), 10 deletions(-)
diff --git a/test-content/assets/js/lunr/lunr-store.js
b/test-content/assets/js/lunr/lunr-store.js
index 1077cf3..9351ab6 100644
--- a/test-content/assets/js/lunr/lunr-store.js
+++ b/test-content/assets/js/lunr/lunr-store.js
@@ -435,7 +435,7 @@ var store = [{
"url": "https://hudi.apache.org/docs/0.5.2-writing_data.html",
"teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
"title": "查询 Hudi 数据集",
- "excerpt":"从概念上讲,Hudi物理存储一次数据到DFS上,同时在其上提供三个逻辑视图,如之前所述。 数据集同步到Hive
Metastore后,它将提供由Hudi的自定义输入格式支持的Hive外部表。一旦提供了适当的Hudi捆绑包,
就可以通过Hive、Spark和Presto之类的常用查询引擎来查询数据集。 具体来说,在写入过程中传递了两个由table name命名的Hive表。
例如,如果table name = hudi_tbl,我们得到 hudi_tbl 实现了由 HoodieParquetInputFormat
支持的数据集的读优化视图,从而提供了纯列式数据。 hudi_tbl_rt 实现了由 HoodieParquetRealtimeInputFormat
支持的数据集的实时视图,从而提供了基础数据和日志数据的合并视图。 如概念部分所述,增量处理所需要的
一个关键原语是增量拉取(以从数据集中获取更改流/日志)。您可以增量提取Hudi数据集,这意味着自指定的即时时间起, 您可�
�只获得全部更新和新行。 这与插入更新一起使用,对于构建某 [...]
+ "excerpt":"从概念上讲,Hudi物理存储一次数据到DFS上,同时在其上提供三个逻辑视图,如之前所述。 数据集同步到Hive
Metastore后,它将提供由Hudi的自定义输入格式支持的Hive外部表。一旦提供了适当的Hudi捆绑包,
就可以通过Hive、Spark和Presto之类的常用查询引擎来查询数据集。 具体来说,在写入过程中传递了两个由table name命名的Hive表。
例如,如果table name = hudi_tbl,我们得到 hudi_tbl 实现了由 HoodieParquetInputFormat
支持的数据集的读优化视图,从而提供了纯列式数据。 hudi_tbl_rt 实现了由 HoodieParquetRealtimeInputFormat
支持的数据集的实时视图,从而提供了基础数据和日志数据的合并视图。 如概念部分所述,增量处理所需要的
一个关键原语是增量拉取(以从数据集中获取更改流/日志)。您可以增量提取Hudi数据集,这意味着自指定的即时时间起, 您可�
�只获得全部更新和新行。 这与插入更新一起使用,对于构建某 [...]
"tags": [],
"url": "https://hudi.apache.org/cn/docs/0.5.2-querying_data.html",
"teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
@@ -600,7 +600,7 @@ var store = [{
"url": "https://hudi.apache.org/docs/writing_data.html",
"teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
"title": "查询 Hudi 数据集",
- "excerpt":"从概念上讲,Hudi物理存储一次数据到DFS上,同时在其上提供三个逻辑视图,如之前所述。 数据集同步到Hive
Metastore后,它将提供由Hudi的自定义输入格式支持的Hive外部表。一旦提供了适当的Hudi捆绑包,
就可以通过Hive、Spark和Presto之类的常用查询引擎来查询数据集。 具体来说,在写入过程中传递了两个由table name命名的Hive表。
例如,如果table name = hudi_tbl,我们得到 hudi_tbl 实现了由 HoodieParquetInputFormat
支持的数据集的读优化视图,从而提供了纯列式数据。 hudi_tbl_rt 实现了由 HoodieParquetRealtimeInputFormat
支持的数据集的实时视图,从而提供了基础数据和日志数据的合并视图。 如概念部分所述,增量处理所需要的
一个关键原语是增量拉取(以从数据集中获取更改流/日志)。您可以增量提取Hudi数据集,这意味着自指定的即时时间起, 您可�
�只获得全部更新和新行。 这与插入更新一起使用,对于构建某 [...]
+ "excerpt":"从概念上讲,Hudi物理存储一次数据到DFS上,同时在其上提供三个逻辑视图,如之前所述。 数据集同步到Hive
Metastore后,它将提供由Hudi的自定义输入格式支持的Hive外部表。一旦提供了适当的Hudi捆绑包,
就可以通过Hive、Spark和Presto之类的常用查询引擎来查询数据集。 具体来说,在写入过程中传递了两个由table name命名的Hive表。
例如,如果table name = hudi_tbl,我们得到 hudi_tbl 实现了由 HoodieParquetInputFormat
支持的数据集的读优化视图,从而提供了纯列式数据。 hudi_tbl_rt 实现了由 HoodieParquetRealtimeInputFormat
支持的数据集的实时视图,从而提供了基础数据和日志数据的合并视图。 如概念部分所述,增量处理所需要的
一个关键原语是增量拉取(以从数据集中获取更改流/日志)。您可以增量提取Hudi数据集,这意味着自指定的即时时间起, 您可�
�只获得全部更新和新行。 这与插入更新一起使用,对于构建某 [...]
"tags": [],
"url": "https://hudi.apache.org/cn/docs/querying_data.html",
"teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
diff --git a/test-content/cn/docs/0.5.2-querying_data.html
b/test-content/cn/docs/0.5.2-querying_data.html
index 0f4a441..5d337d6 100644
--- a/test-content/cn/docs/0.5.2-querying_data.html
+++ b/test-content/cn/docs/0.5.2-querying_data.html
@@ -335,6 +335,12 @@
<nav class="toc">
<header><h4 class="nav__title"><i class="fas fa-file-alt"></i> IN
THIS PAGE</h4></header>
<ul class="toc__menu">
+ <li><a href="#查询引擎支持列表">查询引擎支持列表</a>
+ <ul>
+ <li><a href="#读优化表">读优化表</a></li>
+ <li><a href="#实时表">实时表</a></li>
+ </ul>
+ </li>
<li><a href="#hive">Hive</a>
<ul>
<li><a href="#hive-ro-view">读优化表</a></li>
@@ -352,7 +358,7 @@
<li><a href="#presto">Presto</a></li>
<li><a href="#impala此功能还未正式发布">Impala(此功能还未正式发布)</a>
<ul>
- <li><a href="#读优化表">读优化表</a></li>
+ <li><a href="#读优化表-1">读优化表</a></li>
</ul>
</li>
</ul>
@@ -377,6 +383,94 @@
并与其他表(数据集/维度)结合以<a
href="/cn/docs/0.5.2-writing_data.html">写出增量</a>到目标Hudi数据集。增量视图是通过查询上表之一实现的,并具有特殊配置,
该特殊配置指示查询计划仅需要从数据集中获取增量数据。</p>
+<h2 id="查询引擎支持列表">查询引擎支持列表</h2>
+
+<p>下面的表格展示了各查询引擎是否支持Hudi格式</p>
+
+<h3 id="读优化表">读优化表</h3>
+
+<table>
+ <thead>
+ <tr>
+ <th>查询引擎</th>
+ <th>实时视图</th>
+ <th>增量拉取</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><strong>Hive</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark SQL</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark Datasource</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Presto</strong></td>
+ <td>Y</td>
+ <td>N</td>
+ </tr>
+ <tr>
+ <td><strong>Impala</strong></td>
+ <td>Y</td>
+ <td>N</td>
+ </tr>
+ </tbody>
+</table>
+
+<h3 id="实时表">实时表</h3>
+
+<table>
+ <thead>
+ <tr>
+ <th>查询引擎</th>
+ <th>实时视图</th>
+ <th>增量拉取</th>
+ <th>读优化表</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><strong>Hive</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark SQL</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark Datasource</strong></td>
+ <td>N</td>
+ <td>N</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Presto</strong></td>
+ <td>N</td>
+ <td>N</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Impala</strong></td>
+ <td>N</td>
+ <td>N</td>
+ <td>Y</td>
+ </tr>
+ </tbody>
+</table>
+
<p>接下来,我们将详细讨论在每个查询引擎上如何访问所有三个视图。</p>
<h2 id="hive">Hive</h2>
@@ -542,7 +636,9 @@ Upsert实用程序(<code
class="highlighter-rouge">HoodieDeltaStreamer</code>
<span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">VIEW_TYPE_INCREMENTAL_OPT_VAL</span><span
class="o">())</span>
<span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">BEGIN_INSTANTTIME_OPT_KEY</span><span
class="o">(),</span>
<span class="o"><</span><span
class="n">beginInstantTime</span><span class="o">>)</span>
- <span class="o">.</span><span class="na">load</span><span
class="o">(</span><span class="n">tablePath</span><span class="o">);</span>
<span class="c1">// For incremental view, pass in the root/base path of
dataset</span>
+ <span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">INCR_PATH_GLOB_OPT_KEY</span><span
class="o">(),</span>
+ <span class="s">"/year=2020/month=*/day=*"</span><span
class="o">)</span> <span class="c1">// 可选,从指定的分区增量拉取</span>
+ <span class="o">.</span><span class="na">load</span><span
class="o">(</span><span class="n">tablePath</span><span class="o">);</span>
<span class="c1">// 用数据集的最底层路径</span>
</code></pre></div></div>
<p>请参阅<a
href="/cn/docs/0.5.2-configurations.html#spark-datasource">设置</a>部分,以查看所有数据源选项。</p>
@@ -579,7 +675,7 @@ Upsert实用程序(<code
class="highlighter-rouge">HoodieDeltaStreamer</code>
<h2 id="impala此功能还未正式发布">Impala(此功能还未正式发布)</h2>
-<h3 id="读优化表">读优化表</h3>
+<h3 id="读优化表-1">读优化表</h3>
<p>Impala可以在HDFS上查询Hudi读优化表,作为一种 <a
href="https://docs.cloudera.com/documentation/enterprise/6/6.3/topics/impala_tables.html#external_tables">EXTERNAL
TABLE</a> 的形式。<br />
可以通过以下方式在Impala上建立Hudi读优化表:</p>
diff --git a/test-content/cn/docs/querying_data.html
b/test-content/cn/docs/querying_data.html
index 835ab9e..290ef18 100644
--- a/test-content/cn/docs/querying_data.html
+++ b/test-content/cn/docs/querying_data.html
@@ -335,6 +335,12 @@
<nav class="toc">
<header><h4 class="nav__title"><i class="fas fa-file-alt"></i> IN
THIS PAGE</h4></header>
<ul class="toc__menu">
+ <li><a href="#查询引擎支持列表">查询引擎支持列表</a>
+ <ul>
+ <li><a href="#读优化表">读优化表</a></li>
+ <li><a href="#实时表">实时表</a></li>
+ </ul>
+ </li>
<li><a href="#hive">Hive</a>
<ul>
<li><a href="#hive-ro-view">读优化表</a></li>
@@ -352,7 +358,7 @@
<li><a href="#presto">Presto</a></li>
<li><a href="#impala此功能还未正式发布">Impala(此功能还未正式发布)</a>
<ul>
- <li><a href="#读优化表">读优化表</a></li>
+ <li><a href="#读优化表-1">读优化表</a></li>
</ul>
</li>
</ul>
@@ -377,6 +383,94 @@
并与其他表(数据集/维度)结合以<a
href="/cn/docs/writing_data.html">写出增量</a>到目标Hudi数据集。增量视图是通过查询上表之一实现的,并具有特殊配置,
该特殊配置指示查询计划仅需要从数据集中获取增量数据。</p>
+<h2 id="查询引擎支持列表">查询引擎支持列表</h2>
+
+<p>下面的表格展示了各查询引擎是否支持Hudi格式</p>
+
+<h3 id="读优化表">读优化表</h3>
+
+<table>
+ <thead>
+ <tr>
+ <th>查询引擎</th>
+ <th>实时视图</th>
+ <th>增量拉取</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><strong>Hive</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark SQL</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark Datasource</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Presto</strong></td>
+ <td>Y</td>
+ <td>N</td>
+ </tr>
+ <tr>
+ <td><strong>Impala</strong></td>
+ <td>Y</td>
+ <td>N</td>
+ </tr>
+ </tbody>
+</table>
+
+<h3 id="实时表">实时表</h3>
+
+<table>
+ <thead>
+ <tr>
+ <th>查询引擎</th>
+ <th>实时视图</th>
+ <th>增量拉取</th>
+ <th>读优化表</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><strong>Hive</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark SQL</strong></td>
+ <td>Y</td>
+ <td>Y</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Spark Datasource</strong></td>
+ <td>N</td>
+ <td>N</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Presto</strong></td>
+ <td>N</td>
+ <td>N</td>
+ <td>Y</td>
+ </tr>
+ <tr>
+ <td><strong>Impala</strong></td>
+ <td>N</td>
+ <td>N</td>
+ <td>Y</td>
+ </tr>
+ </tbody>
+</table>
+
<p>接下来,我们将详细讨论在每个查询引擎上如何访问所有三个视图。</p>
<h2 id="hive">Hive</h2>
@@ -542,7 +636,9 @@ Upsert实用程序(<code
class="highlighter-rouge">HoodieDeltaStreamer</code>
<span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">VIEW_TYPE_INCREMENTAL_OPT_VAL</span><span
class="o">())</span>
<span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">BEGIN_INSTANTTIME_OPT_KEY</span><span
class="o">(),</span>
<span class="o"><</span><span
class="n">beginInstantTime</span><span class="o">>)</span>
- <span class="o">.</span><span class="na">load</span><span
class="o">(</span><span class="n">tablePath</span><span class="o">);</span>
<span class="c1">// For incremental view, pass in the root/base path of
dataset</span>
+ <span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">INCR_PATH_GLOB_OPT_KEY</span><span
class="o">(),</span>
+ <span class="s">"/year=2020/month=*/day=*"</span><span
class="o">)</span> <span class="c1">// 可选,从指定的分区增量拉取</span>
+ <span class="o">.</span><span class="na">load</span><span
class="o">(</span><span class="n">tablePath</span><span class="o">);</span>
<span class="c1">// 用数据集的最底层路径</span>
</code></pre></div></div>
<p>请参阅<a
href="/cn/docs/configurations.html#spark-datasource">设置</a>部分,以查看所有数据源选项。</p>
@@ -579,7 +675,7 @@ Upsert实用程序(<code
class="highlighter-rouge">HoodieDeltaStreamer</code>
<h2 id="impala此功能还未正式发布">Impala(此功能还未正式发布)</h2>
-<h3 id="读优化表">读优化表</h3>
+<h3 id="读优化表-1">读优化表</h3>
<p>Impala可以在HDFS上查询Hudi读优化表,作为一种 <a
href="https://docs.cloudera.com/documentation/enterprise/6/6.3/topics/impala_tables.html#external_tables">EXTERNAL
TABLE</a> 的形式。<br />
可以通过以下方式在Impala上建立Hudi读优化表:</p>
diff --git a/test-content/docs/0.5.2-querying_data.html
b/test-content/docs/0.5.2-querying_data.html
index 69482b3..5be58d9 100644
--- a/test-content/docs/0.5.2-querying_data.html
+++ b/test-content/docs/0.5.2-querying_data.html
@@ -471,7 +471,7 @@ with special configurations that indicates to query
planning that only increment
<td><strong>Impala</strong></td>
<td>N</td>
<td>N</td>
- <td>N</td>
+ <td>Y</td>
</tr>
</tbody>
</table>
@@ -628,6 +628,7 @@ The following snippet shows how to obtain all records
changed after <code class=
<span class="o">.</span><span class="na">format</span><span
class="o">(</span><span class="s">"org.apache.hudi"</span><span
class="o">)</span>
<span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">QUERY_TYPE_OPT_KEY</span><span
class="o">(),</span> <span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">QUERY_TYPE_INCREMENTAL_OPT_VAL</span><span
class="o">())</span>
<span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">BEGIN_INSTANTTIME_OPT_KEY</span><span
class="o">(),</span> <span class="o"><</span><span
class="n">beginInstantTime</span><span class="o">>)</span>
+ <span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">INCR_PATH_GLOB_OPT_KEY</span><span
class="o">(),</span> <span class="s">"/year=2020/month=*/day=*"</span><span
class="o">)</span> <span class="c1">// Optional, use glob pattern if querying
certain partitions</span>
<span class="o">.</span><span class="na">load</span><span
class="o">(</span><span class="n">tablePath</span><span class="o">);</span>
<span class="c1">// For incremental query, pass in the root/base path of
table</span>
<span class="n">hudiIncQueryDF</span><span class="o">.</span><span
class="na">createOrReplaceTempView</span><span class="o">(</span><span
class="s">"hudi_trips_incremental"</span><span class="o">)</span>
diff --git a/test-content/docs/querying_data.html
b/test-content/docs/querying_data.html
index f25f28b..70e9d8f 100644
--- a/test-content/docs/querying_data.html
+++ b/test-content/docs/querying_data.html
@@ -471,7 +471,7 @@ with special configurations that indicates to query
planning that only increment
<td><strong>Impala</strong></td>
<td>N</td>
<td>N</td>
- <td>N</td>
+ <td>Y</td>
</tr>
</tbody>
</table>
@@ -628,6 +628,7 @@ The following snippet shows how to obtain all records
changed after <code class=
<span class="o">.</span><span class="na">format</span><span
class="o">(</span><span class="s">"org.apache.hudi"</span><span
class="o">)</span>
<span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">QUERY_TYPE_OPT_KEY</span><span
class="o">(),</span> <span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">QUERY_TYPE_INCREMENTAL_OPT_VAL</span><span
class="o">())</span>
<span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">BEGIN_INSTANTTIME_OPT_KEY</span><span
class="o">(),</span> <span class="o"><</span><span
class="n">beginInstantTime</span><span class="o">>)</span>
+ <span class="o">.</span><span class="na">option</span><span
class="o">(</span><span class="nc">DataSourceReadOptions</span><span
class="o">.</span><span class="na">INCR_PATH_GLOB_OPT_KEY</span><span
class="o">(),</span> <span class="s">"/year=2020/month=*/day=*"</span><span
class="o">)</span> <span class="c1">// Optional, use glob pattern if querying
certain partitions</span>
<span class="o">.</span><span class="na">load</span><span
class="o">(</span><span class="n">tablePath</span><span class="o">);</span>
<span class="c1">// For incremental query, pass in the root/base path of
table</span>
<span class="n">hudiIncQueryDF</span><span class="o">.</span><span
class="na">createOrReplaceTempView</span><span class="o">(</span><span
class="s">"hudi_trips_incremental"</span><span class="o">)</span>