This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 721e797 Travis CI build asf-site
721e797 is described below
commit 721e797c9bbae4e21b517f5c71e2d25709e3b04e
Author: CI <[email protected]>
AuthorDate: Mon Apr 12 06:30:35 2021 +0000
Travis CI build asf-site
---
content/docs/configurations.html | 152 ++++++++++++++++++++++++++++++
content/docs/flink-quick-start-guide.html | 14 +--
content/docs/querying_data.html | 4 +-
3 files changed, 161 insertions(+), 9 deletions(-)
diff --git a/content/docs/configurations.html b/content/docs/configurations.html
index 2ea0368..ca2283c 100644
--- a/content/docs/configurations.html
+++ b/content/docs/configurations.html
@@ -366,6 +366,8 @@
<ul>
<li><a href="#write-options-1">Write Options</a></li>
<li><a href="#read-options-1">Read Options</a></li>
+ <li><a href="#index-sync-options">Index sync options</a></li>
+ <li><a href="#hive-sync-options">Hive sync options</a></li>
</ul>
</li>
<li><a href="#writeclient-configs">WriteClient Configs</a>
@@ -623,6 +625,12 @@ The actual datasource level configs are listed below.</p>
<td><span style="color:grey"> Key generator class, that implements will
extract the key out of incoming record </span></td>
</tr>
<tr>
+ <td><code
class="highlighter-rouge">write.partition.url_encode</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>Whether to encode the partition path url, default false</td>
+ </tr>
+ <tr>
<td><code class="highlighter-rouge">write.tasks</code></td>
<td>N</td>
<td>4</td>
@@ -673,6 +681,24 @@ The actual datasource level configs are listed below.</p>
<td>3600</td>
<td><span style="color:grey"> Max delta seconds time needed to trigger
compaction, default 1 hour </span></td>
</tr>
+ <tr>
+ <td><code class="highlighter-rouge">compaction.max_memory</code></td>
+ <td>N</td>
+ <td>100</td>
+ <td>Max memory in MB for compaction spillable map, default 100MB</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">clean.async.enabled</code></td>
+ <td>N</td>
+ <td>true</td>
+ <td>Whether to cleanup the old commits immediately on new commits,
enabled by default</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">clean.retain_commits</code></td>
+ <td>N</td>
+ <td>10</td>
+ <td>Number of commits to retain. So data will be retained for
num_of_commits * time_between_commits (scheduled). This also directly
translates into how much you can incrementally pull on this table, default
10</td>
+ </tr>
</tbody>
</table>
@@ -778,6 +804,132 @@ The actual datasource level configs are listed below.</p>
</tbody>
</table>
+<h3 id="index-sync-options">Index sync options</h3>
+
+<table>
+ <thead>
+ <tr>
+ <th>Option Name</th>
+ <th>Required</th>
+ <th>Default</th>
+ <th>Remarks</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code class="highlighter-rouge">index.bootstrap.enabled</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>Whether to bootstrap the index state from existing hoodie table,
default false</td>
+ </tr>
+ </tbody>
+</table>
+
+<h3 id="hive-sync-options">Hive sync options</h3>
+
+<table>
+ <thead>
+ <tr>
+ <th>Option Name</th>
+ <th>Required</th>
+ <th>Default</th>
+ <th>Remarks</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.enable</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>Asynchronously sync Hive meta to HMS, default false</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.db</code></td>
+ <td>N</td>
+ <td>default</td>
+ <td>Database name for hive sync, default ‘default’</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.table</code></td>
+ <td>N</td>
+ <td>unknown</td>
+ <td>Table name for hive sync, default ‘unknown’</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.file_format</code></td>
+ <td>N</td>
+ <td>PARQUET</td>
+ <td>File format for hive sync, default ‘PARQUET’</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.username</code></td>
+ <td>N</td>
+ <td>hive</td>
+ <td>Username for hive sync, default ‘hive’</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.password</code></td>
+ <td>N</td>
+ <td>hive</td>
+ <td>Password for hive sync, default ‘hive’</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.jdbc_url</code></td>
+ <td>N</td>
+ <td>jdbc:hive2://localhost:10000</td>
+ <td>Jdbc URL for hive sync, default ‘jdbc:hive2://localhost:10000’</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hive_sync.partition_fields</code></td>
+ <td>N</td>
+ <td>’’</td>
+ <td>Partition fields for hive sync, default ‘’</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hive_sync.partition_extractor_class</code></td>
+ <td>N</td>
+ <td>SlashEncodedDayPartitionValueExtractor.class</td>
+ <td>Tool to extract the partition value from HDFS path, default
‘SlashEncodedDayPartitionValueExtractor’</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hive_sync.assume_date_partitioning</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>Assume partitioning is yyyy/mm/dd, default false</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.use_jdbc</code></td>
+ <td>N</td>
+ <td>true</td>
+ <td>Use JDBC when hive synchronization is enabled, default true</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.auto_create_db</code></td>
+ <td>N</td>
+ <td>true</td>
+ <td>Auto create hive database if it does not exists, default true</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hive_sync.ignore_exceptions</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>Ignore exceptions during hive synchronization, default false</td>
+ </tr>
+ <tr>
+ <td><code class="highlighter-rouge">hive_sync.skip_ro_suffix</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>Skip the _ro suffix for Read optimized table when registering,
default false</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hive_sync.support_timestamp</code></td>
+ <td>N</td>
+ <td>false</td>
+ <td>INT64 with original type TIMESTAMP_MICROS is converted to hive
timestamp type. Disabled by default for backward compatibility.</td>
+ </tr>
+ </tbody>
+</table>
+
<h2 id="writeclient-configs">WriteClient Configs</h2>
<p>Jobs programming directly against the RDD level apis can build a <code
class="highlighter-rouge">HoodieWriteConfig</code> object and pass it in to the
<code class="highlighter-rouge">HoodieWriteClient</code> constructor.
diff --git a/content/docs/flink-quick-start-guide.html
b/content/docs/flink-quick-start-guide.html
index f4f48e7..c7c4576 100644
--- a/content/docs/flink-quick-start-guide.html
+++ b/content/docs/flink-quick-start-guide.html
@@ -384,8 +384,8 @@ After each write operation we will also show how to read
the data snapshot (incr
quick start tool for SQL users.</p>
<h3 id="step1-download-flink-jar">Step.1 download flink jar</h3>
-<p>Hudi works with Flink-1.11.x version. You can follow instructions <a
href="https://flink.apache.org/downloads.html">here</a> for setting up flink.
-The hudi-flink-bundle jar is archived with scala 2.11, so it’s recommended to
use flink 1.11 bundled with scala 2.11.</p>
+<p>Hudi works with Flink-1.12.x version. You can follow instructions <a
href="https://flink.apache.org/downloads.html">here</a> for setting up flink.
+The hudi-flink-bundle jar is archived with scala 2.11, so it’s recommended to
use flink 1.12.x bundled with scala 2.11.</p>
<h3 id="step2-start-flink-cluster">Step.2 start flink cluster</h3>
<p>Start a standalone flink cluster within hadoop environment.
@@ -437,7 +437,7 @@ The SQL CLI only executes the SQL line by line.</p>
<span class="k">set</span> <span class="n">execution</span><span
class="p">.</span><span class="k">result</span><span class="o">-</span><span
class="k">mode</span><span class="o">=</span><span
class="n">tableau</span><span class="p">;</span>
<span class="k">CREATE</span> <span class="k">TABLE</span> <span
class="n">t1</span><span class="p">(</span>
- <span class="n">uuid</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">20</span><span class="p">),</span>
+ <span class="n">uuid</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">20</span><span class="p">),</span> <span
class="c1">-- you can use 'PRIMARY KEY NOT ENFORCED' syntax to mark the field
as record key</span>
<span class="n">name</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">10</span><span class="p">),</span>
<span class="n">age</span> <span class="nb">INT</span><span
class="p">,</span>
<span class="n">ts</span> <span class="nb">TIMESTAMP</span><span
class="p">(</span><span class="mi">3</span><span class="p">),</span>
@@ -446,7 +446,7 @@ The SQL CLI only executes the SQL line by line.</p>
<span class="n">PARTITIONED</span> <span class="k">BY</span> <span
class="p">(</span><span class="nv">`partition`</span><span class="p">)</span>
<span class="k">WITH</span> <span class="p">(</span>
<span class="s1">'connector'</span> <span class="o">=</span> <span
class="s1">'hudi'</span><span class="p">,</span>
- <span class="s1">'path'</span> <span class="o">=</span> <span
class="s1">'schema://base-path'</span><span class="p">,</span>
+ <span class="s1">'path'</span> <span class="o">=</span> <span
class="s1">'table_base_path'</span><span class="p">,</span>
<span class="s1">'table.type'</span> <span class="o">=</span> <span
class="s1">'MERGE_ON_READ'</span> <span class="c1">-- this creates a
MERGE_ON_READ table, by default is COPY_ON_WRITE</span>
<span class="p">);</span>
@@ -491,7 +491,7 @@ This can be achieved using Hudi’s streaming querying and
providing a start tim
We do not need to specify endTime, if we want all changes after the given
commit (as is the common case).</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre
class="highlight"><code><span class="k">CREATE</span> <span
class="k">TABLE</span> <span class="n">t1</span><span class="p">(</span>
- <span class="n">uuid</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">20</span><span class="p">),</span>
+ <span class="n">uuid</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">20</span><span class="p">),</span> <span
class="c1">-- you can use 'PRIMARY KEY NOT ENFORCED' syntax to mark the field
as record key</span>
<span class="n">name</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">10</span><span class="p">),</span>
<span class="n">age</span> <span class="nb">INT</span><span
class="p">,</span>
<span class="n">ts</span> <span class="nb">TIMESTAMP</span><span
class="p">(</span><span class="mi">3</span><span class="p">),</span>
@@ -500,10 +500,10 @@ We do not need to specify endTime, if we want all changes
after the given commit
<span class="n">PARTITIONED</span> <span class="k">BY</span> <span
class="p">(</span><span class="nv">`partition`</span><span class="p">)</span>
<span class="k">WITH</span> <span class="p">(</span>
<span class="s1">'connector'</span> <span class="o">=</span> <span
class="s1">'hudi'</span><span class="p">,</span>
- <span class="s1">'path'</span> <span class="o">=</span> <span
class="s1">'oss://vvr-daily/hudi/t1'</span><span class="p">,</span>
+ <span class="s1">'path'</span> <span class="o">=</span> <span
class="s1">'table_base_path'</span><span class="p">,</span>
<span class="s1">'table.type'</span> <span class="o">=</span> <span
class="s1">'MERGE_ON_READ'</span><span class="p">,</span>
<span class="s1">'read.streaming.enabled'</span> <span class="o">=</span>
<span class="s1">'true'</span><span class="p">,</span> <span class="c1">--
this option enable the streaming read</span>
- <span class="s1">'read.streaming.start-commit'</span> <span
class="o">=</span> <span class="s1">'20210316134557'</span> <span class="c1">--
specifies the start commit instant time</span>
+ <span class="s1">'read.streaming.start-commit'</span> <span
class="o">=</span> <span class="s1">'20210316134557'</span><span
class="p">,</span> <span class="c1">-- specifies the start commit instant
time</span>
<span class="s1">'read.streaming.check-interval'</span> <span
class="o">=</span> <span class="s1">'4'</span> <span class="c1">-- specifies
the check interval for finding new source commits, default 60s.</span>
<span class="p">);</span>
diff --git a/content/docs/querying_data.html b/content/docs/querying_data.html
index 5a01751..548df98 100644
--- a/content/docs/querying_data.html
+++ b/content/docs/querying_data.html
@@ -722,7 +722,7 @@ relying on the custom Hudi input formats again like Hive.
Typically notebook use
<div class="language-sql highlighter-rouge"><div class="highlight"><pre
class="highlight"><code><span class="c1">-- this defines a COPY_ON_WRITE table
named 't1'</span>
<span class="k">CREATE</span> <span class="k">TABLE</span> <span
class="n">t1</span><span class="p">(</span>
- <span class="n">uuid</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">20</span><span class="p">),</span>
+ <span class="n">uuid</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">20</span><span class="p">),</span> <span
class="c1">-- you can use 'PRIMARY KEY NOT ENFORCED' syntax to specify the
field as record key</span>
<span class="n">name</span> <span class="nb">VARCHAR</span><span
class="p">(</span><span class="mi">10</span><span class="p">),</span>
<span class="n">age</span> <span class="nb">INT</span><span
class="p">,</span>
<span class="n">ts</span> <span class="nb">TIMESTAMP</span><span
class="p">(</span><span class="mi">3</span><span class="p">),</span>
@@ -731,7 +731,7 @@ relying on the custom Hudi input formats again like Hive.
Typically notebook use
<span class="n">PARTITIONED</span> <span class="k">BY</span> <span
class="p">(</span><span class="nv">`partition`</span><span class="p">)</span>
<span class="k">WITH</span> <span class="p">(</span>
<span class="s1">'connector'</span> <span class="o">=</span> <span
class="s1">'hudi'</span><span class="p">,</span>
- <span class="s1">'path'</span> <span class="o">=</span> <span
class="s1">'schema://base-path'</span>
+ <span class="s1">'path'</span> <span class="o">=</span> <span
class="s1">'table_base+path'</span>
<span class="p">);</span>
<span class="c1">-- query the data</span>