This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/asf-site by this push:
new ffe4e7f Travis CI build asf-site
ffe4e7f is described below
commit ffe4e7f70b14cd5c5949c7223b005c9af5cb7891
Author: CI <[email protected]>
AuthorDate: Sat Feb 20 11:55:15 2021 +0000
Travis CI build asf-site
---
content/activity.html | 24 ++
content/assets/js/lunr/lunr-store.js | 5 +
content/blog.html | 24 ++
content/blog/hudi-key-generators/index.html | 637 ++++++++++++++++++++++++++++
content/cn/activity.html | 24 ++
content/sitemap.xml | 4 +
6 files changed, 718 insertions(+)
diff --git a/content/activity.html b/content/activity.html
index 7ec278e..71bf9b4 100644
--- a/content/activity.html
+++ b/content/activity.html
@@ -193,6 +193,30 @@
<h2 class="archive__item-title" itemprop="headline">
+ <a href="/blog/hudi-key-generators/" rel="permalink">Apache Hudi Key
Generators
+</a>
+
+ </h2>
+ <!-- Look the author details up from the site config. -->
+
+ <!-- Output author details if some exist. -->
+
+
+ <p class="archive__item-excerpt" itemprop="description">Different key
generators available with Apache Hudi
+</p>
+ </article>
+</div>
+
+
+
+
+
+
+<div class="list__item">
+ <article class="archive__item" itemscope
itemtype="https://schema.org/CreativeWork">
+
+ <h2 class="archive__item-title" itemprop="headline">
+
<a href="/blog/hudi-clustering-intro/" rel="permalink">Optimize Data
lake layout using Clustering in Apache Hudi
</a>
diff --git a/content/assets/js/lunr/lunr-store.js
b/content/assets/js/lunr/lunr-store.js
index bf8a81a..ae425f0 100644
--- a/content/assets/js/lunr/lunr-store.js
+++ b/content/assets/js/lunr/lunr-store.js
@@ -1438,4 +1438,9 @@ var store = [{
"excerpt":"Background Apache Hudi brings stream processing to big
data, providing fresh data while being an order of magnitude efficient over
traditional batch processing. In a data lake/warehouse, one of the key
trade-offs is between ingestion speed and query performance. Data ingestion
typically prefers small files to improve parallelism and make...","categories":
["blog"],
"tags": [],
"url": "https://hudi.apache.org/blog/hudi-clustering-intro/",
+ "teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
+ "title": "Apache Hudi Key Generators",
+ "excerpt":"Every record in Hudi is uniquely identified by a HoodieKey,
which is a pair of record key and partition path where the record belongs to.
Hudi has imposed this constraint so that updates and deletes can be applied to
the record of interest. Hudi relies on the partition path
field...","categories": ["blog"],
+ "tags": [],
+ "url": "https://hudi.apache.org/blog/hudi-key-generators/",
"teaser":"https://hudi.apache.org/assets/images/500x300.png"},]
diff --git a/content/blog.html b/content/blog.html
index 004a368..0935196 100644
--- a/content/blog.html
+++ b/content/blog.html
@@ -191,6 +191,30 @@
<h2 class="archive__item-title" itemprop="headline">
+ <a href="/blog/hudi-key-generators/" rel="permalink">Apache Hudi Key
Generators
+</a>
+
+ </h2>
+ <!-- Look the author details up from the site config. -->
+
+ <!-- Output author details if some exist. -->
+
+
+ <p class="archive__item-excerpt" itemprop="description">Different key
generators available with Apache Hudi
+</p>
+ </article>
+</div>
+
+
+
+
+
+
+<div class="list__item">
+ <article class="archive__item" itemscope
itemtype="https://schema.org/CreativeWork">
+
+ <h2 class="archive__item-title" itemprop="headline">
+
<a href="/blog/hudi-clustering-intro/" rel="permalink">Optimize Data
lake layout using Clustering in Apache Hudi
</a>
diff --git a/content/blog/hudi-key-generators/index.html
b/content/blog/hudi-key-generators/index.html
new file mode 100644
index 0000000..508dafb
--- /dev/null
+++ b/content/blog/hudi-key-generators/index.html
@@ -0,0 +1,637 @@
+<!doctype html>
+<html lang="en" class="no-js">
+ <head>
+ <meta charset="utf-8">
+
+<!-- begin _includes/seo.html --><title>Apache Hudi Key Generators - Apache
Hudi</title>
+<meta name="description" content="Different key generators available with
Apache Hudi">
+
+<meta property="og:type" content="article">
+<meta property="og:locale" content="en_US">
+<meta property="og:site_name" content="">
+<meta property="og:title" content="Apache Hudi Key Generators">
+<meta property="og:url"
content="https://hudi.apache.org/blog/hudi-key-generators/">
+
+
+ <meta property="og:description" content="Different key generators available
with Apache Hudi">
+
+
+
+
+
+
+
+
+
+
+
+<!-- end _includes/seo.html -->
+
+
+<!--<link href="/feed.xml" type="application/atom+xml" rel="alternate" title="
Feed">-->
+
+<!-- https://t.co/dKP3o1e -->
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+<script>
+ document.documentElement.className =
document.documentElement.className.replace(/\bno-js\b/g, '') + ' js ';
+</script>
+
+<!-- For all browsers -->
+<link rel="stylesheet" href="/assets/css/main.css">
+
+<!--[if IE]>
+ <style>
+ /* old IE unsupported flexbox fixes */
+ .greedy-nav .site-title {
+ padding-right: 3em;
+ }
+ .greedy-nav button {
+ position: absolute;
+ top: 0;
+ right: 0;
+ height: 100%;
+ }
+ </style>
+<![endif]-->
+
+
+
+<link rel="icon" type="image/x-icon" href="/assets/images/favicon.ico">
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+<script src="/assets/js/jquery.min.js"></script>
+
+
+<script src="/assets/js/main.min.js"></script>
+
+ </head>
+
+ <body class="layout--single">
+ <!--[if lt IE 9]>
+<div class="notice--danger align-center" style="margin: 0;">You are using an
<strong>outdated</strong> browser. Please <a
href="https://browsehappy.com/">upgrade your browser</a> to improve your
experience.</div>
+<![endif]-->
+
+ <div class="masthead">
+ <div class="masthead__inner-wrap" id="masthead__inner-wrap">
+ <div class="masthead__menu">
+ <nav id="site-nav" class="greedy-nav">
+
+ <a class="site-logo" href="/">
+ <div style="width: 150px; height: 40px">
+ </div>
+ </a>
+
+ <a class="site-title" href="/">
+
+ </a>
+ <ul class="visible-links"><li class="masthead__menu-item">
+ <a href="/docs/quick-start-guide.html" target="_self"
>Documentation</a>
+ </li><li class="masthead__menu-item">
+ <a href="/community.html" target="_self" >Community</a>
+ </li><li class="masthead__menu-item">
+ <a href="/blog.html" target="_self" >Blog</a>
+ </li><li class="masthead__menu-item">
+ <a href="https://cwiki.apache.org/confluence/display/HUDI/FAQ"
target="_blank" >FAQ</a>
+ </li><li class="masthead__menu-item">
+ <a href="/docs/powered_by.html" target="_self" >Powered By</a>
+ </li><li class="masthead__menu-item">
+ <a href="/releases.html" target="_self" >Releases</a>
+ </li></ul>
+ <button class="greedy-nav__toggle hidden" type="button">
+ <span class="visually-hidden">Toggle menu</span>
+ <div class="navicon"></div>
+ </button>
+ <ul class="hidden-links hidden"></ul>
+ </nav>
+ </div>
+ </div>
+</div>
+<!--
+<p class="notice--warning" style="margin: 0 !important; text-align: center
!important;"><strong>Note:</strong> This site is work in progress, if you
notice any issues, please <a target="_blank"
href="https://github.com/apache/hudi/issues">Report on Issue</a>.
+ Click <a href="/"> here</a> back to old site.</p>
+-->
+
+ <div class="initial-content">
+ <div id="main" role="main">
+
+
+ <div class="sidebar sticky">
+
+
+ <div itemscope itemtype="https://schema.org/Person">
+
+ <div class="author__content">
+
+ <h3 class="author__name" itemprop="name">Quick Links</h3>
+
+
+ <div class="author__bio" itemprop="description">
+ <p>Hudi <em>ingests</em> & <em>manages</em> storage of large
analytical datasets over DFS.</p>
+
+ </div>
+
+ </div>
+
+ <div class="author__urls-wrapper">
+ <ul class="author__urls social-icons">
+
+
+ <li><a href="/docs/quick-start-guide" target="_self" rel="nofollow
noopener noreferrer"><i class="fa fa-book" aria-hidden="true"></i>
Documentation</a></li>
+
+
+
+ <li><a href="https://cwiki.apache.org/confluence/display/HUDI"
target="_blank" rel="nofollow noopener noreferrer"><i class="fa fa-wikipedia-w"
aria-hidden="true"></i> Technical Wiki</a></li>
+
+
+
+ <li><a href="/contributing" target="_self" rel="nofollow noopener
noreferrer"><i class="fa fa-thumbs-o-up" aria-hidden="true"></i> Contribution
Guide</a></li>
+
+
+
+ <li><a
href="https://join.slack.com/t/apache-hudi/shared_invite/enQtODYyNDAxNzc5MTg2LTE5OTBlYmVhYjM0N2ZhOTJjOWM4YzBmMWU2MjZjMGE4NDc5ZDFiOGQ2N2VkYTVkNzU3ZDQ4OTI1NmFmYWQ0NzE"
target="_blank" rel="nofollow noopener noreferrer"><i class="fa fa-slack"
aria-hidden="true"></i> Join on Slack</a></li>
+
+
+
+ <li><a href="https://github.com/apache/hudi" target="_blank"
rel="nofollow noopener noreferrer"><i class="fa fa-github"
aria-hidden="true"></i> Fork on GitHub</a></li>
+
+
+
+ <li><a href="https://issues.apache.org/jira/projects/HUDI/summary"
target="_blank" rel="nofollow noopener noreferrer"><i class="fa fa-navicon"
aria-hidden="true"></i> Report Issues</a></li>
+
+
+
+ <li><a href="/security" target="_self" rel="nofollow noopener
noreferrer"><i class="fa fa-navicon" aria-hidden="true"></i> Report Security
Issues</a></li>
+
+
+
+
+ </ul>
+ </div>
+</div>
+
+
+
+
+ </div>
+
+
+ <article class="page" itemscope itemtype="https://schema.org/CreativeWork">
+ <!-- Look the author details up from the site config. -->
+
+
+ <div class="page__inner-wrap">
+
+ <header>
+ <h1 id="page-title" class="page__title" itemprop="headline">Apache
Hudi Key Generators
+</h1>
+ <!-- Output author details if some exist. -->
+
+ </header>
+
+
+ <section class="page__content" itemprop="text">
+
+ <style>
+ .page {
+ padding-right: 0 !important;
+ }
+ </style>
+
+ <p>Every record in Hudi is uniquely identified by a HoodieKey, which
is a pair of record key and partition path where the
+record belongs to. Hudi has imposed this constraint so that updates and
deletes can be applied to the record of interest.
+Hudi relies on the partition path field to partition your dataset and records
within a partition have unique record keys.
+Since uniqueness is guaranteed only within the partition, there could be
records with same record keys across different
+partitions. One should choose the partition field wisely as it could be a
determining factor for your ingestion and
+query latency.</p>
+
+<h2 id="key-generators">Key Generators</h2>
+
+<p>Hudi exposes a number of out of the box key generators that customers can
use based on their need. Or can have their
+own implementation for the KeyGenerator. This blog goes over all different
types of key generators that are readily
+available to use.</p>
+
+<p><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenerator.java">Here</a>
+is the interface for KeyGenerator in Hudi for your reference.</p>
+
+<p>Before diving into different types of key generators, let’s go over some of
the common configs required to be set for
+key generators.</p>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config</th>
+ <th style="text-align: center">Meaning/purpose</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.datasource.write.recordkey.field</code></td>
+ <td style="text-align: center">Refers to record key field. This is a
mandatory field.</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.datasource.write.partitionpath.field</code></td>
+ <td style="text-align: center">Refers to partition path field. This is a
mandatory field.</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.datasource.write.keygenerator.class</code></td>
+ <td style="text-align: center">Refers to Key generator class(including
full path). Could refer to any of the available ones or user defined one. This
is a mandatory field.</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.datasource.write.partitionpath.urlencode</code></td>
+ <td style="text-align: center">When set to true, partition path will be
url encoded. Default value is false.</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.datasource.write.hive_style_partitioning</code></td>
+ <td style="text-align: center">When set to true, uses hive style
partitioning. Partition field name will be prefixed to the value. Format:
“<partition_path_field_name>=<partition_path_value>”. Default value is
false.</partition_path_value></partition_path_field_name></td>
+ </tr>
+ </tbody>
+</table>
+
+<p>There are few more configs involved if you are looking for
TimestampBasedKeyGenerator. Will cover those in the respective section.</p>
+
+<p>Lets go over different key generators available to be used with Hudi.</p>
+
+<h3 id="simplekeygenerator"><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java">SimpleKeyGenerator</a></h3>
+
+<p>Record key refers to one field(column in dataframe) by name and partition
path refers to one field (single column in dataframe)
+by name. This is one of the most commonly used one. Values are interpreted as
is from dataframe and converted to string.</p>
+
+<h3 id="complexkeygenerator"><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java">ComplexKeyGenerator</a></h3>
+<p>Both record key and partition paths comprise one or more than one field by
name(combination of multiple fields). Fields
+are expected to be comma separated in the config value. For example <code
class="highlighter-rouge">"Hoodie.datasource.write.recordkey.field" :
“col1,col4”</code></p>
+
+<h3 id="globaldeletekeygenerator"><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java">GlobalDeleteKeyGenerator</a></h3>
+<p>Global index deletes do not require partition value. So this key generator
avoids using partition value for generating HoodieKey.</p>
+
+<h3 id="timestampbasedkeygenerator"><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java">TimestampBasedKeyGenerator</a></h3>
+<p>This key generator relies on timestamps for the partition field. The field
values are interpreted as timestamps
+and not just converted to string while generating partition path value for
records. Record key is same as before where it is chosen by
+field name. Users are expected to set few more configs to use this
KeyGenerator.</p>
+
+<p>Configs to be set:</p>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config</th>
+ <th>Meaning/purpose</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>One of the timestamp types supported(UNIX_TIMESTAMP, DATE_STRING,
MIXED, EPOCHMILLISECONDS, SCALAR)</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>Output date format</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+ <td>Timezone of the data format</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">oodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+ <td>Input date format</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Let’s go over some example values for TimestampBasedKeyGenerator.</p>
+
+<h4 id="timestamp-is-gmt">Timestamp is GMT</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“EPOCHMILLISECONDS”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“yyyy-MM-dd hh”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+ <td>“GMT+8:00”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input Field value: “1578283932000L” <br />
+Partition path generated from key generator: “2020-01-06 12”</p>
+
+<p>If input field value is null for some rows. <br />
+Partition path generated from key generator: “1970-01-01 08”</p>
+
+<h4 id="timestamp-is-date_string">Timestamp is DATE_STRING</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“DATE_STRING”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“yyyy-MM-dd hh”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+ <td>“GMT+8:00”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+ <td>“yyyy-MM-dd hh:mm:ss”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input field value: “2020-01-06 12:12:12” <br />
+Partition path generated from key generator: “2020-01-06 12”</p>
+
+<p>If input field value is null for some rows. <br />
+Partition path generated from key generator: “1970-01-01 12:00:00”
+<br /></p>
+
+<h4 id="scalar-examples">Scalar examples</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“SCALAR”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“yyyy-MM-dd hh”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+ <td>“GMT”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit</code></td>
+ <td>“days”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input field value: “20000L” <br />
+Partition path generated from key generator: “2024-10-04 12”</p>
+
+<p>If input field value is null. <br />
+Partition path generated from key generator: “1970-01-02 12”</p>
+
+<h4 id="iso8601withmsz-with-single-input-format">ISO8601WithMsZ with Single
Input format</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“DATE_STRING”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+ <td>“yyyy-MM-dd’T’HH:mm:ss.SSSZ”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“yyyyMMddHH”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+ <td>“GMT”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input field value: “2020-04-01T13:01:33.428Z” <br />
+Partition path generated from key generator: “2020040113”</p>
+
+<h4 id="iso8601withmsz-with-multiple-input-formats">ISO8601WithMsZ with
Multiple Input formats</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“DATE_STRING”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+ <td>“yyyy-MM-dd’T’HH:mm:ssZ,yyyy-MM-dd’T’HH:mm:ss.SSSZ”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“yyyyMMddHH”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+ <td>“UTC”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input field value: “2020-04-01T13:01:33.428Z” <br />
+Partition path generated from key generator: “2020040113”</p>
+
+<h4 id="iso8601noms-with-offset-using-multiple-input-formats">ISO8601NoMs with
offset using multiple input formats</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“DATE_STRING”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+ <td>“yyyy-MM-dd’T’HH:mm:ssZ,yyyy-MM-dd’T’HH:mm:ss.SSSZ”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“yyyyMMddHH”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+ <td>“UTC”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input field value: “2020-04-01T13:01:33-<strong>05:00</strong>” <br />
+Partition path generated from key generator: “2020040118”</p>
+
+<h4 id="input-as-short-date-string-and-expect-date-in-date-format">Input as
short date string and expect date in date format</h4>
+
+<table>
+ <thead>
+ <tr>
+ <th>Config field</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+ <td>“DATE_STRING”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+ <td>“yyyy-MM-dd’T’HH:mm:ssZ,yyyy-MM-dd’T’HH:mm:ss.SSSZ,yyyyMMdd”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+ <td>””</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+ <td>“UTC”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+ <td>“MM/dd/yyyy”</td>
+ </tr>
+ <tr>
+ <td><code
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+ <td>“UTC”</td>
+ </tr>
+ </tbody>
+</table>
+
+<p>Input field value: “220200401” <br />
+Partition path generated from key generator: “04/01/2020”</p>
+
+<h3 id="customkeygenerator"><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java">CustomKeyGenerator</a></h3>
+<p>This is a generic implementation of KeyGenerator where users are able to
leverage the benefits of SimpleKeyGenerator,
+ComplexKeyGenerator and TimestampBasedKeyGenerator all at the same time. One
can configure record key and partition
+paths as a single field or a combination of fields. This keyGenerator is
particularly useful if you want to define
+complex partition paths involving regular fields and timestamp based fields.
It expects value for prop <code
class="highlighter-rouge">"hoodie.datasource.write.partitionpath.field"</code>
+in a specific format. The format should be
“field1:PartitionKeyType1,field2:PartitionKeyType2…”</p>
+
+<p>The complete partition path is created as
+<code class="highlighter-rouge"><value for field1 basis
PartitionKeyType1>/<value for field2 basis PartitionKeyType2></code>
+and so on. Each partition key type could either be SIMPLE or TIMESTAMP.</p>
+
+<p>Example config value: <code
class="highlighter-rouge">“field_3:simple,field_5:timestamp”</code></p>
+
+<p>RecordKey config value is either single field incase of SimpleKeyGenerator
or a comma separate field names if referring to ComplexKeyGenerator.
+Eg: “col1” or “col3,col4”.</p>
+
+<h3 id="nonpartitionedkeygenerator"><a
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java">NonPartitionedKeyGenerator</a></h3>
+<p>If your hudi dataset is not partitioned, you could use this
“NonPartitionedKeyGenerator” which will return an empty
+partition for all records. In other words, all records go to the same
partition (which is empty “”)</p>
+
+<p>Hope this blog gave you a good understanding of different types of Key
Generators available in Apache Hudi. Thanks for your continued support for
Hudi’s community.</p>
+
+
+ </section>
+
+ <a href="#masthead__inner-wrap" class="back-to-top">Back to top
↑</a>
+
+
+
+
+ </div>
+
+ </article>
+
+</div>
+
+ </div>
+
+ <div class="page__footer">
+ <footer>
+
+<div class="row">
+ <div class="col-lg-12 footer">
+ <p>
+ <table class="table-apache-info">
+ <tr>
+ <td>
+ <a class="footer-link-img" href="https://apache.org">
+ <img width="250px" src="/assets/images/asf_logo.svg" alt="The
Apache Software Foundation">
+ </a>
+ </td>
+ <td>
+ <a style="float: right"
href="https://www.apache.org/events/current-event.html">
+ <img
src="https://www.apache.org/events/current-event-234x60.png" />
+ </a>
+ </td>
+ </tr>
+ </table>
+ </p>
+ <p>
+ <a href="https://www.apache.org/licenses/">License</a> | <a
href="https://www.apache.org/security/">Security</a> | <a
href="https://www.apache.org/foundation/thanks.html">Thanks</a> | <a
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
+ </p>
+ <p>
+ Copyright © <span id="copyright-year">2019</span> <a
href="https://apache.org">The Apache Software Foundation</a>, Licensed under
the <a href="https://www.apache.org/licenses/LICENSE-2.0"> Apache License,
Version 2.0</a>.
+ Hudi, Apache and the Apache feather logo are trademarks of The Apache
Software Foundation. <a href="/docs/privacy">Privacy Policy</a>
+ </p>
+ </div>
+</div>
+ </footer>
+ </div>
+
+
+ </body>
+</html>
\ No newline at end of file
diff --git a/content/cn/activity.html b/content/cn/activity.html
index eb734ec..e096c4a 100644
--- a/content/cn/activity.html
+++ b/content/cn/activity.html
@@ -191,6 +191,30 @@
<h2 class="archive__item-title" itemprop="headline">
+ <a href="/blog/hudi-key-generators/" rel="permalink">Apache Hudi Key
Generators
+</a>
+
+ </h2>
+ <!-- Look the author details up from the site config. -->
+
+ <!-- Output author details if some exist. -->
+
+
+ <p class="archive__item-excerpt" itemprop="description">Different key
generators available with Apache Hudi
+</p>
+ </article>
+</div>
+
+
+
+
+
+
+<div class="list__item">
+ <article class="archive__item" itemscope
itemtype="https://schema.org/CreativeWork">
+
+ <h2 class="archive__item-title" itemprop="headline">
+
<a href="/blog/hudi-clustering-intro/" rel="permalink">Optimize Data
lake layout using Clustering in Apache Hudi
</a>
diff --git a/content/sitemap.xml b/content/sitemap.xml
index a6de7ea..5d19956 100644
--- a/content/sitemap.xml
+++ b/content/sitemap.xml
@@ -1153,6 +1153,10 @@
<lastmod>2021-01-27T00:00:00-05:00</lastmod>
</url>
<url>
+<loc>https://hudi.apache.org/blog/hudi-key-generators/</loc>
+<lastmod>2021-02-13T00:00:00-05:00</lastmod>
+</url>
+<url>
<loc>https://hudi.apache.org/cn/activity</loc>
<lastmod>2019-12-30T14:59:57-05:00</lastmod>
</url>