This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 26ef788  Travis CI build asf-site
26ef788 is described below

commit 26ef788715ac2a7024413c50b8a043695282478c
Author: CI <ci...@hudi.apache.org>
AuthorDate: Tue Mar 23 22:41:07 2021 +0000

    Travis CI build asf-site
---
 content/assets/js/lunr/lunr-store.js      |   5 +
 content/docs/0.5.3-cloud.html             |  11 +
 content/docs/azure_hoodie.html            |  11 +
 content/docs/cloud.html                   |  11 +
 content/docs/comparison.html              |  11 +
 content/docs/concurrency_control.html     | 568 ++++++++++++++++++++++++++++++
 content/docs/configurations.html          |  73 ++++
 content/docs/cos_hoodie.html              |  11 +
 content/docs/deployment.html              |  11 +
 content/docs/docker_demo.html             |  11 +
 content/docs/docs-versions.html           |  11 +
 content/docs/flink-quick-start-guide.html |  11 +
 content/docs/gcs_hoodie.html              |  11 +
 content/docs/ibm_cos_hoodie.html          |  11 +
 content/docs/metrics.html                 |  11 +
 content/docs/migration_guide.html         |  11 +
 content/docs/oss_hoodie.html              |  11 +
 content/docs/overview.html                |  11 +
 content/docs/performance.html             |  11 +
 content/docs/powered_by.html              |  11 +
 content/docs/privacy.html                 |  11 +
 content/docs/querying_data.html           |  11 +
 content/docs/s3_hoodie.html               |  11 +
 content/docs/spark_quick-start-guide.html |  11 +
 content/docs/structure.html               |  11 +
 content/docs/use_cases.html               |  11 +
 content/docs/writing_data.html            |  11 +
 content/sitemap.xml                       |   4 +
 28 files changed, 914 insertions(+)

diff --git a/content/assets/js/lunr/lunr-store.js 
b/content/assets/js/lunr/lunr-store.js
index f13c4df..4fa69d9 100644
--- a/content/assets/js/lunr/lunr-store.js
+++ b/content/assets/js/lunr/lunr-store.js
@@ -1324,6 +1324,11 @@ var store = [{
         "tags": [],
         "url": "https://hudi.apache.org/docs/metrics.html";,
         "teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
+        "title": "Concurrent Writes to Hudi Tables",
+        "excerpt":"In this section, we will cover Hudi’s concurrency model and 
describe ways to ingest data into a Hudi Table from multiple writers; using the 
DeltaStreamer tool as well as using the Hudi datasource. Supported Concurrency 
Controls MVCC : Hudi table services such as compaction, cleaning, clustering 
leverage Multi Version Concurrency...","categories": [],
+        "tags": [],
+        "url": "https://hudi.apache.org/docs/concurrency_control.html";,
+        "teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
         "title": "Privacy Policy",
         "excerpt":"Information about your use of this website is collected 
using server access logs and a tracking cookie. The collected information 
consists of the following: The IP address from which you access the website; 
The type of browser and operating system you use to access our site; The date 
and time...","categories": [],
         "tags": [],
diff --git a/content/docs/0.5.3-cloud.html b/content/docs/0.5.3-cloud.html
index b045e52..405c630 100644
--- a/content/docs/0.5.3-cloud.html
+++ b/content/docs/0.5.3-cloud.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/azure_hoodie.html b/content/docs/azure_hoodie.html
index a781bde..4427c6e 100644
--- a/content/docs/azure_hoodie.html
+++ b/content/docs/azure_hoodie.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/cloud.html b/content/docs/cloud.html
index 99a378f..2a5b12a 100644
--- a/content/docs/cloud.html
+++ b/content/docs/cloud.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/comparison.html b/content/docs/comparison.html
index 7dcc624..99651ea 100644
--- a/content/docs/comparison.html
+++ b/content/docs/comparison.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/concurrency_control.html 
b/content/docs/concurrency_control.html
new file mode 100644
index 0000000..af97c0f
--- /dev/null
+++ b/content/docs/concurrency_control.html
@@ -0,0 +1,568 @@
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    <meta charset="utf-8">
+
+<!-- begin _includes/seo.html --><title>Concurrent Writes to Hudi Tables - 
Apache Hudi</title>
+<meta name="description" content="In this section, we will cover Hudi’s 
concurrency model and describe ways to ingest data into a Hudi Table from 
multiple writers; using the DeltaStreamer tool as well as using the Hudi 
datasource.">
+
+<meta property="og:type" content="article">
+<meta property="og:locale" content="en_US">
+<meta property="og:site_name" content="">
+<meta property="og:title" content="Concurrent Writes to Hudi Tables">
+<meta property="og:url" 
content="https://hudi.apache.org/docs/concurrency_control.html";>
+
+
+  <meta property="og:description" content="In this section, we will cover 
Hudi’s concurrency model and describe ways to ingest data into a Hudi Table 
from multiple writers; using the DeltaStreamer tool as well as using the Hudi 
datasource.">
+
+
+
+
+
+  <meta property="article:modified_time" content="2021-03-19T15:59:57-04:00">
+
+
+
+
+
+
+
+<!-- end _includes/seo.html -->
+
+
+<!--<link href="/feed.xml" type="application/atom+xml" rel="alternate" title=" 
Feed">-->
+
+<!-- https://t.co/dKP3o1e -->
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+<script>
+  document.documentElement.className = 
document.documentElement.className.replace(/\bno-js\b/g, '') + ' js ';
+</script>
+
+<!-- For all browsers -->
+<link rel="stylesheet" href="/assets/css/main.css">
+
+<!--[if IE]>
+  <style>
+    /* old IE unsupported flexbox fixes */
+    .greedy-nav .site-title {
+      padding-right: 3em;
+    }
+    .greedy-nav button {
+      position: absolute;
+      top: 0;
+      right: 0;
+      height: 100%;
+    }
+  </style>
+<![endif]-->
+
+
+
+<link rel="icon" type="image/x-icon" href="/assets/images/favicon.ico">
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+<script src="/assets/js/jquery.min.js"></script>
+
+    
+<script src="/assets/js/main.min.js"></script>
+
+  </head>
+
+  <body class="layout--single">
+    <!--[if lt IE 9]>
+<div class="notice--danger align-center" style="margin: 0;">You are using an 
<strong>outdated</strong> browser. Please <a 
href="https://browsehappy.com/";>upgrade your browser</a> to improve your 
experience.</div>
+<![endif]-->
+
+    <div class="masthead">
+  <div class="masthead__inner-wrap" id="masthead__inner-wrap">
+    <div class="masthead__menu">
+      <nav id="site-nav" class="greedy-nav">
+        
+          <a class="site-logo" href="/">
+              <div style="width: 150px; height: 40px">
+              </div>
+          </a>
+        
+        <a class="site-title" href="/">
+          
+        </a>
+        <ul class="visible-links"><li class="masthead__menu-item">
+              <a href="/docs/spark_quick-start-guide.html" target="_self" 
>Documentation</a>
+            </li><li class="masthead__menu-item">
+              <a href="/community.html" target="_self" >Community</a>
+            </li><li class="masthead__menu-item">
+              <a href="/blog.html" target="_self" >Blog</a>
+            </li><li class="masthead__menu-item">
+              <a href="https://cwiki.apache.org/confluence/display/HUDI/FAQ"; 
target="_blank" >FAQ</a>
+            </li><li class="masthead__menu-item">
+              <a href="/docs/powered_by.html" target="_self" >Powered By</a>
+            </li><li class="masthead__menu-item">
+              <a href="/releases.html" target="_self" >Releases</a>
+            </li></ul>
+        <button class="greedy-nav__toggle hidden" type="button">
+          <span class="visually-hidden">Toggle menu</span>
+          <div class="navicon"></div>
+        </button>
+        <ul class="hidden-links hidden"></ul>
+      </nav>
+    </div>
+  </div>
+</div>
+<!--
+<p class="notice--warning" style="margin: 0 !important; text-align: center 
!important;"><strong>Note:</strong> This site is work in progress, if you 
notice any issues, please <a target="_blank" 
href="https://github.com/apache/hudi/issues";>Report on Issue</a>.
+  Click <a href="/"> here</a> back to old site.</p>
+-->
+
+    <div class="initial-content">
+      <div id="main" role="main">
+  
+
+  <div class="sidebar sticky">
+
+  
+
+  
+
+    
+      
+
+
+
+
+
+
+
+<nav class="nav__list">
+  
+  <input id="ac-toc" name="accordion-toc" type="checkbox" />
+  <label for="ac-toc">Toggle Menu</label>
+  <ul class="nav__items">
+    
+      <li>
+        
+          <span class="nav__sub-title">Documentation</span>
+        
+
+        
+        <ul>
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/overview.html" class="">Overview</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/spark_quick-start-guide.html" class="">Quick 
Start(Spark)</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/flink-quick-start-guide.html" class="">Quick 
Start(Flink)</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/use_cases.html" class="">Use Cases</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/writing_data.html" class="">Writing 
Data</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/concurrency_control.html" 
class="active">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/configurations.html" 
class="">Configuration</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/performance.html" 
class="">Performance</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/deployment.html" class="">Deployment</a></li>
+            
+
+          
+        </ul>
+        
+      </li>
+    
+      <li>
+        
+          <span class="nav__sub-title">Resources</span>
+        
+
+        
+        <ul>
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/docker_demo.html" class="">Dockerized 
Demo</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/cloud.html" class="">Storage 
Configuration</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/metrics.html" class="">Metrics</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/docs-versions.html" class="">Docs 
Versions</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
+              <li><a href="/docs/privacy.html" class="">Privacy Policy</a></li>
+            
+
+          
+        </ul>
+        
+      </li>
+    
+  </ul>
+</nav>
+
+    
+
+  
+  </div>
+
+
+  <article class="page" itemscope itemtype="https://schema.org/CreativeWork";>
+    <!-- Look the author details up from the site config. -->
+    
+
+    <div class="page__inner-wrap">
+      
+        <header>
+          <h1 id="page-title" class="page__title" 
itemprop="headline">Concurrent Writes to Hudi Tables
+</h1>
+          <!-- Output author details if some exist. -->
+          
+        </header>
+      
+
+      <section class="page__content" itemprop="text">
+        
+        <aside class="sidebar__right sticky">
+          <nav class="toc">
+            <header><h4 class="nav__title"><i class="fas fa-file-alt"></i> IN 
THIS PAGE</h4></header>
+            <ul class="toc__menu">
+  <li><a href="#supported-concurrency-controls">Supported Concurrency 
Controls</a></li>
+  <li><a href="#single-writer-guarantees">Single Writer Guarantees</a></li>
+  <li><a href="#multi-writer-guarantees">Multi Writer Guarantees</a></li>
+  <li><a href="#enabling-multi-writing">Enabling Multi Writing</a></li>
+  <li><a href="#datasource-writer">Datasource Writer</a></li>
+  <li><a href="#deltastreamer">DeltaStreamer</a></li>
+  <li><a href="#best-practices-when-using-optimistic-concurrency-control">Best 
Practices when using Optimistic Concurrency Control</a></li>
+  <li><a href="#disabling-multi-writing">Disabling Multi Writing</a></li>
+</ul>
+          </nav>
+        </aside>
+        
+        <p>In this section, we will cover Hudi’s concurrency model and 
describe ways to ingest data into a Hudi Table from multiple writers; using the 
<a href="#deltastreamer">DeltaStreamer</a> tool as well as 
+using the <a href="#datasource-writer">Hudi datasource</a>.</p>
+
+<h2 id="supported-concurrency-controls">Supported Concurrency Controls</h2>
+
+<ul>
+  <li>
+    <p><strong>MVCC</strong> : Hudi table services such as compaction, 
cleaning, clustering leverage Multi Version Concurrency Control to provide 
snapshot isolation
+between multiple table service writers and readers. Additionally, using MVCC, 
Hudi provides snapshot isolation between an ingestion writer and multiple 
concurrent readers. 
+With this model, Hudi supports running any number of table service jobs 
concurrently, without any concurrency conflict. 
+This is made possible by ensuring that scheduling plans of such table services 
always happens in a single writer mode to ensure no conflict and avoids race 
conditions.</p>
+  </li>
+  <li>
+    <p><strong>[NEW] OPTIMISTIC CONCURRENCY</strong> : Write operations such 
as the ones described above (UPSERT, INSERT) etc, leverage optimistic 
concurrency control to enable multiple ingestion writers to
+the same Hudi Table. Hudi supports <code class="highlighter-rouge">file level 
OCC</code>, i.e., for any 2 commits (or writers) happening to the same table, 
if they do not have writes to overlapping files being changed, both writers are 
allowed to succeed. 
+This feature is currently <em>experimental</em> and requires either Zookeeper 
or HiveMetastore to acquire locks.</p>
+  </li>
+</ul>
+
+<p>It may be helpful to understand the different guarantees provided by <a 
href="/docs/writing_data.html#write-operations">write operations</a> via Hudi 
datasource or the delta streamer.</p>
+
+<h2 id="single-writer-guarantees">Single Writer Guarantees</h2>
+
+<ul>
+  <li><em>UPSERT Guarantee</em>: The target table will NEVER show 
duplicates.</li>
+  <li><em>INSERT Guarantee</em>: The target table wilL NEVER have duplicates 
if <a href="/docs/configurations.html#INSERT_DROP_DUPS_OPT_KEY">dedup</a> is 
enabled.</li>
+  <li><em>BULK_INSERT Guarantee</em>: The target table will NEVER have 
duplicates if <a 
href="/docs/configurations.html#INSERT_DROP_DUPS_OPT_KEY">dedup</a> is 
enabled.</li>
+  <li><em>INCREMENTAL PULL Guarantee</em>: Data consumption and checkpoints 
are NEVER out of order.</li>
+</ul>
+
+<h2 id="multi-writer-guarantees">Multi Writer Guarantees</h2>
+
+<p>With multiple writers using OCC, some of the above guarantees change as 
follows</p>
+
+<ul>
+  <li><em>UPSERT Guarantee</em>: The target table will NEVER show 
duplicates.</li>
+  <li><em>INSERT Guarantee</em>: The target table MIGHT have duplicates even 
if <a href="/docs/configurations.html#INSERT_DROP_DUPS_OPT_KEY">dedup</a> is 
enabled.</li>
+  <li><em>BULK_INSERT Guarantee</em>: The target table MIGHT have duplicates 
even if <a href="/docs/configurations.html#INSERT_DROP_DUPS_OPT_KEY">dedup</a> 
is enabled.</li>
+  <li><em>INCREMENTAL PULL Guarantee</em>: Data consumption and checkpoints 
MIGHT be out of order due to multiple writer jobs finishing at different 
times.</li>
+</ul>
+
+<h2 id="enabling-multi-writing">Enabling Multi Writing</h2>
+
+<p>The following properties are needed to be set properly to turn on 
optimistic concurrency control.</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>hoodie.write.concurrency.mode=optimistic_concurrency_control
+hoodie.failed.writes.cleaner.policy=LAZY
+hoodie.writer.lock.provider=&lt;lock-provider-classname&gt;
+</code></pre></div></div>
+
+<p>There are 2 different server based lock providers that require different 
configuration to be set.</p>
+
+<p><strong><code class="highlighter-rouge">Zookeeper</code></strong> based 
lock provider</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>hoodie.writer.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider
+hoodie.writer.lock.zookeeper.url
+hoodie.writer.lock.zookeeper.port
+hoodie.writer.lock.wait_time_ms
+hoodie.writer.lock.num_retries
+hoodie.writer.lock.lock_key
+hoodie.writer.lock.zookeeper.zk_base_path
+</code></pre></div></div>
+
+<p><strong><code class="highlighter-rouge">HiveMetastore</code></strong> based 
lock provider</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>hoodie.writer.lock.provider=org.apache.hudi.hive.HiveMetastoreBasedLockProvider
+hoodie.writer.lock.hivemetastore.database
+hoodie.writer.lock.hivemetastore.table
+hoodie.writer.lock.wait_time_ms
+hoodie.writer.lock.num_retries
+</code></pre></div></div>
+
+<p><code class="highlighter-rouge">The HiveMetastore URI's are picked up from 
the hadoop configuration file loaded during runtime.</code></p>
+
+<h2 id="datasource-writer">Datasource Writer</h2>
+
+<p>The <code class="highlighter-rouge">hudi-spark</code> module offers the 
DataSource API to write (and read) a Spark DataFrame into a Hudi table.</p>
+
+<p>Following is an example of how to use optimistic_concurrency_control via 
spark datasource</p>
+
+<div class="language-java highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code><span class="n">inputDF</span><span 
class="o">.</span><span class="na">write</span><span class="o">.</span><span 
class="na">format</span><span class="o">(</span><span 
class="s">"hudi"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">options</span><span 
class="o">(</span><span class="n">getQuickstartWriteConfigs</span><span 
class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="no">PRECOMBINE_FIELD_OPT_KEY</span><span 
class="o">,</span> <span class="s">"ts"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span 
class="s">"hoodie.failed.writes.cleaner.policy"</span><span class="o">,</span> 
<span class="s">"LAZY"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="s">"hoodie.write.concurrency.mode"</span><span 
class="o">,</span> <span class="s">"optimistic_concurrency_control"</span><span 
class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span 
class="s">"hoodie.writer.lock.zookeeper.url"</span><span class="o">,</span> 
<span class="s">"zookeeper"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span 
class="s">"hoodie.writer.lock.zookeeper.port"</span><span class="o">,</span> 
<span class="s">"2181"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="s">"hoodie.writer.lock.wait_time_ms"</span><span 
class="o">,</span> <span class="s">"12000"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="s">"hoodie.writer.lock.num_retries"</span><span 
class="o">,</span> <span class="s">"2"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="s">"hoodie.writer.lock.lock_key"</span><span 
class="o">,</span> <span class="s">"test_table"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span 
class="s">"hoodie.writer.lock.zookeeper.zk_base_path"</span><span 
class="o">,</span> <span class="s">"/test"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="no">RECORDKEY_FIELD_OPT_KEY</span><span 
class="o">,</span> <span class="s">"uuid"</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="no">PARTITIONPATH_FIELD_OPT_KEY</span><span 
class="o">,</span> <span class="s">"partitionpath"</span><span 
class="o">)</span>
+       <span class="o">.</span><span class="na">option</span><span 
class="o">(</span><span class="no">TABLE_NAME</span><span class="o">,</span> 
<span class="n">tableName</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">mode</span><span 
class="o">(</span><span class="nc">Overwrite</span><span class="o">)</span>
+       <span class="o">.</span><span class="na">save</span><span 
class="o">(</span><span class="n">basePath</span><span class="o">)</span>
+</code></pre></div></div>
+
+<h2 id="deltastreamer">DeltaStreamer</h2>
+
+<p>The <code class="highlighter-rouge">HoodieDeltaStreamer</code> utility 
(part of hudi-utilities-bundle) provides ways to ingest from different sources 
such as DFS or Kafka, with the following capabilities.</p>
+
+<p>Using optimistic_concurrency_control via delta streamer requires adding the 
above configs to the properties file that can be passed to the
+job. For example below, adding the configs to kafka-source.properties file and 
passing them to deltastreamer will enable optimistic concurrency.
+A deltastreamer job can then be triggered as follows:</p>
+
+<div class="language-java highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code><span class="o">[</span><span 
class="n">hoodie</span><span class="o">]</span><span class="err">$</span> <span 
class="n">spark</span><span class="o">-</span><span class="n">submit</span> 
<span class="o">--</span><span class="kd">class</span> <span 
class="nc">org</span><span class="o">.</span><span 
class="na">apache</span><span class="o">.</span><span 
class="na">hudi</span><span class="o">.</sp [...]
+  <span class="o">--</span><span class="n">props</span> <span 
class="nl">file:</span><span 
class="c1">//${PWD}/hudi-utilities/src/test/resources/delta-streamer-config/kafka-source.properties
 \</span>
+  <span class="o">--</span><span class="n">schemaprovider</span><span 
class="o">-</span><span class="kd">class</span> <span 
class="nc">org</span><span class="o">.</span><span 
class="na">apache</span><span class="o">.</span><span 
class="na">hudi</span><span class="o">.</span><span 
class="na">utilities</span><span class="o">.</span><span 
class="na">schema</span><span class="o">.</span><span 
class="na">SchemaRegistryProvider</span> <span class="err">\</span>
+  <span class="o">--</span><span class="n">source</span><span 
class="o">-</span><span class="kd">class</span> <span 
class="nc">org</span><span class="o">.</span><span 
class="na">apache</span><span class="o">.</span><span 
class="na">hudi</span><span class="o">.</span><span 
class="na">utilities</span><span class="o">.</span><span 
class="na">sources</span><span class="o">.</span><span 
class="na">AvroKafkaSource</span> <span class="err">\</span>
+  <span class="o">--</span><span class="n">source</span><span 
class="o">-</span><span class="n">ordering</span><span class="o">-</span><span 
class="n">field</span> <span class="n">impresssiontime</span> <span 
class="err">\</span>
+  <span class="o">--</span><span class="n">target</span><span 
class="o">-</span><span class="n">base</span><span class="o">-</span><span 
class="n">path</span> <span class="nl">file:</span><span 
class="err">\</span><span class="o">/</span><span class="err">\</span><span 
class="o">/</span><span class="err">\</span><span class="o">/</span><span 
class="n">tmp</span><span class="o">/</span><span class="n">hudi</span><span 
class="o">-</span><span class="n">deltastreamer</span><span class="o">- [...]
+  <span class="o">--</span><span class="n">target</span><span 
class="o">-</span><span class="n">table</span> <span class="n">uber</span><span 
class="o">.</span><span class="na">impressions</span> <span class="err">\</span>
+  <span class="o">--</span><span class="n">op</span> <span 
class="no">BULK_INSERT</span>
+</code></pre></div></div>
+
+<h2 id="best-practices-when-using-optimistic-concurrency-control">Best 
Practices when using Optimistic Concurrency Control</h2>
+
+<p>Concurrent Writing to Hudi tables requires acquiring a lock with either 
Zookeeper or HiveMetastore. Due to several reasons you might want to configure 
retries to allow your application to acquire the lock.</p>
+<ol>
+  <li>Network connectivity or excessive load on servers increasing time for 
lock acquisition resulting in timeouts</li>
+  <li>Running a large number of concurrent jobs that are writing to the same 
hudi table can result in contention during lock acquisition can cause 
timeouts</li>
+  <li>In some scenarios of conflict resolution, Hudi commit operations might 
take upto 10’s of seconds while the lock is being held. This can result in 
timeouts for other jobs waiting to acquire a lock.</li>
+</ol>
+
+<p>Set the correct native lock provider client retries. NOTE that sometimes 
these settings are set on the server once and all clients inherit the same 
configs. Please check your settings before enabling optimistic concurrency.</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>hoodie.writer.lock.wait_time_ms
+hoodie.writer.lock.num_retries
+</code></pre></div></div>
+
+<p>Set the correct hudi client retries for Zookeeper &amp; HiveMetastore. This 
is useful in cases when native client retry settings cannot be changed. Please 
note that these retries will happen in addition to any native client retries 
that you may have set.</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>hoodie.writer.lock.client.wait_time_ms
+hoodie.writer.lock.client.num_retries
+</code></pre></div></div>
+
+<p><em>Setting the right values for these depends on a case by case basis; 
some defaults have been provided for general cases.</em></p>
+
+<h2 id="disabling-multi-writing">Disabling Multi Writing</h2>
+
+<p>Remove the following settings that were used to enable multi-writer or 
override with default values.</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>hoodie.write.concurrency.mode=single_writer
+hoodie.failed.writes.cleaner.policy=EAGER
+</code></pre></div></div>
+
+      </section>
+
+      <a href="#masthead__inner-wrap" class="back-to-top">Back to top 
&uarr;</a>
+
+
+      
+
+    </div>
+
+  </article>
+
+</div>
+
+    </div>
+
+    <div class="page__footer">
+      <footer>
+        
+<div class="row">
+  <div class="col-lg-12 footer">
+    <p>
+      <table class="table-apache-info">
+        <tr>
+          <td>
+            <a class="footer-link-img" href="https://apache.org";>
+              <img width="250px" src="/assets/images/asf_logo.svg" alt="The 
Apache Software Foundation">
+            </a>
+          </td>
+          <td>
+            <a style="float: right" 
href="https://www.apache.org/events/current-event.html";>
+              <img 
src="https://www.apache.org/events/current-event-234x60.png"; />
+            </a>
+          </td>
+        </tr>
+      </table>
+    </p>
+    <p>
+      <a href="https://www.apache.org/licenses/";>License</a> | <a 
href="https://www.apache.org/security/";>Security</a> | <a 
href="https://www.apache.org/foundation/thanks.html";>Thanks</a> | <a 
href="https://www.apache.org/foundation/sponsorship.html";>Sponsorship</a>
+    </p>
+    <p>
+      Copyright &copy; <span id="copyright-year">2019</span> <a 
href="https://apache.org";>The Apache Software Foundation</a>, Licensed under 
the <a href="https://www.apache.org/licenses/LICENSE-2.0";> Apache License, 
Version 2.0</a>.
+      Hudi, Apache and the Apache feather logo are trademarks of The Apache 
Software Foundation. <a href="/docs/privacy">Privacy Policy</a>
+    </p>
+  </div>
+</div>
+      </footer>
+    </div>
+
+
+  </body>
+</html>
\ No newline at end of file
diff --git a/content/docs/configurations.html b/content/docs/configurations.html
index e4f1f61..99bd6f3 100644
--- a/content/docs/configurations.html
+++ b/content/docs/configurations.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
@@ -369,6 +380,7 @@
       <li><a href="#metrics-configs">Metrics configs</a></li>
       <li><a href="#memory-configs">Memory configs</a></li>
       <li><a href="#write-commit-callback-configs">Write commit callback 
configs</a></li>
+      <li><a href="#locking-configs">Locking configs</a></li>
     </ul>
   </li>
 </ul>
@@ -1365,6 +1377,67 @@ Each clustering operation can create multiple groups. 
Total amount of data proce
 <p>Property: <code 
class="highlighter-rouge">hoodie.write.commit.callback.kafka.retries</code> <br 
/>
 <span style="color:grey">Times to retry. 3 by default</span></p>
 
+<h3 id="locking-configs">Locking configs</h3>
+<p>Configs that control locking mechanisms if <a 
href="#WriteConcurrencyMode">WriteConcurrencyMode=optimistic_concurrency_control</a>
 is enabled
+<a href="#withLockConfig">withLockConfig</a> (HoodieLockConfig) <br /></p>
+
+<h4 id="withLockProvider">withLockProvider(lockProvider = 
org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.provider</code> <br />
+<span style="color:grey">Lock provider class name, user can provide their own 
implementation of LockProvider which should be subclass of 
org.apache.hudi.common.lock.LockProvider</span></p>
+
+<h4 id="withZkQuorum">withZkQuorum(zkQuorum)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.zookeeper.url</code> <br />
+<span style="color:grey">Set the list of comma separated servers to connect 
to</span></p>
+
+<h4 id="withZkBasePath">withZkBasePath(zkBasePath)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.zookeeper.base_path</code> 
[Required] <br />
+<span style="color:grey">The base path on Zookeeper under which to create a 
ZNode to acquire the lock. This should be common for all jobs writing to the 
same table</span></p>
+
+<h4 id="withZkPort">withZkPort(zkPort)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.zookeeper.port</code> [Required] 
<br />
+<span style="color:grey">The connection port to be used for 
Zookeeper</span></p>
+
+<h4 id="withZkLockKey">withZkLockKey(zkLockKey)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.zookeeper.lock_key</code> 
[Required] <br />
+<span style="color:grey">Key name under base_path at which to create a ZNode 
and acquire lock. Final path on zk will look like base_path/lock_key. We 
recommend setting this to the table name</span></p>
+
+<h4 
id="withZkConnectionTimeoutInMs">withZkConnectionTimeoutInMs(connectionTimeoutInMs
 = 15000)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.zookeeper.connection_timeout_ms</code>
 <br />
+<span style="color:grey">How long to wait when connecting to ZooKeeper before 
considering the connection a failure</span></p>
+
+<h4 id="withZkSessionTimeoutInMs">withZkSessionTimeoutInMs(sessionTimeoutInMs 
= 60000)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.zookeeper.session_timeout_ms</code>
 <br />
+<span style="color:grey">How long to wait after losing a connection to 
ZooKeeper before the session is expired</span></p>
+
+<h4 id="withNumRetries">withNumRetries(num_retries = 3)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.num_retries</code> <br />
+<span style="color:grey">Maximum number of times to retry by lock provider 
client</span></p>
+
+<h4 
id="withRetryWaitTimeInMillis">withRetryWaitTimeInMillis(retryWaitTimeInMillis 
= 5000)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.wait_time_ms_between_retry</code> 
<br />
+<span style="color:grey">Initial amount of time to wait between retries by 
lock provider client</span></p>
+
+<h4 id="withHiveDatabaseName">withHiveDatabaseName(hiveDatabaseName)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.hivemetastore.database</code> 
[Required] <br />
+<span style="color:grey">The Hive database to acquire lock against</span></p>
+
+<h4 id="withHiveTableName">withHiveTableName(hiveTableName)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.hivemetastore.table</code> 
[Required] <br />
+<span style="color:grey">The Hive table under the hive database to acquire 
lock against</span></p>
+
+<h4 id="withClientNumRetries">withClientNumRetries(clientNumRetries = 0)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.client.num_retries</code> <br />
+<span style="color:grey">Maximum number of times to retry to acquire lock 
additionally from the hudi client</span></p>
+
+<h4 
id="withRetryWaitTimeInMillis">withRetryWaitTimeInMillis(retryWaitTimeInMillis 
= 10000)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.client.wait_time_ms_between_retry</code>
 <br />
+<span style="color:grey">Amount of time to wait between retries from the hudi 
client</span></p>
+
+<h4 
id="withConflictResolutionStrategy">withConflictResolutionStrategy(lockProvider 
= 
org.apache.hudi.client.transaction.SimpleConcurrentFileWritesConflictResolutionStrategy)</h4>
+<p>Property: <code 
class="highlighter-rouge">hoodie.writer.lock.conflict.resolution.strategy</code>
 <br />
+<span style="color:grey">Lock provider class name, this should be subclass of 
org.apache.hudi.client.transaction.ConflictResolutionStrategy</span></p>
+
+
       </section>
 
       <a href="#masthead__inner-wrap" class="back-to-top">Back to top 
&uarr;</a>
diff --git a/content/docs/cos_hoodie.html b/content/docs/cos_hoodie.html
index 2a9d9df..5f7977f 100644
--- a/content/docs/cos_hoodie.html
+++ b/content/docs/cos_hoodie.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/deployment.html b/content/docs/deployment.html
index 21ef6d0..30d144b 100644
--- a/content/docs/deployment.html
+++ b/content/docs/deployment.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/docker_demo.html b/content/docs/docker_demo.html
index f7baaf9..e24699f 100644
--- a/content/docs/docker_demo.html
+++ b/content/docs/docker_demo.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/docs-versions.html b/content/docs/docs-versions.html
index 6d47eeb..6db93ed 100644
--- a/content/docs/docs-versions.html
+++ b/content/docs/docs-versions.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/flink-quick-start-guide.html 
b/content/docs/flink-quick-start-guide.html
index 6e5d4ee..f4f48e7 100644
--- a/content/docs/flink-quick-start-guide.html
+++ b/content/docs/flink-quick-start-guide.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/gcs_hoodie.html b/content/docs/gcs_hoodie.html
index 138cb1b..d71d866 100644
--- a/content/docs/gcs_hoodie.html
+++ b/content/docs/gcs_hoodie.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/ibm_cos_hoodie.html b/content/docs/ibm_cos_hoodie.html
index 68c65ce..9307778 100644
--- a/content/docs/ibm_cos_hoodie.html
+++ b/content/docs/ibm_cos_hoodie.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/metrics.html b/content/docs/metrics.html
index 9eb712b..65ca70e 100644
--- a/content/docs/metrics.html
+++ b/content/docs/metrics.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/migration_guide.html 
b/content/docs/migration_guide.html
index d671c72..3ef7f99 100644
--- a/content/docs/migration_guide.html
+++ b/content/docs/migration_guide.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/oss_hoodie.html b/content/docs/oss_hoodie.html
index ec429f3..a1a643b 100644
--- a/content/docs/oss_hoodie.html
+++ b/content/docs/oss_hoodie.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/overview.html b/content/docs/overview.html
index d6a78d2..04de878 100644
--- a/content/docs/overview.html
+++ b/content/docs/overview.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/performance.html b/content/docs/performance.html
index b64c5c0..48b1183 100644
--- a/content/docs/performance.html
+++ b/content/docs/performance.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/powered_by.html b/content/docs/powered_by.html
index 8761748..e604d51 100644
--- a/content/docs/powered_by.html
+++ b/content/docs/powered_by.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/privacy.html b/content/docs/privacy.html
index 22e16b7..5b03c8e 100644
--- a/content/docs/privacy.html
+++ b/content/docs/privacy.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/querying_data.html b/content/docs/querying_data.html
index fb0bfc9..18f5c59 100644
--- a/content/docs/querying_data.html
+++ b/content/docs/querying_data.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="active">Querying 
Data</a></li>
             
 
diff --git a/content/docs/s3_hoodie.html b/content/docs/s3_hoodie.html
index e00a6de..b86d9f3 100644
--- a/content/docs/s3_hoodie.html
+++ b/content/docs/s3_hoodie.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/spark_quick-start-guide.html 
b/content/docs/spark_quick-start-guide.html
index 8c36992..ad47de5 100644
--- a/content/docs/spark_quick-start-guide.html
+++ b/content/docs/spark_quick-start-guide.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/structure.html b/content/docs/structure.html
index 2adc401..061aa5e 100644
--- a/content/docs/structure.html
+++ b/content/docs/structure.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/use_cases.html b/content/docs/use_cases.html
index d563210..bd823fb 100644
--- a/content/docs/use_cases.html
+++ b/content/docs/use_cases.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/docs/writing_data.html b/content/docs/writing_data.html
index 72e2a67..1fb6536 100644
--- a/content/docs/writing_data.html
+++ b/content/docs/writing_data.html
@@ -208,6 +208,17 @@
             
 
             
+              <li><a href="/docs/concurrency_control.html" 
class="">Concurrency Control</a></li>
+            
+
+          
+            
+            
+
+            
+            
+
+            
               <li><a href="/docs/querying_data.html" class="">Querying 
Data</a></li>
             
 
diff --git a/content/sitemap.xml b/content/sitemap.xml
index 229a39d..125ecc7 100644
--- a/content/sitemap.xml
+++ b/content/sitemap.xml
@@ -1061,6 +1061,10 @@
 <lastmod>2020-06-20T15:59:57-04:00</lastmod>
 </url>
 <url>
+<loc>https://hudi.apache.org/docs/concurrency_control.html</loc>
+<lastmod>2021-03-19T15:59:57-04:00</lastmod>
+</url>
+<url>
 <loc>https://hudi.apache.org/cn/docs/privacy.html</loc>
 <lastmod>2019-12-30T14:59:57-05:00</lastmod>
 </url>

Reply via email to