http://git-wip-us.apache.org/repos/asf/zookeeper/blob/ec4ec140/content/doc/r3.5.4-beta/zookeeperQuotas.html ---------------------------------------------------------------------- diff --git a/content/doc/r3.5.4-beta/zookeeperQuotas.html b/content/doc/r3.5.4-beta/zookeeperQuotas.html new file mode 100644 index 0000000..a2d96a2 --- /dev/null +++ b/content/doc/r3.5.4-beta/zookeeperQuotas.html @@ -0,0 +1,278 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<META http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<meta content="Apache Forrest" name="Generator"> +<meta name="Forrest-version" content="0.9"> +<meta name="Forrest-skin-name" content="pelt"> +<title>ZooKeeper Quota's Guide</title> +<link type="text/css" href="skin/basic.css" rel="stylesheet"> +<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> +<link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> +<link type="text/css" href="skin/profile.css" rel="stylesheet"> +<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> +<link rel="shortcut icon" href="images/favicon.ico"> +</head> +<body onload="init()"> +<script type="text/javascript">ndeSetTextSize();</script> +<div id="top"> +<!--+ + |breadtrail + +--> +<div class="breadtrail"> +<a href="http://www.apache.org/">Apache</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> +</div> +<!--+ + |header + +--> +<div class="header"> +<!--+ + |start group logo + +--> +<div class="grouplogo"> +<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> +</div> +<!--+ + |end group logo + +--> +<!--+ + |start Project Logo + +--> +<div class="projectlogo"> +<a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a> +</div> +<!--+ + |end Project Logo + +--> +<!--+ + |start Search + +--> +<div class="searchbox"> +<form action="http://www.google.com/search" method="get" class="roundtopsmall"> +<input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> + <input name="Search" value="Search" type="submit"> +</form> +</div> +<!--+ + |end search + +--> +<!--+ + |start Tabs + +--> +<ul id="tabs"> +<li> +<a class="unselected" href="http://zookeeper.apache.org/">Project</a> +</li> +<li> +<a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a> +</li> +<li class="current"> +<a class="selected" href="index.html">ZooKeeper 3.5 Documentation</a> +</li> +</ul> +<!--+ + |end Tabs + +--> +</div> +</div> +<div id="main"> +<div id="publishedStrip"> +<!--+ + |start Subtabs + +--> +<div id="level2tabs"></div> +<!--+ + |end Endtabs + +--> +<script type="text/javascript"><!-- +document.write("Last Published: " + document.lastModified); +// --></script> +</div> +<!--+ + |breadtrail + +--> +<div class="breadtrail"> + + + </div> +<!--+ + |start Menu, mainarea + +--> +<!--+ + |start Menu + +--> +<div id="menu"> +<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Overview</div> +<div id="menu_1.1" class="menuitemgroup"> +<div class="menuitem"> +<a href="index.html">Welcome</a> +</div> +<div class="menuitem"> +<a href="zookeeperOver.html">Overview</a> +</div> +<div class="menuitem"> +<a href="zookeeperStarted.html">Getting Started</a> +</div> +<div class="menuitem"> +<a href="releasenotes.html">Release Notes</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Developer</div> +<div id="menu_1.2" class="menuitemgroup"> +<div class="menuitem"> +<a href="api/index.html">API Docs</a> +</div> +<div class="menuitem"> +<a href="zookeeperProgrammers.html">Programmer's Guide</a> +</div> +<div class="menuitem"> +<a href="javaExample.html">Java Example</a> +</div> +<div class="menuitem"> +<a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a> +</div> +<div class="menuitem"> +<a href="recipes.html">Recipes</a> +</div> +</div> +<div onclick="SwitchMenu('menu_selected_1.3', 'skin/')" id="menu_selected_1.3Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Admin & Ops</div> +<div id="menu_selected_1.3" class="selectedmenuitemgroup" style="display: block;"> +<div class="menuitem"> +<a href="zookeeperAdmin.html">Administrator's Guide</a> +</div> +<div class="menupage"> +<div class="menupagetitle">Quota Guide</div> +</div> +<div class="menuitem"> +<a href="zookeeperJMX.html">JMX</a> +</div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> +<div class="menuitem"> +<a href="zookeeperReconfig.html">Dynamic Reconfiguration</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Contributor</div> +<div id="menu_1.4" class="menuitemgroup"> +<div class="menuitem"> +<a href="zookeeperInternals.html">ZooKeeper Internals</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div> +<div id="menu_1.5" class="menuitemgroup"> +<div class="menuitem"> +<a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a> +</div> +<div class="menuitem"> +<a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a> +</div> +<div class="menuitem"> +<a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a> +</div> +</div> +<div id="credit"></div> +<div id="roundbottom"> +<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> +<!--+ + |alternative credits + +--> +<div id="credit2"></div> +</div> +<!--+ + |end Menu + +--> +<!--+ + |start content + +--> +<div id="content"> +<div title="Portable Document Format" class="pdflink"> +<a class="dida" href="zookeeperQuotas.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> + PDF</a> +</div> +<h1>ZooKeeper Quota's Guide</h1> +<h3>A Guide to Deployment and Administration</h3> +<div id="front-matter"> +<div id="minitoc-area"> +<ul class="minitoc"> +<li> +<a href="#zookeeper_quotas">Quotas</a> +<ul class="minitoc"> +<li> +<a href="#Setting+Quotas">Setting Quotas</a> +</li> +<li> +<a href="#Listing+Quotas">Listing Quotas</a> +</li> +<li> +<a href="#Deleting+Quotas"> Deleting Quotas</a> +</li> +</ul> +</li> +</ul> +</div> +</div> + + + + +<a name="zookeeper_quotas"></a> +<h2 class="h3">Quotas</h2> +<div class="section"> +<p> ZooKeeper has both namespace and bytes quotas. You can use the ZooKeeperMain class to setup quotas. + ZooKeeper prints <em>WARN</em> messages if users exceed the quota assigned to them. The messages + are printed in the log of the ZooKeeper. + </p> +<p> +<span class="codefrag computeroutput">$ bin/zkCli.sh -server host:port</span> +</p> +<p> The above command gives you a command line option of using quotas.</p> +<a name="Setting+Quotas"></a> +<h3 class="h4">Setting Quotas</h3> +<p>You can use + <em>setquota</em> to set a quota on a ZooKeeper node. It has an option of setting quota with + -n (for namespace) + and -b (for bytes). </p> +<p> The ZooKeeper quota are stored in ZooKeeper itself in /zookeeper/quota. To disable other people from + changing the quota's set the ACL for /zookeeper/quota such that only admins are able to read and write to it. + </p> +<a name="Listing+Quotas"></a> +<h3 class="h4">Listing Quotas</h3> +<p> You can use + <em>listquota</em> to list a quota on a ZooKeeper node. + </p> +<a name="Deleting+Quotas"></a> +<h3 class="h4"> Deleting Quotas</h3> +<p> You can use + <em>delquota</em> to delete quota on a ZooKeeper node. + </p> +</div> + +<p align="right"> +<font size="-2"></font> +</p> +</div> +<!--+ + |end content + +--> +<div class="clearboth"> </div> +</div> +<div id="footer"> +<!--+ + |start bottomstrip + +--> +<div class="lastmodified"> +<script type="text/javascript"><!-- +document.write("Last Published: " + document.lastModified); +// --></script> +</div> +<div class="copyright"> + Copyright © + <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> +</div> +<!--+ + |end bottomstrip + +--> +</div> +</body> +</html>
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/ec4ec140/content/doc/r3.5.4-beta/zookeeperQuotas.pdf ---------------------------------------------------------------------- diff --git a/content/doc/r3.5.4-beta/zookeeperQuotas.pdf b/content/doc/r3.5.4-beta/zookeeperQuotas.pdf new file mode 100644 index 0000000..76eb18e Binary files /dev/null and b/content/doc/r3.5.4-beta/zookeeperQuotas.pdf differ http://git-wip-us.apache.org/repos/asf/zookeeper/blob/ec4ec140/content/doc/r3.5.4-beta/zookeeperReconfig.html ---------------------------------------------------------------------- diff --git a/content/doc/r3.5.4-beta/zookeeperReconfig.html b/content/doc/r3.5.4-beta/zookeeperReconfig.html new file mode 100644 index 0000000..d3d6a26 --- /dev/null +++ b/content/doc/r3.5.4-beta/zookeeperReconfig.html @@ -0,0 +1,1250 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<META http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<meta content="Apache Forrest" name="Generator"> +<meta name="Forrest-version" content="0.9"> +<meta name="Forrest-skin-name" content="pelt"> +<title>ZooKeeper Dynamic Reconfiguration</title> +<link type="text/css" href="skin/basic.css" rel="stylesheet"> +<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> +<link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> +<link type="text/css" href="skin/profile.css" rel="stylesheet"> +<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> +<link rel="shortcut icon" href="images/favicon.ico"> +</head> +<body onload="init()"> +<script type="text/javascript">ndeSetTextSize();</script> +<div id="top"> +<!--+ + |breadtrail + +--> +<div class="breadtrail"> +<a href="http://www.apache.org/">Apache</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> +</div> +<!--+ + |header + +--> +<div class="header"> +<!--+ + |start group logo + +--> +<div class="grouplogo"> +<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> +</div> +<!--+ + |end group logo + +--> +<!--+ + |start Project Logo + +--> +<div class="projectlogo"> +<a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a> +</div> +<!--+ + |end Project Logo + +--> +<!--+ + |start Search + +--> +<div class="searchbox"> +<form action="http://www.google.com/search" method="get" class="roundtopsmall"> +<input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> + <input name="Search" value="Search" type="submit"> +</form> +</div> +<!--+ + |end search + +--> +<!--+ + |start Tabs + +--> +<ul id="tabs"> +<li> +<a class="unselected" href="http://zookeeper.apache.org/">Project</a> +</li> +<li> +<a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a> +</li> +<li class="current"> +<a class="selected" href="index.html">ZooKeeper 3.5 Documentation</a> +</li> +</ul> +<!--+ + |end Tabs + +--> +</div> +</div> +<div id="main"> +<div id="publishedStrip"> +<!--+ + |start Subtabs + +--> +<div id="level2tabs"></div> +<!--+ + |end Endtabs + +--> +<script type="text/javascript"><!-- +document.write("Last Published: " + document.lastModified); +// --></script> +</div> +<!--+ + |breadtrail + +--> +<div class="breadtrail"> + + + </div> +<!--+ + |start Menu, mainarea + +--> +<!--+ + |start Menu + +--> +<div id="menu"> +<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Overview</div> +<div id="menu_1.1" class="menuitemgroup"> +<div class="menuitem"> +<a href="index.html">Welcome</a> +</div> +<div class="menuitem"> +<a href="zookeeperOver.html">Overview</a> +</div> +<div class="menuitem"> +<a href="zookeeperStarted.html">Getting Started</a> +</div> +<div class="menuitem"> +<a href="releasenotes.html">Release Notes</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Developer</div> +<div id="menu_1.2" class="menuitemgroup"> +<div class="menuitem"> +<a href="api/index.html">API Docs</a> +</div> +<div class="menuitem"> +<a href="zookeeperProgrammers.html">Programmer's Guide</a> +</div> +<div class="menuitem"> +<a href="javaExample.html">Java Example</a> +</div> +<div class="menuitem"> +<a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a> +</div> +<div class="menuitem"> +<a href="recipes.html">Recipes</a> +</div> +</div> +<div onclick="SwitchMenu('menu_selected_1.3', 'skin/')" id="menu_selected_1.3Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Admin & Ops</div> +<div id="menu_selected_1.3" class="selectedmenuitemgroup" style="display: block;"> +<div class="menuitem"> +<a href="zookeeperAdmin.html">Administrator's Guide</a> +</div> +<div class="menuitem"> +<a href="zookeeperQuotas.html">Quota Guide</a> +</div> +<div class="menuitem"> +<a href="zookeeperJMX.html">JMX</a> +</div> +<div class="menuitem"> +<a href="zookeeperObservers.html">Observers Guide</a> +</div> +<div class="menupage"> +<div class="menupagetitle">Dynamic Reconfiguration</div> +</div> +</div> +<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Contributor</div> +<div id="menu_1.4" class="menuitemgroup"> +<div class="menuitem"> +<a href="zookeeperInternals.html">ZooKeeper Internals</a> +</div> +</div> +<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div> +<div id="menu_1.5" class="menuitemgroup"> +<div class="menuitem"> +<a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a> +</div> +<div class="menuitem"> +<a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a> +</div> +<div class="menuitem"> +<a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a> +</div> +</div> +<div id="credit"></div> +<div id="roundbottom"> +<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> +<!--+ + |alternative credits + +--> +<div id="credit2"></div> +</div> +<!--+ + |end Menu + +--> +<!--+ + |start content + +--> +<div id="content"> +<div title="Portable Document Format" class="pdflink"> +<a class="dida" href="zookeeperReconfig.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> + PDF</a> +</div> +<h1>ZooKeeper Dynamic Reconfiguration</h1> +<div id="front-matter"> +<div id="minitoc-area"> +<ul class="minitoc"> +<li> +<a href="#ch_reconfig_intro">Overview</a> +</li> +<li> +<a href="#ch_reconfig_format">Changes to Configuration Format</a> +<ul class="minitoc"> +<li> +<a href="#sc_reconfig_clientport">Specifying the client port</a> +</li> +<li> +<a href="#sc_reconfig_standaloneEnabled">The standaloneEnabled flag</a> +</li> +<li> +<a href="#sc_reconfig_reconfigEnabled">The reconfigEnabled flag</a> +</li> +<li> +<a href="#sc_reconfig_file">Dynamic configuration file</a> +</li> +<li> +<a href="#sc_reconfig_backward">Backward compatibility</a> +</li> +</ul> +</li> +<li> +<a href="#ch_reconfig_upgrade">Upgrading to 3.5.0</a> +</li> +<li> +<a href="#ch_reconfig_dyn">Dynamic Reconfiguration of the ZooKeeper Ensemble</a> +<ul class="minitoc"> +<li> +<a href="#ch_reconfig_api">API</a> +</li> +<li> +<a href="#sc_reconfig_access_control">Security</a> +</li> +<li> +<a href="#sc_reconfig_retrieving">Retrieving the current dynamic configuration</a> +</li> +<li> +<a href="#sc_reconfig_modifying">Modifying the current dynamic configuration</a> +<ul class="minitoc"> +<li> +<a href="#sc_reconfig_general">General</a> +</li> +<li> +<a href="#sc_reconfig_incremental">Incremental mode</a> +</li> +<li> +<a href="#sc_reconfig_nonincremental">Non-incremental mode</a> +</li> +<li> +<a href="#sc_reconfig_conditional">Conditional reconfig</a> +</li> +<li> +<a href="#sc_reconfig_errors">Error conditions</a> +</li> +<li> +<a href="#sc_reconfig_additional">Additional comments</a> +</li> +</ul> +</li> +</ul> +</li> +<li> +<a href="#ch_reconfig_rebalancing">Rebalancing Client Connections</a> +</li> +</ul> +</div> +</div> + + + + +<a name="ch_reconfig_intro"></a> +<h2 class="h3">Overview</h2> +<div class="section"> +<p>Prior to the 3.5.0 release, the membership and all other configuration + parameters of Zookeeper were static - loaded during boot and immutable at + runtime. Operators resorted to ''rolling restarts'' - a manually intensive + and error-prone method of changing the configuration that has caused data + loss and inconsistency in production.</p> +<p>Starting with 3.5.0, “rolling restarts” are no longer needed! + ZooKeeper comes with full support for automated configuration changes: the + set of Zookeeper servers, their roles (participant / observer), all ports, + and even the quorum system can be changed dynamically, without service + interruption and while maintaining data consistency. Reconfigurations are + performed immediately, just like other operations in ZooKeeper. Multiple + changes can be done using a single reconfiguration command. The dynamic + reconfiguration functionality does not limit operation concurrency, does + not require client operations to be stopped during reconfigurations, has a + very simple interface for administrators and no added complexity to other + client operations.</p> +<p>New client-side features allow clients to find out about configuration + changes and to update the connection string (list of servers and their + client ports) stored in their ZooKeeper handle. A probabilistic algorithm + is used to rebalance clients across the new configuration servers while + keeping the extent of client migrations proportional to the change in + ensemble membership.</p> +<p>This document provides the administrator manual for reconfiguration. + For a detailed description of the reconfiguration algorithms, performance + measurements, and more, please see our paper:</p> +<dl> + +<dt> +<term>Shraer, A., Reed, B., Malkhi, D., Junqueira, F. Dynamic + Reconfiguration of Primary/Backup Clusters. In <em>USENIX Annual + Technical Conference (ATC) </em>(2012), 425-437</term> +</dt> +<dd> +<p>Links: <a href="https://www.usenix.org/system/files/conference/atc12/atc12-final74.pdf">paper (pdf)</a>, <a href="https://www.usenix.org/sites/default/files/conference/protected-files/shraer_atc12_slides.pdf">slides (pdf)</a>, <a href="https://www.usenix.org/conference/atc12/technical-sessions/presentation/shraer">video</a>, <a href="http://www.slideshare.net/Hadoop_Summit/dynamic-reconfiguration-of-zookeeper">hadoop summit slides</a> +</p> +</dd> + +</dl> +<p> +<strong>Note:</strong> Starting with 3.5.3, the dynamic reconfiguration + feature is disabled by default, and has to be explicitly turned on via + <a href="zookeeperAdmin.html#sc_advancedConfiguration"> + reconfigEnabled </a> configuration option. + </p> +</div> + +<a name="ch_reconfig_format"></a> +<h2 class="h3">Changes to Configuration Format</h2> +<div class="section"> +<a name="sc_reconfig_clientport"></a> +<h3 class="h4">Specifying the client port</h3> +<p>A client port of a server is the port on which the server accepts + client connection requests. Starting with 3.5.0 the + <em>clientPort</em> and <em>clientPortAddress + </em> configuration parameters should no longer be used. Instead, + this information is now part of the server keyword specification, which + becomes as follows:</p> +<p> +<span class="codefrag computeroutput">server.<positive id> = <address1>:<port1>:<port2>[:role];[<client port address>:]<client port></span> +</p> +<p>The client port specification is to the right of the semicolon. The + client port address is optional, and if not specified it defaults to + "0.0.0.0". As usual, role is also optional, it can be + <em>participant</em> or <em>observer</em> + (<em>participant</em> by default).</p> +<p> Examples of legal server statements: </p> +<ul> + +<li> + +<p> +<span class="codefrag computeroutput">server.5 = 125.23.63.23:1234:1235;1236</span> +</p> + +</li> + +<li> + +<p> +<span class="codefrag computeroutput">server.5 = 125.23.63.23:1234:1235:participant;1236</span> +</p> + +</li> + +<li> + +<p> +<span class="codefrag computeroutput">server.5 = 125.23.63.23:1234:1235:observer;1236</span> +</p> + +</li> + +<li> + +<p> +<span class="codefrag computeroutput">server.5 = 125.23.63.23:1234:1235;125.23.63.24:1236</span> +</p> + +</li> + +<li> + +<p> +<span class="codefrag computeroutput">server.5 = 125.23.63.23:1234:1235:participant;125.23.63.23:1236</span> +</p> + +</li> + +</ul> +<a name="sc_reconfig_standaloneEnabled"></a> +<h3 class="h4">The standaloneEnabled flag</h3> +<p>Prior to 3.5.0, one could run ZooKeeper in Standalone mode or in a + Distributed mode. These are separate implementation stacks, and + switching between them during run time is not possible. By default (for + backward compatibility) <em>standaloneEnabled</em> is set to + <em>true</em>. The consequence of using this default is that + if started with a single server the ensemble will not be allowed to + grow, and if started with more than one server it will not be allowed to + shrink to contain fewer than two participants.</p> +<p>Setting the flag to <em>false</em> instructs the system + to run the Distributed software stack even if there is only a single + participant in the ensemble. To achieve this the (static) configuration + file should contain:</p> +<p> +<span class="codefrag computeroutput">standaloneEnabled=false</span> +</p> +<p>With this setting it is possible to start a ZooKeeper ensemble + containing a single participant and to dynamically grow it by adding + more servers. Similarly, it is possible to shrink an ensemble so that + just a single participant remains, by removing servers.</p> +<p>Since running the Distributed mode allows more flexibility, we + recommend setting the flag to <em>false</em>. We expect that + the legacy Standalone mode will be deprecated in the future.</p> +<a name="sc_reconfig_reconfigEnabled"></a> +<h3 class="h4">The reconfigEnabled flag</h3> +<p>Starting with 3.5.0 and prior to 3.5.3, there is no way to disable + dynamic reconfiguration feature. We would like to offer the option of + disabling reconfiguration feature because with reconfiguration enabled, + we have a security concern that a malicious actor can make arbitrary changes + to the configuration of a ZooKeeper ensemble, including adding a compromised + server to the ensemble. We prefer to leave to the discretion of the user to + decide whether to enable it or not and make sure that the appropriate security + measure are in place. So in 3.5.3 the <a href="zookeeperAdmin.html#sc_advancedConfiguration"> + reconfigEnabled </a> configuration option is introduced + such that the reconfiguration feature can be completely disabled and any attempts + to reconfigure a cluster through reconfig API with or without authentication + will fail by default, unless <strong>reconfigEnabled</strong> is set to + <strong>true</strong>. + </p> +<p>To set the option to true, the configuration file (zoo.cfg) should contain:</p> +<p> +<span class="codefrag computeroutput">reconfigEnabled=true</span> +</p> +<a name="sc_reconfig_file"></a> +<h3 class="h4">Dynamic configuration file</h3> +<p>Starting with 3.5.0 we're distinguishing between dynamic + configuration parameters, which can be changed during runtime, and + static configuration parameters, which are read from a configuration + file when a server boots and don't change during its execution. For now, + the following configuration keywords are considered part of the dynamic + configuration: <em>server</em>, <em>group</em> + and <em>weight</em>.</p> +<p>Dynamic configuration parameters are stored in a separate file on + the server (which we call the dynamic configuration file). This file is + linked from the static config file using the new + <em>dynamicConfigFile</em> keyword.</p> +<p> +<strong>Example</strong> +</p> +<div class="note example"> +<div class="label">zoo_replicated1.cfg</div> +<div class="content"> + +<title>zoo_replicated1.cfg</title> + +<pre class="code">tickTime=2000 +dataDir=/zookeeper/data/zookeeper1 +initLimit=5 +syncLimit=2 +dynamicConfigFile=/zookeeper/conf/zoo_replicated1.cfg.dynamic</pre> + +</div> +</div> +<div class="note example"> +<div class="label">zoo_replicated1.cfg.dynamic</div> +<div class="content"> + +<title>zoo_replicated1.cfg.dynamic</title> + +<pre class="code">server.1=125.23.63.23:2780:2783:participant;2791 +server.2=125.23.63.24:2781:2784:participant;2792 +server.3=125.23.63.25:2782:2785:participant;2793</pre> + +</div> +</div> +<p>When the ensemble configuration changes, the static configuration + parameters remain the same. The dynamic parameters are pushed by + ZooKeeper and overwrite the dynamic configuration files on all servers. + Thus, the dynamic configuration files on the different servers are + usually identical (they can only differ momentarily when a + reconfiguration is in progress, or if a new configuration hasn't + propagated yet to some of the servers). Once created, the dynamic + configuration file should not be manually altered. Changed are only made + through the new reconfiguration commands outlined below. Note that + changing the config of an offline cluster could result in an + inconsistency with respect to configuration information stored in the + ZooKeeper log (and the special configuration znode, populated from the + log) and is therefore highly discouraged.</p> +<p> +<strong>Example 2</strong> +</p> +<p>Users may prefer to initially specify a single configuration file. + The following is thus also legal:</p> +<div class="note example"> +<div class="label">zoo_replicated1.cfg</div> +<div class="content"> + +<title>zoo_replicated1.cfg</title> + +<pre class="code">tickTime=2000 +dataDir=/zookeeper/data/zookeeper1 +initLimit=5 +syncLimit=2 +clientPort=<strong>2791</strong> // note that this line is now redundant and therefore not recommended +server.1=125.23.63.23:2780:2783:participant;<strong>2791</strong> +server.2=125.23.63.24:2781:2784:participant;2792 +server.3=125.23.63.25:2782:2785:participant;2793</pre> + +</div> +</div> +<p>The configuration files on each server will be automatically split + into dynamic and static files, if they are not already in this format. + So the configuration file above will be automatically transformed into + the two files in Example 1. Note that the clientPort and + clientPortAddress lines (if specified) will be automatically removed + during this process, if they are redundant (as in the example above). + The original static configuration file is backed up (in a .bak + file).</p> +<a name="sc_reconfig_backward"></a> +<h3 class="h4">Backward compatibility</h3> +<p>We still support the old configuration format. For example, the + following configuration file is acceptable (but not recommended):</p> +<div class="note example"> +<div class="label">zoo_replicated1.cfg</div> +<div class="content"> + +<title>zoo_replicated1.cfg</title> + +<pre class="code">tickTime=2000 +dataDir=/zookeeper/data/zookeeper1 +initLimit=5 +syncLimit=2 +clientPort=2791 +server.1=125.23.63.23:2780:2783:participant +server.2=125.23.63.24:2781:2784:participant +server.3=125.23.63.25:2782:2785:participant</pre> + +</div> +</div> +<p>During boot, a dynamic configuration file is created and contains + the dynamic part of the configuration as explained earlier. In this + case, however, the line "clientPort=2791" will remain in the static + configuration file of server 1 since it is not redundant -- it was not + specified as part of the "server.1=..." using the format explained in + the section <a href="#ch_reconfig_format">Changes to Configuration Format</a>. If a reconfiguration + is invoked that sets the client port of server 1, we remove + "clientPort=2791" from the static configuration file (the dynamic file + now contain this information as part of the specification of server + 1).</p> +</div> + +<a name="ch_reconfig_upgrade"></a> +<h2 class="h3">Upgrading to 3.5.0</h2> +<div class="section"> +<p>Upgrading a running ZooKeeper ensemble to 3.5.0 should be done only + after upgrading your ensemble to the 3.4.6 release. Note that this is only + necessary for rolling upgrades (if you're fine with shutting down the + system completely, you don't have to go through 3.4.6). If you attempt a + rolling upgrade without going through 3.4.6 (for example from 3.4.5), you + may get the following error:</p> +<pre class="code">2013-01-30 11:32:10,663 [myid:2] - INFO [localhost/127.0.0.1:2784:QuorumCnxManager$Listener@498] - Received connection request /127.0.0.1:60876 +2013-01-30 11:32:10,663 [myid:2] - WARN [localhost/127.0.0.1:2784:QuorumCnxManager@349] - Invalid server id: -65536</pre> +<p>During a rolling upgrade, each server is taken down in turn and + rebooted with the new 3.5.0 binaries. Before starting the server with + 3.5.0 binaries, we highly recommend updating the configuration file so + that all server statements "server.x=..." contain client ports (see the + section <a href="#sc_reconfig_clientport">Specifying the client port</a>). As explained earlier + you may leave the configuration in a single file, as well as leave the + clientPort/clientPortAddress statements (although if you specify client + ports in the new format, these statements are now redundant).</p> +</div> + + +<a name="ch_reconfig_dyn"></a> +<h2 class="h3">Dynamic Reconfiguration of the ZooKeeper Ensemble</h2> +<div class="section"> +<p>The ZooKeeper Java and C API were extended with getConfig and reconfig + commands that facilitate reconfiguration. Both commands have a synchronous + (blocking) variant and an asynchronous one. We demonstrate these commands + here using the Java CLI, but note that you can similarly use the C CLI or + invoke the commands directly from a program just like any other ZooKeeper + command.</p> +<a name="ch_reconfig_api"></a> +<h3 class="h4">API</h3> +<p>There are two sets of APIs for both Java and C client. + </p> +<dl> + +<dt> +<term> +<strong>Reconfiguration API</strong> +</term> +</dt> +<dd> +<p>Reconfiguration API is used to reconfigure the ZooKeeper cluster. + Starting with 3.5.3, reconfiguration Java APIs are moved into ZooKeeperAdmin class + from ZooKeeper class, and use of this API requires ACL setup and user + authentication (see <a href="#sc_reconfig_access_control">Security</a> for more information.). + </p> +<p>Note: for temporary backward compatibility, the reconfig() APIs will remain in ZooKeeper.java + where they were for a few alpha versions of 3.5.x. However, these APIs are deprecated and users + should move to the reconfigure() APIs in ZooKeeperAdmin.java. + </p> +</dd> + + +<dt> +<term> +<strong>Get Configuration API</strong> +</term> +</dt> +<dd> +<p>Get configuration APIs are used to retrieve ZooKeeper cluster configuration information + stored in /zookeeper/config znode. Use of this API does not require specific setup or authentication, + because /zookeeper/config is readable to any users.</p> +</dd> + +</dl> +<a name="sc_reconfig_access_control"></a> +<h3 class="h4">Security</h3> +<p>Prior to <strong>3.5.3</strong>, there is no enforced security mechanism + over reconfig so any ZooKeeper clients that can connect to ZooKeeper server ensemble + will have the ability to change the state of a ZooKeeper cluster via reconfig. + It is thus possible for a malicious client to add compromised server to an ensemble, + e.g., add a compromised server, or remove legitimate servers. + Cases like these could be security vulnerabilities on a case by case basis. + </p> +<p>To address this security concern, we introduced access control over reconfig + starting from <strong>3.5.3</strong> such that only a specific set of users + can use reconfig commands or APIs, and these users need be configured explicitly. In addition, + the setup of ZooKeeper cluster must enable authentication so ZooKeeper clients can be authenticated. + </p> +<p> + We also provides an escape hatch for users who operate and interact with a ZooKeeper ensemble in a secured + environment (i.e. behind company firewall). For those users who want to use reconfiguration feature but + don't want the overhead of configuring an explicit list of authorized user for reconfig access checks, + they can set <a href="zookeeperAdmin.html#sc_authOptions">"skipACL"</a> to "yes" which will + skip ACL check and allow any user to reconfigure cluster. + </p> +<p> + Overall, ZooKeeper provides flexible configuration options for the reconfigure feature + that allow a user to choose based on user's security requirement. + We leave to the discretion of the user to decide appropriate security measure are in place. + </p> +<dl> + +<dt> +<term> +<strong>Access Control</strong> +</term> +</dt> +<dd> +<p>The dynamic configuration is stored in a special znode + ZooDefs.CONFIG_NODE = /zookeeper/config. This node by default is read only + for all users, except super user and users that's explicitly configured for write + access. + </p> +<p>Clients that need to use reconfig commands or reconfig API should be configured as users + that have write access to CONFIG_NODE. By default, only the super user has full control including + write access to CONFIG_NODE. Additional users can be granted write access through superuser + by setting an ACL that has write permission associated with specified user. + </p> +<p>A few examples of how to setup ACLs and use reconfiguration API with authentication can be found in + ReconfigExceptionTest.java and TestReconfigServer.cc.</p> +</dd> + + +<dt> +<term> +<strong>Authentication</strong> +</term> +</dt> +<dd> +<p>Authentication of users is orthogonal to the access control and is delegated to + existing authentication mechanism supported by ZooKeeper's pluggable authentication schemes. + See <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL">ZooKeeper and SASL</a> for more details on this topic. + </p> +</dd> + + +<dt> +<term> +<strong>Disable ACL check</strong> +</term> +</dt> +<dd> +<p> + ZooKeeper supports <a href="zookeeperAdmin.html#sc_authOptions">"skipACL"</a> option such that ACL + check will be completely skipped, if skipACL is set to "yes". In such cases any unauthenticated + users can use reconfig API. + </p> +</dd> + +</dl> +<a name="sc_reconfig_retrieving"></a> +<h3 class="h4">Retrieving the current dynamic configuration</h3> +<p>The dynamic configuration is stored in a special znode + ZooDefs.CONFIG_NODE = /zookeeper/config. The new + <span class="codefrag command">config</span> CLI command reads this znode (currently it is + simply a wrapper to <span class="codefrag command">get /zookeeper/config</span>). As with + normal reads, to retrieve the latest committed value you should do a + <span class="codefrag command">sync</span> first.</p> +<pre class="code">[zk: 127.0.0.1:2791(CONNECTED) 3] config +server.1=localhost:2780:2783:participant;localhost:2791 +server.2=localhost:2781:2784:participant;localhost:2792 +server.3=localhost:2782:2785:participant;localhost:2793 +<strong>version=400000003</strong> +</pre> +<p>Notice the last line of the output. This is the configuration + version. The version equals to the zxid of the reconfiguration command + which created this configuration. The version of the first established + configuration equals to the zxid of the NEWLEADER message sent by the + first successfully established leader. When a configuration is written + to a dynamic configuration file, the version automatically becomes part + of the filename and the static configuration file is updated with the + path to the new dynamic configuration file. Configuration files + corresponding to earlier versions are retained for backup + purposes.</p> +<p>During boot time the version (if it exists) is extracted from the + filename. The version should never be altered manually by users or the + system administrator. It is used by the system to know which + configuration is most up-to-date. Manipulating it manually can result in + data loss and inconsistency.</p> +<p>Just like a <span class="codefrag command">get</span> command, the + <span class="codefrag command">config</span> CLI command accepts the <span class="codefrag option">-w</span> + flag for setting a watch on the znode, and <span class="codefrag option">-s</span> flag for + displaying the Stats of the znode. It additionally accepts a new flag + <span class="codefrag option">-c</span> which outputs only the version and the client + connection string corresponding to the current configuration. For + example, for the configuration above we would get:</p> +<pre class="code">[zk: 127.0.0.1:2791(CONNECTED) 17] config -c +400000003 localhost:2791,localhost:2793,localhost:2792</pre> +<p>Note that when using the API directly, this command is called + <span class="codefrag command">getConfig</span>.</p> +<p>As any read command it returns the configuration known to the + follower to which your client is connected, which may be slightly + out-of-date. One can use the <span class="codefrag command">sync</span> command for + stronger guarantees. For example using the Java API:</p> +<pre class="code">zk.sync(ZooDefs.CONFIG_NODE, void_callback, context); +zk.getConfig(watcher, callback, context);</pre> +<p>Note: in 3.5.0 it doesn't really matter which path is passed to the + <span class="codefrag command">sync() </span> command as all the server's state is brought + up to date with the leader (so one could use a different path instead of + ZooDefs.CONFIG_NODE). However, this may change in the future.</p> +<a name="sc_reconfig_modifying"></a> +<h3 class="h4">Modifying the current dynamic configuration</h3> +<p>Modifying the configuration is done through the + <span class="codefrag command">reconfig</span> command. There are two modes of + reconfiguration: incremental and non-incremental (bulk). The + non-incremental simply specifies the new dynamic configuration of the + system. The incremental specifies changes to the current configuration. + The <span class="codefrag command">reconfig</span> command returns the new + configuration.</p> +<p>A few examples are in: <span class="codefrag filename">ReconfigTest.java</span>, + <span class="codefrag filename">ReconfigRecoveryTest.java</span> and + <span class="codefrag filename">TestReconfigServer.cc</span>.</p> +<a name="sc_reconfig_general"></a> +<h4>General</h4> +<p> +<strong>Removing servers:</strong> Any server can + be removed, including the leader (although removing the leader will + result in a short unavailability, see Figures 6 and 8 in the <a href="https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters">paper</a>). The server will not be shut-down automatically. + Instead, it becomes a "non-voting follower". This is somewhat similar + to an observer in that its votes don't count towards the Quorum of + votes necessary to commit operations. However, unlike a non-voting + follower, an observer doesn't actually see any operation proposals and + does not ACK them. Thus a non-voting follower has a more significant + negative effect on system throughput compared to an observer. + Non-voting follower mode should only be used as a temporary mode, + before shutting the server down, or adding it as a follower or as an + observer to the ensemble. We do not shut the server down automatically + for two main reasons. The first reason is that we do not want all the + clients connected to this server to be immediately disconnected, + causing a flood of connection requests to other servers. Instead, it + is better if each client decides when to migrate independently. The + second reason is that removing a server may sometimes (rarely) be + necessary in order to change it from "observer" to "participant" (this + is explained in the section <a href="#sc_reconfig_additional">Additional comments</a>).</p> +<p>Note that the new configuration should have some minimal number of + participants in order to be considered legal. If the proposed change + would leave the cluster with less than 2 participants and standalone + mode is enabled (standaloneEnabled=true, see the section <a href="#sc_reconfig_standaloneEnabled">The standaloneEnabled flag</a>), the reconfig will not be + processed (BadArgumentsException). If standalone mode is disabled + (standaloneEnabled=false) then its legal to remain with 1 or more + participants.</p> +<p> +<strong>Adding servers:</strong> Before a + reconfiguration is invoked, the administrator must make sure that a + quorum (majority) of participants from the new configuration are + already connected and synced with the current leader. To achieve this + we need to connect a new joining server to the leader before it is + officially part of the ensemble. This is done by starting the joining + server using an initial list of servers which is technically not a + legal configuration of the system but (a) contains the joiner, and (b) + gives sufficient information to the joiner in order for it to find and + connect to the current leader. We list a few different options of + doing this safely.</p> +<ol> + +<li> + +<p>Initial configuration of joiners is comprised of servers in + the last committed configuration and one or more joiners, where + <strong>joiners are listed as observers.</strong> + For example, if servers D and E are added at the same time to (A, + B, C) and server C is being removed, the initial configuration of + D could be (A, B, C, D) or (A, B, C, D, E), where D and E are + listed as observers. Similarly, the configuration of E could be + (A, B, C, E) or (A, B, C, D, E), where D and E are listed as + observers. <strong>Note that listing the joiners as + observers will not actually make them observers - it will only + prevent them from accidentally forming a quorum with other + joiners.</strong> Instead, they will contact the servers in the + current configuration and adopt the last committed configuration + (A, B, C), where the joiners are absent. Configuration files of + joiners are backed up and replaced automatically as this happens. + After connecting to the current leader, joiners become non-voting + followers until the system is reconfigured and they are added to + the ensemble (as participant or observer, as appropriate).</p> + +</li> + +<li> + +<p>Initial configuration of each joiner is comprised of servers + in the last committed configuration + <strong>the + joiner itself, listed as a participant.</strong> For example, to + add a new server D to a configuration consisting of servers (A, B, + C), the administrator can start D using an initial configuration + file consisting of servers (A, B, C, D). If both D and E are added + at the same time to (A, B, C), the initial configuration of D + could be (A, B, C, D) and the configuration of E could be (A, B, + C, E). Similarly, if D is added and C is removed at the same time, + the initial configuration of D could be (A, B, C, D). Never list + more than one joiner as participant in the initial configuration + (see warning below).</p> + +</li> + +<li> + +<p>Whether listing the joiner as an observer or as participant, + it is also fine not to list all the current configuration servers, + as long as the current leader is in the list. For example, when + adding D we could start D with a configuration file consisting of + just (A, D) if A is the current leader. however this is more + fragile since if A fails before D officially joins the ensemble, D + doesn’t know anyone else and therefore the administrator will have + to intervene and restart D with another server list.</p> + +</li> + +</ol> +<div class="note"> +<div class="label">Warning</div> +<div class="content"> + +<title>Warning</title> + +<p>Never specify more than one joining server in the same initial + configuration as participants. Currently, the joining servers don’t + know that they are joining an existing ensemble; if multiple joiners + are listed as participants they may form an independent quorum + creating a split-brain situation such as processing operations + independently from your main ensemble. It is OK to list multiple + joiners as observers in an initial config.</p> + +</div> +</div> +<p>If the configuration of existing servers changes or they become unavailable + before the joiner succeeds to connect and learn obout configuration changes, the + joiner may need to be restarted with an updated configuration file in order to be + able to connect.</p> +<p>Finally, note that once connected to the leader, a joiner adopts + the last committed configuration, in which it is absent (the initial + config of the joiner is backed up before being rewritten). If the + joiner restarts in this state, it will not be able to boot since it is + absent from its configuration file. In order to start it you’ll once + again have to specify an initial configuration.</p> +<p> +<strong>Modifying server parameters:</strong> One + can modify any of the ports of a server, or its role + (participant/observer) by adding it to the ensemble with different + parameters. This works in both the incremental and the bulk + reconfiguration modes. It is not necessary to remove the server and + then add it back; just specify the new parameters as if the server is + not yet in the system. The server will detect the configuration change + and perform the necessary adjustments. See an example in the section + <a href="#sc_reconfig_incremental">Incremental mode</a> and an exception to this + rule in the section <a href="#sc_reconfig_additional">Additional comments</a>.</p> +<p>It is also possible to change the Quorum System used by the + ensemble (for example, change the Majority Quorum System to a + Hierarchical Quorum System on the fly). This, however, is only allowed + using the bulk (non-incremental) reconfiguration mode. In general, + incremental reconfiguration only works with the Majority Quorum + System. Bulk reconfiguration works with both Hierarchical and Majority + Quorum Systems.</p> +<p> +<strong>Performance Impact:</strong> There is + practically no performance impact when removing a follower, since it + is not being automatically shut down (the effect of removal is that + the server's votes are no longer being counted). When adding a server, + there is no leader change and no noticeable performance disruption. + For details and graphs please see Figures 6, 7 and 8 in the <a href="https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters">paper</a>.</p> +<p>The most significant disruption will happen when a leader change + is caused, in one of the following cases:</p> +<ol> + +<li> + +<p>Leader is removed from the ensemble.</p> + +</li> + +<li> + +<p>Leader's role is changed from participant to observer.</p> + +</li> + +<li> + +<p>The port used by the leader to send transactions to others + (quorum port) is modified.</p> + +</li> + +</ol> +<p>In these cases we perform a leader hand-off where the old leader + nominates a new leader. The resulting unavailability is usually + shorter than when a leader crashes since detecting leader failure is + unnecessary and electing a new leader can usually be avoided during a + hand-off (see Figures 6 and 8 in the <a href="https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters">paper</a>).</p> +<p>When the client port of a server is modified, it does not drop + existing client connections. New connections to the server will have + to use the new client port.</p> +<p> +<strong>Progress guarantees:</strong> Up to the + invocation of the reconfig operation, a quorum of the old + configuration is required to be available and connected for ZooKeeper + to be able to make progress. Once reconfig is invoked, a quorum of + both the old and of the new configurations must be available. The + final transition happens once (a) the new configuration is activated, + and (b) all operations scheduled before the new configuration is + activated by the leader are committed. Once (a) and (b) happen, only a + quorum of the new configuration is required. Note, however, that + neither (a) nor (b) are visible to a client. Specifically, when a + reconfiguration operation commits, it only means that an activation + message was sent out by the leader. It does not necessarily mean that + a quorum of the new configuration got this message (which is required + in order to activate it) or that (b) has happened. If one wants to + make sure that both (a) and (b) has already occurred (for example, in + order to know that it is safe to shut down old servers that were + removed), one can simply invoke an update + (<span class="codefrag command">set-data</span>, or some other quorum operation, but not + a <span class="codefrag command">sync</span>) and wait for it to commit. An alternative + way to achieve this was to introduce another round to the + reconfiguration protocol (which, for simplicity and compatibility with + Zab, we decided to avoid).</p> +<a name="sc_reconfig_incremental"></a> +<h4>Incremental mode</h4> +<p>The incremental mode allows adding and removing servers to the + current configuration. Multiple changes are allowed. For + example:</p> +<p> +<span class="codefrag userinput">> reconfig -remove 3 -add + server.5=125.23.63.23:1234:1235;1236</span> +</p> +<p>Both the add and the remove options get a list of comma separated + arguments (no spaces):</p> +<p> +<span class="codefrag userinput">> reconfig -remove 3,4 -add + server.5=localhost:2111:2112;2113,6=localhost:2114:2115:observer;2116</span> +</p> +<p>The format of the server statement is exactly the same as + described in the section <a href="#sc_reconfig_clientport">Specifying the client port</a> and + includes the client port. Notice that here instead of "server.5=" you + can just say "5=". In the example above, if server 5 is already in the + system, but has different ports or is not an observer, it is updated + and once the configuration commits becomes an observer and starts + using these new ports. This is an easy way to turn participants into + observers and vise versa or change any of their ports, without + rebooting the server.</p> +<p>ZooKeeper supports two types of Quorum Systems – the simple + Majority system (where the leader commits operations after receiving + ACKs from a majority of voters) and a more complex Hierarchical + system, where votes of different servers have different weights and + servers are divided into voting groups. Currently, incremental + reconfiguration is allowed only if the last proposed configuration + known to the leader uses a Majority Quorum System + (BadArgumentsException is thrown otherwise).</p> +<p>Incremental mode - examples using the Java API:</p> +<pre class="code">List<String> leavingServers = new ArrayList<String>(); +leavingServers.add("1"); +leavingServers.add("2"); +byte[] config = zk.reconfig(null, leavingServers, null, -1, new Stat());</pre> +<pre class="code">List<String> leavingServers = new ArrayList<String>(); +List<String> joiningServers = new ArrayList<String>(); +leavingServers.add("1"); +joiningServers.add("server.4=localhost:1234:1235;1236"); +byte[] config = zk.reconfig(joiningServers, leavingServers, null, -1, new Stat()); + +String configStr = new String(config); +System.out.println(configStr);</pre> +<p>There is also an asynchronous API, and an API accepting comma + separated Strings instead of List<String>. See + src/java/main/org/apache/zookeeper/ZooKeeper.java.</p> +<a name="sc_reconfig_nonincremental"></a> +<h4>Non-incremental mode</h4> +<p>The second mode of reconfiguration is non-incremental, whereby a + client gives a complete specification of the new dynamic system + configuration. The new configuration can either be given in place or + read from a file:</p> +<p> +<span class="codefrag userinput">> reconfig -file newconfig.cfg + </span>//newconfig.cfg is a dynamic config file, see <a href="#sc_reconfig_file">Dynamic configuration file</a> +</p> +<p> +<span class="codefrag userinput">> reconfig -members + server.1=125.23.63.23:2780:2783:participant;2791,server.2=125.23.63.24:2781:2784:participant;2792,server.3=125.23.63.25:2782:2785:participant;2793</span> +</p> +<p>The new configuration may use a different Quorum System. For + example, you may specify a Hierarchical Quorum System even if the + current ensemble uses a Majority Quorum System.</p> +<p>Bulk mode - example using the Java API:</p> +<pre class="code">ArrayList<String> newMembers = new ArrayList<String>(); +newMembers.add("server.1=1111:1234:1235;1236"); +newMembers.add("server.2=1112:1237:1238;1239"); +newMembers.add("server.3=1114:1240:1241:observer;1242"); + +byte[] config = zk.reconfig(null, null, newMembers, -1, new Stat()); + +String configStr = new String(config); +System.out.println(configStr);</pre> +<p>There is also an asynchronous API, and an API accepting comma + separated String containing the new members instead of + List<String>. See + src/java/main/org/apache/zookeeper/ZooKeeper.java.</p> +<a name="sc_reconfig_conditional"></a> +<h4>Conditional reconfig</h4> +<p>Sometimes (especially in non-incremental mode) a new proposed + configuration depends on what the client "believes" to be the current + configuration, and should be applied only to that configuration. + Specifically, the <span class="codefrag command">reconfig</span> succeeds only if the + last configuration at the leader has the specified version.</p> +<p> +<span class="codefrag userinput">> reconfig -file <filename> -v <version></span> +</p> +<p>In the previously listed Java examples, instead of -1 one could + specify a configuration version to condition the + reconfiguration.</p> +<a name="sc_reconfig_errors"></a> +<h4>Error conditions</h4> +<p>In addition to normal ZooKeeper error conditions, a + reconfiguration may fail for the following reasons:</p> +<ol> + +<li> + +<p>another reconfig is currently in progress + (ReconfigInProgress)</p> + +</li> + +<li> + +<p>the proposed change would leave the cluster with less than 2 + participants, in case standalone mode is enabled, or, if + standalone mode is disabled then its legal to remain with 1 or + more participants (BadArgumentsException)</p> + +</li> + +<li> + +<p>no quorum of the new configuration was connected and + up-to-date with the leader when the reconfiguration processing + began (NewConfigNoQuorum)</p> + +</li> + +<li> + +<p> +<span class="codefrag userinput">-v x</span> was specified, but the version + <span class="codefrag userinput">y</span> of the latest configuration is not + <span class="codefrag userinput">x</span> (BadVersionException)</p> + +</li> + +<li> + +<p>an incremental reconfiguration was requested but the last + configuration at the leader uses a Quorum System which is + different from the Majority system (BadArgumentsException)</p> + +</li> + +<li> + +<p>syntax error (BadArgumentsException)</p> + +</li> + +<li> + +<p>I/O exception when reading the configuration from a file + (BadArgumentsException)</p> + +</li> + +</ol> +<p>Most of these are illustrated by test-cases in + <span class="codefrag filename">ReconfigFailureCases.java</span>.</p> +<a name="sc_reconfig_additional"></a> +<h4>Additional comments</h4> +<p> +<strong>Liveness:</strong> To better understand + the difference between incremental and non-incremental + reconfiguration, suppose that client C1 adds server D to the system + while a different client C2 adds server E. With the non-incremental + mode, each client would first invoke <span class="codefrag command">config</span> to find + out the current configuration, and then locally create a new list of + servers by adding its own suggested server. The new configuration can + then be submitted using the non-incremental + <span class="codefrag command">reconfig</span> command. After both reconfigurations + complete, only one of E or D will be added (not both), depending on + which client's request arrives second to the leader, overwriting the + previous configuration. The other client can repeat the process until + its change takes effect. This method guarantees system-wide progress + (i.e., for one of the clients), but does not ensure that every client + succeeds. To have more control C2 may request to only execute the + reconfiguration in case the version of the current configuration + hasn't changed, as explained in the section <a href="#sc_reconfig_conditional">Conditional reconfig</a>. In this way it may avoid blindly + overwriting the configuration of C1 if C1's configuration reached the + leader first.</p> +<p>With incremental reconfiguration, both changes will take effect as + they are simply applied by the leader one after the other to the + current configuration, whatever that is (assuming that the second + reconfig request reaches the leader after it sends a commit message + for the first reconfig request -- currently the leader will refuse to + propose a reconfiguration if another one is already pending). Since + both clients are guaranteed to make progress, this method guarantees + stronger liveness. In practice, multiple concurrent reconfigurations + are probably rare. Non-incremental reconfiguration is currently the + only way to dynamically change the Quorum System. Incremental + configuration is currently only allowed with the Majority Quorum + System.</p> +<p> +<strong>Changing an observer into a + follower:</strong> Clearly, changing a server that participates in + voting into an observer may fail if error (2) occurs, i.e., if fewer + than the minimal allowed number of participants would remain. However, + converting an observer into a participant may sometimes fail for a + more subtle reason: Suppose, for example, that the current + configuration is (A, B, C, D), where A is the leader, B and C are + followers and D is an observer. In addition, suppose that B has + crashed. If a reconfiguration is submitted where D is said to become a + follower, it will fail with error (3) since in this configuration, a + majority of voters in the new configuration (any 3 voters), must be + connected and up-to-date with the leader. An observer cannot + acknowledge the history prefix sent during reconfiguration, and + therefore it does not count towards these 3 required servers and the + reconfiguration will be aborted. In case this happens, a client can + achieve the same task by two reconfig commands: first invoke a + reconfig to remove D from the configuration and then invoke a second + command to add it back as a participant (follower). During the + intermediate state D is a non-voting follower and can ACK the state + transfer performed during the second reconfig comand.</p> +</div> + +<a name="ch_reconfig_rebalancing"></a> +<h2 class="h3">Rebalancing Client Connections</h2> +<div class="section"> +<p>When a ZooKeeper cluster is started, if each client is given the same + connection string (list of servers), the client will randomly choose a + server in the list to connect to, which makes the expected number of + client connections per server the same for each of the servers. We + implemented a method that preserves this property when the set of servers + changes through reconfiguration. See Sections 4 and 5.1 in the <a href="https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters">paper</a>.</p> +<p>In order for the method to work, all clients must subscribe to + configuration changes (by setting a watch on /zookeeper/config either + directly or through the <span class="codefrag command">getConfig</span> API command). When + the watch is triggered, the client should read the new configuration by + invoking <span class="codefrag command">sync</span> and <span class="codefrag command">getConfig</span> and if + the configuration is indeed new invoke the + <span class="codefrag command">updateServerList</span> API command. To avoid mass client + migration at the same time, it is better to have each client sleep a + random short period of time before invoking + <span class="codefrag command">updateServerList</span>.</p> +<p>A few examples can be found in: + <span class="codefrag filename">StaticHostProviderTest.java</span> and + <span class="codefrag filename">TestReconfig.cc</span> +</p> +<p>Example (this is not a recipe, but a simplified example just to + explain the general idea):</p> +<pre class="code"> +public void process(WatchedEvent event) { + synchronized (this) { + if (event.getType() == EventType.None) { + connected = (event.getState() == KeeperState.SyncConnected); + notifyAll(); + } else if (event.getPath()!=null && event.getPath().equals(ZooDefs.CONFIG_NODE)) { + // in prod code never block the event thread! + zk.sync(ZooDefs.CONFIG_NODE, this, null); + zk.getConfig(this, this, null); + } + } +} +public void processResult(int rc, String path, Object ctx, byte[] data, Stat stat) { + if (path!=null && path.equals(ZooDefs.CONFIG_NODE)) { + String config[] = ConfigUtils.getClientConfigStr(new String(data)).split(" "); // similar to config -c + long version = Long.parseLong(config[0], 16); + if (this.configVersion == null){ + this.configVersion = version; + } else if (version > this.configVersion) { + hostList = config[1]; + try { + // the following command is not blocking but may cause the client to close the socket and + // migrate to a different server. In practice its better to wait a short period of time, chosen + // randomly, so that different clients migrate at different times + zk.updateServerList(hostList); + } catch (IOException e) { + System.err.println("Error updating server list"); + e.printStackTrace(); + } + this.configVersion = version; +} } }</pre> +</div> + +<p align="right"> +<font size="-2"></font> +</p> +</div> +<!--+ + |end content + +--> +<div class="clearboth"> </div> +</div> +<div id="footer"> +<!--+ + |start bottomstrip + +--> +<div class="lastmodified"> +<script type="text/javascript"><!-- +document.write("Last Published: " + document.lastModified); +// --></script> +</div> +<div class="copyright"> + Copyright © + <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> +</div> +<!--+ + |end bottomstrip + +--> +</div> +</body> +</html> http://git-wip-us.apache.org/repos/asf/zookeeper/blob/ec4ec140/content/doc/r3.5.4-beta/zookeeperReconfig.pdf ---------------------------------------------------------------------- diff --git a/content/doc/r3.5.4-beta/zookeeperReconfig.pdf b/content/doc/r3.5.4-beta/zookeeperReconfig.pdf new file mode 100644 index 0000000..9b292d4 Binary files /dev/null and b/content/doc/r3.5.4-beta/zookeeperReconfig.pdf differ
