Modified: helix/site-content/UseCases.html URL: http://svn.apache.org/viewvc/helix/site-content/UseCases.html?rev=1900177&r1=1900176&r2=1900177&view=diff ============================================================================== --- helix/site-content/UseCases.html (original) +++ helix/site-content/UseCases.html Sat Apr 23 01:14:12 2022 @@ -1,283 +1,219 @@ - <!DOCTYPE html> -<!-- - Generated by Apache Maven Doxia at 2022-04-23 - Rendered using Reflow Maven Skin 1.1.1 (http://andriusvelykis.github.io/reflow-maven-skin) ---> -<html xml:lang="en" lang="en"> - <head> - <meta charset="UTF-8" /> - <title>Apache Helix – Use Cases</title> - <meta name="viewport" content="width=device-width, initial-scale=1.0" /> - <meta name="description" content="" /> - <meta http-equiv="content-language" content="en" /> - - <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap.min.css" rel="stylesheet" /> - <link href="./css/docs.css" rel="stylesheet" /> - <link href="./css/reflow-skin.css" rel="stylesheet" /> - - - <link href="./css/lightbox.css" rel="stylesheet" /> - - <link href="./css/site.css" rel="stylesheet" /> - <link href="./css/print.css" rel="stylesheet" media="print" /> - - <!-- Le HTML5 shim, for IE6-8 support of HTML5 elements --> - <!--[if lt IE 9]> - <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> - <![endif]--> - - - - </head> - - <body class="page-$config.fileId project-$config.projectId" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target"> - - <div class="navbar navbar-fixed-top"> - <div class="navbar-inner"> - <div class="container"> - <a class="btn btn-navbar" data-toggle="collapse" data-target="#top-nav-collapse"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </a> - <div class="nav-collapse collapse" id="top-nav-collapse"> - <ul class="nav pull-right"> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Learn <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="Core Concepts">Core Concepts</a></li> - <li class="active"><a href="" title="Architecture">Architecture</a></li> - <li class="active"><a href="" title="Publications">Publications</a></li> - <li class="active"><a href="" title="Client Libraries">Client Libraries</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Documentation <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="1.0.3">1.0.3</a></li> - <li class="active"><a href="" title="1.0.2">1.0.2</a></li> - <li class="active"><a href="" title="0.9.10 (0.9.9)">0.9.10 (0.9.9)</a></li> - <li class="active"><a href="" title="trunk">trunk</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Helix 1.0.3 <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="Documentation">Documentation</a></li> - <li class="active"><a href="" title="Quick Start">Quick Start</a></li> - <li class="active"><a href="" title="Tutorial">Tutorial</a></li> - <li class="active"><a href="" title="Download">Download</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Helix 0.9.10 (0.9.9) <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="Documentation">Documentation</a></li> - <li class="active"><a href="" title="Quick Start">Quick Start</a></li> - <li class="active"><a href="" title="Tutorial">Tutorial</a></li> - <li class="active"><a href="" title="Download">Download</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Involved <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="IRC">IRC</a></li> - <li class="active"><a href="" title="Mailing Lists">Mailing Lists</a></li> - <li class="active"><a href="" title="Issues">Issues</a></li> - <li class="active"><a href="" title="Team">Team</a></li> - <li class="active"><a href="" title="Sources">Sources</a></li> - <li class="active"><a href="" title="Continuous Integration">Continuous Integration</a></li> - <li class="active"><a href="" title="Building Guide">Building Guide</a></li> - <li class="active"><a href="" title="Release Guide">Release Guide</a></li> - <li class="active"><a href="" title="Improve this Website">Improve this Website</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">ASF <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="ASF Home">ASF Home</a></li> - <li class="active"><a href="" title="License">License</a></li> - <li class="active"><a href="" title="Sponsorship">Sponsorship</a></li> - <li class="active"><a href="" title="Thanks">Thanks</a></li> - <li class="active"><a href="" title="Security">Security</a></li> - </ul> - </li> - </ul> - </div><!--/.nav-collapse --> - </div> - </div> - </div> - - <div class="container"> - - <!-- Masthead - ================================================== --> - - <header> - <div class="jumbotron subhead"> - <div class="row" id="banner"> - <div class="span12"> - <div class="pull-left"> - <a href="" id="bannerLeft"><img src="" alt='"''"' /></a> - </div> - <div class="pull-right"> - <a href="http://www.apache.org/" id="bannerRight"><img src="" alt='"''"' /></a> - </div> - </div> - </div> - </div> - <div> - <ul class="breadcrumb"> - <li><a href="" title="Apache Helix">Apache Helix</a></li> - <li class="divider">/</li> - <li>Use Cases</li> - </ul> - </div> - </header> - - <div class="main-body"> - <div class="row"> - <div class="span12"> - <div class="body-content"> -$bodyWithHeader - </div> - </div> - </div> - </div> - - </div><!-- /container --> - - <!-- Footer - ================================================== --> - <footer class="well"> - <div class="container"> - <div class="row"> - <div class="span9 bottom-nav"> - <ul class="nav nav-list"> - <li class="nav-header">Learn</li> - <li class="active"> - <a href="#" title="Core Concepts">Core Concepts</a> - </li> - <li class="active"> - <a href="#" title="Architecture">Architecture</a> - </li> - <li class="active"> - <a href="#" title="Publications">Publications</a> - </li> - <li class="active"> - <a href="#" title="Client Libraries">Client Libraries</a> - </li> - <li class="nav-header">Documentation</li> - <li class="active"> - <a href="#" title="1.0.3">1.0.3</a> - </li> - <li class="active"> - <a href="#" title="1.0.2">1.0.2</a> - </li> - <li class="active"> - <a href="#" title="0.9.10 (0.9.9)">0.9.10 (0.9.9)</a> - </li> - <li class="active"> - <a href="#" title="trunk">trunk</a> - </li> - <li class="nav-header">Helix 1.0.3</li> - <li class="active"> - <a href="#" title="Documentation">Documentation</a> - </li> - <li class="active"> - <a href="#" title="Quick Start">Quick Start</a> - </li> - <li class="active"> - <a href="#" title="Tutorial">Tutorial</a> - </li> - <li class="active"> - <a href="#" title="Download">Download</a> - </li> - <li class="nav-header">Helix 0.9.10 (0.9.9)</li> - <li class="active"> - <a href="#" title="Documentation">Documentation</a> - </li> - <li class="active"> - <a href="#" title="Quick Start">Quick Start</a> - </li> - <li class="active"> - <a href="#" title="Tutorial">Tutorial</a> - </li> - <li class="active"> - <a href="#" title="Download">Download</a> - </li> - <li class="nav-header">Get Involved</li> - <li class="active"> - <a href="#" title="IRC">IRC</a> - </li> - <li class="active"> - <a href="#" title="Mailing Lists">Mailing Lists</a> - </li> - <li class="active"> - <a href="#" title="Issues">Issues</a> - </li> - <li class="active"> - <a href="#" title="Team">Team</a> - </li> - <li class="active"> - <a href="#" title="Sources">Sources</a> - </li> - <li class="active"> - <a href="#" title="Continuous Integration">Continuous Integration</a> - </li> - <li class="active"> - <a href="#" title="Building Guide">Building Guide</a> - </li> - <li class="active"> - <a href="#" title="Release Guide">Release Guide</a> - </li> - <li class="active"> - <a href="#" title="Improve this Website">Improve this Website</a> - </li> - <li class="nav-header">ASF</li> - <li class="active"> - <a href="#" title="ASF Home">ASF Home</a> - </li> - <li class="active"> - <a href="#" title="License">License</a> - </li> - <li class="active"> - <a href="#" title="Sponsorship">Sponsorship</a> - </li> - <li class="active"> - <a href="#" title="Thanks">Thanks</a> - </li> - <li class="active"> - <a href="#" title="Security">Security</a> - </li> - </ul> - </div> - </div> - </div> - </footer> - - <div class="container subfooter"> - <div class="row"> - <div class="span12"> - <p class="pull-right"><a href="#">Back to top</a></p> - <p class="copyright">Copyright ©2022 <a href="https://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p> - </div> - </div> - </div> - - <!-- Le javascript - ================================================== --> - <!-- Placed at the end of the document so the pages load faster --> - <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script> - - <script src="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/js/bootstrap.min.js"></script> - <script src="./js/lightbox.min.js"></script> - <script src="./js/reflow-scroll.js"></script> - <script src="./js/reflow-skin.js"></script> +<!-- + | Generated by Apache Maven Doxia Site Renderer 1.11.1 from src/site/markdown/UseCases.md at 2022-04-23 + | Rendered using Apache Maven Fluido Skin 1.11.0-SNAPSHOT +--> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en"> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1" /> + <meta name="generator" content="Apache Maven Doxia Site Renderer 1.11.1" /> + <title>Apache Helix – Use Cases</title> + <link rel="stylesheet" href="./css/apache-maven-fluido-1.11.0-SNAPSHOT.min.css" /> + <link rel="stylesheet" href="./css/site.css" /> + <link rel="stylesheet" href="./css/print.css" media="print" /> + <script src="./js/apache-maven-fluido-1.11.0-SNAPSHOT.min.js"></script> +<script type="text/javascript"> + + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-3211522-12']); + _gaq.push(['_trackPageview']); + + (function() { + var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; + ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; + var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); + + </script> + </head> + <body class="topBarDisabled"> + <div class="container-fluid"> + <header> + <div id="banner"> + <div class="pull-left"><a href="https://helix.apache.org/" id="bannerLeft"><img src="images/helix-logo.jpg" alt=""/></a></div> + <div class="pull-right"><a href="https://www.apache.org/" id="bannerRight"><img src="images/feather_small.gif" alt=""/></a></div> + <div class="clear"><hr/></div> + </div> + + <div id="breadcrumbs"> + <ul class="breadcrumb"> + <li class=""><a href="./" title="Apache Helix">Apache Helix</a><span class="divider">/</span></li> + <li class="active ">Use Cases</li> + </ul> + </div> + </header> + <div class="row-fluid"> + <header id="leftColumn" class="span2"> + <nav class="well sidebar-nav"> + <ul class="nav nav-list"> + <li class="nav-header">Learn</li> + <li><a href="Concepts.html" title="Core Concepts"><span class="none"></span>Core Concepts</a></li> + <li><a href="Architecture.html" title="Architecture"><span class="none"></span>Architecture</a></li> + <li><a href="Publications.html" title="Publications"><span class="none"></span>Publications</a></li> + <li><a href="ClientLibraries.html" title="Client Libraries"><span class="none"></span>Client Libraries</a></li> + <li class="nav-header">Documentation</li> + <li><a href="1.0.3-docs/index.html" title="1.0.3"><span class="none"></span>1.0.3</a></li> + <li><a href="1.0.2-docs/index.html" title="1.0.2"><span class="none"></span>1.0.2</a></li> + <li><a href="0.9.9-docs/index.html" title="0.9.10 (0.9.9)"><span class="none"></span>0.9.10 (0.9.9)</a></li> + <li><a href="trunk-docs/index.html" title="trunk"><span class="none"></span>trunk</a></li> + <li class="nav-header">Helix 1.0.3</li> + <li><a href="1.0.3-docs/index.html" title="Documentation"><span class="none"></span>Documentation</a></li> + <li><a href="1.0.3-docs/Quickstart.html" title="Quick Start"><span class="none"></span>Quick Start</a></li> + <li><a href="1.0.3-docs/Tutorial.html" title="Tutorial"><span class="none"></span>Tutorial</a></li> + <li><a href="1.0.3-docs/download.html" title="Download"><span class="none"></span>Download</a></li> + <li class="nav-header">Helix 0.9.10 (0.9.9)</li> + <li><a href="0.9.9-docs/index.html" title="Documentation"><span class="none"></span>Documentation</a></li> + <li><a href="0.9.9-docs/Quickstart.html" title="Quick Start"><span class="none"></span>Quick Start</a></li> + <li><a href="0.9.9-docs/Tutorial.html" title="Tutorial"><span class="none"></span>Tutorial</a></li> + <li><a href="0.9.9-docs/download.html" title="Download"><span class="none"></span>Download</a></li> + <li class="nav-header">Get Involved</li> + <li><a href="IRC.html" title="IRC"><span class="none"></span>IRC</a></li> + <li><a href="mail-lists.html" title="Mailing Lists"><span class="none"></span>Mailing Lists</a></li> + <li><a href="issue-tracking.html" title="Issues"><span class="none"></span>Issues</a></li> + <li><a href="team-list.html" title="Team"><span class="none"></span>Team</a></li> + <li><a href="sources.html" title="Sources"><span class="none"></span>Sources</a></li> + <li><a href="integration.html" title="Continuous Integration"><span class="none"></span>Continuous Integration</a></li> + <li><a href="involved/building.html" title="Building Guide"><span class="none"></span>Building Guide</a></li> + <li><a href="releasing.html" title="Release Guide"><span class="none"></span>Release Guide</a></li> + <li><a href="involved/contribdocs.html" title="Improve this Website"><span class="none"></span>Improve this Website</a></li> + <li class="nav-header">ASF</li> + <li><a href="http://www.apache.org/" class="externalLink" title="ASF Home"><span class="none"></span>ASF Home</a></li> + <li><a href="http://www.apache.org/licenses/" class="externalLink" title="License"><span class="none"></span>License</a></li> + <li><a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsorship"><span class="none"></span>Sponsorship</a></li> + <li><a href="http://www.apache.org/foundation/thanks.html" class="externalLink" title="Thanks"><span class="none"></span>Thanks</a></li> + <li><a href="http://www.apache.org/security/" class="externalLink" title="Security"><span class="none"></span>Security</a></li> + </ul> + </nav> + <div class="well sidebar-nav"> + <hr /> + <div id="poweredBy"> + <div class="clear"></div> + <div class="clear"></div> + <div class="clear"></div> +<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy"><img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png" /></a> + </div> + </div> + </header> + <main id="bodyColumn" class="span10" > +<!--- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> - </body> +<h1>Use cases at LinkedIn</h1> +<p>At LinkedIn Helix framework is used to manage 3 distributed data systems which are quite different from each other.</p> +<ul> + +<li>Espresso</li> +<li>Databus</li> +<li>Search As A Service</li> +</ul><section> +<h2><a name="Espresso"></a>Espresso</h2> +<p>Espresso is a distributed, timeline consistent, scal- able, document store that supports local secondary indexing and local transactions. +Espresso databases are horizontally partitioned into a number of partitions, with each partition having a certain number of replicas +distributed across the storage nodes. +Espresso designates one replica of each partition as master and the rest as slaves; only one master may exist for each partition at any time. +Espresso enforces timeline consistency where only the master of a partition can accept writes to its records, and all slaves receive and +apply the same writes through a replication stream. +For load balancing, both master and slave partitions are assigned evenly across all storage nodes. +For fault tolerance, it adds the constraint that no two replicas of the same partition may be located on the same node.</p><section> +<h3><a name="State_model"></a>State model</h3> +<p>Espresso follows a Master-Slave state model. A replica can be in Offline,Slave or Master state. +The state machine table describes the next state given the Current State, Final State</p> + +<div class="source"><pre class="prettyprint"><code> OFFLINE | SLAVE | MASTER + _____________________________ + | | | | +OFFLINE | N/A | SLAVE | SLAVE | + |__________|________|_________| + | | | | +SLAVE | OFFLINE | N/A | MASTER | + |__________|________|_________| + | | | | +MASTER | SLAVE | SLAVE | N/A | + |__________|________|_________| + +</code></pre></div></section><section> +<h3><a name="Constraints"></a>Constraints</h3> +<ul> + +<li>Max number of replicas in Master state:1</li> +<li>Execution mode AUTO. i.e on node failure no new replicas will be created. Only the State of remaining replicas will be changed.</li> +<li>Number of mastered partitions on each node must be approximately same.</li> +<li>The above constraint must be satisfied when a node fails or a new node is added.</li> +<li>When new nodes are added the number of partitions moved must be minimized.</li> +<li>When new nodes are added the max number of OFFLINE-SLAVE transitions that can happen concurrently on new node is X.</li> +</ul></section></section><section> +<h2><a name="Databus"></a>Databus</h2> +<p>Databus is a change data capture (CDC) system that provides a common pipeline for transporting events +from LinkedIn primary databases to caches within various applications. +Databus deploys a cluster of relays that pull the change log from multiple databases and +let consumers subscribe to the change log stream. Each Databus relay connects to one or more database servers and +hosts a certain subset of databases (and partitions) from those database servers.</p> +<p>For a large partitioned database (e.g. Espresso), the change log is consumed by a bank of consumers. +Each databus partition is assigned to a consumer such that partitions are evenly distributed across consumers and each partition is +assigned to exactly one consumer at a time. The set of consumers may grow over time, and consumers may leave the group due to planned or unplanned +outages. In these cases, partitions must be reassigned, while maintaining balance and the single consumer-per-partition invariant.</p><section> +<h3><a name="State_model"></a>State model</h3> +<p>Databus consumers follow a simple Offline-Online state model. +The state machine table describes the next state given the Current State, Final State</p> + +<div class="source"><pre class="prettyprint"><code> + OFFLINE | ONLINE | + ___________________| + | | | +OFFLINE | N/A | ONLINE | + |__________|________| + | | | +ONLINE | OFFLINE | N/A | + |__________|________| + + +</code></pre></div> +</section></section><section> +<h2><a name="Search_As_A_Service"></a>Search As A Service</h2> +<p>LinkedIn�s Search-as-a-service lets internal customers define custom indexes on a chosen dataset +and then makes those indexes searchable via a service API. The index service runs on a cluster of machines. +The index is broken into partitions and each partition has a configured number of replicas. +Each cluster server runs an instance of the Sensei system (an online index store) and hosts index partitions. +Each new indexing service gets assigned to a set of servers, and the partition replicas must be evenly distributed across those servers.</p><section> +<h3><a name="State_model"></a>State model</h3> +<p><img src="images/bootstrap_statemodel.gif" alt="Helix Design" /></p></section></section> + </main> + </div> + </div> + <hr/> + <footer> + <div class="container-fluid"> + <div class="row-fluid"> +<div class="row span16"><div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation. + All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div> + <a href="http://helix.apache.org/privacy-policy.html">Privacy Policy</a> + </div> + </div> + </div> + </footer> +<script> + if(anchors) { + anchors.add(); + } +</script> + </body> </html> \ No newline at end of file
Modified: helix/site-content/css/print.css URL: http://svn.apache.org/viewvc/helix/site-content/css/print.css?rev=1900177&r1=1900176&r2=1900177&view=diff ============================================================================== --- helix/site-content/css/print.css (original) +++ helix/site-content/css/print.css Sat Apr 23 01:14:12 2022 @@ -1,80 +1,21 @@ -.navbar, -.breadcrumb, -.toc-separator -#toc-bar, -#toc-sidebar, -footer, -.subfooter { - display: none !important; -} - -body { - padding-top: 0px !important; -} - -/* CSS below taken from HTML5 Boilerplate */ -* { - background: transparent !important; - color: #000 !important; /* Black prints faster: h5bp.com/s */ - box-shadow:none !important; - text-shadow: none !important; -} - -a, -a:visited { - text-decoration: underline; -} - -a[href]:after { - content: " (" attr(href) ")"; -} - -abbr[title]:after { - content: " (" attr(title) ")"; -} - /* - * Don't show links for images, or javascript/internal links, or header links + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -header a:after, -.ir a:after, -a[href^="javascript:"]:after, -a[href^="#"]:after { - content: "" !important; -} - -pre, -blockquote { - border: 1px solid #999; - page-break-inside: avoid; -} - -thead { - display: table-header-group; /* h5bp.com/t */ -} - -tr, -img { - page-break-inside: avoid; -} - -img { - max-width: 100% !important; -} - -@page { - margin: 0.5cm; -} - -p, -h2, -h3 { - orphans: 3; - widows: 3; -} - -h2, -h3 { - page-break-after: avoid; -} \ No newline at end of file +#banner, #footer, #leftcol, #breadcrumbs, .docs #toc, .docs .courtesylinks, #leftColumn, #navColumn {display: none !important;} +#bodyColumn, body.docs div.docs {margin: 0 !important;border: none !important} \ No newline at end of file Modified: helix/site-content/design/crush-ed.html URL: http://svn.apache.org/viewvc/helix/site-content/design/crush-ed.html?rev=1900177&r1=1900176&r2=1900177&view=diff ============================================================================== --- helix/site-content/design/crush-ed.html (original) +++ helix/site-content/design/crush-ed.html Sat Apr 23 01:14:12 2022 @@ -1,283 +1,481 @@ - <!DOCTYPE html> -<!-- - Generated by Apache Maven Doxia at 2022-04-23 - Rendered using Reflow Maven Skin 1.1.1 (http://andriusvelykis.github.io/reflow-maven-skin) ---> -<html xml:lang="en" lang="en"> - <head> - <meta charset="UTF-8" /> - <title>Apache Helix – CRUSH-ed for even distribution</title> - <meta name="viewport" content="width=device-width, initial-scale=1.0" /> - <meta name="description" content="" /> - <meta http-equiv="content-language" content="en" /> - - <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap.min.css" rel="stylesheet" /> - <link href="../css/docs.css" rel="stylesheet" /> - <link href="../css/reflow-skin.css" rel="stylesheet" /> - - - <link href="../css/lightbox.css" rel="stylesheet" /> - - <link href="../css/site.css" rel="stylesheet" /> - <link href="../css/print.css" rel="stylesheet" media="print" /> - - <!-- Le HTML5 shim, for IE6-8 support of HTML5 elements --> - <!--[if lt IE 9]> - <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> - <![endif]--> - - - - </head> - - <body class="page-$config.fileId project-$config.projectId" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target"> - - <div class="navbar navbar-fixed-top"> - <div class="navbar-inner"> - <div class="container"> - <a class="btn btn-navbar" data-toggle="collapse" data-target="#top-nav-collapse"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </a> - <div class="nav-collapse collapse" id="top-nav-collapse"> - <ul class="nav pull-right"> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Learn <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="Core Concepts">Core Concepts</a></li> - <li class="active"><a href="" title="Architecture">Architecture</a></li> - <li class="active"><a href="" title="Publications">Publications</a></li> - <li class="active"><a href="" title="Client Libraries">Client Libraries</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Documentation <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="1.0.3">1.0.3</a></li> - <li class="active"><a href="" title="1.0.2">1.0.2</a></li> - <li class="active"><a href="" title="0.9.10 (0.9.9)">0.9.10 (0.9.9)</a></li> - <li class="active"><a href="" title="trunk">trunk</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Helix 1.0.3 <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="Documentation">Documentation</a></li> - <li class="active"><a href="" title="Quick Start">Quick Start</a></li> - <li class="active"><a href="" title="Tutorial">Tutorial</a></li> - <li class="active"><a href="" title="Download">Download</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Helix 0.9.10 (0.9.9) <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="Documentation">Documentation</a></li> - <li class="active"><a href="" title="Quick Start">Quick Start</a></li> - <li class="active"><a href="" title="Tutorial">Tutorial</a></li> - <li class="active"><a href="" title="Download">Download</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Involved <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="IRC">IRC</a></li> - <li class="active"><a href="" title="Mailing Lists">Mailing Lists</a></li> - <li class="active"><a href="" title="Issues">Issues</a></li> - <li class="active"><a href="" title="Team">Team</a></li> - <li class="active"><a href="" title="Sources">Sources</a></li> - <li class="active"><a href="" title="Continuous Integration">Continuous Integration</a></li> - <li class="active"><a href="" title="Building Guide">Building Guide</a></li> - <li class="active"><a href="" title="Release Guide">Release Guide</a></li> - <li class="active"><a href="" title="Improve this Website">Improve this Website</a></li> - </ul> - </li> - <li class="dropdown active"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown">ASF <b class="caret"></b></a> - <ul class="dropdown-menu"> - <li class="active"><a href="" title="ASF Home">ASF Home</a></li> - <li class="active"><a href="" title="License">License</a></li> - <li class="active"><a href="" title="Sponsorship">Sponsorship</a></li> - <li class="active"><a href="" title="Thanks">Thanks</a></li> - <li class="active"><a href="" title="Security">Security</a></li> - </ul> - </li> - </ul> - </div><!--/.nav-collapse --> - </div> - </div> - </div> - - <div class="container"> - - <!-- Masthead - ================================================== --> - - <header> - <div class="jumbotron subhead"> - <div class="row" id="banner"> - <div class="span12"> - <div class="pull-left"> - <a href="" id="bannerLeft"><img src="" alt='"''"' /></a> - </div> - <div class="pull-right"> - <a href="http://www.apache.org/" id="bannerRight"><img src="" alt='"''"' /></a> - </div> - </div> - </div> - </div> - <div> - <ul class="breadcrumb"> - <li><a href="" title="Apache Helix">Apache Helix</a></li> - <li class="divider">/</li> - <li>CRUSH-ed for even distribution</li> - </ul> - </div> - </header> - - <div class="main-body"> - <div class="row"> - <div class="span12"> - <div class="body-content"> -$bodyWithHeader - </div> - </div> - </div> - </div> - - </div><!-- /container --> - - <!-- Footer - ================================================== --> - <footer class="well"> - <div class="container"> - <div class="row"> - <div class="span9 bottom-nav"> - <ul class="nav nav-list"> - <li class="nav-header">Learn</li> - <li class="active"> - <a href="#" title="Core Concepts">Core Concepts</a> - </li> - <li class="active"> - <a href="#" title="Architecture">Architecture</a> - </li> - <li class="active"> - <a href="#" title="Publications">Publications</a> - </li> - <li class="active"> - <a href="#" title="Client Libraries">Client Libraries</a> - </li> - <li class="nav-header">Documentation</li> - <li class="active"> - <a href="#" title="1.0.3">1.0.3</a> - </li> - <li class="active"> - <a href="#" title="1.0.2">1.0.2</a> - </li> - <li class="active"> - <a href="#" title="0.9.10 (0.9.9)">0.9.10 (0.9.9)</a> - </li> - <li class="active"> - <a href="#" title="trunk">trunk</a> - </li> - <li class="nav-header">Helix 1.0.3</li> - <li class="active"> - <a href="#" title="Documentation">Documentation</a> - </li> - <li class="active"> - <a href="#" title="Quick Start">Quick Start</a> - </li> - <li class="active"> - <a href="#" title="Tutorial">Tutorial</a> - </li> - <li class="active"> - <a href="#" title="Download">Download</a> - </li> - <li class="nav-header">Helix 0.9.10 (0.9.9)</li> - <li class="active"> - <a href="#" title="Documentation">Documentation</a> - </li> - <li class="active"> - <a href="#" title="Quick Start">Quick Start</a> - </li> - <li class="active"> - <a href="#" title="Tutorial">Tutorial</a> - </li> - <li class="active"> - <a href="#" title="Download">Download</a> - </li> - <li class="nav-header">Get Involved</li> - <li class="active"> - <a href="#" title="IRC">IRC</a> - </li> - <li class="active"> - <a href="#" title="Mailing Lists">Mailing Lists</a> - </li> - <li class="active"> - <a href="#" title="Issues">Issues</a> - </li> - <li class="active"> - <a href="#" title="Team">Team</a> - </li> - <li class="active"> - <a href="#" title="Sources">Sources</a> - </li> - <li class="active"> - <a href="#" title="Continuous Integration">Continuous Integration</a> - </li> - <li class="active"> - <a href="#" title="Building Guide">Building Guide</a> - </li> - <li class="active"> - <a href="#" title="Release Guide">Release Guide</a> - </li> - <li class="active"> - <a href="#" title="Improve this Website">Improve this Website</a> - </li> - <li class="nav-header">ASF</li> - <li class="active"> - <a href="#" title="ASF Home">ASF Home</a> - </li> - <li class="active"> - <a href="#" title="License">License</a> - </li> - <li class="active"> - <a href="#" title="Sponsorship">Sponsorship</a> - </li> - <li class="active"> - <a href="#" title="Thanks">Thanks</a> - </li> - <li class="active"> - <a href="#" title="Security">Security</a> - </li> - </ul> - </div> - </div> - </div> - </footer> - - <div class="container subfooter"> - <div class="row"> - <div class="span12"> - <p class="pull-right"><a href="#">Back to top</a></p> - <p class="copyright">Copyright ©2022 <a href="https://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p> - </div> - </div> - </div> - - <!-- Le javascript - ================================================== --> - <!-- Placed at the end of the document so the pages load faster --> - <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script> - - <script src="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/js/bootstrap.min.js"></script> - <script src="../js/lightbox.min.js"></script> - <script src="../js/reflow-scroll.js"></script> - <script src="../js/reflow-skin.js"></script> - - </body> +<!-- + | Generated by Apache Maven Doxia Site Renderer 1.11.1 from src/site/markdown/design/crush-ed.md at 2022-04-23 + | Rendered using Apache Maven Fluido Skin 1.11.0-SNAPSHOT +--> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en"> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1" /> + <meta name="generator" content="Apache Maven Doxia Site Renderer 1.11.1" /> + <title>Apache Helix – CRUSH-ed for even distribution</title> + <link rel="stylesheet" href="../css/apache-maven-fluido-1.11.0-SNAPSHOT.min.css" /> + <link rel="stylesheet" href="../css/site.css" /> + <link rel="stylesheet" href="../css/print.css" media="print" /> + <script src="../js/apache-maven-fluido-1.11.0-SNAPSHOT.min.js"></script> +<script type="text/javascript"> + + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-3211522-12']); + _gaq.push(['_trackPageview']); + + (function() { + var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; + ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; + var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); + + </script> + </head> + <body class="topBarDisabled"> + <div class="container-fluid"> + <header> + <div id="banner"> + <div class="pull-left"><a href="https://helix.apache.org/" id="bannerLeft"><img src="../images/helix-logo.jpg" alt=""/></a></div> + <div class="pull-right"><a href="https://www.apache.org/" id="bannerRight"><img src="../images/feather_small.gif" alt=""/></a></div> + <div class="clear"><hr/></div> + </div> + + <div id="breadcrumbs"> + <ul class="breadcrumb"> + <li class=""><a href=".././" title="Apache Helix">Apache Helix</a><span class="divider">/</span></li> + <li class="active ">CRUSH-ed for even distribution</li> + </ul> + </div> + </header> + <div class="row-fluid"> + <header id="leftColumn" class="span2"> + <nav class="well sidebar-nav"> + <ul class="nav nav-list"> + <li class="nav-header">Learn</li> + <li><a href="../Concepts.html" title="Core Concepts"><span class="none"></span>Core Concepts</a></li> + <li><a href="../Architecture.html" title="Architecture"><span class="none"></span>Architecture</a></li> + <li><a href="../Publications.html" title="Publications"><span class="none"></span>Publications</a></li> + <li><a href="../ClientLibraries.html" title="Client Libraries"><span class="none"></span>Client Libraries</a></li> + <li class="nav-header">Documentation</li> + <li><a href="../1.0.3-docs/index.html" title="1.0.3"><span class="none"></span>1.0.3</a></li> + <li><a href="../1.0.2-docs/index.html" title="1.0.2"><span class="none"></span>1.0.2</a></li> + <li><a href="../0.9.9-docs/index.html" title="0.9.10 (0.9.9)"><span class="none"></span>0.9.10 (0.9.9)</a></li> + <li><a href="../trunk-docs/index.html" title="trunk"><span class="none"></span>trunk</a></li> + <li class="nav-header">Helix 1.0.3</li> + <li><a href="../1.0.3-docs/index.html" title="Documentation"><span class="none"></span>Documentation</a></li> + <li><a href="../1.0.3-docs/Quickstart.html" title="Quick Start"><span class="none"></span>Quick Start</a></li> + <li><a href="../1.0.3-docs/Tutorial.html" title="Tutorial"><span class="none"></span>Tutorial</a></li> + <li><a href="../1.0.3-docs/download.html" title="Download"><span class="none"></span>Download</a></li> + <li class="nav-header">Helix 0.9.10 (0.9.9)</li> + <li><a href="../0.9.9-docs/index.html" title="Documentation"><span class="none"></span>Documentation</a></li> + <li><a href="../0.9.9-docs/Quickstart.html" title="Quick Start"><span class="none"></span>Quick Start</a></li> + <li><a href="../0.9.9-docs/Tutorial.html" title="Tutorial"><span class="none"></span>Tutorial</a></li> + <li><a href="../0.9.9-docs/download.html" title="Download"><span class="none"></span>Download</a></li> + <li class="nav-header">Get Involved</li> + <li><a href="../IRC.html" title="IRC"><span class="none"></span>IRC</a></li> + <li><a href="../mail-lists.html" title="Mailing Lists"><span class="none"></span>Mailing Lists</a></li> + <li><a href="../issue-tracking.html" title="Issues"><span class="none"></span>Issues</a></li> + <li><a href="../team-list.html" title="Team"><span class="none"></span>Team</a></li> + <li><a href="../sources.html" title="Sources"><span class="none"></span>Sources</a></li> + <li><a href="../integration.html" title="Continuous Integration"><span class="none"></span>Continuous Integration</a></li> + <li><a href="../involved/building.html" title="Building Guide"><span class="none"></span>Building Guide</a></li> + <li><a href="../releasing.html" title="Release Guide"><span class="none"></span>Release Guide</a></li> + <li><a href="../involved/contribdocs.html" title="Improve this Website"><span class="none"></span>Improve this Website</a></li> + <li class="nav-header">ASF</li> + <li><a href="http://www.apache.org/" class="externalLink" title="ASF Home"><span class="none"></span>ASF Home</a></li> + <li><a href="http://www.apache.org/licenses/" class="externalLink" title="License"><span class="none"></span>License</a></li> + <li><a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsorship"><span class="none"></span>Sponsorship</a></li> + <li><a href="http://www.apache.org/foundation/thanks.html" class="externalLink" title="Thanks"><span class="none"></span>Thanks</a></li> + <li><a href="http://www.apache.org/security/" class="externalLink" title="Security"><span class="none"></span>Security</a></li> + </ul> + </nav> + <div class="well sidebar-nav"> + <hr /> + <div id="poweredBy"> + <div class="clear"></div> + <div class="clear"></div> + <div class="clear"></div> +<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy"><img class="builtBy" alt="Built by Maven" src="../images/logos/maven-feather.png" /></a> + </div> + </div> + </header> + <main id="bodyColumn" class="span10" > +<h1>CRUSH-ed for even distribution</h1> +<ul> + +<li><a href="#CRUSH-edforevendistribution-Overview">Overview</a></li> +<li><a href="#CRUSH-edforevendistribution-Design">Design</a></li> +<li><a href="#CRUSH-edforevendistribution-Experiment">Experiment</a> +<ul> + +<li><a href="#CRUSH-edforevendistribution-PartitionDistribution">Partition Distribution</a></li> +<li><a href="#CRUSH-edforevendistribution-DisablingNodes">Disabling Nodes</a></li> +<li><a href="#CRUSH-edforevendistribution-Rollingupgrade">Rolling upgrade</a></li> +<li><a href="#CRUSH-edforevendistribution-AddingNodes">Adding Nodes</a></li> +<li><a href="#CRUSH-edforevendistribution-AlgorithmPerformance">Algorithm Performance</a></li> +<li><a href="#CRUSH-edforevendistribution-AdditionalTestsforCornerCases">Additional Tests for Corner Cases</a></li> +</ul> +</li> +<li><a href="#CRUSH-edforevendistribution-Conclusion">Conclusion</a></li> +<li><a href="#CRUSH-edforevendistribution-UserGuide">User Guide</a></li> +<li><a href="#CRUSH-edforevendistribution-FutureWorks">Future Works</a> +<ul> + +<li><a href="#CRUSH-edforevendistribution-InstanceLevelCapacityLimitation">Instance Level Capacity Limitation</a></li> +<li><a href="#CRUSH-edforevendistribution-RebalanceAlgorithmTakesPartitionWeightintoConsideration">Rebalance Algorithm Takes Partition Weight into Consideration</a></li> +</ul> +</li> +</ul> +<h1>Overview</h1> +<p>CRUSH (or MultiRoundCRUSH) algorithm ensures consistent partition distribution. It is based on pseudo-random partition placement.  When the number of placed items (in our case the partitions) approaches infinity, the distribution approaches perfectly uniform.<br /> +However, it behaves the worst with a small number of placed items.  Especially, for those single tenant clusters with a small number of partitions, we may experience un-evenly partition distributions over instances in the cluster.<br /> +According to the data we collected from our PROD environment, distribution calculated by CRUSH is not satisfying. About 50% difference between the heavy loaded nodes and the light loaded nodes. Even with MultiRoundCRUSH, the difference is still about 30%.</p> +<p>Alternatively, card dealing strategy generates more even distribution in this cases. However, comparing it with CRUSH, there will be much more extra partition movements ( A movement is “extra” if the host instance has one partition out and another partition in) when the cluster topology is changed. The legacy AutoRebalanceStrategy, which is also based on card dealing, resolves this issue by taking the current mapping as an input. And it only changes the mapping for the delta part. It means the algorithm is not deterministic. Worse, it does not support fault zone configuration. So legacy AutoRebalanceStrategy is not an option.</p> +<p>Given a deterministic algorithm is required, we find load balance and minimal partition movements are hard to achieve at the same time. A trade-off between uniform distribution and partition movements during cluster changes is needed.</p> +<p>In this document, we propose a hybrid algorithm based on CRUSH that ensures both even distribution and minimized extra partition movement. We call it CRUSH-ed.<br /> +The basic idea is running additional rounds of re-balance on the uneven partitions. Note that CRUSH-ed guarantees stability (deterministic) and is fault zone aware.</p> +<p>According to our experiments, CRUSH-ed results in a much more uniform distribution and very few extra partition movements (when nodes are changed unexpectedly) compared with the original CRUSH. The cost is additional run time for the re-assigning calculation.<br /> +We think CRUSH-ed can to achieve better partition assignment in most of the cases.</p> +<h1>Design</h1> +<p>In general, we have 2 goals:</p> +<ol style="list-style-type: decimal"> + +<li>Even distribution.</li> +<li>Minimize partition movements when nodes are changed.</li> +</ol> +<p>CRUSH has very small movement count, but the distribution is not optimal.</p> +<p>As for MultiRound-CRUSH, it is designed for even distribution. The idea is running CRUSH multiple times. Each time the CRUSH will only be applied to the spiking part. So the distribution would eventually converge to even distribution.<br /> +However, the number of iterations that are required is not guaranteed. And there will be more partition movements when the topology of the nodes is changed. As a result, it more or less fails both goals. Still not ideal.</p> +<p>Since we already have a good base, we built CRUSH-ed based on CRUSH. It changes the uneven partition assignment generated by CRUSH as following.<br /> +Note that blue part should keep unchanged before and after.</p> +<p>Before (CRUSH)</p> +<p><img src="images/design/crushed/214844314.png" alt="Before (CRUSH)" /></p> +<p>After (new strategy)</p> +<p><img src="images/design/crushed/214844313.png" alt="After (new strategy)" /></p> +<p>Since the problem is NP-hard. We are not expecting the best assignment. A greedy algorithm works good enough.<br /> +After we tried different designs, we find it's hard to achieve both goals (even distribution and fewer movements) using a single strategy. So we decided to apply a hybrid algorithm that finishes the work step by step.</p> +<p><b>Step 1, run CRUSH to get a base assignment.</b><br /> +The base assignment usually contains a certain number of uneven partitions, so we need the following steps to re-distribute them.</p> +<p><b>Step 2, run a card dealing algorithm on the uneven parts.</b><br /> +And assign them to idle nodes. This algorithm is conceptually simple. The result ensures that all partitions are assigned to instances with minimum difference. Note that when fault zone joins the game, our greedy algorithm may not be able to calculate possible results because the candidate assignment may have fault zone conflict. So we add the buffer to tolerate small uneven assignment.</p> +<p>Example of assignments after step 2,</p> +<p><img src="images/design/crushed/214844288.png" alt="Example" /></p> +<p><b>Step 3, Shuffle partitions' preference lists.</b><br /> +Since replica states are assigned according to node order in these lists, if the lists are randomly ordered, the states will also be random. This may cause uneven states distribution.<br /> +To resolve this issue, before the final step 4, CRUSH-ed executes a simpler version card dealing algorithm on replica's order to shuffle them in each preference list. This operation results in a much evener state distribution.</p> +<p>Example of master distribution before step 3,</p> +<p><img src="images/design/crushed/214844287.png" alt="Example" /></p> +<p>Example of master distribution after step 3,</p> +<p><img src="images/design/crushed/214844286.png" alt="Example" /></p> +<p><b>Step 4, re-calculate the assignment for the partitions on temporarily disabled nodes using a consistent hashing algorithm.</b><br /> +Consistent hashing can ensure minimize partition movement.<br /> +Note that the first 3 steps are using full node list, regardless of disabled or offline nodes. So the assignment will be stable even the algorithm contains random factors such hashCode. Then step 4 ensures all the disabled nodes are handled correctly without causing huge partition movements.</p> +<p>Assume the first node is down, after step 4, the final assignment and master distribution are as following. Note that there is no assignment on the first node anymore.</p> +<p><img src="images/design/crushed/214844285.png" alt="Example" /></p> +<p>One potential issue of using intuitive algorithm is not converging. In this case, CRUSH-ed falls back to CRUSH.<br /> +Pseudocode is listed below.</p> +<p><b>Pseudo Code</b></p> + +<div class="source"><pre class="prettyprint"><code>// Round 1: Calculate mapping using the base strategy. +// Note to use all nodes for minimizing the influence of live node changes. +origPartitionMap = getBaseRebalanceStrategy().computePartitionAssignment(allNodes, clusterData); + +// Transform current assignment to instance->partitions map, and get total partitions +nodeToPartitionMap = convertMap(origPartitionMap); + +// Round 2: Rebalance mapping using card dealing algorithm. +Topology allNodeTopo = new Topology(allNodes, clusterData); +cardDealer.computeMapping(allNodeTopo, nodeToPartitionMap); + +// Since states are assigned according to preference list order, shuffle preference list for even states distribution. +shufflePreferenceList(nodeToPartitionMap); + +// Round 3: Re-mapping the partitions on non-live nodes using consistent hashing for reducing movement. +// Consistent hashing ensures minimum movements when nodes are disabled unexpectedly. +if (!liveNodes.containsAll(allNodes)) { + Topology liveNodeTopo = new Topology(liveNodes, clusterData); + hashPlacement.computeMapping(liveNodeTopo, nodeToPartitionMap); +} + +if (!nodeToPartitionMap.isEmpty()) { + // Round 2 and 3 is done successfully + return convertMap(nodeToPartitionMap); +} else { + return getBaseRebalanceStrategy().computePartitionAssignment(liveNodes, clusterData); +} +</code></pre></div> +<p>The re-assigning logic is generic and can be applied to any algorithms. So we created an abstract class for common logic. And then create strategy classes for Helix users. The Java class diagram looks like following.</p> +<p><img src="images/design/crushed/214844307.png" alt="Example" /></p><section><section> +<h3><a name="Cap_of.C2.A0difference_participant_load_using_CRUSH-ed"></a>Cap of difference participant load using CRUSH-ed</h3> +<p>For each resource, CRUSH-ed tries the best to ensure the partition count difference is in maximum ONE (as long as the assignment is possible considering fault zone). In this case, the worst assignment happens when all these differences are located on one node. So N resources contribute their additional partitions to one node, the node will have N additional partitions in total. Theoretically, the maximum difference between the heavy loaded node and other nodes is <b>the number of resources</b> in a cluster.</p></section><section> +<h3><a name="Not_a_global_uniform_distribution_solution_that_considers_node_capacity"></a>Not a global uniform distribution solution that considers node capacity</h3> +<p>Global distribution algorithm requires the algorithm takes other resources' partition distribution as part of the input. Since this related the distribution of all resources, the distribution would be very unstable. Any change in any resource will change it. As a result, the rebalance process may lead to a large number of partition movement.<br /> +Therefore, the solution in this document cannot be used to calculate global uniform distribution, neither node capacity aware throttling.<br /> +This requirement would be investigated in another initiative. Please refer to future works.</p> +<h1>Experiment</h1> +<p>We tested CRUSH-ed by simulating rebalancing using real product clusters topology data. And we tested multiple scenarios:</p> +<ul> + +<li>Distribution based on cluster topology.</li> +<li>Disabling hosts to simulate hosts down.</li> +<li>Adding hosts to simulate expansion.</li> +<li>Rolling upgrade.</li> +</ul> +<p>All results show that CRUSH-ed generates more uniform global distribution compared with CRUSH.<br /> +Moreover, partition movements in most scenarios are minimized. Except for hosts expansion case. Which can be controlled by configuring state transition throttling.</p></section></section><section> +<h2><a name="Partition_Distribution"></a>Partition Distribution</h2> +<p>Following charts demonstrate the worst cases (min load vs. max load) and STDEVs of partition/master distributions of several Espresso PROD clusters.<br /> +If we measure the improvement by STDEV, CRUSH-ed reduces the partition distribution unevenness by 87% on average compared with CRUSH. And for master replica distribution, the evenness improvement is 68% on average and 81% in maximum.</p> +<p><img src="images/design/crushed/214844302.png" alt="Example" /><img src="images/design/crushed/214844301.png" alt="Example" /></p> +<p><img src="images/design/crushed/214844298.png" alt="Example" /><img src="images/design/crushed/214844297.png" alt="Example" /></p> +<p>In addition, even for the clusters that already have good distribution using CRUSH, their real resource usage might be uneven because of partitions size variance.<br /> +For example, Venice-0 has more than 50k replicas. So even with CRUSH, partition distribution is already good, STDEV is 32.<br /> +However, because the partitions are quite different in storage usage, nodes' storage usage STDEV is 222. Max disk usage is about 2956GB.</p> +<p>Then, if using CRUSH-ed, the partition distribution is improved a little bit, STDEV is 6.<br /> +Moreover, since all the resources have a uniform distribution, total storage usage is much evener compared with CRUSH. STDEV is 30. And max disk usage is about 2644GB.</p> +<p>Given the host resource is reserved according to max usage, CRUSH-ed can save (2956 - 2644) / 2956 = 10.5% cost.</p> +<p><img src="images/design/crushed/214844300.png" alt="Example" /><img src="images/design/crushed/214844299.png" alt="Example" /></p></section><section> +<h2><a name="Disabling_Nodes"></a>Disabling Nodes</h2> +<p>When nodes are offline or disabled, CRUSH-ed will re-assigned the partitions to other live nodes. The algorithm ensures only moving the necessary partitions.<br /> +We simulated disabling a certain number of nodes, and check the changes regarding partition movement and master changes. We also used the expected movement (the partitions/masters count on the disabled nodes) as a baseline to measure if any extra movements happen.</p> +<p>The results show that movement is highly correlated to the portion of disabled nodes. And extra movements are minor (in most cases 0 movements).</p> +<p>Note that <b>Rate</b> in this document is <b>the changed number / total partition or master count</b>.</p> +<p><img src="images/design/crushed/214844296.png" alt="Example" /><img src="images/design/crushed/214844295.png" alt="Example" /></p></section><section> +<h2><a name="Rolling_upgrade"></a>Rolling upgrade</h2> +<p>Rolling upgrade is different from disabling nodes. Since nodes are reset one by one, in this test we assume the difference could be 2 nodes in maximum (for example, upgrading Node A then upgrading Node B).<br /> +In this case, movements are still minimized.</p> +<p>Note that in real PROD clusters, since admin can set delayed scheduler, there won't be movement at all.<br /> +Following chart shows the maximum recorded movements or master changes during the whole rolling upgrade process. Even with the worst case, extra partition movements and master changes are close to 0%.<br /> +<img src="images/design/crushed/214844289.png" alt="Example" /></p></section><section> +<h2><a name="Adding_Nodes"></a>Adding Nodes</h2> +<p>Adding nodes (or cluster expansion) changes topology. CRUSH-ed leverages card dealing for evenness. So when topology changes, the movements would be much more than CRUSH.<br /> +As we evaluated, the moved partitions are majority uneven ones.</p> +<p>According to our experiment using all Espresso clusters, the extra partition movement rate could be as much as 23%. And master changes could be as much as 58%.<br /> +The percentage is still compared with total partition count or total master count.</p> +<p>Note the extra change rate is not correlated with the number of additional nodes. So our recommendation is finishing expansion in one operation so as to do only one partition shuffling.</p> +<p><img src="images/design/crushed/214844294.png" alt="Example" /></p></section><section> +<h2><a name="Algorithm_Performance"></a>Algorithm Performance</h2> +<p>We compared CRUSH-ed with CRUSH algorithms using different instance numbers. The tests are executed multiple times and we recorded middle numbers.<br /> +The results are shown as follows.</p> +<p>Note that CRUSH-ed does not cost much additional calculating time for regular rebalancing. In maximum, 30% more runtime compared with CRUSH. And it will be less than MultiRoundCRUSH.</p> +<p><img src="images/design/crushed/214844283.png" alt="Example" /><img src="images/design/crushed/214844284.png" alt="Example" /></p> +<p>However, when there are nodes down since CRUSH-ed needs to run an additional consistent hashing based re-distribution, the calculating time will be much longer. More than 3 times compared with CRUSH.</p> +<p><img src="images/design/crushed/214844282.png" alt="Example" /></p> +<p>With some <b>performance improvements</b>, such as using cache to avoid duplicate calculation, we achieved to greatly reduce CRUSH-ed's running time. According to our experiment, it is now close to MultiRound CRUSH.</p> +<p><img src="images/design/crushed/214844281.png" alt="Example" /></p></section><section> +<h2><a name="Additional_Tests_for_Corner_Cases"></a>Additional Tests for Corner Cases</h2> +<p>These tests prove that the algorithm still works in the extreme cases.</p> +<p><b>ESPRESSO_IDENTITY</b></p> +<p>This cluster has only one resource. We get the distribution details for all 4 fabrics.</p> +<p><img src="images/design/crushed/214844292.png" alt="Example" /><img src="images/design/crushed/214844293.png" alt="Example" /><img src="images/design/crushed/214844291.png" alt="Example" /><img src="images/design/crushed/214844290.png" alt="Example" /></p><section><section> +<h4><a name="a100_nodes.2C_100_resources_and_101_partitions_for_each_resource"></a>100 nodes, 100 resources and 101 partitions for each resource</h4> +<p>The result is good. STDEV is 0.11. And the max loaded node has 104 partitions, while the min loaded node has 100 partitions.</p></section><section> +<h4><a name="a100_nodes.2C_1_resource_with_10100_partitions"></a>100 nodes, 1 resource with 10100 partitions</h4> +<p>The result is strictly evenness. STDEV of the distribution is all 0.</p></section></section><section> +<h3><a name="Test_without_Fault_Zone"></a>Test without Fault Zone</h3><section> +<h4><a name="Distribution_with_CRUSH-ed"></a>Distribution with CRUSH-ed</h4> +<table border="0" class="table table-striped"> +<thead> + +<tr class="a"> +<th>Total Repilca</th> +<th>Min Replica</th> +<th>Max Replica</th> +<th>Partition STDDEV</th> +<th>Min Master</th> +<th>Max Master</th> +<th>Master STDDEV</th></tr> +</thead><tbody> + +<tr class="b"> +<td align="left">30720</td> +<td>520</td> +<td>523</td> +<td>0.09886598</td> +<td>170</td> +<td>177</td> +<td>0.21981398</td></tr> +</tbody> +</table></section><section> +<h4><a name="Partition_Change_on_Node_down_.28disabled.29"></a>Partition Change on Node down (disabled)</h4> +<table border="0" class="table table-striped"> +<thead> + +<tr class="a"> +<th>Number Of Node Change</th> +<th>Total Partition Movements</th> +<th>Moved Partition / Total Partition (%)</th> +<th>Extra Movements (not one the changed nodes)</th> +<th>Total Master Changes</th> +<th>Moved Masters / Total Masters (%)</th> +<th>Extra Changes (not one the changed nodes)</th></tr> +</thead><tbody> + +<tr class="b"> +<td align="left">-7</td> +<td>3642</td> +<td>11.8554688</td> +<td>0</td> +<td>1212</td> +<td>11.8359375</td> +<td>0.09765625</td></tr> +</tbody> +</table></section></section><section> +<h3><a name="Test_with_5_Fault_Zones_.2811_participants_in_one_zone_and_12_participants_in_the_rest.29"></a>Test with 5 Fault Zones (11 participants in one zone and 12 participants in the rest)</h3><section> +<h4><a name="Distribution_with_CRUSH-ed"></a>Distribution with CRUSH-ed</h4> +<table border="0" class="table table-striped"> +<thead> + +<tr class="a"> +<th>Total Repilca</th> +<th>Min Replica</th> +<th>Max Replica</th> +<th>Partition STDDEV</th> +<th>Min Master</th> +<th>Max Master</th> +<th>Master STDDEV</th></tr> +</thead><tbody> + +<tr class="b"> +<td align="left">30720</td> +<td>518</td> +<td>524</td> +<td>0.12537857</td> +<td>169</td> +<td>177</td> +<td>0.20728582</td></tr> +</tbody> +</table></section><section> +<h4><a name="Partition_Change_on_Random_Node_down_.28disabled.29"></a>Partition Change on Random Node down (disabled)</h4> +<table border="0" class="table table-striped"> +<thead> + +<tr class="a"> +<th>Number Of Node Change</th> +<th>Total Partition Movements</th> +<th>Moved Partition / Total Partition (%)</th> +<th>Extra Movements (not one the changed nodes)</th> +<th>Total Master Changes</th> +<th>Moved Masters / Total Masters (%)</th> +<th>Extra Changes (not one the changed nodes)</th></tr> +</thead><tbody> + +<tr class="b"> +<td align="left">-7</td> +<td>3646</td> +<td>11.8684896</td> +<td>0</td> +<td>1206</td> +<td>11.7773438</td> +<td>0</td></tr> +</tbody> +</table></section><section> +<h4><a name="Partition_Change_on_a_Whole_Fault_Zone_down_.28disabled.29"></a>Partition Change on a Whole Fault Zone down (disabled)</h4> +<table border="0" class="table table-striped"> +<thead> + +<tr class="a"> +<th>Number Of Node Change</th> +<th>Total Partition Movements</th> +<th>Moved Partition / Total Partition (%)</th> +<th>Extra Movements (not one the changed nodes)</th> +<th>Total Master Changes</th> +<th>Moved Masters / Total Masters (%)</th> +<th>Extra Changes (not one the changed nodes)</th></tr> +</thead><tbody> + +<tr class="b"> +<td align="left">-12</td> +<td>6248</td> +<td>20.3385417</td> +<td>0</td> +<td>2079</td> +<td>20.3027344</td> +<td>0</td></tr> +</tbody> +</table></section><section> +<h4><a name="Partition_Change_on_half_of_the_disabled_Fault_Zone_back"></a>Partition Change on half of the disabled Fault Zone back</h4> +<table border="0" class="table table-striped"> +<thead> + +<tr class="a"> +<th>Number Of Node Change</th> +<th>Total Partition Movements</th> +<th>Moved Partition / Total Partition (%)</th> +<th>Extra Movements (not one the changed nodes)</th> +<th>Total Master Changes</th> +<th>Moved Masters / Total Masters (%)</th> +<th>Extra Changes (not one the changed nodes)</th></tr> +</thead><tbody> + +<tr class="b"> +<td align="left">+6</td> +<td>3767</td> +<td>12.2623698</td> +<td>0</td> +<td>1043</td> +<td>10.1855469</td> +<td>0</td></tr> +</tbody> +</table></section></section><section> +<h3><a name="Additional_Tests_for_Validate_Strategy_Stability"></a>Additional Tests for Validate Strategy Stability</h3> +<ul> + +<li>Disable and Re-enabled participants. The distribution is recovered.</li> +<li>Keep bouncing the cluster by resetting random participants. The distribution is still good.</li> +</ul> +<h1>Conclusion</h1> +<p>CRUSH-ed achieves more uniform distribution compared with CRUSH in the cost of longer running time and more partition movement when the cluster is changed.</p> +<p>Moreover, according to our experiments, MultiRoundCRUSH-FEAP can achieve the best uniform distribution. But the gain does not match its additional cost. So it is not recommended.</p> +<h1>User Guide</h1> +<ol style="list-style-type: decimal"> + +<li>Ensure the resouce is using FULL_AUTO mode.</li> +<li>Set rebalance strategy to be “org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy”.</li> +<li>Expect more partition movement on topology changes when using the new strategy.</li> +</ol> +<p><b>IdeaState SimpleFields Example</b></p> + +<div class="source"><pre class="prettyprint"><code>HELIX_ENABLED : "true" +IDEAL\_STATE\_MODE : "AUTO_REBALANCE" +REBALANCE\_MODE : "FULL\_AUTO" +REBALANCE_STRATEGY : "org.apache.helix.controller.rebalancer.strategy.CrushRebalanceStrategy" +MIN\_ACTIVE\_REPLICAS : "0" +NUM_PARTITIONS : "64" +REBALANCER\_CLASS\_NAME : "org.apache.helix.controller.rebalancer.DelayedAutoRebalancer" +REPLICAS : "1" +STATE\_MODEL\_DEF_REF : "LeaderStandby" +</code></pre></div> +<h1>Future Works</h1></section></section><section> +<h2><a name="Instance_Level_Capacity_Limitation"></a>Instance Level Capacity Limitation</h2> +<p>Currently, all resources are assigned separately.<br /> +The pros of this design are that resources change won't cause existing partitions to be re-assigned.<br /> +The cons are:</p> +<ol style="list-style-type: decimal"> + +<li>It's hard to ensure strict global uniform distribution.</li> +<li>Instance level capacity control is not possible given the algorithm doesn't have a global view of partition assignment.</li> +</ol></section><section> +<h2><a name="Rebalance_Algorithm_Takes_Partition_Weight_into_Consideration"></a>Rebalance Algorithm Takes Partition Weight into Consideration</h2> +<p>This algorithm still considers all partitions to be equally weighted. But in reality, different partitions may have different resource requirements.<br /> +Application admins need to configure partition weight and Helix should assignment them accordingly.</p> +<p>Note this feature only makes sense when it is applied to a global assignment algorithm since each partition in the same resource are weighted the same.</p></section> + </main> + </div> + </div> + <hr/> + <footer> + <div class="container-fluid"> + <div class="row-fluid"> +<div class="row span16"><div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation. + All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div> + <a href="http://helix.apache.org/privacy-policy.html">Privacy Policy</a> + </div> + </div> + </div> + </footer> +<script> + if(anchors) { + anchors.add(); + } +</script> + </body> </html> \ No newline at end of file
