http://git-wip-us.apache.org/repos/asf/zookeeper/blob/cf24deb2/docs/zookeeperOver.html ---------------------------------------------------------------------- diff --git a/docs/zookeeperOver.html b/docs/zookeeperOver.html deleted file mode 100644 index 0968d3a..0000000 --- a/docs/zookeeperOver.html +++ /dev/null @@ -1,704 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> -<html> -<head> -<META http-equiv="Content-Type" content="text/html; charset=UTF-8"> -<meta content="Apache Forrest" name="Generator"> -<meta name="Forrest-version" content="0.9"> -<meta name="Forrest-skin-name" content="pelt"> -<title>ZooKeeper</title> -<link type="text/css" href="skin/basic.css" rel="stylesheet"> -<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> -<link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> -<link type="text/css" href="skin/profile.css" rel="stylesheet"> -<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> -<link rel="shortcut icon" href="images/favicon.ico"> -</head> -<body onload="init()"> -<script type="text/javascript">ndeSetTextSize();</script> -<div id="top"> -<!--+ - |breadtrail - +--> -<div class="breadtrail"> -<a href="http://www.apache.org/">Apache</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> -</div> -<!--+ - |header - +--> -<div class="header"> -<!--+ - |start group logo - +--> -<div class="grouplogo"> -<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> -</div> -<!--+ - |end group logo - +--> -<!--+ - |start Project Logo - +--> -<div class="projectlogo"> -<a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a> -</div> -<!--+ - |end Project Logo - +--> -<!--+ - |start Search - +--> -<div class="searchbox"> -<form action="http://www.google.com/search" method="get" class="roundtopsmall"> -<input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> - <input name="Search" value="Search" type="submit"> -</form> -</div> -<!--+ - |end search - +--> -<!--+ - |start Tabs - +--> -<ul id="tabs"> -<li> -<a class="unselected" href="http://zookeeper.apache.org/">Project</a> -</li> -<li> -<a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a> -</li> -<li class="current"> -<a class="selected" href="index.html">ZooKeeper 3.4 Documentation</a> -</li> -</ul> -<!--+ - |end Tabs - +--> -</div> -</div> -<div id="main"> -<div id="publishedStrip"> -<!--+ - |start Subtabs - +--> -<div id="level2tabs"></div> -<!--+ - |end Endtabs - +--> -<script type="text/javascript"><!-- -document.write("Last Published: " + document.lastModified); -// --></script> -</div> -<!--+ - |breadtrail - +--> -<div class="breadtrail"> - - - </div> -<!--+ - |start Menu, mainarea - +--> -<!--+ - |start Menu - +--> -<div id="menu"> -<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Overview</div> -<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;"> -<div class="menuitem"> -<a href="index.html">Welcome</a> -</div> -<div class="menupage"> -<div class="menupagetitle">Overview</div> -</div> -<div class="menuitem"> -<a href="zookeeperStarted.html">Getting Started</a> -</div> -<div class="menuitem"> -<a href="releasenotes.html">Release Notes</a> -</div> -</div> -<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Developer</div> -<div id="menu_1.2" class="menuitemgroup"> -<div class="menuitem"> -<a href="api/index.html">API Docs</a> -</div> -<div class="menuitem"> -<a href="zookeeperProgrammers.html">Programmer's Guide</a> -</div> -<div class="menuitem"> -<a href="javaExample.html">Java Example</a> -</div> -<div class="menuitem"> -<a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a> -</div> -<div class="menuitem"> -<a href="recipes.html">Recipes</a> -</div> -</div> -<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">BookKeeper</div> -<div id="menu_1.3" class="menuitemgroup"> -<div class="menuitem"> -<a href="bookkeeperStarted.html">Getting started</a> -</div> -<div class="menuitem"> -<a href="bookkeeperOverview.html">Overview</a> -</div> -<div class="menuitem"> -<a href="bookkeeperConfig.html">Setup guide</a> -</div> -<div class="menuitem"> -<a href="bookkeeperProgrammer.html">Programmer's guide</a> -</div> -</div> -<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Admin & Ops</div> -<div id="menu_1.4" class="menuitemgroup"> -<div class="menuitem"> -<a href="zookeeperAdmin.html">Administrator's Guide</a> -</div> -<div class="menuitem"> -<a href="zookeeperQuotas.html">Quota Guide</a> -</div> -<div class="menuitem"> -<a href="zookeeperJMX.html">JMX</a> -</div> -<div class="menuitem"> -<a href="zookeeperObservers.html">Observers Guide</a> -</div> -</div> -<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Contributor</div> -<div id="menu_1.5" class="menuitemgroup"> -<div class="menuitem"> -<a href="zookeeperInternals.html">ZooKeeper Internals</a> -</div> -</div> -<div onclick="SwitchMenu('menu_1.6', 'skin/')" id="menu_1.6Title" class="menutitle">Miscellaneous</div> -<div id="menu_1.6" class="menuitemgroup"> -<div class="menuitem"> -<a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a> -</div> -<div class="menuitem"> -<a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a> -</div> -<div class="menuitem"> -<a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a> -</div> -</div> -<div id="credit"></div> -<div id="roundbottom"> -<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> -<!--+ - |alternative credits - +--> -<div id="credit2"></div> -</div> -<!--+ - |end Menu - +--> -<!--+ - |start content - +--> -<div id="content"> -<div title="Portable Document Format" class="pdflink"> -<a class="dida" href="zookeeperOver.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> - PDF</a> -</div> -<h1>ZooKeeper</h1> -<div id="front-matter"> -<div id="minitoc-area"> -<ul class="minitoc"> -<li> -<a href="#ch_DesignOverview">ZooKeeper: A Distributed Coordination Service for Distributed - Applications</a> -<ul class="minitoc"> -<li> -<a href="#sc_designGoals">Design Goals</a> -</li> -<li> -<a href="#sc_dataModelNameSpace">Data model and the hierarchical namespace</a> -</li> -<li> -<a href="#Nodes+and+ephemeral+nodes">Nodes and ephemeral nodes</a> -</li> -<li> -<a href="#Conditional+updates+and+watches">Conditional updates and watches</a> -</li> -<li> -<a href="#Guarantees">Guarantees</a> -</li> -<li> -<a href="#Simple+API">Simple API</a> -</li> -<li> -<a href="#Implementation">Implementation</a> -</li> -<li> -<a href="#Uses">Uses</a> -</li> -<li> -<a href="#Performance">Performance</a> -</li> -<li> -<a href="#Reliability">Reliability</a> -</li> -<li> -<a href="#The+ZooKeeper+Project">The ZooKeeper Project</a> -</li> -</ul> -</li> -</ul> -</div> -</div> - - - - - -<a name="ch_DesignOverview"></a> -<h2 class="h3">ZooKeeper: A Distributed Coordination Service for Distributed - Applications</h2> -<div class="section"> -<p>ZooKeeper is a distributed, open-source coordination service for - distributed applications. It exposes a simple set of primitives that - distributed applications can build upon to implement higher level services - for synchronization, configuration maintenance, and groups and naming. It - is designed to be easy to program to, and uses a data model styled after - the familiar directory tree structure of file systems. It runs in Java and - has bindings for both Java and C.</p> -<p>Coordination services are notoriously hard to get right. They are - especially prone to errors such as race conditions and deadlock. The - motivation behind ZooKeeper is to relieve distributed applications the - responsibility of implementing coordination services from scratch.</p> -<a name="sc_designGoals"></a> -<h3 class="h4">Design Goals</h3> -<p> -<strong>ZooKeeper is simple.</strong> ZooKeeper - allows distributed processes to coordinate with each other through a - shared hierarchal namespace which is organized similarly to a standard - file system. The name space consists of data registers - called znodes, - in ZooKeeper parlance - and these are similar to files and directories. - Unlike a typical file system, which is designed for storage, ZooKeeper - data is kept in-memory, which means ZooKeeper can acheive high - throughput and low latency numbers.</p> -<p>The ZooKeeper implementation puts a premium on high performance, - highly available, strictly ordered access. The performance aspects of - ZooKeeper means it can be used in large, distributed systems. The - reliability aspects keep it from being a single point of failure. The - strict ordering means that sophisticated synchronization primitives can - be implemented at the client.</p> -<p> -<strong>ZooKeeper is replicated.</strong> Like the - distributed processes it coordinates, ZooKeeper itself is intended to be - replicated over a sets of hosts called an ensemble.</p> -<table class="ForrestTable" cellspacing="1" cellpadding="4"> -<tr> -<td>ZooKeeper Service</td> -</tr> -<tr> -<td> - - <img alt="" src="images/zkservice.jpg"> - - </td> -</tr> -</table> -<p>The servers that make up the ZooKeeper service must all know about - each other. They maintain an in-memory image of state, along with a - transaction logs and snapshots in a persistent store. As long as a - majority of the servers are available, the ZooKeeper service will be - available.</p> -<p>Clients connect to a single ZooKeeper server. The client maintains - a TCP connection through which it sends requests, gets responses, gets - watch events, and sends heart beats. If the TCP connection to the server - breaks, the client will connect to a different server.</p> -<p> -<strong>ZooKeeper is ordered.</strong> ZooKeeper - stamps each update with a number that reflects the order of all - ZooKeeper transactions. Subsequent operations can use the order to - implement higher-level abstractions, such as synchronization - primitives.</p> -<p> -<strong>ZooKeeper is fast.</strong> It is - especially fast in "read-dominant" workloads. ZooKeeper applications run - on thousands of machines, and it performs best where reads are more - common than writes, at ratios of around 10:1.</p> -<a name="sc_dataModelNameSpace"></a> -<h3 class="h4">Data model and the hierarchical namespace</h3> -<p>The name space provided by ZooKeeper is much like that of a - standard file system. A name is a sequence of path elements separated by - a slash (/). Every node in ZooKeeper's name space is identified by a - path.</p> -<table class="ForrestTable" cellspacing="1" cellpadding="4"> -<tr> -<td>ZooKeeper's Hierarchical Namespace</td> -</tr> -<tr> -<td> - - <img alt="" src="images/zknamespace.jpg"> - - </td> -</tr> -</table> -<a name="Nodes+and+ephemeral+nodes"></a> -<h3 class="h4">Nodes and ephemeral nodes</h3> -<p>Unlike is standard file systems, each node in a ZooKeeper - namespace can have data associated with it as well as children. It is - like having a file-system that allows a file to also be a directory. - (ZooKeeper was designed to store coordination data: status information, - configuration, location information, etc., so the data stored at each - node is usually small, in the byte to kilobyte range.) We use the term - <em>znode</em> to make it clear that we are talking about - ZooKeeper data nodes.</p> -<p>Znodes maintain a stat structure that includes version numbers for - data changes, ACL changes, and timestamps, to allow cache validations - and coordinated updates. Each time a znode's data changes, the version - number increases. For instance, whenever a client retrieves data it also - receives the version of the data.</p> -<p>The data stored at each znode in a namespace is read and written - atomically. Reads get all the data bytes associated with a znode and a - write replaces all the data. Each node has an Access Control List (ACL) - that restricts who can do what.</p> -<p>ZooKeeper also has the notion of ephemeral nodes. These znodes - exists as long as the session that created the znode is active. When the - session ends the znode is deleted. Ephemeral nodes are useful when you - want to implement <em>[tbd]</em>.</p> -<a name="Conditional+updates+and+watches"></a> -<h3 class="h4">Conditional updates and watches</h3> -<p>ZooKeeper supports the concept of <em>watches</em>. - Clients can set a watch on a znodes. A watch will be triggered and - removed when the znode changes. When a watch is triggered the client - receives a packet saying that the znode has changed. And if the - connection between the client and one of the Zoo Keeper servers is - broken, the client will receive a local notification. These can be used - to <em>[tbd]</em>.</p> -<a name="Guarantees"></a> -<h3 class="h4">Guarantees</h3> -<p>ZooKeeper is very fast and very simple. Since its goal, though, is - to be a basis for the construction of more complicated services, such as - synchronization, it provides a set of guarantees. These are:</p> -<ul> - -<li> - -<p>Sequential Consistency - Updates from a client will be applied - in the order that they were sent.</p> - -</li> - - -<li> - -<p>Atomicity - Updates either succeed or fail. No partial - results.</p> - -</li> - - -<li> - -<p>Single System Image - A client will see the same view of the - service regardless of the server that it connects to.</p> - -</li> - -</ul> -<ul> - -<li> - -<p>Reliability - Once an update has been applied, it will persist - from that time forward until a client overwrites the update.</p> - -</li> - -</ul> -<ul> - -<li> - -<p>Timeliness - The clients view of the system is guaranteed to - be up-to-date within a certain time bound.</p> - -</li> - -</ul> -<p>For more information on these, and how they can be used, see - <em>[tbd]</em> -</p> -<a name="Simple+API"></a> -<h3 class="h4">Simple API</h3> -<p>One of the design goals of ZooKeeper is provide a very simple - programming interface. As a result, it supports only these - operations:</p> -<dl> - -<dt> -<term>create</term> -</dt> -<dd> -<p>creates a node at a location in the tree</p> -</dd> - - -<dt> -<term>delete</term> -</dt> -<dd> -<p>deletes a node</p> -</dd> - - -<dt> -<term>exists</term> -</dt> -<dd> -<p>tests if a node exists at a location</p> -</dd> - - -<dt> -<term>get data</term> -</dt> -<dd> -<p>reads the data from a node</p> -</dd> - - -<dt> -<term>set data</term> -</dt> -<dd> -<p>writes data to a node</p> -</dd> - - -<dt> -<term>get children</term> -</dt> -<dd> -<p>retrieves a list of children of a node</p> -</dd> - - -<dt> -<term>sync</term> -</dt> -<dd> -<p>waits for data to be propagated</p> -</dd> - -</dl> -<p>For a more in-depth discussion on these, and how they can be used - to implement higher level operations, please refer to - <em>[tbd]</em> -</p> -<a name="Implementation"></a> -<h3 class="h4">Implementation</h3> -<p> -<a href="#fg_zkComponents">ZooKeeper Components</a> shows the high-level components - of the ZooKeeper service. With the exception of the request processor, - each of - the servers that make up the ZooKeeper service replicates its own copy - of each of components.</p> -<table class="ForrestTable" cellspacing="1" cellpadding="4"> -<tr> -<td>ZooKeeper Components</td> -</tr> -<tr> -<td> - - <img alt="" src="images/zkcomponents.jpg"> - - </td> -</tr> -</table> -<p>The replicated database is an in-memory database containing the - entire data tree. Updates are logged to disk for recoverability, and - writes are serialized to disk before they are applied to the in-memory - database.</p> -<p>Every ZooKeeper server services clients. Clients connect to - exactly one server to submit irequests. Read requests are serviced from - the local replica of each server database. Requests that change the - state of the service, write requests, are processed by an agreement - protocol.</p> -<p>As part of the agreement protocol all write requests from clients - are forwarded to a single server, called the - <em>leader</em>. The rest of the ZooKeeper servers, called - <em>followers</em>, receive message proposals from the - leader and agree upon message delivery. The messaging layer takes care - of replacing leaders on failures and syncing followers with - leaders.</p> -<p>ZooKeeper uses a custom atomic messaging protocol. Since the - messaging layer is atomic, ZooKeeper can guarantee that the local - replicas never diverge. When the leader receives a write request, it - calculates what the state of the system is when the write is to be - applied and transforms this into a transaction that captures this new - state.</p> -<a name="Uses"></a> -<h3 class="h4">Uses</h3> -<p>The programming interface to ZooKeeper is deliberately simple. - With it, however, you can implement higher order operations, such as - synchronizations primitives, group membership, ownership, etc. Some - distributed applications have used it to: <em>[tbd: add uses from - white paper and video presentation.]</em> For more information, see - <em>[tbd]</em> -</p> -<a name="Performance"></a> -<h3 class="h4">Performance</h3> -<p>ZooKeeper is designed to be highly performant. But is it? The - results of the ZooKeeper's development team at Yahoo! Research indicate - that it is. (See <a href="#fg_zkPerfRW">ZooKeeper Throughput as the Read-Write Ratio Varies</a>.) It is especially high - performance in applications where reads outnumber writes, since writes - involve synchronizing the state of all servers. (Reads outnumbering - writes is typically the case for a coordination service.)</p> -<table class="ForrestTable" cellspacing="1" cellpadding="4"> -<tr> -<td>ZooKeeper Throughput as the Read-Write Ratio Varies</td> -</tr> -<tr> -<td> - - <img alt="" src="images/zkperfRW-3.2.jpg"> - - </td> -</tr> -</table> -<p>The figure <a href="#fg_zkPerfRW">ZooKeeper Throughput as the Read-Write Ratio Varies</a> is a throughput - graph of ZooKeeper release 3.2 running on servers with dual 2Ghz - Xeon and two SATA 15K RPM drives. One drive was used as a - dedicated ZooKeeper log device. The snapshots were written to - the OS drive. Write requests were 1K writes and the reads were - 1K reads. "Servers" indicate the size of the ZooKeeper - ensemble, the number of servers that make up the - service. Approximately 30 other servers were used to simulate - the clients. The ZooKeeper ensemble was configured such that - leaders do not allow connections from clients.</p> -<div class="note"> -<div class="label">Note</div> -<div class="content"> -<p>In version 3.2 r/w performance improved by ~2x - compared to the <a href="http://zookeeper.apache.org/docs/r3.1.1/zookeeperOver.html#Performance">previous - 3.1 release</a>.</p> -</div> -</div> -<p>Benchmarks also indicate that it is reliable, too. <a href="#fg_zkPerfReliability">Reliability in the Presence of Errors</a> shows how a deployment responds to - various failures. The events marked in the figure are the - following:</p> -<ol> - -<li> - -<p>Failure and recovery of a follower</p> - -</li> - - -<li> - -<p>Failure and recovery of a different follower</p> - -</li> - - -<li> - -<p>Failure of the leader</p> - -</li> - - -<li> - -<p>Failure and recovery of two followers</p> - -</li> - - -<li> - -<p>Failure of another leader</p> - -</li> - -</ol> -<a name="Reliability"></a> -<h3 class="h4">Reliability</h3> -<p>To show the behavior of the system over time as - failures are injected we ran a ZooKeeper service made up of - 7 machines. We ran the same saturation benchmark as before, - but this time we kept the write percentage at a constant - 30%, which is a conservative ratio of our expected - workloads. - </p> -<table class="ForrestTable" cellspacing="1" cellpadding="4"> -<tr> -<td>Reliability in the Presence of Errors</td> -</tr> -<tr> -<td> - - <img alt="" src="images/zkperfreliability.jpg"> - - </td> -</tr> -</table> -<p>The are a few important observations from this graph. First, if - followers fail and recover quickly, then ZooKeeper is able to sustain a - high throughput despite the failure. But maybe more importantly, the - leader election algorithm allows for the system to recover fast enough - to prevent throughput from dropping substantially. In our observations, - ZooKeeper takes less than 200ms to elect a new leader. Third, as - followers recover, ZooKeeper is able to raise throughput again once they - start processing requests.</p> -<a name="The+ZooKeeper+Project"></a> -<h3 class="h4">The ZooKeeper Project</h3> -<p>ZooKeeper has been - <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/PoweredBy"> - successfully used - </a> - in many industrial applications. It is used at Yahoo! as the - coordination and failure recovery service for Yahoo! Message - Broker, which is a highly scalable publish-subscribe system - managing thousands of topics for replication and data - delivery. It is used by the Fetching Service for Yahoo! - crawler, where it also manages failure recovery. A number of - Yahoo! advertising systems also use ZooKeeper to implement - reliable services. - </p> -<p>All users and developers are encouraged to join the - community and contribute their expertise. See the - <a href="http://zookeeper.apache.org/"> - Zookeeper Project on Apache - </a> - for more information. - </p> -</div> - -<p align="right"> -<font size="-2"></font> -</p> -</div> -<!--+ - |end content - +--> -<div class="clearboth"> </div> -</div> -<div id="footer"> -<!--+ - |start bottomstrip - +--> -<div class="lastmodified"> -<script type="text/javascript"><!-- -document.write("Last Published: " + document.lastModified); -// --></script> -</div> -<div class="copyright"> - Copyright © - <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> -</div> -<!--+ - |end bottomstrip - +--> -</div> -</body> -</html>
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/cf24deb2/docs/zookeeperOver.pdf ---------------------------------------------------------------------- diff --git a/docs/zookeeperOver.pdf b/docs/zookeeperOver.pdf deleted file mode 100644 index 7ce6705..0000000 Binary files a/docs/zookeeperOver.pdf and /dev/null differ
