BIGTOP-2777: make hbase charm more robust Closes #216
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/5f0c2205 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/5f0c2205 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/5f0c2205 Branch: refs/heads/branch-1.2 Commit: 5f0c220580c7662f078341f1c0d6dd5627d5c8f2 Parents: 2022c14 Author: Kevin W Monroe <[email protected]> Authored: Mon May 22 17:00:18 2017 +0000 Committer: Evans Ye <[email protected]> Committed: Sat Jul 1 12:52:56 2017 +0000 ---------------------------------------------------------------------- .../src/charm/hbase/layer-hbase/README.md | 96 +++++++++-- .../src/charm/hbase/layer-hbase/config.yaml | 6 + .../src/charm/hbase/layer-hbase/layer.yaml | 17 +- .../lib/charms/layer/bigtop_hbase.py | 75 ++++++-- .../charm/hbase/layer-hbase/reactive/hbase.py | 172 +++++++++++++++++-- 5 files changed, 303 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/5f0c2205/bigtop-packages/src/charm/hbase/layer-hbase/README.md ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/charm/hbase/layer-hbase/README.md b/bigtop-packages/src/charm/hbase/layer-hbase/README.md index a9be1ac..fab19b3 100644 --- a/bigtop-packages/src/charm/hbase/layer-hbase/README.md +++ b/bigtop-packages/src/charm/hbase/layer-hbase/README.md @@ -24,7 +24,7 @@ This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware. Learn more at [hbase.apache.org][]. -This charm deploys version 1.1.9 of the HBase master and regionserver +This charm deploys version 1.1.9 of the HBase master, regionserver, and thrift components from [Apache Bigtop][]. [hbase.apache.org]: http://hbase.apache.org/ @@ -36,7 +36,7 @@ components from [Apache Bigtop][]. This charm requires Juju 2.0 or greater. If Juju is not yet set up, please follow the [getting-started][] instructions prior to deploying this charm. -An HBase deployment consists of HBase masters and HBase RegionServers. +An HBase deployment consists of HBase Masters and HBase RegionServers. In a distributed HBase environment, one master and one regionserver are deployed on each unit. HBase ensures that only one master is active with the rest in standby mode in case the active master fails. @@ -46,9 +46,9 @@ of the `hadoop-hbase` bundle: juju deploy hadoop-hbase -This will deploy an Apache Bigtop Hadoop cluster with 3 HBase units. More -information about this deployment can be found in the -[bundle readme](https://jujucharms.com/hadoop-hbase/). +This will deploy an Apache Bigtop Hadoop cluster with 3 HBase units colocated +on 3 Hadoop DataNodes. More information about this deployment can be found in +the [bundle readme](https://jujucharms.com/hadoop-hbase/). This charm also supports the Thrift client API for HBase. Thrift is both cross-platform and more lightweight than REST for many operations. @@ -95,30 +95,36 @@ more information about a specific smoke test with: juju show-action-output <action-id> -## HBase web UI -HBase provides a web console that can be used to verify information about -the cluster. To access it, find the `PUBLIC-ADDRESS` of any hbase unit and -expose the application: +## HBase web interfaces +The HBase Master service provides a web console that can be used to verify +information about the cluster. To access it, find the `PUBLIC-ADDRESS` of any +hbase unit and expose the application: juju status hbase juju expose hbase -The web interface will be available at the following URL: +The HBase Master web interface will be available at the following URL: - http://HBASE_PUBLIC_IP:60010 + http://HBASE_PUBLIC_IP:16010 + +The HBase RegionServer and Thrift services also provide web interfaces that +are accessible at the following URLs: + + http://HBASE_PUBLIC_IP:16030 + http://HBASE_PUBLIC_IP:9095 # Using -Once the deployment has been verified, there are a number of actions available -in this charm. +## Actions +Once HBase is ready, there are a number of actions available in this charm. -Run a performance test: +Run a performance test (as described in the **Benchmarking** section): juju run-action hbase/0 perf-test juju show-action-output <id> # <-- id from above command -Run a smoke test (as described in the above **Verifying** section): +Run a smoke test (as described in the **Verifying** section): juju run-action hbase/0 smoke-test juju show-action-output <id> # <-- id from above command @@ -139,14 +145,66 @@ Start/Stop the HBase RegionServer and Thrift services on a unit: juju run-action hbase/0 [start|stop]-hbase-regionserver juju show-action-output <id> # <-- id from above command +## HBase Shell + +HBase includes a shell that can be used to interact with the cluster. Access it +from any hbase unit (e.g. `hbase/0`) as follows: + + $ juju ssh hbase/0 + $ hbase shell + HBase Shell; enter 'help<RETURN>' for list of supported commands. + Type "exit<RETURN>" to leave the HBase Shell + Version 1.1.3, rUnknown, Wed Mar 29 07:39:44 UTC 2017 + + hbase(main):002:0> exit + +# Configuring + +Charm configuration can be changed at runtime with `juju config`. This charm +supports the following config parameters. + +## Heap +The default heap size for the the HBase master JVM is 1024MB. Set a different +value (in MB) with the following: + + juju config hbase heap=4096 + + +# Benchmarking + +This charm provides a `perf-test` action to gauge the performance of the HBase +cluster: + + $ juju run-action hbase/0 perf-test + Action queued with id: 339cec1f-e903-4ee7-85ca-876fb0c3d28e + + $ juju show-action-output 339cec1f-e903-4ee7-85ca-876fb0c3d28e + results: + meta: + composite: + direction: asc + units: secs + value: "90" + raw: /opt/hbase-perf-results/1495562300.log + start: 2017-05-23T17:58:20Z + stop: 2017-05-23T17:59:50Z + outcome: success + status: completed + timing: + completed: 2017-05-23 17:59:51 +0000 UTC + enqueued: 2017-05-23 17:58:16 +0000 UTC + started: 2017-05-23 17:58:20 +0000 UTC + # Limitations -Restarting an HBase deployment is potentially disruptive. Be aware that the -following events will cause a restart: +Restarting an HBase cluster is potentially disruptive. Be aware that the +following events will cause a restart of all HBase services: -- Zookeeper units joining or departing the quorum. -- Upgrading the hbase charm. +- Adding or removing HBase units +- Adding or removing Zookeeper units +- Changing charm configuration with `juju config` +- Upgrading this charm # Issues http://git-wip-us.apache.org/repos/asf/bigtop/blob/5f0c2205/bigtop-packages/src/charm/hbase/layer-hbase/config.yaml ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/charm/hbase/layer-hbase/config.yaml b/bigtop-packages/src/charm/hbase/layer-hbase/config.yaml new file mode 100644 index 0000000..f04d9fd --- /dev/null +++ b/bigtop-packages/src/charm/hbase/layer-hbase/config.yaml @@ -0,0 +1,6 @@ +options: + heap: + type: int + default: 1024 + description: | + The maximum heap size (in MB) used by the HBase master JVM. http://git-wip-us.apache.org/repos/asf/bigtop/blob/5f0c2205/bigtop-packages/src/charm/hbase/layer-hbase/layer.yaml ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/charm/hbase/layer-hbase/layer.yaml b/bigtop-packages/src/charm/hbase/layer-hbase/layer.yaml index a35e252..26c63e1 100644 --- a/bigtop-packages/src/charm/hbase/layer-hbase/layer.yaml +++ b/bigtop-packages/src/charm/hbase/layer-hbase/layer.yaml @@ -2,10 +2,11 @@ repo: https://github.com/apache/bigtop/tree/master/bigtop-packages/src/charm/hba includes: - 'layer:apache-bigtop-base' - 'layer:hadoop-client' - - 'interface:zookeeper' + - 'layer:leadership' - 'interface:benchmark' - 'interface:hbase' - 'interface:hbase-quorum' + - 'interface:zookeeper' options: apache-bigtop-base: ports: @@ -14,21 +15,21 @@ options: # resourcemanager). Communication among units within the cluster does # not need ports to be explicitly opened. hbase-master: - port: 60000 - exposed_on: 'hbase' + port: 16000 hbase-master-web: - port: 60010 + port: 16010 exposed_on: 'hbase' hbase-region: - port: 60020 - exposed_on: 'hbase' + port: 16020 hbase-region-web: - port: 60030 + port: 16030 exposed_on: 'hbase' hbase-thrift: port: 9090 + hbase-thrift-web: + port: 9095 exposed_on: 'hbase' # Not yet on bigtop #hbase-rest: - # port: 8080 + # port: 8085 # exposed_on: 'hbase' http://git-wip-us.apache.org/repos/asf/bigtop/blob/5f0c2205/bigtop-packages/src/charm/hbase/layer-hbase/lib/charms/layer/bigtop_hbase.py ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/charm/hbase/layer-hbase/lib/charms/layer/bigtop_hbase.py b/bigtop-packages/src/charm/hbase/layer-hbase/lib/charms/layer/bigtop_hbase.py index cd7c14c..40fe78a 100755 --- a/bigtop-packages/src/charm/hbase/layer-hbase/lib/charms/layer/bigtop_hbase.py +++ b/bigtop-packages/src/charm/hbase/layer-hbase/lib/charms/layer/bigtop_hbase.py @@ -13,52 +13,93 @@ # See the License for the specific language governing permissions and # limitations under the License. -from charmhelpers.core import hookenv, host -from jujubigdata import utils -from charms.layer.apache_bigtop_base import Bigtop +from charmhelpers.core import hookenv, host, unitdata from charms import layer +from charms.layer.apache_bigtop_base import Bigtop +from jujubigdata import utils +from path import Path class HBase(object): - """ - This class manages HBase. - """ + '''This class manages HBase.''' def __init__(self): self.dist_config = utils.DistConfig( data=layer.options('apache-bigtop-base')) def configure(self, hosts, zk_units): - zks = [] - for unit in zk_units: - ip = utils.resolve_private_address(unit['host']) - zks.append(ip) - zks.sort() - zk_connect = ",".join(zks) - + zk_connect = self.get_zk_connect(zk_units) roles = ['hbase-server', 'hbase-master', 'hbase-client'] - override = { + 'bigtop::hbase_thrift_port': self.dist_config.port('hbase-thrift'), + 'hadoop_hbase::client::thrift': True, + 'hadoop_hbase::common_config::heap_size': hookenv.config()['heap'], 'hadoop_hbase::common_config::zookeeper_quorum': zk_connect, - 'hadoop_hbase::deploy::auxiliary': False + 'hadoop_hbase::deploy::auxiliary': False, } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() + def get_zk_connect(self, zk_units): + zks = [] + for unit in zk_units: + ip = utils.resolve_private_address(unit['host']) + zks.append(ip) + zks.sort() + return ",".join(zks) + + def update_regionservers(self, addrs, remove=False): + ''' + Each HBase unit in the cluster runs a RegionServer process. Ensure + all unit IP addresses are listed in the regionservers file. + + @param: addrs List of IP addresses + @param: remove Bool to add (False) or remove (True) unit IPs + ''' + unit_kv = unitdata.kv() + kv_ips = unit_kv.get('regionservers', default=[]) + + # add/remove IPs from our list + if remove: + kv_ips = [ip for ip in kv_ips if ip not in addrs] + else: + kv_ips.extend(addrs) + + # write regionservers file using a sorted, unique set of addrs + new_kv = sorted(set(kv_ips)) + rs_file = Path('/etc/hbase/conf/regionservers') + rs_file.write_lines( + [ + '# DO NOT EDIT', + '# This file is automatically managed by Juju', + ] + [ip for ip in new_kv], + append=False + ) + + # save the new kv IPs + unit_kv.set('regionservers', new_kv) + unit_kv.flush(True) + def restart(self): self.stop() self.start() def start(self): + # order is important; master must start first. + hookenv.log('Starting HBase services') host.service_start('hbase-master') host.service_start('hbase-regionserver') host.service_start('hbase-thrift') + hookenv.log('HBase services have been started') def stop(self): - host.service_stop('hbase-master') - host.service_stop('hbase-regionserver') + # order is important; master must stop last. + hookenv.log('Stopping HBase services') host.service_stop('hbase-thrift') + host.service_stop('hbase-regionserver') + host.service_stop('hbase-master') + hookenv.log('HBase services have been stopped') def open_ports(self): for port in self.dist_config.exposed_ports('hbase'): http://git-wip-us.apache.org/repos/asf/bigtop/blob/5f0c2205/bigtop-packages/src/charm/hbase/layer-hbase/reactive/hbase.py ---------------------------------------------------------------------- diff --git a/bigtop-packages/src/charm/hbase/layer-hbase/reactive/hbase.py b/bigtop-packages/src/charm/hbase/layer-hbase/reactive/hbase.py index 26751b5..212a16b 100644 --- a/bigtop-packages/src/charm/hbase/layer-hbase/reactive/hbase.py +++ b/bigtop-packages/src/charm/hbase/layer-hbase/reactive/hbase.py @@ -13,11 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from charms.reactive import when, when_not_all, is_state, set_state, remove_state -from charms.layer.bigtop_hbase import HBase from charmhelpers.core import hookenv -from charms.reactive.helpers import data_changed from charms.layer.apache_bigtop_base import get_layer_opts, get_package_version +from charms.layer.bigtop_hbase import HBase +from charms.reactive import ( + RelationBase, + is_state, + remove_state, + set_state, + when, + when_any, + when_not, + when_not_all +) +from charms.reactive.helpers import any_file_changed, data_changed @when('bigtop.available') @@ -39,37 +48,78 @@ def report_status(): elif not zk_ready: hookenv.status_set('waiting', 'waiting for zookeeper to become ready') - elif hbase_installed: + elif not hbase_installed: + hookenv.status_set('waiting', + 'waiting to install hbase') + else: hookenv.status_set('active', 'ready') -@when('bigtop.available', 'zookeeper.ready', 'hadoop.hdfs.ready') -def install_hbase(zk, hdfs): +@when('bigtop.available', 'hadoop.hdfs.ready', 'zookeeper.ready') +def install_hbase(hdfs, zk): + ''' + Anytime our dependencies are available, check to see if we have a valid + reason to (re)install. These include: + - initial install + - config change + - Zookeeper unit has joined/departed + ''' zks = zk.zookeepers() - if (is_state('hbase.installed') and - (not data_changed('zks', zks))): - return + deployment_matrix = { + 'zookeepers': zks, + } + + # Handle nuances when installing versus re-installing + if not is_state('hbase.installed'): + prefix = "installing" - msg = "configuring hbase" if is_state('hbase.installed') else "installing hbase" - hookenv.status_set('maintenance', msg) + # On initial install, prime our kv with the current deployment matrix. + # Subsequent calls will use this to determine if a reinstall is needed. + data_changed('deployment_matrix', deployment_matrix) + else: + prefix = "configuring" + # We do not need to reinstall when peers come and go; that is covered + # by other handlers below. + if is_state('hbpeer.departed') or is_state('hbpeer.joined'): + return + + # Return if neither config nor our matrix has changed + if not (is_state('config.changed') or + data_changed('deployment_matrix', deployment_matrix)): + return + + hookenv.status_set('maintenance', '{} hbase'.format(prefix)) + hookenv.log("{} hbase with: {}".format(prefix, deployment_matrix)) hbase = HBase() hosts = {} - nns = hdfs.namenodes() - hosts['namenode'] = nns[0] + hosts['namenode'] = hdfs.namenodes()[0] hbase.configure(hosts, zks) - hbase.open_ports() - set_state('hbase.installed') - report_status() + + # Ensure our IP is in the regionservers list; restart if the rs conf + # file has changed. + hbase.update_regionservers([hookenv.unit_private_ip()]) + if any_file_changed(['/etc/hbase/conf/regionservers']): + hbase.restart() + # set app version string for juju status output hbase_version = get_package_version('hbase-master') or 'unknown' hookenv.application_version_set(hbase_version) + hbase.open_ports() + report_status() + set_state('hbase.installed') + @when('hbase.installed') @when_not_all('hadoop.hdfs.ready', 'zookeeper.ready') def stop_hbase(): + ''' + HBase depends on HDFS and Zookeeper. If we are installed and either of + these dependencies go away, shut down HBase services and remove our + installed state. + ''' hbase = HBase() hbase.close_ports() hbase.stop() @@ -77,10 +127,94 @@ def stop_hbase(): report_status() -@when('hbase.installed', 'hbclient.joined') -def serve_client(client): +@when('hbase.installed') +@when_any('hbpeer.departed', 'hbpeer.joined') +def handle_peers(): + ''' + We use HBase peers to keep track of the RegionServer IP addresses in a + cluster. Use get_nodes() from the appropriate peer relation to retrieve + a list of peer tuples, e.g.: + [('hbase/0', '172.31.5.161'), ('hbase/2', '172.31.5.11')] + + Depending on the state, this handler will add or remove peer IP addresses + from the regionservers config file. + ''' + if is_state('hbpeer.departed'): + hbpeer = RelationBase.from_state('hbpeer.departed') + is_departing = True + message = 'removing hbase peer(s)' + else: + hbpeer = RelationBase.from_state('hbpeer.joined') + is_departing = False + message = 'adding hbase peer(s)' + + # Make sure we have a valid relation object + if hbpeer: + nodes = hbpeer.get_nodes() + else: + hookenv.log('Ignoring unknown HBase peer state') + return + + hookenv.status_set('maintenance', message) + hbase = HBase() + ip_addrs = [node[1] for node in nodes] + hookenv.log('{}: {}'.format(message, ip_addrs)) + hbase.update_regionservers(ip_addrs, remove=is_departing) + + # NB: the rs conf file will always change when handling peer updates, but + # we still include this condition to keep the files_changed kv current. + if any_file_changed(['/etc/hbase/conf/regionservers']): + hbase.restart() + + # Dismiss appropriate state now that we've handled the peer + if is_departing: + hbpeer.dismiss_departed() + else: + hbpeer.dismiss_joined() + report_status() + + +@when('hbase.installed', 'leadership.is_leader') +@when('zookeeper.ready', 'hbclient.joined') +def serve_client(zk, client): + ''' + We may have multiple HBase peers, but we only need to send 1 set of + connection data. Leverage Juju leadership to only send the leader + info (even if it's not the actual HBase master). + + Zookeeper will ensure that any HBase peer routes requests to the + appropriate master. + ''' + hbase = HBase() + + # Get hbase config and zk info config = get_layer_opts() + host = hookenv.unit_private_ip() master_port = config.port('hbase-master') regionserver_port = config.port('hbase-region') thrift_port = config.port('hbase-thrift') - client.send_port(master_port, regionserver_port, thrift_port) + zk_connect = hbase.get_zk_connect(zk.zookeepers()) + + # Send data to our connected client + client.send_connection(master_port=master_port, + regionserver_port=regionserver_port, + thrift_port=thrift_port, + host=host, + zk_connect=zk_connect) + + hookenv.log('Serving HBase client with master {}:{}, regionserver ' + 'port {}, thrift port {}, and zk connect {}'.format( + host, master_port, + regionserver_port, + thrift_port, + zk_connect)) + + +@when('leadership.is_leader', 'hbclient.joined') +@when_not('hbase.installed') +def stop_serving_client(client): + ''' + If HDFS or ZK goes away, the 'installed' state will be removed. If we have + connected clients, inform them that hbase is no longer ready. + ''' + client.clear_hbase_started()
