Repository: ambari Updated Branches: refs/heads/trunk 00d2f1193 -> 4140cc78a
AMBARI-18590 - RegionServer Registration Checks Fail During Upgrade If rDNS is Not Enabled (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4140cc78 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4140cc78 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4140cc78 Branch: refs/heads/trunk Commit: 4140cc78a0153799938c2dbe3f80c11ab3be2e30 Parents: 00d2f11 Author: Jonathan Hurley <jhur...@hortonworks.com> Authored: Thu Oct 13 11:16:15 2016 -0400 Committer: Jonathan Hurley <jhur...@hortonworks.com> Committed: Thu Oct 13 15:44:58 2016 -0400 ---------------------------------------------------------------------- .../HBASE/0.96.0.2.0/package/scripts/upgrade.py | 48 ++++++++++++++++++-- 1 file changed, 43 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/4140cc78/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py index f1fa80c..703fe26 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py @@ -19,7 +19,10 @@ limitations under the License. """ import re +import socket + from resource_management.core import shell +from resource_management.core.exceptions import ComponentIsNotRunning from resource_management.core.exceptions import Fail from resource_management.core.logger import Logger from resource_management.libraries.functions import conf_select, stack_select @@ -44,7 +47,7 @@ def post_regionserver(env): check_cmd = "echo 'status \"simple\"' | {0} shell".format(params.hbase_cmd) exec_cmd = "{0} {1}".format(params.kinit_cmd, check_cmd) - call_and_match(exec_cmd, params.hbase_user, params.hostname + ":", re.IGNORECASE) + is_regionserver_registered(exec_cmd, params.hbase_user, params.hostname, re.IGNORECASE) def is_region_server_process_running(): @@ -55,14 +58,49 @@ def is_region_server_process_running(): except ComponentIsNotRunning: return False -@retry(times=30, sleep_time=30, err_class=Fail) # keep trying for 15 mins -def call_and_match(cmd, user, regex, regex_search_flags): +@retry(times=30, sleep_time=30, err_class=Fail) +def is_regionserver_registered(cmd, user, hostname, regex_search_flags): + """ + Queries HBase through the HBase shell to see which servers have successfully registered. This is + useful in cases, such as upgrades, where we must ensure that a RegionServer has not only started, + but also completed it's registration handshake before moving into upgrading the next RegionServer. + + The hbase shell is used along with the "show 'simple'" command in order to determine if the + specified host has registered. + :param cmd: + :param user: + :param hostname: + :param regex_search_flags: + :return: + """ if not is_region_server_process_running(): Logger.info("RegionServer process is not running") raise Fail("RegionServer process is not running") + # use hbase shell with "status 'simple'" command code, out = shell.call(cmd, user=user) - if not (out and re.search(regex, out, regex_search_flags)): - raise Fail("Could not verify RS available") + # if we don't have ouput, then we can't check + if not out: + raise Fail("Unable to retrieve status information from the HBase shell") + + # try matching the hostname with a colon (which indicates a bound port) + bound_hostname_to_match = hostname + ":" + match = re.search(bound_hostname_to_match, out, regex_search_flags) + + # if there's no match, try again with the IP address + if not match: + try: + ip_address = socket.gethostbyname(hostname) + bound_ip_address_to_match = ip_address + ":" + match = re.search(bound_ip_address_to_match, out, regex_search_flags) + except socket.error: + # this is merely a backup, so just log that it failed + Logger.warning("Unable to lookup the IP address of {0}, reverse DNS lookup may not be working.".format(hostname)) + pass + + # failed with both a hostname and an IP address, so raise the Fail and let the function auto retry + if not match: + raise Fail( + "The RegionServer named {0} has not yet registered with the HBase Master".format(hostname))