Repository: aurora Updated Branches: refs/heads/master 4745c8cc2 -> a9f4e26a2
Fix host maintenance commands to properly initialize the api client. Bugs closed: AURORA-1777 Reviewed at https://reviews.apache.org/r/52087/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/a9f4e26a Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/a9f4e26a Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/a9f4e26a Branch: refs/heads/master Commit: a9f4e26a24e63aeb6a215c0f5d5bb54b7c936abb Parents: 4745c8c Author: Joshua Cohen <[email protected]> Authored: Tue Sep 20 14:26:49 2016 -0500 Committer: Joshua Cohen <[email protected]> Committed: Tue Sep 20 14:26:49 2016 -0500 ---------------------------------------------------------------------- src/main/python/apache/aurora/admin/admin.py | 49 +++++++++----------- .../python/apache/aurora/admin/admin_util.py | 29 +++++++++++- .../apache/aurora/admin/host_maintenance.py | 10 ++-- .../python/apache/aurora/admin/maintenance.py | 37 ++++++++++----- .../python/apache/aurora/admin/test_admin.py | 2 + .../apache/aurora/admin/test_admin_sla.py | 2 + .../apache/aurora/admin/test_admin_util.py | 23 ++++++++- src/test/python/apache/aurora/admin/util.py | 1 + .../sh/org/apache/aurora/e2e/test_end_to_end.sh | 6 ++- 9 files changed, 114 insertions(+), 45 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/admin.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/admin/admin.py b/src/main/python/apache/aurora/admin/admin.py index 9fc89a2..070c348 100644 --- a/src/main/python/apache/aurora/admin/admin.py +++ b/src/main/python/apache/aurora/admin/admin.py @@ -22,10 +22,9 @@ from twitter.common import app, log from twitter.common.quantity import Data, Time from twitter.common.quantity.parse_simple import parse_data, parse_time -from apache.aurora.client.api import AuroraClientAPI +from apache.aurora.admin.admin_util import make_admin_client from apache.aurora.client.api.sla import JobUpTimeLimit from apache.aurora.client.base import ( - AURORA_ADMIN_USER_AGENT_NAME, GROUPING_OPTION, check_and_log_response, combine_messages, @@ -63,17 +62,12 @@ MIN_SLA_INSTANCE_COUNT = optparse.Option( ) -def make_admin_client(cluster): - if cluster not in CLUSTERS: - die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys()))) - +def make_admin_client_with_options(cluster): options = app.get_options() - verbose = getattr(options, 'verbosity', 'normal') == 'verbose' - return AuroraClientAPI( - CLUSTERS[cluster], - AURORA_ADMIN_USER_AGENT_NAME, - verbose=verbose, + return make_admin_client( + cluster=cluster, + verbose=getattr(options, 'verbosity', 'normal') == 'verbose', bypass_leader_redirect=options.bypass_leader_redirect) @@ -158,7 +152,7 @@ def query(args, options): if not (states <= ACTIVE_STATES) and not options.force: die('--force is required for expensive queries (states outside ACTIVE states') - api = make_admin_client(cluster) + api = make_admin_client_with_options(cluster) query_info = api.query(TaskQuery(role=role, jobName=job, instanceIds=instances, statuses=states)) if query_info.responseCode != ResponseCode.OK: @@ -198,7 +192,7 @@ def set_quota(cluster, role, cpu_str, ram, disk): except ValueError as e: die(str(e)) - resp = make_admin_client(cluster).set_quota(role, cpu, ram_mb, disk_mb) + resp = make_admin_client_with_options(cluster).set_quota(role, cpu, ram_mb, disk_mb) check_and_log_response(resp) @@ -213,7 +207,7 @@ def increase_quota(cluster, role, cpu_str, ram_str, disk_str): ram = parse_data(ram_str).as_(Data.MB) disk = parse_data(disk_str).as_(Data.MB) - client = make_admin_client(cluster) + client = make_admin_client_with_options(cluster) resp = client.get_quota(role) quota = resp.result.getQuotaResult.quota resource_details = ResourceManager.resource_details_from_quota(quota) @@ -245,7 +239,7 @@ def scheduler_backup_now(cluster): Immediately initiates a full storage backup. """ - check_and_log_response(make_admin_client(cluster).perform_backup()) + check_and_log_response(make_admin_client_with_options(cluster).perform_backup()) @app.command @@ -255,7 +249,7 @@ def scheduler_list_backups(cluster): Lists backups available for recovery. """ - resp = make_admin_client(cluster).list_backups() + resp = make_admin_client_with_options(cluster).list_backups() check_and_log_response(resp) backups = resp.result.listBackupsResult.backups print('%s available backups:' % len(backups)) @@ -270,7 +264,7 @@ def scheduler_stage_recovery(cluster, backup_id): Stages a backup for recovery. """ - check_and_log_response(make_admin_client(cluster).stage_recovery(backup_id)) + check_and_log_response(make_admin_client_with_options(cluster).stage_recovery(backup_id)) @app.command @@ -280,7 +274,7 @@ def scheduler_print_recovery_tasks(cluster): Prints all active tasks in a staged recovery. """ - resp = make_admin_client(cluster).query_recovery( + resp = make_admin_client_with_options(cluster).query_recovery( TaskQuery(statuses=ACTIVE_STATES)) check_and_log_response(resp) log.info('Role\tJob\tShard\tStatus\tTask ID') @@ -302,7 +296,8 @@ def scheduler_delete_recovery_tasks(cluster, task_ids): Deletes a comma-separated list of task IDs from a staged recovery. """ ids = set(task_ids.split(',')) - check_and_log_response(make_admin_client(cluster).delete_recovery_tasks(TaskQuery(taskIds=ids))) + check_and_log_response(make_admin_client_with_options(cluster).delete_recovery_tasks( + TaskQuery(taskIds=ids))) @app.command @@ -312,7 +307,7 @@ def scheduler_commit_recovery(cluster): Commits a staged recovery. """ - check_and_log_response(make_admin_client(cluster).commit_recovery()) + check_and_log_response(make_admin_client_with_options(cluster).commit_recovery()) @app.command @@ -322,7 +317,7 @@ def scheduler_unload_recovery(cluster): Unloads a staged recovery. """ - check_and_log_response(make_admin_client(cluster).unload_recovery()) + check_and_log_response(make_admin_client_with_options(cluster).unload_recovery()) @app.command @@ -332,7 +327,7 @@ def scheduler_snapshot(cluster): Request that the scheduler perform a storage snapshot and block until complete. """ - check_and_log_response(make_admin_client(cluster).snapshot()) + check_and_log_response(make_admin_client_with_options(cluster).snapshot()) @app.command @@ -352,7 +347,7 @@ def reconcile_tasks(cluster): scheduler configuration option. """ options = app.get_options() - client = make_admin_client(cluster) + client = make_admin_client_with_options(cluster) if options.type == 'implicit': resp = client.reconcile_implicit() elif options.type == 'explicit': @@ -445,7 +440,7 @@ def sla_list_safe_domain(cluster, percentage, duration): override_jobs = parse_jobs_file(options.override_filename) if options.override_filename else {} get_grouping_or_die(options.grouping) - vector = make_admin_client(cluster).sla_get_safe_domain_vector( + vector = make_admin_client_with_options(cluster).sla_get_safe_domain_vector( options.min_instance_count, include_hosts) groups = vector.get_safe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), @@ -502,7 +497,9 @@ def sla_probe_hosts(cluster, percentage, duration): hosts = parse_hostnames(options.filename, options.hosts) get_grouping_or_die(options.grouping) - vector = make_admin_client(cluster).sla_get_safe_domain_vector(options.min_instance_count, hosts) + vector = make_admin_client_with_options(cluster).sla_get_safe_domain_vector( + options.min_instance_count, + hosts) groups = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), options.grouping) output, _ = format_sla_results(groups) @@ -539,4 +536,4 @@ def get_scheduler(cluster): Dumps the leading scheduler endpoint URL. """ print("Found leading scheduler at: %s" % - make_admin_client(cluster).scheduler_proxy.scheduler_client().raw_url) + make_admin_client_with_options(cluster).scheduler_proxy.scheduler_client().raw_url) http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/admin_util.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/admin/admin_util.py b/src/main/python/apache/aurora/admin/admin_util.py index 394deb5..8240e80 100644 --- a/src/main/python/apache/aurora/admin/admin_util.py +++ b/src/main/python/apache/aurora/admin/admin_util.py @@ -22,7 +22,10 @@ from uuid import uuid1 from twitter.common.quantity import Amount, Time from twitter.common.quantity.parse_simple import parse_time -from apache.aurora.client.base import die +from apache.aurora.client.api import AuroraClientAPI +from apache.aurora.client.base import AURORA_ADMIN_USER_AGENT_NAME, die +from apache.aurora.common.cluster import Cluster +from apache.aurora.common.clusters import CLUSTERS """Admin client utility functions shared between admin and maintenance modules.""" @@ -264,3 +267,27 @@ def format_sla_results(host_groups, unsafe_only=False): results.append(host_details) hostnames.add(host) return results, hostnames + + +def make_admin_client(cluster, verbose=False, bypass_leader_redirect=False): + """Creates an API client with the specified options for use in admin commands. + + :param cluster: The cluster to connect with. + :type cluster: Either a string cluster name or a Cluster object. + :param verbose: Should the client emit verbose output. + :type verbose: bool + :type bypass_leader_redirect: Should the client bypass the scheduler's leader redirect filter. + :type bypass_leader_redirect: bool + :rtype: an AuroraClientAPI instance. + """ + + is_cluster_object = isinstance(cluster, Cluster) + + if not is_cluster_object and cluster not in CLUSTERS: + die('Unknown cluster: %s. Known clusters: %s' % (cluster, ", ".join(CLUSTERS.keys()))) + + return AuroraClientAPI( + cluster if is_cluster_object else CLUSTERS[cluster], + AURORA_ADMIN_USER_AGENT_NAME, + verbose=verbose, + bypass_leader_redirect=bypass_leader_redirect) http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/host_maintenance.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/admin/host_maintenance.py b/src/main/python/apache/aurora/admin/host_maintenance.py index 677f870..83fc2b6 100644 --- a/src/main/python/apache/aurora/admin/host_maintenance.py +++ b/src/main/python/apache/aurora/admin/host_maintenance.py @@ -16,8 +16,7 @@ from threading import Event from twitter.common import log from twitter.common.quantity import Amount, Time -from apache.aurora.admin.admin_util import format_sla_results, print_results -from apache.aurora.client.api import AuroraClientAPI +from apache.aurora.admin.admin_util import format_sla_results, make_admin_client, print_results from apache.aurora.client.base import DEFAULT_GROUPING, check_and_log_response, group_hosts from gen.apache.aurora.api.ttypes import Hosts, MaintenanceMode @@ -47,8 +46,11 @@ class HostMaintenance(object): for group in groups: yield Hosts(group[1]) - def __init__(self, cluster, verbosity, wait_event=None): - self._client = AuroraClientAPI(cluster, verbosity == 'verbose') + def __init__(self, cluster, verbosity, wait_event=None, bypass_leader_redirect=False): + self._client = make_admin_client( + cluster=cluster, + verbose=verbosity == 'verbose', + bypass_leader_redirect=bypass_leader_redirect) self._wait_event = wait_event or Event() def _drain_hosts(self, drainable_hosts): http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/main/python/apache/aurora/admin/maintenance.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/admin/maintenance.py b/src/main/python/apache/aurora/admin/maintenance.py index bf44651..942a237 100644 --- a/src/main/python/apache/aurora/admin/maintenance.py +++ b/src/main/python/apache/aurora/admin/maintenance.py @@ -51,8 +51,11 @@ def host_deactivate(cluster): future hosts that will be drained shortly in subsequent batches. """ options = app.get_options() - HostMaintenance(CLUSTERS[cluster], options.verbosity).start_maintenance( - parse_hostnames(options.filename, options.hosts)) + HostMaintenance( + cluster=CLUSTERS[cluster], + verbosity=options.verbosity, + bypass_leader_redirect=options.bypass_leader_redirect).start_maintenance( + parse_hostnames(options.filename, options.hosts)) @app.command @@ -69,8 +72,11 @@ def host_activate(cluster): allow normal scheduling to resume on the given list of hosts. """ options = app.get_options() - HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance( - parse_hostnames(options.filename, options.hosts)) + HostMaintenance( + cluster=CLUSTERS[cluster], + verbosity=options.verbosity, + bypass_leader_redirect=options.bypass_leader_redirect).end_maintenance( + parse_hostnames(options.filename, options.hosts)) @app.command @@ -113,13 +119,16 @@ def host_drain(cluster): post_drain_callback = parse_script(options.post_drain_script) - HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance( - drainable_hosts, - grouping_function=options.grouping, - percentage=override_percentage, - duration=override_duration, - output_file=options.unsafe_hosts_filename, - callback=post_drain_callback) + HostMaintenance( + cluster=CLUSTERS[cluster], + verbosity=options.verbosity, + bypass_leader_redirect=options.bypass_leader_redirect).perform_maintenance( + drainable_hosts, + grouping_function=options.grouping, + percentage=override_percentage, + duration=override_duration, + output_file=options.unsafe_hosts_filename, + callback=post_drain_callback) @app.command @@ -134,6 +143,10 @@ def host_status(cluster): """ options = app.get_options() checkable_hosts = parse_hostnames(options.filename, options.hosts) - statuses = HostMaintenance(CLUSTERS[cluster], options.verbosity).check_status(checkable_hosts) + statuses = HostMaintenance( + cluster=CLUSTERS[cluster], + verbosity=options.verbosity, + bypass_leader_redirect=options.bypass_leader_redirect).check_status(checkable_hosts) + for pair in statuses: log.info("%s is in state: %s" % pair) http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/admin/test_admin.py b/src/test/python/apache/aurora/admin/test_admin.py index f720742..66abade 100644 --- a/src/test/python/apache/aurora/admin/test_admin.py +++ b/src/test/python/apache/aurora/admin/test_admin.py @@ -58,6 +58,7 @@ class TestQueryCommand(AuroraClientCommandTest): mock_options.states = states mock_options.listformat = listformat or '%role%/%name%/%instanceId% %status%' mock_options.verbosity = False + mock_options.bypass_leader_redirect = False return mock_options @classmethod @@ -228,6 +229,7 @@ class TestReconcileTaskCommand(AuroraClientCommandTest): mock_options = create_autospec(spec=['type', 'batch_size'], instance=True) mock_options.type = reconcile_type mock_options.batch_size = batch_size + mock_options.bypass_leader_redirect = False return mock_options def test_reconcile_implicit(self): http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin_sla.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/admin/test_admin_sla.py b/src/test/python/apache/aurora/admin/test_admin_sla.py index 54b5a82..6d7c74d 100644 --- a/src/test/python/apache/aurora/admin/test_admin_sla.py +++ b/src/test/python/apache/aurora/admin/test_admin_sla.py @@ -48,6 +48,7 @@ class TestAdminSlaListSafeDomainCommand(AuroraClientCommandTest): mock_options.disable_all_hooks = False mock_options.min_instance_count = MIN_INSTANCE_COUNT mock_options.grouping = grouping or DEFAULT_GROUPING + mock_options.bypass_leader_redirect = False return mock_options @classmethod @@ -308,6 +309,7 @@ class TestAdminSlaProbeHostsCommand(AuroraClientCommandTest): mock_options.verbosity = False mock_options.grouping = grouping or DEFAULT_GROUPING mock_options.min_instance_count = 1 + mock_options.bypass_leader_redirect = False return mock_options @classmethod http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/test_admin_util.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/admin/test_admin_util.py b/src/test/python/apache/aurora/admin/test_admin_util.py index f5c8c69..4714bfa 100644 --- a/src/test/python/apache/aurora/admin/test_admin_util.py +++ b/src/test/python/apache/aurora/admin/test_admin_util.py @@ -18,10 +18,19 @@ import unittest import mock from twitter.common.contextutil import temporary_file -from apache.aurora.admin.admin_util import parse_script +from apache.aurora.admin.admin_util import make_admin_client, parse_script +from apache.aurora.common.cluster import Cluster +from apache.aurora.common.clusters import Clusters class TestAdminUtil(unittest.TestCase): + TEST_CLUSTER_NAME = 'west' + TEST_CLUSTER = Cluster( + name=TEST_CLUSTER_NAME, + zk='zookeeper.example.com', + scheduler_zk_path='/foo/bar', + auth_mechanism='UNAUTHENTICATED') + TEST_CLUSTERS = Clusters([TEST_CLUSTER]) @mock.patch("apache.aurora.admin.admin_util.subprocess", spec=subprocess) def test_parse_script(self, mock_subprocess): @@ -34,3 +43,15 @@ class TestAdminUtil(unittest.TestCase): def test_parse_script_invalid_filename(self): self.assertRaises(SystemExit, parse_script, "invalid filename") + + def test_make_admin_client_cluster_string(self): + with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS): + self.assertIsNotNone(make_admin_client(self.TEST_CLUSTER_NAME)) + + def test_make_admin_client_cluster_object(self): + with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS): + self.assertIsNotNone(make_admin_client(self.TEST_CLUSTER)) + + def test_make_admin_client_cluster_unknown(self): + with mock.patch('apache.aurora.admin.admin_util.CLUSTERS', new=self.TEST_CLUSTERS): + self.assertRaises(SystemExit, make_admin_client, 'east') http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/python/apache/aurora/admin/util.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/admin/util.py b/src/test/python/apache/aurora/admin/util.py index d0a915c..355734d 100644 --- a/src/test/python/apache/aurora/admin/util.py +++ b/src/test/python/apache/aurora/admin/util.py @@ -81,4 +81,5 @@ class AuroraClientCommandTest(unittest.TestCase): def setup_mock_options(cls): mock_options = create_autospec(spec=['verbosity'], instance=True) mock_options.verbosity = 'verbose' + mock_options.bypass_leader_redirect = False return mock_options http://git-wip-us.apache.org/repos/asf/aurora/blob/a9f4e26a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh ---------------------------------------------------------------------- diff --git a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh index e36726e..c93be9b 100755 --- a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh +++ b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh @@ -26,7 +26,7 @@ fi set -u -e -x set -o pipefail -readonly TEST_SCHEDULER_IP=192.168.33.7 +readonly TEST_SLAVE_IP=192.168.33.7 _curl() { curl --silent --fail --retry 4 --retry-delay 10 "$@" ; } @@ -371,6 +371,10 @@ test_admin() { echo '== Testing admin commands' echo '== Getting leading scheduler' aurora_admin get_scheduler $_cluster | grep ":8081" + + # host maintenance commands currently have a separate entry point and use their own api client. + # Until we address that, at least verify that the command group still works. + aurora_admin host_status --hosts=$TEST_SLAVE_IP $_cluster } test_ephemeral_daemon_with_final() {
